python 中的 Ctypes 2d 字符串数组将不同的字符串存储在相同的内存地址

Ctypes 2d array of strings in python stores different strings at same memory address

提问人:God I Am Clown 提问时间:11/15/2023 更新时间:11/15/2023 访问量:40

问:

我拥有的 Python 代码非常简单:

from ctypes import *
from random import randint

class uni(Union):
    _fields_ = [('p', c_char_p),
                ('a', c_longlong)]

#initializing the array of strings
x = ((c_char_p * 3) * 10) ()
for i in range(10):
    for j in range(3):
        x[i][j] = str(randint(100, 999)).encode('utf-8')

#it prints what i expect it to print
for i in range(10):
    for j in range(3):
        print(x[i][j], end = ' ')
    print()
    
print("addresses")
for i in range(10):
    for j in range(3):
        t = uni()
        # getting an integer that points to the string to print string's address
        t.p = x[i][j] 
        print(hex(t.a), end = ' - ')
        print(string_at(t.a), end = ' | ')
    print()

这将输出以下内容:

b'475' b'912' b'805' 
b'107' b'986' b'191' 
b'389' b'525' b'921' 
b'441' b'869' b'452' 
b'505' b'788' b'571' 
b'111' b'974' b'758' 
b'447' b'975' b'671' 
b'322' b'633' b'332' 
b'924' b'633' b'174' 
b'677' b'611' b'431' 
addresses
0x7fdfbbbcad80 - b'475' | 0x7fdfbbbcad80 - b'912' | 0x7fdfbbbcad80 - b'805' | 
0x7fdfbbbcad80 - b'107' | 0x7fdfbbbcad80 - b'986' | 0x7fdfbbbcad80 - b'191' | 
0x7fdfbbbcad80 - b'389' | 0x7fdfbbbcad80 - b'525' | 0x7fdfbbbcad80 - b'921' | 
0x7fdfbbbcad80 - b'441' | 0x7fdfbbbcad80 - b'869' | 0x7fdfbbbcad80 - b'452' | 
0x7fdfbbbcad80 - b'505' | 0x7fdfbbbcad80 - b'788' | 0x7fdfbbbcad80 - b'571' | 
0x7fdfbbbcad80 - b'111' | 0x7fdfbbbcad80 - b'974' | 0x7fdfbbbcad80 - b'758' | 
0x7fdfbbbcad80 - b'447' | 0x7fdfbbbcad80 - b'975' | 0x7fdfbbbcad80 - b'671' | 
0x7fdfbbbcad80 - b'322' | 0x7fdfbbbcad80 - b'633' | 0x7fdfbbbcad80 - b'332' | 
0x7fdfbbbcad80 - b'924' | 0x7fdfbbbcad80 - b'633' | 0x7fdfbbbcad80 - b'174' | 
0x7fdfbbbcad80 - b'677' | 0x7fdfbbbcad80 - b'611' | 0x7fdfbbbcad80 - b'431' | 

如何?它如何在同一地址存储不同的字符串?

注意:我在调试将 2d 字符串数组传递给 C++ 共享对象的程序时发现了这一点。C++ 函数定义为:

extern "C" 
void print2d(char*** arr, int len, int inner_len)
{
  std::cout << arr << '\n';   //ok
  std::cout.flush(); 
  std::cout << *arr << '\n';  //ok
  std::cout.flush();
  std::cout << **arr << '\n'; //this segfaults
}

如果有人有任何解决此问题的建议,我很乐意听到他们的意见

python c++ 数组 字符串 ctypes

评论

1赞 molbdnilo 11/15/2023
我认为文档的“惊喜”部分是相关的。
0赞 Mark Tolonen 11/15/2023
是的,每次访问 ctypes 对象的内容都会构造一个新的 python 对象。在这种情况下,因为它是临时的,所以在每次打印时都会创建和销毁它,并且每次都会在同一内存块中创建它。
0赞 Mark Tolonen 11/15/2023
至于 C++ 代码,传递的 2D 对象不是 ,而是更像(在本例中)。不确定我是否正确。现在没有编译器来检查。char***(char*)[][3]
0赞 God I Am Clown 11/15/2023
是的,@MarkTolonen似乎是对的,内存是连续排列的,我需要一系列指针。当我更改它的初始化时,一切正常,这也是我在 C++ 中收到段错误的原因

答:

1赞 Adesoji Alu 11/15/2023 #1

为每个字符串分配唯一的内存地址,并将它们连同它们的值一起打印出来。

#include <iostream>
#include <cstring>

extern "C" {
    void print2d(char*** arr, int rows, int cols) {
        for (int i = 0; i < rows; ++i) {
            for (int j = 0; j < cols; ++j) {
                std::cout << arr[i][j] << " ";
            }
            std::cout << std::endl;
        }
    }
}


假设您的 c++ 代码是 print2d.cpp,请确保您的 python 脚本和 print2.cdd 位于同一路径或目录中,然后将此 C++ 代码编译为 Ubuntu 上的共享库,使用带有 -shared 和 -fPIC 标志的 g++

g++ -fPIC -shared -o libprint2d.so print2d.cpp

g++ 将创建一个名为 libprint2d.so 的共享库。

from ctypes import *
from random import randint

class uni(Union):
    _fields_ = [('p', c_char_p),
                ('a', c_longlong)]

# Function to manually allocate memory for a string
def allocate_string(s):
    size = len(s) + 1  # Add 1 for the null terminator
    buffer = create_string_buffer(size)
    buffer.raw = s + b'\x00'  # Add null terminator explicitly
    return buffer

# Initializing the array of strings
buffers = []  # Store references to buffers
x = ((POINTER(c_char) * 3) * 10)()
for i in range(10):
    for j in range(3):
        random_string = str(randint(100, 999)).encode('utf-8')
        buffer = allocate_string(random_string)
        buffers.append(buffer)  # Keep a reference to the buffer
        x[i][j] = cast(buffer, POINTER(c_char))  # Store the pointer to the buffer

# Print the strings and their addresses
for i in range(10):
    for j in range(3):
        t = uni()
        t.p = cast(x[i][j], c_char_p)  # Cast the pointer to c_char_p
        print(hex(t.a), end=' - ')
        print(string_at(t.a), end=' | ')
    print()
# Load the shared library
lib = CDLL('./libprint2d.so')

# Define the argument and return types of the print2d function
lib.print2d.argtypes = [POINTER(POINTER(POINTER(c_char))), c_int, c_int]
lib.print2d.restype = None

# Convert the 2D array x to the required type for the C++ function
array_type = POINTER(POINTER(c_char)) * 10
array = array_type(*[cast(row, POINTER(POINTER(c_char))) for row in x])

# Call the C++ function
lib.print2d(array, 10, 3)

您应该获得类似于以下内容的输出

0x7fe3bae66290 - b'640' |0x7fe3bae66390 - b'626' |0x7fe3bae66410 - b'582' |

0x7fe3bae66490 - b'732' |0x7fe3bae66510 - b'184' |0x7fe3bae66590 - b'305' |

0x7fe3bae66610 - b'503' |0x7fe3bae66690 - b'441' |0x7fe3bae66710 - b'791' |

0x7fe3bae66810 - b'292' |0x7fe3bae66890 - b'133' |0x7fe3bae66910 - b'541' |

0x7fe3bae66990 - b'624' |0x7fe3bae66a10 - b'973' |0x7fe3bae66a90 - b'817' |

0x7fe3bae66b10 - b'322' |0x7fe3bae66b90 - b'314' |0x7fe3bae66790 - b'630' |

0x7fe3bae66c10 - b'834' |0x7fe3bae66c90 - b'458' |0x7fe3bae66d10 - b'489' |

0x7fe3bae66d90 - b'277' |0x7fe3bae66e10 - b'362' |0x7fe3bae66e90 - b'313' |

0x7fe3bae66f10 - b'534' |0x7fe3bae66f90 - b'131' |0x7fe3bae66210 - b'366' |

0x7fe3bae75110 - b'960' |0x7fe3bae75190 - b'594' |0x7fe3bae75210 - b'110' |

640 626 582

732 184 305

503 441 791

292 133 541

624 973 817

322 314 630

834 458 489

277 362 313

534 131 366

960 594 110