Numba
from numba import cuda
Move numpy array to GPU ahead of time
gpu_array = cuda.to_device(np.random.random((30_000_000)))
Copy back to CPU
cpu_array = gpu_array.copy_to_host()
from numba import cuda
# Define a kernel that is compiled for CUDA
@cuda.jit
def vector_add(r, x, y):
start = cuda.grid(1)
step = cuda.gridsize(1)
stop = len(r)
for i in range(start, stop, step):
r[i] = x[i] + y[i]
# Allocate some arrays on the device and copy data
N = 2 * 10
x = cuda.to_device(np.arange(N))
y = cuda.to_device(np.arange(N) * 2)
r = cuda.device_array_like(x)
# Configure and launch kernel
block_dim = 256
grid_dim = (len(x) // block_dim) + 1
vector_add[grid_dim, block_dim](r, x, y)
# Copy result back from the device
result = r.copy_to_host()