Numba

from numba import cuda

Move numpy array to GPU ahead of time

gpu_array = cuda.to_device(np.random.random((30_000_000)))

Copy back to CPU

cpu_array = gpu_array.copy_to_host()

from numba import cuda

# Define a kernel that is compiled for CUDA

@cuda.jit

def vector_add(r, x, y):

start = cuda.grid(1)

step = cuda.gridsize(1)

stop = len(r)

for i in range(start, stop, step):

r[i] = x[i] + y[i]

# Allocate some arrays on the device and copy data

N = 2 * 10

x = cuda.to_device(np.arange(N))

y = cuda.to_device(np.arange(N) * 2)

r = cuda.device_array_like(x)

# Configure and launch kernel

block_dim = 256

grid_dim = (len(x) // block_dim) + 1

vector_add[grid_dim, block_dim](r, x, y)

# Copy result back from the device

result = r.copy_to_host()