One of the key advantages of GPU computing is its ability to perform parallel computations efficiently. GPU architectures consist of thousands of cores capable of executing multiple tasks simultaneously, enabling significant speedups for parallelizable algorithms.
# Example Python code snippet for parallel computing with GPUs using CUDAimport numpy as npfrom numba import cuda# Define a CUDA kernel function@cuda.jitdefgpu_parallel_computation(input_array,output_array): idx = cuda.grid(1)if idx < input_array.size: output_array[idx]= input_array[idx]*2# Generate input datainput_data = np.arange(1000000)# Allocate GPU memoryinput_gpu = cuda.to_device(input_data)output_gpu = cuda.device_array_like(input_gpu)# Configure kernel launch parametersblock_size =256grid_size = (input_data.size + block_size -1) // block_size# Launch kernel on GPUgpu_parallel_computation[grid_size, block_size](input_gpu, output_gpu)# Copy results back to hostoutput_data = output_gpu.copy_to_host()print(output_data)