using LinearAlgebra using Plots pyplot() using BenchmarkTools using Printf using CUDA using Random sizes = 2 .^ (14:30); timeSend = Array{Float64}(undef, length(sizes)); timeGather = Array{Float64}(undef, length(sizes)); sendBandwidth = Array{Float64}(undef, length(sizes)); gatherBandwidth = Array{Float64}(undef, length(sizes)); memoryTimesCPU = Array{Float64}(undef, length(sizes)); memoryTimesGPU = Array{Float64}(undef, length(sizes)); memoryBandwidthGPU = Array{Float64}(undef, length(sizes)); memoryBandwidthCPU = Array{Float64}(undef, length(sizes)); for i = 1:length(sizes) GC.gc(true) numElements = convert(Int64, sizes[i] / 8); cpuData = rand(0:9, (numElements, 1)); gpuData = CuArray{Float64}(rand(0:9, (numElements, 1))); # Time to GPU timeSend[i] = CUDA.@elapsed CuArray(cpuData); # Time from GPU timeGather[i] = CUDA.@elapsed Array(gpuData); sendBandwidth[i] = (sizes[i] / timeSend[i] / 1e9); gatherBandwidth[i] = (sizes[i] / timeGather[i] / 1e9); memoryTimesGPU[i] = CUDA.@elapsed CUDA.@sync gpuData .+ 1; memoryBandwidthGPU[i] = 2*(sizes[i] / memoryTimesGPU[i] / 1e9); memoryTimesCPU[i] = @elapsed cpuData .+ 1; memoryBandwidthCPU[i] = 2*(sizes[i] / memoryTimesCPU[i] / 1e9); end @printf("Achieved peak send speed of %.1f GB/s \n", maximum(sendBandwidth)) @printf("Achieved peak gather speed of %.1f GB/s\n", maximum(gatherBandwidth)) @printf("Achieved peak read+write speed on the GPU: %.1f GB/s\n",maximum(memoryBandwidthGPU)) @printf("Achieved peak read+write speed on the CPU: %.1f GB/s\n",maximum(memoryBandwidthCPU)) p1 = plot( sizes, sendBandwidth, lw = 2, legend = :topleft, xaxis = ("Array Size (bytes)", :log10), xlims = (10^4, 10^9), frame = true, label = string("Send to GPU (Peak:",round(maximum(sendBandwidth),digits=2)," GB/s)"); ); plot!(p1, sizes, gatherBandwidth, lw = 2, label = string("Gather from GPU (Peak:",round(maximum(gatherBandwidth),digits=2)," GB/s)")); plot!(p1,yaxis = ("Transfer speed (GB/s)")); plot!(p1,title = ("Data Transfer Bandwidth")); plot!(p1,minorxgrid = true, ylims = :round); scatter!( [sizes[argmax(sendBandwidth)], sizes[argmax(gatherBandwidth)]], [maximum(sendBandwidth), maximum(gatherBandwidth)], label = "", marker = (10, 0.3, [:blue, :red]), ); p2 = plot( sizes, memoryBandwidthGPU, lw = 2, legend = :topleft, xaxis = ("Array Size (bytes)", :log10), xlims = (10^4, 10^9), frame = true, label = string("GPU (Peak:",round(maximum(memoryBandwidthGPU),digits=2)," GB/s)"); ); plot!(p2,sizes, memoryBandwidthCPU, lw = 2, label = string("CPU (Peak:",round(maximum(memoryBandwidthCPU),digits=2)," GB/s)")); plot!(p2,yaxis = ("Speed (GB/s)")); plot!(p2,title = ("read+write Bandwidth")); plot!(p2,minorxgrid = true, ylims = :round); scatter!( [sizes[argmax(memoryBandwidthGPU)], sizes[argmax(memoryBandwidthCPU)]], [maximum(memoryBandwidthGPU), maximum(memoryBandwidthCPU)], label = "", marker = (10, 0.3, [:blue, :red]), ); p3 = plot(p1,p2, layout = grid(2, 1, widths=[1]), size=(600,700)); savefig(p3, "gpu_rw_perf_result.png")