-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_sgemm_cudamemcpy.sh
executable file
·61 lines (52 loc) · 2.64 KB
/
run_sgemm_cudamemcpy.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/bin/bash
# Usage: ./run-sgemm-nvidia.sh
# Parameter to configure - change these as required!
NUM_KERN=100
DEVICE_ID=0
DEVICE_ID_1=1
DEVICE_ID_2=2
DEVICE_ID_3=3
SIZE=25536
echo "Number of kernels: ${NUM_KERN}"
echo "GPU ID: ${DEVICE_ID}"
echo "Matrix size: ${SIZE}"
# Get UUID of GPU SGEMM kernels are run on
#UUID_list=(`nvidia-smi -L | awk '{print $NF}' | tr -d '[)]'`)
#UUID=${UUID_list[${1}]}
#echo "GPU UUID: ${UUID}"
# Get timestamp at the time of the run
ts=`date '+%s'`
# File name
FILE_NAME=sgemm_nvidia_${SIZE}_${NUM_KERN}_${DEVICE_ID}
FILE_NAME_1=sgemm_nvidia_${SIZE}_${NUM_KERN}_${DEVICE_ID_1}
FILE_NAME_2=sgemm_nvidia_${SIZE}_${NUM_KERN}_${DEVICE_ID_2}
FILE_NAME_3=sgemm_nvidia_${SIZE}_${NUM_KERN}_${DEVICE_ID_3}
echo "Output file name: ${FILE_NAME}"
# Run application with profiling via nvprof
#echo ""
#echo "Generating 2 matrices of size ${SIZE}. This will take a few minutes."
#./gen_data ${SIZE}
#echo "Completed generating 2 matrices"
echo ""
./cudamemcpy 40000 25 0 &
ID=$!
./cudamemcpy 40000 25 1 &
ID2=$!
#./cudamemcpy 40000 100 2 &
./cudamemcpy 40000 25 3 &
ID3=$!
#echo "Running ${NUM_KERN} kernels of SGEMM on GPU ${DEVICE_ID}. This will takes a few minutes."
#{ time __PREFETCH=off nvprof --print-gpu-trace --event-collection-mode continuous --system-profiling on --kernel-latency-timestamps on --csv --log-file ${FILE_NAME}_$1.csv --device-buffer-size 128 --continuous-sampling-interval 1 -f ./sgemm ${SIZE} ${NUM_KERN} ${DEVICE_ID} > ${FILE_NAME}_$1.out ; } &>> ${FILE_NAME}_$1.out &
#GPU0_PID=$!
#{ time __PREFETCH=off nvprof --print-gpu-trace --event-collection-mode continuous --system-profiling on --kernel-latency-timestamps on --csv --log-file ${FILE_NAME_1}_$1.csv --device-buffer-size 128 --continuous-sampling-interval 1 -f ./sgemm ${SIZE} ${NUM_KERN} ${DEVICE_ID_1} > ${FILE_NAME_1}_$1.out ; } &>> ${FILE_NAME_1}_$1.out &
#GPU1_PID=$!
{ time __PREFETCH=off nvprof --print-gpu-trace --event-collection-mode continuous --system-profiling on --kernel-latency-timestamps on --csv --log-file ${FILE_NAME_2}_$1.csv --device-buffer-size 128 --continuous-sampling-interval 1 -f ./sgemm ${SIZE} ${NUM_KERN} ${DEVICE_ID_2} > ${FILE_NAME_2}_$1.out ; } &>> ${FILE_NAME_2}_$1.out &
GPU2_PID=$!
#{ time __PREFETCH=off nvprof --print-gpu-trace --event-collection-mode continuous --system-profiling on --kernel-latency-timestamps on --csv --log-file ${FILE_NAME_3}_$1.csv --device-buffer-size 128 --continuous-sampling-interval 1 -f ./sgemm ${SIZE} ${NUM_KERN} ${DEVICE_ID_3} > ${FILE_NAME_3}_$1.out ; } &>> ${FILE_NAME_3}_$1.out &
#GPU3_PID=$!
#wait $GPU0_PID
#wait $GPU1_PID
wait $GPU2_PID
#wait $GPU3_PID
kill -9 $ID $ID2 $ID3
echo "Completed SGEMM. Outputs in ../out"