110 | | [[Image(nvidia_xserver.jpg)]] |
111 | | |
112 | | |
113 | | |
114 | | |
115 | | |
116 | | |
| 110 | [[Image(query_gpu.jpg)]] |
| 111 | |
| 112 | 8.安裝CUDA 工具套件和CUDA 軟體開發套件[[br]] |
| 113 | 進入終端機[[br]] |
| 114 | $sudo sh ./cudatoolkit_2.3_linux_64_ubuntu9.04.run[[br]] |
| 115 | $sudo sh ./cudasdk_2.3_linux.run[[br]] |
| 116 | 過程中,所有選項按Enter會使用預設的設定。[[br]] |
| 117 | 修改 etc 目錄下的 profile 檔[[br]] |
| 118 | $cd /etc/[[br]] |
| 119 | $cudo emacs profile (可用任意的編輯器)[[br]] |
| 120 | 依照CUDA所在目錄,在檔案的最後加上[[br]] |
| 121 | PATH=/usr/local/cuda/bin:$PATH [[br]] |
| 122 | LD_LIBRARY_PATH=/usr/local/cuda/lib64[[br]] |
| 123 | export PATH[[br]] |
| 124 | export LD_LIBRARY_PATH[[br]] |
| 125 | 這四行。[[br]] |
| 126 | 如果是32 位元的作業系統,第二行改成LD_LIBRARY_PATH=/usr/local/cuda/lib[[br]] |
| 127 | 如果是64 位元的作業系統,第二行改成LD_LIBRARY_PATH=/usr/local/cuda/lib64[[br]] |
| 128 | 登出,再登入, 用CUDA程式測試,如果正確無誤,即完成安裝。[[br]] |
| 129 | |
| 130 | 若出現以下錯誤訊息[[br]] |
| 131 | = ./a.out: error while loading shared libraries: libcudart.so.2: cannot open shared object file: No such file or directory = |
| 132 | |
| 133 | 若是64位元 命令列 [[br]] |
| 134 | $sudo ln -sf /usr/local/cuda/lib64/libcudart.so.2.3 /lib64/libcudart.so.2[[br]] |
| 135 | 如果是32位元可能需要加此行[[br]] |
| 136 | $sudo ln -sf /usr/local/cuda/lib/libcudart.so.2.3 /lib/libcudart.so.2[[br]] |
| 137 | |
| 138 | 可以檢查(option)[[br]] |
| 139 | $emacs /etc/ld.so.conf[[br]] |
| 140 | include /usr/local/cuda/lib[[br]] |
| 141 | |
| 142 | 若32位元 include /usr/local/cuda/lib[[br]] |
| 143 | 若64位元 include /usr/local/cuda/lib64[[br]] |
| 144 | |
| 145 | 可用以下程式測試。[[br]] |
| 146 | 用nvcc 編譯cuda 程式 number_add_1.cu。[[br]] |
| 147 | 如果CPU的執行結果和GPU相同,代表GPU成功運作。[[br]] |
| 148 | |
| 149 | |
| 150 | $nvcc number_add_1.cu[[br]] |
| 151 | $./a.out[[br]] |
| 152 | |
| 153 | |
| 154 | number_add_1.cu程式碼:[[br]] |
| 155 | |
| 156 | #include <stdio.h> [[br]] |
| 157 | #include <stdlib.h>[[br]] |
| 158 | #include <math.h>[[br]] |
| 159 | #include <time.h>[[br]] |
| 160 | #include <sys/time.h>[[br]] |
| 161 | #include <iostream>[[br]] |
| 162 | #include <iomanip>[[br]] |
| 163 | using namespace std;[[br]] |
| 164 | #define DATA_SIZE 1048576[[br]] |
| 165 | int data[DATA_SIZE];[[br]] |
| 166 | |
| 167 | __global__ static void sumOfSquares(int *num, int* result){[[br]] |
| 168 | int sum = 0;[[br]] |
| 169 | int i;[[br]] |
| 170 | for(i = 0; i < DATA_SIZE; i++) {[[br]] |
| 171 | sum += num[i] * num[i];[[br]] |
| 172 | }[[br]] |
| 173 | |
| 174 | *result = sum;[[br]] |
| 175 | }[[br]] |
| 176 | void GenerateNumbers(int *number ,int size){[[br]] |
| 177 | int i;[[br]] |
| 178 | for(i=0;i<size;i++){ [[br]] |
| 179 | number[i]=rand() % 10; [[br]] //value = 0 to 9 [[br]] |
| 180 | }[[br]] |
| 181 | }[[br]] |
| 182 | |
| 183 | double wallclock(void){[[br]] |
| 184 | struct timeval tv;[[br]] |
| 185 | struct timezone tz;[[br]] |
| 186 | double t;[[br]] |
| 187 | |
| 188 | gettimeofday(&tv, &tz);[[br]] |
| 189 | t = (double)tv.tv_sec*1000;[[br]] |
| 190 | t += ((double)tv.tv_usec)/1000.0;[[br]] |
| 191 | |
| 192 | return t;[[br]] |
| 193 | }// millisecond[[br]] |
| 194 | |
| 195 | int main(){[[br]] |
| 196 | cudaSetDevice(0); [[br]] set device number[[br]] |
| 197 | GenerateNumbers(data, DATA_SIZE);[[br]] |
| 198 | |
| 199 | int* gpudata,*result,sum;[[br]] |
| 200 | double t1,t2;[[br]] |
| 201 | |
| 202 | cudaMalloc((void**) &gpudata, sizeof(int) * DATA_SIZE);[[br]] |
| 203 | cudaMalloc((void**) &result, sizeof(int));[[br]] |
| 204 | cudaMemcpy(gpudata, data, sizeof(int) * DATA_SIZE,cudaMemcpyHostToDevice);[[br]] |
| 205 | |
| 206 | t1 = wallclock();[[br]] |
| 207 | sumOfSquares<<<1, 1, 0>>>(gpudata, result);[[br]] |
| 208 | cudaMemcpy(&sum, result, sizeof(int), cudaMemcpyDeviceToHost);[[br]] |
| 209 | |
| 210 | t2 = wallclock();[[br]] |
| 211 | |
| 212 | printf("Elapsed time = %f(ms) in GPU\n",t2-t1);[[br]] |
| 213 | |
| 214 | cudaFree(gpudata);[[br]] |
| 215 | cudaFree(result);[[br]] |
| 216 | |
| 217 | printf("sum: %d\n", sum);[[br]] |
| 218 | |
| 219 | sum = 0;[[br]] |
| 220 | for(int i = 0; i < DATA_SIZE; i++) {[[br]] |
| 221 | sum += data[i] * data[i];[[br]] |
| 222 | }[[br]] |
| 223 | printf("sum (CPU): %d\n", sum);[[br]] |
| 224 | }[[br]] |
| 225 | |
| 226 | |
| 227 | |
| 228 | |
| 229 | |
| 230 | |
| 231 | |
| 232 | |