# cuda-notes **Repository Path**: BTailCat/cuda-notes ## Basic Information - **Project Name**: cuda-notes - **Description**: cuda 笔记 - **Primary Language**: C++ - **License**: LGPL-3.0 - **Default Branch**: master - **Homepage**: None - **GVP Project**: No ## Statistics - **Stars**: 0 - **Forks**: 0 - **Created**: 2020-03-28 - **Last Updated**: 2023-08-08 ## Categories & Tags **Categories**: Uncategorized **Tags**: None ## README # cuda notes ## thrust ### vectors * `创建vector` ```c++ #include #include #include int main(void) { // 创建大小为4的vector thrust::host_vector H(4); //添加数据 H[0] = 14; H[1] = 20; H[2] = 38; H[3] = 46; // H.size() returns the size of vector H std::cout << "H has size " << H.size() << std::endl; // print contents of H for(int i = 0; i < H.size(); i++) std::cout << "H[" << i << "] = " << H[i] << std::endl; // resize H H.resize(2); std::cout << "H now has size " << H.size() << std::endl; // 将主机端的vec拷贝到设备端 thrust::device_vector D = H; // 设备端vector设置数值 D[0] = 99; D[1] = 88; // 打印设备端vector数值 for(int i = 0; i < D.size(); i++) std::cout << "D[" << i << "] = " << D[i] << std::endl; return 0; } ``` * `根据设备地址创建vector` ```c++ size_t N = 10; // raw pointer to device memory int * raw_ptr; cudaMalloc((void **) &raw_ptr, N * sizeof(int)); // wrap raw pointer with a device_ptr thrust::device_ptr dev_ptr(raw_ptr); // use device_ptr in thrust algorithms thrust::fill(dev_ptr, dev_ptr + N, (int) 0); ``` * `对vector数据做整体操作` ```c++ #include #include #include #include #include #include int main(void) { // initialize all ten integers of a device_vector to 1 thrust::device_vector D(10, 1); // set the first seven elements of a vector to 9 thrust::fill(D.begin(), D.begin() + 7, 9); // initialize a host_vector with the first five elements of D thrust::host_vector H(D.begin(), D.begin() + 5); // set the elements of H to 0, 1, 2, 3, ... thrust::sequence(H.begin(), H.end()); // copy all of H back to the beginning of D thrust::copy(H.begin(), H.end(), D.begin()); // print D for(int i = 0; i < D.size(); i++) std::cout << "D[" << i << "] = " << D[i] << std::endl; return 0; } ``` ### algorithms * `系统算法` ```c++ #include #include #include #include #include #include #include #include int main(void) { // allocate three device_vectors with 10 elements thrust::device_vector X(10); thrust::device_vector Y(10); thrust::device_vector Z(10); // initialize X to 0,1,2,3, .... thrust::sequence(X.begin(), X.end()); // compute Y = -X thrust::transform(X.begin(), X.end(), Y.begin(), thrust::negate()); // fill Z with twos thrust::fill(Z.begin(), Z.end(), 2); // compute Y = X mod y thrust::transform(X.begin(), X.end(), Z.begin(), Y.begin(), thrust::modulus()); // replace all the ones in Y with tens thrust::replace(Y.begin(), Y.end(), 1, 10); // print Y thrust::copy(Y.begin(), Y.end(), std::ostream_iterator(std::cout, "\n")); return 0; } ``` * `自定义算法` ```c++ struct saxpy_functor { const float a; saxpy_functor(float _a) : a(_a) {} __host__ __device__ float operator()(const float& x, const float& y) const { return a * x + y; } }; void saxpy_fast(float A, thrust::device_vector& X, thrust::device_vector& Y) { // Y <- A * X + Y thrust::transform(X.begin(), X.end(), Y.begin(), Y.begin(), saxpy_functor(A)); } void saxpy_slow(float A, thrust::device_vector& X, thrust::device_vector& Y) { thrust::device_vector temp(X.size()); // temp <- A thrust::fill(temp.begin(), temp.end(), A); // temp <- A * X thrust::transform(X.begin(), X.end(), temp.begin(), temp.begin(), thrust::multiplies()); // Y <- A * X + Y thrust::transform(temp.begin(), temp.end(), Y.begin(), Y.begin(), thrust::plus()); } ``` * `求特定参数的个数` ```c++ #include #include ... // put three 1s in a device_vector thrust::device_vector vec(5,0); vec[1] = 1; vec[3] = 1; vec[4] = 1; // count the 1s int result = thrust::count(vec.begin(), vec.end(), 1); // result is three ``` * `迭代求和` ```c++ int sum = thrust::reduce(D.begin(), D.end(), (int) 0, thrust::plus()); ``` * `排序` ```c++ #include ... const int N = 6; int A[N] = {1, 4, 2, 8, 5, 7}; thrust::sort(A, A + N); // A is now {1, 2, 4, 5, 7, 8 ``` ```c++ #include ... const int N = 6; int keys[N] = { 1, 4, 2, 8, 5, 7}; char values[N] = {'a', 'b', 'c', 'd', 'e', 'f'}; thrust::sort_by_key(keys, keys + N, values); // keys is now { 1, 2, 4, 5, 7, 8} // values is now {'a', 'c', 'b', 'e', 'f', 'd'} ``` ```c++ #include #include ... const int N = 6; int A[N] = {1, 4, 2, 8, 5, 7}; thrust::stable_sort(A, A + N, thrust::greater()); // A is now {8, 7, 5, 4, 2, 1} ```