您的位置:首页 > 汽车 > 时评 > 半夜免费十大禁用app_长沙网上注册公司流程_关键词查询爱站网_销售管理怎么带团队

半夜免费十大禁用app_长沙网上注册公司流程_关键词查询爱站网_销售管理怎么带团队

2024/9/21 18:44:18 来源:https://blog.csdn.net/eloudy/article/details/142255896  浏览:    关键词:半夜免费十大禁用app_长沙网上注册公司流程_关键词查询爱站网_销售管理怎么带团队
半夜免费十大禁用app_长沙网上注册公司流程_关键词查询爱站网_销售管理怎么带团队

1,不用 roctracer 的普通场景

mt.cpp

/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.Permission is hereby granted, free of charge, to any person obtaining a copyof this software and associated documentation files (the "Software"), to dealin the Software without restriction, including without limitation the rightsto use, copy, modify, merge, publish, distribute, sublicense, and/or sellcopies of the Software, and to permit persons to whom the Software isfurnished to do so, subject to the following conditions:The above copyright notice and this permission notice shall be included inall copies or substantial portions of the Software.THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ORIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THEAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHERLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS INTHE SOFTWARE. */#include <iostream>// hip header file
#include <hip/hip_runtime.h>#define HIP_CALL(call)                                                                             \do {                                                                                             \hipError_t err = call;                                                                         \if (err != hipSuccess) {                                                                       \fprintf(stderr, "%s\n", hipGetErrorString(err));                                             \abort();                                                                                     \}                                                                                              \} while (0)#define WIDTH 1024#define NUM (WIDTH * WIDTH)#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;out[y * width + x] = in[x * width + y];
}// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {for (unsigned int j = 0; j < width; j++) {for (unsigned int i = 0; i < width; i++) {output[i * width + j] = input[j * width + i];}}
}int main() {float* Matrix;float* TransposeMatrix;float* cpuTransposeMatrix;float* gpuMatrix;float* gpuTransposeMatrix;hipDeviceProp_t devProp;HIP_CALL(hipGetDeviceProperties(&devProp, 0));std::cerr << "Device name " << devProp.name << std::endl;int i;int errors;Matrix = (float*)malloc(NUM * sizeof(float));TransposeMatrix = (float*)malloc(NUM * sizeof(float));cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));// initialize the input datafor (i = 0; i < NUM; i++) {Matrix[i] = (float)i * 10.0f;}// allocate the memory on the device sideHIP_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));HIP_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));uint32_t iterations = 100;while (iterations-- > 0) {std::cerr << "## Iteration (" << iterations << ") #################" << std::endl;// Memory transfer from host to deviceHIP_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));// Lauching kernel from hosthipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);HIP_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));// CPU MatrixTranspose computationmatrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);// verify the resultserrors = 0;double eps = 1.0E-6;for (i = 0; i < NUM; i++) {if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {errors++;}}if (errors != 0) {fprintf(stderr, "FAILED: %d errors\n", errors);} else {fprintf(stderr, "PASSED!\n");}}// free the resources on device sideHIP_CALL(hipFree(gpuMatrix));HIP_CALL(hipFree(gpuTransposeMatrix));// free the resources on host sidefree(Matrix);free(TransposeMatrix);free(cpuTransposeMatrix);return errors;
}

编译:

 $ hipcc mt.cpp -o mt

$ ./mt xxx

不会产生文件;

2,加入roctracer的源文件

MatrixTranspose.cpp:

/* Copyright (c) 2018-2022 Advanced Micro Devices, Inc.Permission is hereby granted, free of charge, to any person obtaining a copyof this software and associated documentation files (the "Software"), to dealin the Software without restriction, including without limitation the rightsto use, copy, modify, merge, publish, distribute, sublicense, and/or sellcopies of the Software, and to permit persons to whom the Software isfurnished to do so, subject to the following conditions:The above copyright notice and this permission notice shall be included inall copies or substantial portions of the Software.THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS ORIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THEAUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHERLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS INTHE SOFTWARE. */#include <iostream>// hip header file
#include <hip/hip_runtime.h>
#include "roctracer_ext.h"
// roctx header file
#include <roctx.h>#define HIP_CALL(call)                                                                             \do {                                                                                             \hipError_t err = call;                                                                         \if (err != hipSuccess) {                                                                       \fprintf(stderr, "%s\n", hipGetErrorString(err));                                             \abort();                                                                                     \}                                                                                              \} while (0)#define WIDTH 1024#define NUM (WIDTH * WIDTH)#define THREADS_PER_BLOCK_X 4
#define THREADS_PER_BLOCK_Y 4
#define THREADS_PER_BLOCK_Z 1// Device (Kernel) function, it must be void
__global__ void matrixTranspose(float* out, float* in, const int width) {int x = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;int y = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;out[y * width + x] = in[x * width + y];
}// CPU implementation of matrix transpose
void matrixTransposeCPUReference(float* output, float* input, const unsigned int width) {for (unsigned int j = 0; j < width; j++) {for (unsigned int i = 0; i < width; i++) {output[i * width + j] = input[j * width + i];}}
}int main() {float* Matrix;float* TransposeMatrix;float* cpuTransposeMatrix;float* gpuMatrix;float* gpuTransposeMatrix;hipDeviceProp_t devProp;HIP_CALL(hipGetDeviceProperties(&devProp, 0));std::cerr << "Device name " << devProp.name << std::endl;int i;int errors;Matrix = (float*)malloc(NUM * sizeof(float));TransposeMatrix = (float*)malloc(NUM * sizeof(float));cpuTransposeMatrix = (float*)malloc(NUM * sizeof(float));// initialize the input datafor (i = 0; i < NUM; i++) {Matrix[i] = (float)i * 10.0f;}// allocate the memory on the device sideHIP_CALL(hipMalloc((void**)&gpuMatrix, NUM * sizeof(float)));HIP_CALL(hipMalloc((void**)&gpuTransposeMatrix, NUM * sizeof(float)));uint32_t iterations = 100;while (iterations-- > 0) {std::cerr << "## Iteration (" << iterations << ") #################" << std::endl;// Memory transfer from host to deviceHIP_CALL(hipMemcpy(gpuMatrix, Matrix, NUM * sizeof(float), hipMemcpyHostToDevice));roctxMark("before hipLaunchKernel");int rangeId = roctxRangeStart("hipLaunchKernel range");roctxRangePush("hipLaunchKernel");// Lauching kernel from hosthipLaunchKernelGGL(matrixTranspose, dim3(WIDTH / THREADS_PER_BLOCK_X, WIDTH / THREADS_PER_BLOCK_Y),dim3(THREADS_PER_BLOCK_X, THREADS_PER_BLOCK_Y), 0, 0, gpuTransposeMatrix, gpuMatrix, WIDTH);roctxMark("after hipLaunchKernel");// Memory transfer from device to hostroctxRangePush("hipMemcpy");HIP_CALL(hipMemcpy(TransposeMatrix, gpuTransposeMatrix, NUM * sizeof(float), hipMemcpyDeviceToHost));roctxRangePop();  // for "hipMemcpy"roctxRangePop();  // for "hipLaunchKernel"roctxRangeStop(rangeId);// CPU MatrixTranspose computationmatrixTransposeCPUReference(cpuTransposeMatrix, Matrix, WIDTH);// verify the resultserrors = 0;double eps = 1.0E-6;for (i = 0; i < NUM; i++) {if (std::abs(TransposeMatrix[i] - cpuTransposeMatrix[i]) > eps) {errors++;}}if (errors != 0) {fprintf(stderr, "FAILED: %d errors\n", errors);} else {fprintf(stderr, "PASSED!\n");}}// free the resources on device sideHIP_CALL(hipFree(gpuMatrix));HIP_CALL(hipFree(gpuTransposeMatrix));// free the resources on host sidefree(Matrix);free(TransposeMatrix);free(cpuTransposeMatrix);return errors;
}

编译:

只使用hipcc无法直接编译这个源文件

需要指定include 目录和链接库:

$ hipcc ./MatrixTranspose.cpp  -I /opt/rocm/include/roctracer/ -lroctx64

运行:

./a.out

版权声明:

本网仅为发布的内容提供存储空间,不对发表、转载的内容提供任何形式的保证。凡本网注明“来源:XXX网络”的作品,均转载自其它媒体,著作权归作者所有,商业转载请联系作者获得授权,非商业转载请注明出处。

我们尊重并感谢每一位作者,均已注明文章来源和作者。如因作品内容、版权或其它问题,请及时与我们联系,联系邮箱:809451989@qq.com,投稿邮箱:809451989@qq.com