#define NUM_THREADS 4typedef struct {uint8_t *dst;const uint8_t *src;size_t size;
} ThreadData;void *thread_copy(void *arg) {ThreadData *data = (ThreadData *)arg;memcpy(data->dst, data->src, data->size);return NULL;
}void parallel_memcpy(uint8_t *dst, const uint8_t *src, size_t size) {pthread_t threads[NUM_THREADS];ThreadData thread_data[NUM_THREADS];size_t chunk_size = size / NUM_THREADS;size_t remaining = size % NUM_THREADS;for (int i = 0; i < NUM_THREADS; i++) {thread_data[i].dst = dst + i * chunk_size;thread_data[i].src = src + i * chunk_size;thread_data[i].size = chunk_size;if (i == NUM_THREADS - 1) {thread_data[i].size += remaining;}pthread_create(&threads[i], NULL, thread_copy, &thread_data[i]);}for (int i = 0; i < NUM_THREADS; i++) {pthread_join(threads[i], NULL);}
}parallel_memcpy((uint8_t *)g_if_v_ctl._big_yuv_frame_buf.frame_vir_addr, (const uint8_t *)YuvBigImage, _FRAME_BIG_BUFF_SIZE);
利用多线程加速大块数据的拷贝过程。
最好在多核,大数据块的情况下使用该操作,否则可能会起到反作用。
可以探索使用指令集拷贝的方式,有不错的效果。