共计 1294 个字符,预计需要花费 4 分钟才能阅读完成。
归并排序
归并排序能够简略得扩大为并行模式,每个合并操作互相独立,因而咱们以归并排序为例实现一下多线程排序。
自下而上归并排序
根本示意图如下,假如 N % 2 == 0,归并排序共 log2(N) 层,共 n-1 次合并操作
代码如下,自底向上逐层合并,
grpSize
为以后层的块大小,相邻块进行合并。repo1
和repo2
别离指向输出数据和长期空间,每次合并之后进行替换secondGrpSize
确保了遇到数组开端时,第二个块大小的正确性secondGrpSize == 0
第二个块大小为零时,无需排序,间接拷贝输出数据到新空间即可-
mergeList
参数为:- 块 1 起始地位,
- 块 2 起始地位,
- 块 1 长度,
- 块 2 长度,
- 指标数组
template <class T> void mergesort(T *data, int N) {T *temp = new T[N]; | |
T *repo1, *repo2, *aux; | |
repo1 = data; | |
repo2 = temp; | |
for (int grpSize = 1; grpSize < N; grpSize <<= 1) { | |
#pragma omp parallel for | |
for (int stIdx = 0; stIdx < N; stIdx += 2 * grpSize) { | |
int nextIdx = stIdx + grpSize; | |
int secondGrpSize = min(max(0, N - nextIdx), grpSize); | |
if (secondGrpSize == 0) {for (int i = 0; i < N - stIdx; i++) | |
repo2[stIdx + i] = repo1[stIdx + i]; | |
} else | |
mergeList(repo1 + stIdx, repo1 + nextIdx, grpSize, secondGrpSize, | |
repo2 + stIdx); | |
} | |
aux = repo1; | |
repo1 = repo2; | |
repo2 = aux; | |
} | |
if (repo1 != data) | |
memcpy(data, temp, sizeof(T) * N); | |
delete[] temp;} | |
template <class T> | |
void mergeList(T *src1, T *src2, int len1, int len2, T *dest) { | |
int idx1 = 0, idx2 = 0; | |
int loc = 0; | |
while (idx1 < len1 && idx2 < len2) {if (src1[idx1] <= src2[idx2]) {dest[loc] = src1[idx1]; | |
idx1++; | |
} else {dest[loc] = src2[idx2]; | |
idx2++; | |
} | |
loc++; | |
} | |
for (int i = idx1; i < len1; i++) | |
dest[loc++] = src1[i]; | |
for (int i = idx2; i < len2; i++) | |
dest[loc++] = src2[i]; | |
} |
测试:
16 线程下
$ g++ mergesort_omp_bottomup.cpp -O3 -fopenmp -g | |
$ ./a.out 500000000 | |
12.9973 |
去掉 #pragma omp parallel for
的单线程状况
$ g++ mergesort_omp_bottomup.cpp -O3 -fopenmp -g | |
$ ./a.out 500000000 | |
34.6137 |
正文完