最強八皇后程序:位操作、SIMD、多線程、匯編
#include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <mmintrin.h> #include <pthread.h> #include <algorithm> /* 0 Q......Q 3 ..Q..... 4 ...Q.... 5 ..Q..... 7 Q......Q 用+y和-y判斷斜線 x 0..7 0..7位 x+y 0..14 8..22位 x-y -7..7 0..14 23..37位 */ typedef unsigned char byte; byte __attribute__((aligned(64))) _b[8][8][8]; uint64_t _b2[8][8]; int cnt[8]; pthread_t tid[8]; void pr (uint64_t n) { // 1<<n是第n位,共n+1位 for (uint64_t i = 1ull << 37; i; i >>= 1) putchar(n & i ? '1' : '.'); puts(""); getchar(); } void search (int i, int cy) { // idx, current_y byte (&b)[8][8] = _b[i]; for (int x = 0; x < 8; x++) { byte (&c)[8] = b[cy]; c[0] = x; c[1] = x + cy; c[2] = x - cy; // +7轉非負,+23擺位。注意ull后綴! _b2[i][cy] = (1 << x) | (1 << (x + cy + 8)) | (1ull << (x - cy + 7 + 23)); bool ok = true; for (int y = 0; y < cy; y++) { const uint64_t r = uint64_t(_mm_cmpeq_pi8(*(__m64*)c, *(__m64*)b[y])); _mm_empty(); int f[] = { r & 0xFFFFFF ? 1 : 0, // 匯編里查找pcmpeqb _b2[i][cy] & _b2[i][y] ? 1 : 0, c[0] == b[y][0] | c[1] == b[y][1] | c[2] == b[y][2], c[0] == b[y][0] || c[1] == b[y][1] || c[2] == b[y][2] }; if (!std::equal(f+1, f+4, f)) exit(0); if (f[0]) { ok = false; break; } } if (!ok) continue; if (cy == 7) ++cnt[i]; else search(i, cy + 1); } } void* tfn (void* arg) { int x = int(long(arg)), cy = 0; _b[x][0][0] = _b[x][0][1] = _b[x][0][2] = x; _b2[x][cy] = (1 << x) | (1 << (x + cy + 8)) | (1ull << (x - cy + 7 + 23)); search(x, 1); return 0; } int main () { enum { N = 8 }; for (int i = 0; i < N; i++) pthread_create(tid+i, NULL, tfn, (void*)long(i)); for (int i = 0; i < N; i++) pthread_join(tid[i], NULL); int n = 0; for (int i = 0; i < N; i++) printf("%d\n", n += cnt[i]); }
和0xFFFFFF對應的是andl $16777215, %eax,匯編不用十六進制?!
movq %rdx, -48(%rbp) movq %rax, -56(%rbp) movq -56(%rbp), %xmm0 movq -48(%rbp), %xmm1 pcmpeqb %xmm1, %xmm0 movq %xmm0, %rax movq %rax, -40(%rbp) emms nop movq -40(%rbp), %rax andl $16777215, %eax testq %rax, %rax
太長了,再來個t.cpp看看|和||:
int main (int n, char**) { return (n | 123) ^ (n || 456); } -O0 出不來456 orl $123, %eax xorl $1, %eax int f () { return 456; } int main (int n, char**) { return (n | 123) ^ (n || f()); } call _Z1fv testl %eax, %eax je .L5 .L4: movl $1, %eax jmp .L6 .L5: movl $0, %eax .L6: _Z1fv: pushq %rbp movq %rsp, %rbp movl $456, %eax popq %rbp ret

浙公網安備 33010602011771號