Index: fb/fbmmx.c =================================================================== RCS file: /cvs/xorg/xc/programs/Xserver/fb/fbmmx.c,v retrieving revision 1.20 diff -p -u -r1.20 fbmmx.c --- fb/fbmmx.c 2 Oct 2005 08:28:26 -0000 1.20 +++ fb/fbmmx.c 4 Oct 2005 03:51:44 -0000 @@ -359,14 +359,12 @@ pack565 (__m64 pixel, __m64 target, int static __inline__ __m64 pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b) { - x = _mm_mullo_pi16 (x, a); - y = _mm_mullo_pi16 (y, b); - x = _mm_srli_pi16(x, 1); - y = _mm_srli_pi16(y, 1); - x = _mm_adds_pu16 (x, y); - x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)); + x = _mm_mullo_pi16 (x, a); + y = _mm_mullo_pi16 (y, b); x = _mm_adds_pu16 (x, MC(4x0080)); - x = _mm_srli_pi16 (x, 7); + x = _mm_adds_pu16 (x, y); + x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)); + x = _mm_srli_pi16 (x, 8); return x; } @@ -565,10 +563,10 @@ mmxCombineAddU (CARD32 *dest, const CARD const CARD32 *end = dest + width; while (dest < end) { __m64 s, d; - s = load8888(*src); - d = load8888(*dest); - s = pix_add(s, d); - *dest = store8888(s); + s = load8888(*src); + d = load8888(*dest); + s = pix_add(s, d); + *dest = store8888(s); ++dest; ++src; } @@ -588,7 +586,7 @@ mmxCombineSaturateU (CARD32 *dest, const CARD32 da = ~d >> 24; if (sa > da) { - __m64 msa = load8888(FbIntDiv(da, sa)); + __m64 msa = load8888(FbIntDiv(da, sa)<<24); msa = expand_alpha(msa); ms = pix_multiply(ms, msa); } @@ -2269,117 +2267,5 @@ fbCompositeCopyAreammx (CARD8 op, width, height); } -#if !defined(__amd64__) && !defined(__x86_64__) - -enum CPUFeatures { - NoFeatures = 0, - MMX = 0x1, - MMX_Extensions = 0x2, - SSE = 0x6, - SSE2 = 0x8, - CMOV = 0x10 -}; - -static unsigned int detectCPUFeatures(void) { - unsigned int result; - char vendor[13]; - vendor[0] = 0; - vendor[12] = 0; - /* see p. 118 of amd64 instruction set manual Vol3 */ - /* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered - * since they are special registers (%ebx is the "PIC - * register" holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their+ * original values when we access the output operands. - */ - __asm__ ("pushf\n" - "pop %%eax\n" - "mov %%eax, %%ecx\n" - "xor $0x00200000, %%eax\n" - "push %%eax\n" - "popf\n" - "pushf\n" - "pop %%eax\n" - "mov $0x0, %%edx\n" - "xor %%ecx, %%eax\n" - "jz 1\n" - - "mov $0x00000000, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "mov %%ebx, %%eax\n" - "pop %%ebx\n" - "mov %%eax, %1\n" - "mov %%edx, %2\n" - "mov %%ecx, %3\n" - "mov $0x00000001, %%eax\n" - "push %%ebx\n" - "cpuid\n" - "pop %%ebx\n" - "1:\n" - "mov %%edx, %0\n" - : "=r" (result), - "=m" (vendor[0]), - "=m" (vendor[4]), - "=m" (vendor[8]) - : - : "%eax", "%ecx", "%edx" - ); - - unsigned int features = 0; - if (result) { - /* result now contains the standard feature bits */ - if (result & (1 << 15)) - features |= CMOV; - if (result & (1 << 23)) - features |= MMX; - if (result & (1 << 25)) - features |= SSE; - if (result & (1 << 26)) - features |= SSE2; - if ((result & MMX) && !(result & SSE) && (strcmp(vendor, "AuthenticAMD") == 0)) { - /* check for AMD MMX extensions */ - - unsigned int result; - __asm__("push %%ebx\n" - "mov $0x80000000, %%eax\n" - "cpuid\n" - "xor %%edx, %%edx\n" - "cmp $0x1, %%eax\n" - "jge 2\n" - "mov $0x80000001, %%eax\n" - "cpuid\n" - "2:\n" - "pop %%ebx\n" - "mov %%edx, %0\n" - : "=r" (result) - : - : "%eax", "%ecx", "%edx" - ); - if (result & (1<<22)) - features |= MMX_Extensions; - } - } - return features; -} - -Bool -fbHaveMMX (void) -{ - static Bool initialized = FALSE; - static Bool mmx_present; - - if (!initialized) - { - unsigned int features = detectCPUFeatures(); - mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions); - initialized = TRUE; - } - - return mmx_present; -} -#endif /* __amd64__ */ - - #endif /* RENDER */ #endif /* USE_MMX */ Index: fb/fbpict.c =================================================================== RCS file: /cvs/xorg/xc/programs/Xserver/fb/fbpict.c,v retrieving revision 1.18 diff -p -u -r1.18 fbpict.c --- fb/fbpict.c 30 Aug 2005 03:01:38 -0000 1.18 +++ fb/fbpict.c 4 Oct 2005 03:51:45 -0000 @@ -1312,6 +1312,122 @@ fbComposite (CARD8 op, REGION_UNINIT (pDst->pDrawable->pScreen, ®ion); } +/* The CPU detection code needs to be in a file not compiled with + * "-mmmx -msse", as gcc would generate CMOV instructions otherwise + * that would lead to SIGILL instructions on old CPUs that don't have + * it. + */ +#if defined(USE_MMX) && !defined(__amd64__) && !defined(__x86_64__) + +enum CPUFeatures { + NoFeatures = 0, + MMX = 0x1, + MMX_Extensions = 0x2, + SSE = 0x6, + SSE2 = 0x8, + CMOV = 0x10 +}; + +static unsigned int detectCPUFeatures(void) { + unsigned int result; + char vendor[13]; + vendor[0] = 0; + vendor[12] = 0; + /* see p. 118 of amd64 instruction set manual Vol3 */ + /* We need to be careful about the handling of %ebx and + * %esp here. We can't declare either one as clobbered + * since they are special registers (%ebx is the "PIC + * register" holding an offset to global data, %esp the + * stack pointer), so we need to make sure they have their + * original values when we access the output operands. + */ + __asm__ ("pushf\n" + "pop %%eax\n" + "mov %%eax, %%ecx\n" + "xor $0x00200000, %%eax\n" + "push %%eax\n" + "popf\n" + "pushf\n" + "pop %%eax\n" + "mov $0x0, %%edx\n" + "xor %%ecx, %%eax\n" + "jz 1f\n" + + "mov $0x00000000, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "mov %%ebx, %%eax\n" + "pop %%ebx\n" + "mov %%eax, %1\n" + "mov %%edx, %2\n" + "mov %%ecx, %3\n" + "mov $0x00000001, %%eax\n" + "push %%ebx\n" + "cpuid\n" + "pop %%ebx\n" + "1:\n" + "mov %%edx, %0\n" + : "=r" (result), + "=m" (vendor[0]), + "=m" (vendor[4]), + "=m" (vendor[8]) + : + : "%eax", "%ecx", "%edx" + ); + + unsigned int features = 0; + if (result) { + /* result now contains the standard feature bits */ + if (result & (1 << 15)) + features |= CMOV; + if (result & (1 << 23)) + features |= MMX; + if (result & (1 << 25)) + features |= SSE; + if (result & (1 << 26)) + features |= SSE2; + if ((result & MMX) && !(result & SSE) && (strcmp(vendor, "AuthenticAMD") == 0)) { + /* check for AMD MMX extensions */ + + unsigned int result; + __asm__("push %%ebx\n" + "mov $0x80000000, %%eax\n" + "cpuid\n" + "xor %%edx, %%edx\n" + "cmp $0x1, %%eax\n" + "jge 1f\n" + "mov $0x80000001, %%eax\n" + "cpuid\n" + "1:\n" + "pop %%ebx\n" + "mov %%edx, %0\n" + : "=r" (result) + : + : "%eax", "%ecx", "%edx" + ); + if (result & (1<<22)) + features |= MMX_Extensions; + } + } + return features; +} + +Bool +fbHaveMMX (void) +{ + static Bool initialized = FALSE; + static Bool mmx_present; + + if (!initialized) + { + unsigned int features = detectCPUFeatures(); + mmx_present = (features & (MMX|MMX_Extensions)) == (MMX|MMX_Extensions); + initialized = TRUE; + } + + return mmx_present; +} +#endif /* USE_MMX && !amd64 */ #endif /* RENDER */ Bool @@ -1331,7 +1447,6 @@ fbPictureInit (ScreenPtr pScreen, PictFo ps->RasterizeTrapezoid = fbRasterizeTrapezoid; ps->AddTraps = fbAddTraps; ps->AddTriangles = fbAddTriangles; - #endif /* RENDER */ return TRUE;