Actual source code: sseenabled.c
2: #include <petscsys.h>
4: #if defined(PETSC_HAVE_SSE)
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
10: {
11: char vendor[13];
12: char Intel[13]="GenuineIntel";
13: char AMD[13] ="AuthenticAMD";
14: char Hygon[13]="HygonGenuine";
15: PetscBool flg;
17: PetscStrncpy(vendor,"************",sizeof(vendor));
18: CPUID_GET_VENDOR(vendor);
19: PetscStrcmp(vendor,Intel,&flg);
20: if (!flg) PetscStrcmp(vendor,AMD,&flg);
21: if (!flg) {PetscStrcmp(vendor,Hygon,&flg);
22: if (flg) {
23: /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
24: /* to denote availability of SSE Support */
25: unsigned long myeax,myebx,myecx,myedx;
26: CPUID(CPUID_FEATURES,&myeax,&myebx,&myecx,&myedx);
27: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
28: else *flag = PETSC_FALSE;
29: }
30: return 0;
31: }
33: #if defined(PETSC_HAVE_FORK)
34: #include <signal.h>
35: /*
36: Early versions of the Linux kernel disables SSE hardware because
37: it does not know how to preserve the SSE state at a context switch.
38: To detect this feature, try an sse instruction in another process.
39: If it works, great! If not, an illegal instruction signal will be thrown,
40: so catch it and return an error code.
41: */
42: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
44: static void PetscSSEDisabledHandler(int sig)
45: {
46: signal(SIGILL,SIG_IGN);
47: exit(-1);
48: }
50: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
51: {
52: int status, pid = 0;
54: signal(SIGILL,PetscSSEDisabledHandler);
55: pid = fork();
56: if (pid==0) {
57: SSE_SCOPE_BEGIN;
58: XOR_PS(XMM0,XMM0);
59: SSE_SCOPE_END;
60: exit(0);
61: } else wait(&status);
62: if (!status) *flag = PETSC_TRUE;
63: else *flag = PETSC_FALSE;
64: return 0;
65: }
67: #else
68: /*
69: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
70: Windows ME/2000 doesn't disable SSE Hardware
71: */
72: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
73: #endif
75: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
76: {
77: if (flag) *flag = PETSC_TRUE;
78: return 0;
79: }
81: #else /* Not defined PETSC_HAVE_SSE */
83: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
84: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
86: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
87: {
88: if (flag) *flag = PETSC_FALSE;
89: return 0;
90: }
92: #endif /* defined PETSC_HAVE_SSE */
94: /*@C
95: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
96: set can be used. Some operating systems do not allow the use of these instructions despite
97: hardware availability.
99: Collective
101: Input Parameter:
102: . comm - the MPI Communicator
104: Output Parameters:
105: + lflag - Local Flag: PETSC_TRUE if enabled in this process
106: - gflag - Global Flag: PETSC_TRUE if enabled for all processes in comm
108: Notes:
109: NULL can be specified for lflag or gflag if either of these values are not desired.
111: Options Database Keys:
112: . -disable_sse - Disable use of hand tuned Intel SSE implementations
114: Level: developer
115: @*/
116: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
117: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
118: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
119: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
120: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm,PetscBool *lflag,PetscBool *gflag)
121: {
122: PetscBool disabled_option;
124: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
125: disabled_option = PETSC_FALSE;
127: PetscOptionsGetBool(NULL,NULL,"-disable_sse",&disabled_option,NULL);
128: if (disabled_option) {
129: petsc_sse_local_is_untested = PETSC_FALSE;
130: petsc_sse_enabled_local = PETSC_FALSE;
131: petsc_sse_global_is_untested = PETSC_FALSE;
132: petsc_sse_enabled_global = PETSC_FALSE;
133: }
135: if (petsc_sse_local_is_untested) {
136: PetscSSEHardwareTest(&petsc_sse_enabled_local);
137: if (petsc_sse_enabled_local) {
138: PetscSSEOSEnabledTest(&petsc_sse_enabled_local);
139: }
140: petsc_sse_local_is_untested = PETSC_FALSE;
141: }
143: if (gflag && petsc_sse_global_is_untested) {
144: MPIU_Allreduce(&petsc_sse_enabled_local,&petsc_sse_enabled_global,1,MPIU_BOOL,MPI_LAND,comm);
146: petsc_sse_global_is_untested = PETSC_FALSE;
147: }
148: }
150: if (lflag) *lflag = petsc_sse_enabled_local;
151: if (gflag) *gflag = petsc_sse_enabled_global;
152: return 0;
153: }