mdcore
0.1.5
|
00001 /* 00002 * Copyright (c) 2003, 2007-8 Matteo Frigo 00003 * Copyright (c) 2003, 2007-8 Massachusetts Institute of Technology 00004 * 00005 * Permission is hereby granted, free of charge, to any person obtaining 00006 * a copy of this software and associated documentation files (the 00007 * "Software"), to deal in the Software without restriction, including 00008 * without limitation the rights to use, copy, modify, merge, publish, 00009 * distribute, sublicense, and/or sell copies of the Software, and to 00010 * permit persons to whom the Software is furnished to do so, subject to 00011 * the following conditions: 00012 * 00013 * The above copyright notice and this permission notice shall be 00014 * included in all copies or substantial portions of the Software. 00015 * 00016 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00017 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00018 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00019 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00020 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00021 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00022 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00023 * 00024 */ 00025 00026 00027 /* machine-dependent cycle counters code. Needs to be inlined. */ 00028 00029 /***************************************************************************/ 00030 /* To use the cycle counters in your code, simply #include "cycle.h" (this 00031 file), and then use the functions/macros: 00032 00033 ticks getticks(void); 00034 00035 ticks is an opaque typedef defined below, representing the current time. 00036 You extract the elapsed time between two calls to gettick() via: 00037 00038 double elapsed(ticks t1, ticks t0); 00039 00040 which returns a double-precision variable in arbitrary units. You 00041 are not expected to convert this into human units like seconds; it 00042 is intended only for *comparisons* of time intervals. 00043 00044 (In order to use some of the OS-dependent timer routines like 00045 Solaris' gethrtime, you need to paste the autoconf snippet below 00046 into your configure.ac file and #include "config.h" before cycle.h, 00047 or define the relevant macros manually if you are not using autoconf.) 00048 */ 00049 00050 /***************************************************************************/ 00051 /* This file uses macros like HAVE_GETHRTIME that are assumed to be 00052 defined according to whether the corresponding function/type/header 00053 is available on your system. The necessary macros are most 00054 conveniently defined if you are using GNU autoconf, via the tests: 00055 00056 dnl --------------------------------------------------------------------- 00057 00058 AC_C_INLINE 00059 AC_HEADER_TIME 00060 AC_CHECK_HEADERS([sys/time.h c_asm.h intrinsics.h mach/mach_time.h]) 00061 00062 AC_CHECK_TYPE([hrtime_t],[AC_DEFINE(HAVE_HRTIME_T, 1, [Define to 1 if hrtime_t is defined in <sys/time.h>])],,[#if HAVE_SYS_TIME_H 00063 #include <sys/time.h> 00064 #endif]) 00065 00066 AC_CHECK_FUNCS([gethrtime read_real_time time_base_to_time clock_gettime mach_absolute_time]) 00067 00068 dnl Cray UNICOS _rtc() (real-time clock) intrinsic 00069 AC_MSG_CHECKING([for _rtc intrinsic]) 00070 rtc_ok=yes 00071 AC_TRY_LINK([#ifdef HAVE_INTRINSICS_H 00072 #include <intrinsics.h> 00073 #endif], [_rtc()], [AC_DEFINE(HAVE__RTC,1,[Define if you have the UNICOS _rtc() intrinsic.])], [rtc_ok=no]) 00074 AC_MSG_RESULT($rtc_ok) 00075 00076 dnl --------------------------------------------------------------------- 00077 */ 00078 00079 /***************************************************************************/ 00080 00081 #if TIME_WITH_SYS_TIME 00082 # include <sys/time.h> 00083 # include <time.h> 00084 #else 00085 # if HAVE_SYS_TIME_H 00086 # include <sys/time.h> 00087 # else 00088 # include <time.h> 00089 # endif 00090 #endif 00091 00092 #define INLINE_ELAPSED(INL) static INL double elapsed(ticks t1, ticks t0) \ 00093 { \ 00094 return (double)t1 - (double)t0; \ 00095 } 00096 00097 /*----------------------------------------------------------------*/ 00098 /* Solaris */ 00099 #if defined(HAVE_GETHRTIME) && defined(HAVE_HRTIME_T) && !defined(HAVE_TICK_COUNTER) 00100 typedef hrtime_t ticks; 00101 00102 #define getticks gethrtime 00103 00104 INLINE_ELAPSED(inline) 00105 00106 #define HAVE_TICK_COUNTER 00107 #endif 00108 00109 /*----------------------------------------------------------------*/ 00110 /* AIX v. 4+ routines to read the real-time clock or time-base register */ 00111 #if defined(HAVE_READ_REAL_TIME) && defined(HAVE_TIME_BASE_TO_TIME) && !defined(HAVE_TICK_COUNTER) 00112 typedef timebasestruct_t ticks; 00113 00114 static __inline ticks getticks(void) 00115 { 00116 ticks t; 00117 read_real_time(&t, TIMEBASE_SZ); 00118 return t; 00119 } 00120 00121 static __inline double elapsed(ticks t1, ticks t0) /* time in nanoseconds */ 00122 { 00123 time_base_to_time(&t1, TIMEBASE_SZ); 00124 time_base_to_time(&t0, TIMEBASE_SZ); 00125 return (((double)t1.tb_high - (double)t0.tb_high) * 1.0e9 + 00126 ((double)t1.tb_low - (double)t0.tb_low)); 00127 } 00128 00129 #define HAVE_TICK_COUNTER 00130 #endif 00131 00132 /*----------------------------------------------------------------*/ 00133 /* 00134 * PowerPC ``cycle'' counter using the time base register. 00135 */ 00136 #if ((((defined(__GNUC__) && (defined(__powerpc__) || defined(__ppc__))) || (defined(__MWERKS__) && defined(macintosh)))) || (defined(__IBM_GCC_ASM) && (defined(__powerpc__) || defined(__ppc__)))) && !defined(HAVE_TICK_COUNTER) 00137 typedef unsigned long long ticks; 00138 00139 static __inline__ ticks getticks(void) 00140 { 00141 unsigned int tbl, tbu0, tbu1; 00142 00143 do { 00144 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu0)); 00145 __asm__ __volatile__ ("mftb %0" : "=r"(tbl)); 00146 __asm__ __volatile__ ("mftbu %0" : "=r"(tbu1)); 00147 } while (tbu0 != tbu1); 00148 00149 return (((unsigned long long)tbu0) << 32) | tbl; 00150 } 00151 00152 INLINE_ELAPSED(__inline__) 00153 00154 #define HAVE_TICK_COUNTER 00155 #endif 00156 00157 /* MacOS/Mach (Darwin) time-base register interface (unlike UpTime, 00158 from Carbon, requires no additional libraries to be linked). */ 00159 #if defined(HAVE_MACH_ABSOLUTE_TIME) && defined(HAVE_MACH_MACH_TIME_H) && !defined(HAVE_TICK_COUNTER) 00160 #include <mach/mach_time.h> 00161 typedef uint64_t ticks; 00162 #define getticks mach_absolute_time 00163 INLINE_ELAPSED(__inline__) 00164 #define HAVE_TICK_COUNTER 00165 #endif 00166 00167 /*----------------------------------------------------------------*/ 00168 /* 00169 * Pentium cycle counter 00170 */ 00171 #if (defined(__GNUC__) || defined(__ICC)) && defined(__i386__) && !defined(HAVE_TICK_COUNTER) 00172 typedef unsigned long long ticks; 00173 00174 static __inline__ ticks getticks(void) 00175 { 00176 ticks ret; 00177 00178 __asm__ __volatile__("rdtsc": "=A" (ret)); 00179 /* no input, nothing else clobbered */ 00180 return ret; 00181 } 00182 00183 INLINE_ELAPSED(__inline__) 00184 00185 #define HAVE_TICK_COUNTER 00186 #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ 00187 #endif 00188 00189 /* Visual C++ -- thanks to Morten Nissov for his help with this */ 00190 #if _MSC_VER >= 1200 && _M_IX86 >= 500 && !defined(HAVE_TICK_COUNTER) 00191 #include <windows.h> 00192 typedef LARGE_INTEGER ticks; 00193 #define RDTSC __asm __emit 0fh __asm __emit 031h /* hack for VC++ 5.0 */ 00194 00195 static __inline ticks getticks(void) 00196 { 00197 ticks retval; 00198 00199 __asm { 00200 RDTSC 00201 mov retval.HighPart, edx 00202 mov retval.LowPart, eax 00203 } 00204 return retval; 00205 } 00206 00207 static __inline double elapsed(ticks t1, ticks t0) 00208 { 00209 return (double)t1.QuadPart - (double)t0.QuadPart; 00210 } 00211 00212 #define HAVE_TICK_COUNTER 00213 #define TIME_MIN 5000.0 /* unreliable pentium IV cycle counter */ 00214 #endif 00215 00216 /*----------------------------------------------------------------*/ 00217 /* 00218 * X86-64 cycle counter 00219 */ 00220 #if (defined(__GNUC__) || defined(__ICC) || defined(__SUNPRO_C)) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 00221 typedef unsigned long long ticks; 00222 00223 static __inline__ ticks getticks(void) 00224 { 00225 unsigned a, d; 00226 asm volatile("rdtsc" : "=a" (a), "=d" (d)); 00227 return ((ticks)a) | (((ticks)d) << 32); 00228 } 00229 00230 INLINE_ELAPSED(__inline__) 00231 00232 #define HAVE_TICK_COUNTER 00233 #endif 00234 00235 /* PGI compiler, courtesy Cristiano Calonaci, Andrea Tarsi, & Roberto Gori. 00236 NOTE: this code will fail to link unless you use the -Masmkeyword compiler 00237 option (grrr). */ 00238 #if defined(__PGI) && defined(__x86_64__) && !defined(HAVE_TICK_COUNTER) 00239 typedef unsigned long long ticks; 00240 static ticks getticks(void) 00241 { 00242 asm(" rdtsc; shl $0x20,%rdx; mov %eax,%eax; or %rdx,%rax; "); 00243 } 00244 INLINE_ELAPSED(__inline__) 00245 #define HAVE_TICK_COUNTER 00246 #endif 00247 00248 /* Visual C++, courtesy of Dirk Michaelis */ 00249 #if _MSC_VER >= 1400 && (defined(_M_AMD64) || defined(_M_X64)) && !defined(HAVE_TICK_COUNTER) 00250 00251 #include <intrin.h> 00252 #pragma intrinsic(__rdtsc) 00253 typedef unsigned __int64 ticks; 00254 #define getticks __rdtsc 00255 INLINE_ELAPSED(__inline) 00256 00257 #define HAVE_TICK_COUNTER 00258 #endif 00259 00260 /*----------------------------------------------------------------*/ 00261 /* 00262 * IA64 cycle counter 00263 */ 00264 00265 /* intel's icc/ecc compiler */ 00266 #if (defined(__EDG_VERSION) || defined(__ECC)) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) 00267 typedef unsigned long ticks; 00268 #include <ia64intrin.h> 00269 00270 static __inline__ ticks getticks(void) 00271 { 00272 return __getReg(_IA64_REG_AR_ITC); 00273 } 00274 00275 INLINE_ELAPSED(__inline__) 00276 00277 #define HAVE_TICK_COUNTER 00278 #endif 00279 00280 /* gcc */ 00281 #if defined(__GNUC__) && defined(__ia64__) && !defined(HAVE_TICK_COUNTER) 00282 typedef unsigned long ticks; 00283 00284 static __inline__ ticks getticks(void) 00285 { 00286 ticks ret; 00287 00288 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret)); 00289 return ret; 00290 } 00291 00292 INLINE_ELAPSED(__inline__) 00293 00294 #define HAVE_TICK_COUNTER 00295 #endif 00296 00297 /* HP/UX IA64 compiler, courtesy Teresa L. Johnson: */ 00298 #if defined(__hpux) && defined(__ia64) && !defined(HAVE_TICK_COUNTER) 00299 #include <machine/sys/inline.h> 00300 typedef unsigned long ticks; 00301 00302 static inline ticks getticks(void) 00303 { 00304 ticks ret; 00305 00306 ret = _Asm_mov_from_ar (_AREG_ITC); 00307 return ret; 00308 } 00309 00310 INLINE_ELAPSED(inline) 00311 00312 #define HAVE_TICK_COUNTER 00313 #endif 00314 00315 /* Microsoft Visual C++ */ 00316 #if defined(_MSC_VER) && defined(_M_IA64) && !defined(HAVE_TICK_COUNTER) 00317 typedef unsigned __int64 ticks; 00318 00319 # ifdef __cplusplus 00320 extern "C" 00321 # endif 00322 ticks __getReg(int whichReg); 00323 #pragma intrinsic(__getReg) 00324 00325 static __inline ticks getticks(void) 00326 { 00327 volatile ticks temp; 00328 temp = __getReg(3116); 00329 return temp; 00330 } 00331 00332 INLINE_ELAPSED(inline) 00333 00334 #define HAVE_TICK_COUNTER 00335 #endif 00336 00337 /*----------------------------------------------------------------*/ 00338 /* 00339 * PA-RISC cycle counter 00340 */ 00341 #if defined(__hppa__) || defined(__hppa) && !defined(HAVE_TICK_COUNTER) 00342 typedef unsigned long ticks; 00343 00344 # ifdef __GNUC__ 00345 static __inline__ ticks getticks(void) 00346 { 00347 ticks ret; 00348 00349 __asm__ __volatile__("mfctl 16, %0": "=r" (ret)); 00350 /* no input, nothing else clobbered */ 00351 return ret; 00352 } 00353 # else 00354 # include <machine/inline.h> 00355 static inline unsigned long getticks(void) 00356 { 00357 register ticks ret; 00358 _MFCTL(16, ret); 00359 return ret; 00360 } 00361 # endif 00362 00363 INLINE_ELAPSED(inline) 00364 00365 #define HAVE_TICK_COUNTER 00366 #endif 00367 00368 /*----------------------------------------------------------------*/ 00369 /* S390, courtesy of James Treacy */ 00370 #if defined(__GNUC__) && defined(__s390__) && !defined(HAVE_TICK_COUNTER) 00371 typedef unsigned long long ticks; 00372 00373 static __inline__ ticks getticks(void) 00374 { 00375 ticks cycles; 00376 __asm__("stck 0(%0)" : : "a" (&(cycles)) : "memory", "cc"); 00377 return cycles; 00378 } 00379 00380 INLINE_ELAPSED(__inline__) 00381 00382 #define HAVE_TICK_COUNTER 00383 #endif 00384 /*----------------------------------------------------------------*/ 00385 #if defined(__GNUC__) && defined(__alpha__) && !defined(HAVE_TICK_COUNTER) 00386 /* 00387 * The 32-bit cycle counter on alpha overflows pretty quickly, 00388 * unfortunately. A 1GHz machine overflows in 4 seconds. 00389 */ 00390 typedef unsigned int ticks; 00391 00392 static __inline__ ticks getticks(void) 00393 { 00394 unsigned long cc; 00395 __asm__ __volatile__ ("rpcc %0" : "=r"(cc)); 00396 return (cc & 0xFFFFFFFF); 00397 } 00398 00399 INLINE_ELAPSED(__inline__) 00400 00401 #define HAVE_TICK_COUNTER 00402 #endif 00403 00404 /*----------------------------------------------------------------*/ 00405 #if defined(__GNUC__) && defined(__sparc_v9__) && !defined(HAVE_TICK_COUNTER) 00406 typedef unsigned long ticks; 00407 00408 static __inline__ ticks getticks(void) 00409 { 00410 ticks ret; 00411 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret)); 00412 return ret; 00413 } 00414 00415 INLINE_ELAPSED(__inline__) 00416 00417 #define HAVE_TICK_COUNTER 00418 #endif 00419 00420 /*----------------------------------------------------------------*/ 00421 #if (defined(__DECC) || defined(__DECCXX)) && defined(__alpha) && defined(HAVE_C_ASM_H) && !defined(HAVE_TICK_COUNTER) 00422 # include <c_asm.h> 00423 typedef unsigned int ticks; 00424 00425 static __inline ticks getticks(void) 00426 { 00427 unsigned long cc; 00428 cc = asm("rpcc %v0"); 00429 return (cc & 0xFFFFFFFF); 00430 } 00431 00432 INLINE_ELAPSED(__inline) 00433 00434 #define HAVE_TICK_COUNTER 00435 #endif 00436 /*----------------------------------------------------------------*/ 00437 /* SGI/Irix */ 00438 #if defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_SGI_CYCLE) && !defined(HAVE_TICK_COUNTER) 00439 typedef struct timespec ticks; 00440 00441 static inline ticks getticks(void) 00442 { 00443 struct timespec t; 00444 clock_gettime(CLOCK_SGI_CYCLE, &t); 00445 return t; 00446 } 00447 00448 static inline double elapsed(ticks t1, ticks t0) 00449 { 00450 return ((double)t1.tv_sec - (double)t0.tv_sec) * 1.0E9 + 00451 ((double)t1.tv_nsec - (double)t0.tv_nsec); 00452 } 00453 #define HAVE_TICK_COUNTER 00454 #endif 00455 00456 /*----------------------------------------------------------------*/ 00457 /* Cray UNICOS _rtc() intrinsic function */ 00458 #if defined(HAVE__RTC) && !defined(HAVE_TICK_COUNTER) 00459 #ifdef HAVE_INTRINSICS_H 00460 # include <intrinsics.h> 00461 #endif 00462 00463 typedef long long ticks; 00464 00465 #define getticks _rtc 00466 00467 INLINE_ELAPSED(inline) 00468 00469 #define HAVE_TICK_COUNTER 00470 #endif 00471 00472 /*----------------------------------------------------------------*/ 00473 /* MIPS ZBus */ 00474 #if HAVE_MIPS_ZBUS_TIMER 00475 #if defined(__mips__) && !defined(HAVE_TICK_COUNTER) 00476 #include <sys/mman.h> 00477 #include <unistd.h> 00478 #include <fcntl.h> 00479 00480 typedef uint64_t ticks; 00481 00482 static inline ticks getticks(void) 00483 { 00484 static uint64_t* addr = 0; 00485 00486 if (addr == 0) 00487 { 00488 uint32_t rq_addr = 0x10030000; 00489 int fd; 00490 int pgsize; 00491 00492 pgsize = getpagesize(); 00493 fd = open ("/dev/mem", O_RDONLY | O_SYNC, 0); 00494 if (fd < 0) { 00495 perror("open"); 00496 return NULL; 00497 } 00498 addr = mmap(0, pgsize, PROT_READ, MAP_SHARED, fd, rq_addr); 00499 close(fd); 00500 if (addr == (uint64_t *)-1) { 00501 perror("mmap"); 00502 return NULL; 00503 } 00504 } 00505 00506 return *addr; 00507 } 00508 00509 INLINE_ELAPSED(inline) 00510 00511 #define HAVE_TICK_COUNTER 00512 #endif 00513 #endif /* HAVE_MIPS_ZBUS_TIMER */ 00514