mdcore
0.1.5
|
00001 /******************************************************************************* 00002 * This file is part of mdcore. 00003 * Coypright (c) 2012 Pedro Gonnet (pedro.gonnet@durham.ac.uk) 00004 * 00005 * This program is free software: you can redistribute it and/or modify 00006 * it under the terms of the GNU Lesser General Public License as published 00007 * by the Free Software Foundation, either version 3 of the License, or 00008 * (at your option) any later version. 00009 * 00010 * This program is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00013 * GNU General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public License 00016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 00017 * 00018 ******************************************************************************/ 00019 00020 /* Set the max number of parts for shared buffers. */ 00021 #define cuda_maxparts 512 00022 #define cuda_maxdiags 352 00023 #define cuda_ndiags ( ( (cuda_maxdiags - 1) * cuda_maxdiags ) / 2 ) 00024 #define cuda_frame 32 00025 #define cuda_maxpots 100 00026 #define max_fingers 1 00027 #define cuda_maxblocks 64 00028 #define cuda_memcpy_chunk 6 00029 #define cuda_sum_chunk 3 00030 #define cuda_maxqueues 30 00031 00032 00033 /* Some flags that control optional behaviour */ 00034 // #define TIMERS 00035 // #define PARTS_TEX 00036 // #define PARTS_LOCAL 00037 // #define FORCES_LOCAL 00038 00039 00041 enum { 00042 tid_mutex = 0, 00043 tid_queue, 00044 tid_gettask, 00045 tid_memcpy, 00046 tid_update, 00047 tid_pack, 00048 tid_sort, 00049 tid_pair, 00050 tid_self, 00051 tid_potential, 00052 tid_potential4, 00053 tid_total, 00054 tid_count 00055 }; 00056 00057 00058 /* Timer functions. */ 00059 #ifdef TIMERS 00060 #define TIMER_TIC_ND if ( threadIdx.x == 0 ) tic = clock(); 00061 #define TIMER_TOC_ND(tid) toc = clock(); if ( threadIdx.x == 0 ) atomicAdd( &cuda_timers[tid] , ( toc > tic ) ? (toc - tic) : ( toc + (0xffffffff - tic) ) ); 00062 #define TIMER_TIC clock_t tic; if ( threadIdx.x == 0 ) tic = clock(); 00063 #define TIMER_TOC(tid) clock_t toc = clock(); if ( threadIdx.x == 0 ) atomicAdd( &cuda_timers[tid] , ( toc > tic ) ? (toc - tic) : ( toc + (0xffffffff - tic) ) ); 00064 #define TIMER_TIC2_ND if ( threadIdx.x == 0 ) tic2 = clock(); 00065 #define TIMER_TOC2_ND(tid) toc2 = clock(); if ( threadIdx.x == 0 ) atomicAdd( &cuda_timers[tid] , ( toc2 > tic2 ) ? (toc2 - tic2) : ( toc2 + (0xffffffff - tic2) ) ); 00066 #define TIMER_TIC2 clock_t tic2; if ( threadIdx.x == 0 ) tic2 = clock(); 00067 #define TIMER_TOC2(tid) clock_t toc2 = clock(); if ( threadIdx.x == 0 ) atomicAdd( &cuda_timers[tid] , ( toc2 > tic2 ) ? (toc2 - tic2) : ( toc2 + (0xffffffff - tic2) ) ); 00068 #else 00069 #define TIMER_TIC_ND 00070 #define TIMER_TOC_ND(tid) 00071 #define TIMER_TIC 00072 #define TIMER_TOC(tid) 00073 #define TIMER_TIC2 00074 #define TIMER_TOC2(tid) 00075 #endif 00076 00077 00079 struct queue_cuda { 00080 00081 /* Indices to the first and last elements. */ 00082 int first, last; 00083 00084 /* Number of elements in this queue. */ 00085 volatile int count; 00086 00087 /* Number of elements in the recycled list. */ 00088 volatile int rec_count; 00089 00090 /* The queue data. */ 00091 volatile int *data; 00092 00093 /* The recycling list. */ 00094 volatile int *rec_data; 00095 00096 }; 00097 00098 00100 struct cellpair_cuda { 00101 00103 int i, j; 00104 00106 float shift[3]; 00107 00108 }; 00109 00110