arch/cell/cell_lib/spert.h

Go to the documentation of this file.
00001 #ifndef __SPE_RUNTIME_H__
00002 #define __SPE_RUNTIME_H__
00003 
00004 
00005 #include "spert_common.h"
00006 
00007 
00009 // Defines
00010 
00011 // Work Request Flags
00012 #define WORK_REQUEST_FLAGS_NONE            (0x00)  // No flags
00013 #define WORK_REQUEST_FLAGS_RW_IS_RO        (0x01)  // (Standard Only) Treat the buffer that readWritePtr points to as a readOnly buffer
00014 #define WORK_REQUEST_FLAGS_RW_IS_WO        (0x02)  // (Standard Only) Tread the buffer that readWritePtr points to as a writeOnly buffer
00015 #define WORK_REQUEST_FLAGS_LIST            (0x04)  // (List Only) The work request uses a dma list instead of a single set of buffers
00016 #define WORK_REQUEST_FLAGS_BOTH_CALLBACKS  (0x08)  // (Groups Only) If the work request is part of a group, setting this flag indicates that the individual work requests callback should also be called in addition to the group callback when the entire group is finished.  By default, only the group callback will be called.
00017 
00018 // Right shift amounts to bring flag checks into lsb of register value
00019 #define WORK_REQUEST_FLAGS_RW_IS_RO_SHIFT (0)
00020 #define WORK_REQUEST_FLAGS_RW_IS_WO_SHIFT (1)
00021 #define WORK_REQUEST_FLAGS_LIST_SHIFT     (2)
00022 
00023 // NOTE : This should be "unsigned long" for 32-bit and "unsigned long long" for 64-bit
00024 #define PPU_POINTER_TYPE   unsigned long
00025 
00026 // Defines that describe the message queue between the PPU and SPE
00027 #define SPE_MESSAGE_QUEUE_LENGTH      8   // DO NOT SET ABOVE 31 (because of the way tags are used with the DMA engines)
00028 #define SPE_MESSAGE_QUEUE_BYTE_COUNT  (SIZEOF_128(SPEMessage) * SPE_MESSAGE_QUEUE_LENGTH)
00029 #define DOUBLE_BUFFER_MESSAGE_QUEUE   1   // Set to non-zero to make the SPE Runtime double buffer the message queue
00030 #define SPE_NOTIFY_VIA_MAILBOX        0
00031 #define SPE_NOTIFY_QUEUE_BYTE_COUNT   (ROUNDUP_128(sizeof(SPENotify) * SPE_MESSAGE_QUEUE_LENGTH))
00032 
00033 // The number of dma list entries in a pre-allocated dma list.
00034 #define SPE_DMA_LIST_LENGTH                16       // Per message in message queue (NOTE: Must be an even # >= 4: 4, 10, 22, etc.)
00035 #define SPE_DMA_LIST_ENTRY_MAX_LENGTH      0x4000   // Maximum length of a buffer pointed to by a single dma list entry (should be a power of 2)
00036 
00037 // Scheduler controls
00038 #define SPE_USE_STATE_LOOKUP_TABLE  1
00039 #define LIMIT_READY  5
00040 
00041 // Memory Settings
00042 #define SPE_TOTAL_MEMORY_SIZE   (256 * 1024)  // Defined by the architecture
00043 #define SPE_USE_OWN_MEMSET               (0)  // Set to 1 to force a local version of memset to be used (to try to remove C/C++ runtime dependence)
00044 #define SPE_USE_OWN_MALLOC               (1)  // Set to 1 to force a local version of malloc and free to be used
00045 #define SPE_MEMORY_BLOCK_SIZE     (1024 * 4)  // !!! IMPORTANT !!! : NOTE : SPE_MEMORY_BLOCK_SIZE should be a power of 2.
00046 #define SPE_RESERVED_STACK_SIZE  (1024 * 48)  // Reserve this much memory for the stack
00047 #define SPE_MINIMUM_HEAP_SIZE    (1024 * 16)  // Require at least this amount of heap (or the SPE Runtime will exit)
00048 #define SPE_ZERO_WRITE_ONLY_MEMORY       (0)  // Set to non-zero if the write-only buffer should be zero-ed out on the SPE before being filled in
00049 
00050 // The maximum number of work requests that can be serviced in a single SPE scheduler loop iteration
00051 #define SPE_MAX_GET_PER_LOOP       10
00052 #define SPE_MAX_EXECUTE_PER_LOOP   2
00053 #define SPE_MAX_PUT_PER_LOOP       10
00054 
00055 // Defines for SPEMessage::state
00056 #define SPE_MESSAGE_STATE_MIN                 0
00057 #define SPE_MESSAGE_STATE_CLEAR               0
00058 #define SPE_MESSAGE_STATE_SENT                1
00059 #define SPE_MESSAGE_STATE_PRE_FETCHING        2
00060 #define SPE_MESSAGE_STATE_PRE_FETCHING_LIST   3  // NOTE: code in processMsgState_send requires 'PRE_FETCHING_LIST = PRE_FETCHING + 1'
00061 #define SPE_MESSAGE_STATE_FETCHING            4
00062 #define SPE_MESSAGE_STATE_LIST_READY_LIST     5
00063 #define SPE_MESSAGE_STATE_FETCHING_LIST       6
00064 #define SPE_MESSAGE_STATE_READY               7
00065 #define SPE_MESSAGE_STATE_EXECUTED            8
00066 #define SPE_MESSAGE_STATE_EXECUTED_LIST       9  // NOTE: code in processMsgState_ready requires 'EXECUTED_LIST = EXECUTED + 1'
00067 #define SPE_MESSAGE_STATE_COMMITTING          10
00068 #define SPE_MESSAGE_STATE_FINISHED            11
00069 #define SPE_MESSAGE_STATE_ERROR               12
00070 #define SPE_MESSAGE_STATE_MAX                 12
00071 #define SPE_MESSAGE_NUM_STATES                (SPE_MESSAGE_STATE_MAX - SPE_MESSAGE_STATE_MIN + 1)
00072 
00073 // SPE Function Indexes
00074 #define SPE_FUNC_INDEX_INIT       (-2)
00075 #define SPE_FUNC_INDEX_CLOSE      (-1)
00076 #define SPE_FUNC_INDEX_USER       (0)
00077 
00078 // SPE Commands
00079 #define SPE_MESSAGE_COMMAND_MIN          0
00080 #define SPE_MESSAGE_COMMAND_NONE         0
00081 #define SPE_MESSAGE_COMMAND_EXIT         1
00082 #define SPE_MESSAGE_COMMAND_RESET_CLOCK  2
00083 #define SPE_MESSAGE_COMMAND_MAX          2
00084 
00085 // SPE Error Codes
00086 #define SPE_MESSAGE_OK                       (0x0000)
00087 #define SPE_MESSAGE_ERROR_NOT_ENOUGH_MEMORY  (0x0001)
00088 
00089 // Tracing
00090 #define ENABLE_TRACE        0  // Set to non-zero to enable trance statements for work requests that have tracing enabled
00091 
00092 // DEBUG Display Level
00093 #define SPE_DEBUG_DISPLAY   0  // Set to 0 to save on LS memory usage (all printf's should be wrapped in this!)
00094 #define SPE_DEBUG_DISPLAY_STILL_ALIVE  0 // If > 0 then display a "still alive" message every SPE_DEBUG_DISPLAY_STILL_ALIVE iterations
00095 #define SPE_DEBUG_DISPLAY_NO_PROGRESS  0 // If non-zero, warn when no messages changes state for this many iterations
00096 #define SPE_REPORT_END      1  // Have each SPE report the address of it's _end variable (end of data segment; will be printed by PPE during spe thread creation)
00097 #define SPE_NOTIFY_ON_MALLOC_FAILURE   0  // Set to 1 to force the SPE to notify the user when a pointer returned by malloc/new returns an un-usable pointer (message will retry malloc/new later)
00098 
00099 #define OFFLOAD_API_FULL_CHECK  1
00100 
00101 // STATS Data Collection
00102 #define PPE_STATS    0  // Set to have stat data collected during execution for the PPE side of the Offload API
00103 
00104 // NOTE : Only a single SPE_TIMING/STATS should be enabled at a time
00105 //   !!! (e.g. - if SPE_STATS enabled, then SPE_STATS1, SPE_STATS2, and SPE_TIMING should be disabled) !!!
00106 #define SPE_TIMING   1  // Set to have timing data on the WRs sent back to the PPE
00107 #define SPE_STATS    0  // Set to have stat data collected during execution for the SPE side of the Offload API (SPE Runtime)
00108 #define SPE_STATS1   0
00109 #define SPE_STATS2   0  // 0: unset; >0: message queue index to track; <0: track all message queue entries
00110 
00111 // The lower and upper bounds of tags that are available to the user's code (incase the user's code needs to
00112 //   do DMA transactions directly and needs to use tags in doing so).
00113 #define SPE_USER_TAG_MIN   SPE_MESSAGE_QUEUE_LENGTH  // NOTE: 0 through SPE_MESSAGE_QUEUE_LENGTH are used for work request DMA transactions
00114 #define SPE_USER_TAG_MAX   29  // NOTE: 31 and 30 are reserved for message and notify queues
00115 #define SPE_NUM_USER_TAGS  (SPE_USER_TAG_MAX - SPE_USER_TAG_MIN + 1)
00116 
00117 
00119 // Data Structures
00120 
00126 //   NOTE: The actual layout of this data structure should include the notify and reserved fields
00127 //   (as shown below).  However, to make this a bit easier on the user (calling sendWorkRequest_list)
00128 //   size will be treated as a 32-bit number and the upper 16 bits will be zero-ed out.  This will
00129 //   also prevent the user from setting the notify flag.
00130 typedef struct __dma_list_entry {
00131   //unsigned int notify   : 1;   // Notify when finished
00132   //unsigned int reserved : 15;  // Reserved (set to all zeros)
00133   unsigned int size;             
00134   unsigned int ea;               
00135 } DMAListEntry;
00136 
00137 /* @} */
00138 
00139 
00140 // SPE Message: The structure that defines a message being passed to an SPE
00141 typedef struct __SPE_MESSAGE {
00142 
00143   volatile int counter0;
00144   volatile int state;              // Current state of the message (see SPE_MESSAGE_STATE_xxx)
00145   volatile unsigned int flags;
00146   volatile int funcIndex;          // Indicates what "function" the SPE should perform
00147 
00148   volatile PPU_POINTER_TYPE readWritePtr;
00149   volatile PPU_POINTER_TYPE readOnlyPtr;
00150   volatile PPU_POINTER_TYPE writeOnlyPtr;
00151   volatile int readWriteLen;
00152 
00153   volatile int readOnlyLen;
00154   volatile int writeOnlyLen;
00155   volatile unsigned int totalMem;  // The total amount of memory that will be needed on the SPE for the request
00156   volatile int traceFlag;          // DEBUG
00157 
00158   // NOTE : !!! VERY IMPORTANT !!! : The dmaList address must be 16 byte aligned in the SPE's LS.  The SPEMessage
00159   //   data structures get 16 byte aligned so the fields in this data structure must be order in such a way
00160   //   that dmaList starts a multiple of 16 bytes away from the start of the overall structure.
00161   volatile DMAListEntry dmaList[SPE_DMA_LIST_LENGTH];
00162 
00163   volatile int command;             // A control command that the PPU can use to send commands to the SPE runtime (see SPE_MESSAGE_COMMAND_xxx)
00164   volatile PPU_POINTER_TYPE wrPtr;  // A pointer to userData specified in the sendWorkRequest call that will be passed to the callback function
00165   volatile int counter1;            // A counter used to uniquely identify this message from the message previously held in this slot
00166   volatile int checksum;            // A checksum of the contents of the data structure (NOTE: Code assumes that checksum is the last field and is an int)
00167 
00168 } SPEMessage;
00169 
00170 
00171 // SPE Notify: The structure that defines a notification beind passed from the SPE to the PPE notifying the
00172 //   the PPE that a given work request has completed.
00173 // NOTE : Size of this structure should be a multiple of 16 bytes
00174 typedef struct __SPE_NOTIFY {
00175 
00176   //volatile unsigned long long int startTime;   // The time the Work Request entered user code
00177   //volatile unsigned int runTime;               // The amount of time the Work Request spent in user code
00178   //volatile unsigned short errorCode;           // The error code for the Work Request
00179   //volatile unsigned short counter;             // The counter value (when completed, should match corresponding counter in Message Queue)
00180 
00181   volatile unsigned long long int recvTimeStart; // The time the SPE Runtime first "noticed" the Work Request entry
00182   volatile unsigned int recvTimeEnd;
00183   volatile unsigned int __padding0__[1];
00184 
00185   volatile unsigned int preFetchingTimeStart;
00186   volatile unsigned int preFetchingTimeEnd;
00187   volatile unsigned int fetchingTimeStart;
00188   volatile unsigned int fetchingTimeEnd;
00189 
00190   volatile unsigned int readyTimeStart;
00191   volatile unsigned int readyTimeEnd;
00192   volatile unsigned int userTimeStart;
00193   volatile unsigned int userTimeEnd;
00194 
00195   volatile unsigned int executedTimeStart;
00196   volatile unsigned int executedTimeEnd;
00197   //volatile unsigned int __padding1__[2];
00198   volatile unsigned int userTime0Start;
00199   volatile unsigned int userTime0End;
00200 
00201   // NOTE : Important to keep the commit timing fields, errorCode, and counter fields together in the same
00202   //   cache line (they are all written at the same time and this will ensure that the other cache lines in
00203   //   the same structure are in the LS ... i.e. loads cannot go out of order).
00204   volatile unsigned int commitTimeStart;
00205   volatile unsigned int commitTimeEnd;
00206   volatile unsigned short errorCode;           // The error code for the Work Request
00207   volatile unsigned short counter;             // The counter value (when completed, should match corresponding counter in Message Queue)
00208   volatile unsigned int __padding2__[1];
00209 
00210   volatile unsigned int userTime1Start;
00211   volatile unsigned int userTime1End;
00212   volatile unsigned int userTime2Start;
00213   volatile unsigned int userTime2End;
00214 
00215   volatile unsigned long long int userAccumTime0;
00216   volatile unsigned long long int userAccumTime1;
00217 
00218   volatile unsigned long long int userAccumTime2;
00219   volatile unsigned long long int userAccumTime3;
00220 
00221 } SPENotify;
00222 
00223 
00224 // Define a structure that will be passed to each SPE thread when it is created
00225 typedef struct __SPE_DATA {
00226   volatile PPU_POINTER_TYPE messageQueue;  // Pointer to the message queue's location in main memory
00227   #if SPE_NOTIFY_VIA_MAILBOX == 0
00228     volatile PPU_POINTER_TYPE notifyQueue;
00229   #endif
00230   volatile int messageQueueLength;         // Length of the message queue (the number of messages)
00231   volatile unsigned short vID;             // The virtual SPE number
00232 } SPEData;
00233 
00234 
00236 // Function Prototypes for SPE Functions
00237 // TODO : NOTE : These should probably be moved so they are only declared for the SPE (i.e. - the PPE code
00238 //   may need to include this file, but these functions aren't available, nor make sense, on the PPE).
00239 
00240 #ifdef __cplusplus
00241 extern "C" {
00242 #endif
00243 
00244 extern unsigned short getSPEID();
00245 extern int isTracing();
00246 extern void debug_dumpSPERTState();
00247 
00248 extern void startUserTime0();
00249 extern void endUserTime0();
00250 extern void startUserTime1();
00251 extern void endUserTime1();
00252 extern void startUserTime2();
00253 extern void endUserTime2();
00254 
00255 extern void clearUserAccumTime(int index);
00256 extern void startUserAccumTime(int index);
00257 extern void endUserAccumTime(int index);
00258 
00259 #ifdef __cplusplus
00260 }
00261 #endif
00262 
00263 
00264 #endif //__SPE_RUNTIME_H__

Generated on Sun Jun 29 13:29:05 2008 for Charm++ by  doxygen 1.5.1