1 #ifndef CK_PAIR_CALCULATOR_H
2 #define CK_PAIR_CALCULATOR_H
8 #include "ckmulticast.h"
9 #include "ckhashtable.h"
10 #include "ckcomplex.h"
12 #include "PipeBroadcastStrategy.h"
13 #include "BroadcastStrategy.h"
14 #include "DirectMulticastStrategy.h"
15 #include "RingMulticastStrategy.h"
16 #include "MultiRingMulticast.h"
17 #include "NodeMulticast.h"
25 #include "cmidirect.h"
27 #define COLLATOR_ENABLE_RDMA
29 #ifdef DEBUG_CP_PAIRCALC_RDMA
30 #define DEBUG_MESSAGEDATACOLLATOR_RDMA
36 #ifdef FORTRANUNDERSCORE
54 extern ComlibInstanceHandle mcastInstanceCP;
58 void DGEMM (
char *,
char *,
int *,
int *,
int *,
double *,
double *,
int *,
double *,
int *,
double *,
double *,
int * );
59 void ZGEMM (
char *,
char *,
int *,
int *,
int *,
complex *,
complex *,
int *,
complex *,
int *,
complex *,
complex *,
int * );
62 #include "MessageDataCollator.h"
63 #include "pcMessages.h"
65 #include "ckPairCalculator.decl.h"
336 void collectTile(
bool doMatrix1,
bool doMatrix2,
bool doOrthoT,
int orthoX,
int orthoY,
int orthoGrainSizeX,
int orthoGrainSizeY,
int numRecdBW,
int matrixSize, internalType *matrix1, internalType* matrix2);
352 void bwMultiplyHelper(
int size, internalType *matrix1, internalType *matrix2, internalType *amatrix, internalType *amatrix2,
bool unitcoef,
int m_in,
int n_in,
int k_in,
int BNAoffset,
int BNCoffset,
int BTAoffset,
int BTCoffset,
int orthoX,
int orthoY,
double beta,
int ogx,
int ogy);
354 void bwSendHelper(
int orthoX,
int orthoY,
int sizeX,
int sizeY,
int ogx,
int ogy);
362 void sendBWResultColumnDirect(
bool other,
int startGrain,
int endGrain);
368 void reorder(
int *offsetMap,
int *revOffsetMap,
double *data,
double *scratch);
370 void dumpMatrix(
const char *,
double*,
int,
int,
int xstart=0,
int ystart=0,
int xtra1=0,
int xtra2=0);
372 void dumpMatrix(
const char *,
complex*,
int,
int,
int xstart=0,
int ystart=0,
int xtra1=0,
int xtra2=0);
374 void dumpMatrixComplex(
const char *,
complex *,
int,
int,
int xstart=0,
int ystart=0,
int iter=0);
376 void dgemmSplitBwdM(
int m,
int n,
int k,
char *trans,
char *transT,
double *alpha,
double *A,
double *B,
double *bt,
double *C);
378 void dgemmSplitFwdStreamMK(
int m,
int n,
int k,
char *trans,
char *transT,
double *alpha,
double *A,
int *lda,
double *B,
int *ldb,
double *C,
int *ldc);
380 void dgemmSplitFwdStreamNK(
int m,
int n,
int k,
char *trans,
char *transT,
double *alpha,
double *A,
int *lda,
double *B,
int *ldb,
double *C,
int *ldc);
382 void ResumeFromSync();
386 #ifdef _PAIRCALC_DEBUG_
387 CkPrintf(
"[%d,%d,%d,%d] atsyncs\n", thisIndex.w, thisIndex.x, thisIndex.y, thisIndex.z);
497 void copyIntoTiles(
double *source,
double**dest,
int sourceRows,
int sourceCols,
int *offsetsRow,
int *offsetsCol,
int *touched,
int tileSize,
int tilesPerRow );
502 CkReductionMsg *sumBlockGrain(
int nMsg, CkReductionMsg **msgs);
504 void manmult(
int numrowsA,
int numRowsB,
int rowLength,
double *A,
double *B,
double *C,
double alpha);
507 #endif // CK_PAIR_CALCULATOR_H
bool isSymmetric
Is this a symmetric or asymmetric paircalc instance.
CkSectionInfo * resultCookies
array of bw path section cookies
int streamCaughtL
number of rows caught so far L stream
bool expectOrthoT
Is true only in asymmetric, dynamics scenario. For PC instances in the asymmetric chare array...
int streamCaughtR
number of rows caught so far R stream
void sendBWResultColumn(bool other, int startGrain, int endGrain)
Send the result for this column.
int grainSizeY
number of states per chare y-axis
int orthoGrainSizeRemX
sgrainSizeX % orthoGrainSize
CkCallback * orthoCB
forward path callbacks
internalType * inDataRight
the input pair to be transformed
int numRecdBW
number of messages received BW
void bwSendHelper(int orthoX, int orthoY, int sizeX, int sizeY, int ogx, int ogy)
Called on the normal backward path (non-psiV) to set up the data sends to GSpace. ...
void bwMultiplyDynOrthoT()
Multiplies Fpsi by T (from Ortho)
CollatorType * leftHandler() const
Returns a pointer to the collator that will buffer the left matrix data (only for use by the correspo...
void sendTiles(bool flag_dp)
Contribute orthoGrainSized tiles of data (that are ready?) to the corresponding ortho chares...
double * allCaughtRight
unordered rows of FW input
int orthoGrainSizeRemY
sgrainSizeY % orthoGrainSize
inputType * othernewData
results of sym off diagonal multiply,
CkReductionMsg * sumMatrixDouble(int nMsg, CkReductionMsg **msgs)
forward declaration
internalType * outData
results of fw multiply
int cb_ep
bw path callback entry point
void collectTile(bool doMatrix1, bool doMatrix2, bool doOrthoT, int orthoX, int orthoY, int orthoGrainSizeX, int orthoGrainSizeY, int numRecdBW, int matrixSize, internalType *matrix1, internalType *matrix2)
Receive data from ortho chares and copy into matrix.
int numExpectedY
number of messages expected y-axis
bool existsLeft
inDataLeft allocated
bool existsNew
newData allocated
int numRecRight
number of rows so far total right
int numExpected
number of messages expected all
bool existsRight
inDataRight allocated
void multiplyResult(multiplyResultMsg *msg)
Entry Method. Backward path multiplication.
int numRecLeft
number of rows so far total left
void sendBWResult(sendBWsignalMsg *msg)
Entry Method. Send the results via multiple reductions as triggered by a prioritized message...
int * RightOffsets
index numbers of caught stream elements
void sendBWResultDirect(sendBWsignalMsg *msg)
Entry Method.
int * columnCountOther
count of processed rows in BW by column
bool symmetricOnDiagonal
diagonal symmetric special case
int gemmSplitFWk
number of rows in split FW dgemm
inputType * mynewData
results of bw multiply
int gemmSplitBW
number of rows in split BW dgemm
void lbsync()
Entry Method. Something to sync?
void multiplyResultI(multiplyResultMsg *msg)
Entry Method. Simply forwards the call to multiplyResult(). Dont seem to be any instances in source ...
CollatorType * rightHandler() const
Returns a pointer to the collator that will buffer the right matrix data (only for use by the corresp...
paircalcInputMsg * msgRight
Incoming messages with left and right matrix data that are kept around so that we can directly comput...
CollatorType * leftCollator
Data collators for the left and right matrix blocks.
double ** outTiles
in output streaming we populate the tiles directly
CProxy_InputDataHandler< CollatorType, CollatorType > myMsgHandler
A handle to the co-located chare array that handles data input.
CkGroupID mCastGrpId
group id for multicast manager bw
int gemmSplitFWm
number of columns in split FW dgemm
int numOrthoCol
sGrainSizeX/orthoGrainSize
CkArrayID cb_aid
bw path callback array ID
CkGroupID mCastGrpIdOrtho
group id for multicast manager ortho
void initResultSection(initResultMsg *msg)
Entry Method. Initialize the section cookie for each slice of the result.
void multiplyPsiV()
Dynamics: PsiV Tolerance correction loop called on symmetric instances. Technically, backward path.
internalType * inDataLeft
or the C=-1 inRight orthoT +c in dynamics
int actionType
matrix usage control [NORMAL, KEEPORTHO, PSIV]
Class that buffers incoming data (via messages/RDMA) till it counts a pre-specified number of arrival...
int numOrthoRow
sGrainSizeY/orthoGrainSize
PairCalculator(CProxy_InputDataHandler< CollatorType, CollatorType > inProxy, const pc::pcConfig _cfg)
Entry Method. (obviously)
int * LeftOffsets
index numbers of caught stream elements
int numPoints
number of points in this chunk
int numRecd
number of messages received
int grainSizeX
number of states per chare x-axis
void multiplyForwardRDMA()
Entry Method. Simply redirects call to multiplyForward()
int cb_ep_tol
bw path callback entry point for psiV tolerance
bool amPhantom
consolidate thisIndex.x<thisIndex.y && cfg.isSymmetric && phantomsym
void acceptOrthoT(multiplyResultMsg *msg)
Entry Method. During dynamics, each Ortho calls this on the Asymm loop PC instances to send its share...
int numRecdBWOT
number of messages received BW orthoT
void bwMultiplyHelper(int size, internalType *matrix1, internalType *matrix2, internalType *amatrix, internalType *amatrix2, bool unitcoef, int m_in, int n_in, int k_in, int BNAoffset, int BNCoffset, int BTAoffset, int BTCoffset, int orthoX, int orthoY, double beta, int ogx, int ogy)
multiplyPsiV() and multiplyResult() call this to perform the matrix multiply math on the backward pat...
bool resumed
have resumed from load balancing
void launchComputations(paircalcInputMsg *aMsg)
NOT an entry method. Called locally from the acceptData* methods to launch the appropriate number-cru...
bool isLeftReady
Flags indicating if the left and right matrix blocks have been received.
int * LeftRev
reverse index numbers of caught stream elements
Dumb structure that holds all the configuration inputs required for paircalc instantiation, functioning and interaction.
void phantomDone()
Entry Method. To handle correct startup for symm PC w/phantoms.
void bwbarrier(CkReductionMsg *msg)
Entry Method. a debugging tool: a barrier at the end of the backward path before anything is sent ove...
void copyIntoTiles(double *source, double **dest, int sourceRows, int sourceCols, int *offsetsRow, int *offsetsCol, int *touched, int tileSize, int tilesPerRow)
Copy the results from outdata1 and outdata2 into the tiles.
bool notOnDiagonal
being on or off diagonal changes many things
void acceptLeftData(paircalcInputMsg *msg)
Entry Method. Method to send in the complete block of the left matrix.
int * RightRev
reverse index numbers of caught stream elements
void pup(PUP::er &)
PUP method.
~PairCalculator()
Destructor (nothing needs to be done?)
int * touchedTiles
tracker to detect when tiles are full
void initGRed(initGRedMsg *msg)
Entry Method. Initializes the section cookie and the reduction client. Called on startup as the chare...
int blkSize
number points in gspace plane
void cleanupAfterBWPath()
Cleans up at end of an iteration (fw-bw computation loop); frees mem, resets counters etc...
double * allCaughtLeft
unordered rows of FW input
internalType * inResult2
used in gamma calc (non minimization)
void enqueueBWsend(bool unitcoef, int priority=1)
Schedules the entry methods that send out the results to GSpace with appropriate priority.
CkSectionInfo * otherResultCookies
extra array of bw path section cookies for sym off diag, or dynamics
internalType * inResult1
accumulate ortho or lambda
int * columnCount
count of processed rows in BW by column
int numExpectedX
number of messages expected x-axis
void contributeSubTiles(internalType *fullOutput)
Piece up a tile and send all the pieces as this PC's contribution to the Ortho chares.
int numOrtho
number of orthos in our grain = numOrthoCol*numOrthoRow
CkSectionInfo * orthoCookies
forward path reduction cookie
bool existsOut
outData allocated
pc::pcConfig cfg
A private copy of the input configurations.
int rck
count of received cookies
void acceptRightData(paircalcInputMsg *msg)
Entry Method. Method to send in the complete block of the right matrix.
void multiplyForward(bool flag_dp)
Forward path multiply driver. Prepares matrices, calls DGEMM, contributes results to Ortho subTiles a...