OpenAtom  Version1.5a
CLA_Matrix.ci
Go to the documentation of this file.
1 /** \file CLA_Matrix.ci
2  * Module: CLA_Matrix
3  * Purpose: dense matrix-matrix multiplication
4  *
5  * Usage:
6  * - To create the arrays, use the make_multiplier call detailed in
7  * CLA_Matrix.h.
8  * - To multiply once you have the CLA_Matrix_interface object o, do this:
9  * o.multiply(alpha, beta, data, fptr, usr_data, thisIndex.x, thisIndex.y)
10  *
11  * Note: When mentioning which parameters below are used, we refer only to
12  * the 2D algorithm. Notes for the 3D algorithm will be added later.
13  *
14  * Alpha and beta should be doubles. Only the alpha and beta passed to the C
15  * array will be used. Data should be a double* pointing to
16  * the data to be multiplied if we are at the chare array bound to the A
17  * or B arrays. Otherwise (we are at C), it should be the location where to
18  * store the result. We need to pass the index since we will use ckLocal
19  * calls. Once the multiplication has finished, the C array will call
20  * to function pointed to by fptr, passing it usr_data. These arguments will
21  * be ignored by A and B. fptr should take a void* as an argument are return
22  * void.
23  */
24 
25 module CLA_Matrix{
26 
27  /* Just to grab internalType definitions */
28  include "paircalc/pcFwdDeclarations.h";
29 
30  /* used to send matrix chunks */
31  message CLA_Matrix_msg{
32  internalType data[];
33  };
34 
35  /* auxiliary array, map, and message classes for 3D algorithm */
36  message CLA_MM3D_mult_init_msg;
37  group CLA_MM3D_Map : CkArrayMap {
38  entry CLA_MM3D_Map(int M_chunks, int K_chunks, int N_chunks);
39  };
40  array [3D] CLA_MM3D_multiplier{
41  entry void CLA_MM3D_multiplier();
42  entry void CLA_MM3D_multiplier(int m, int k, int n);
43  entry void initialize_reduction(CLA_MM3D_mult_init_msg *m);
44  entry void receiveA(CLA_Matrix_msg *msg);
45  entry void receiveB(CLA_Matrix_msg *msg);
46  };
47  /* Main array used for all algorithms */
48  array [2D] CLA_Matrix{
49  entry void CLA_Matrix();
50 
51  /* for 2D algorithm */
52  entry void CLA_Matrix(int M, int K, int N, int m, int k, int n,
53  int strideM, int strideK, int strideN, int part,
54  CProxy_CLA_Matrix other1, CProxy_CLA_Matrix other2,
55  CkCallback ready, int gemmSplit);
56  entry void receiveA(CLA_Matrix_msg *m);
57  entry void receiveB(CLA_Matrix_msg *m);
58 
59  /* for 3D algorithm */
60  entry void CLA_Matrix(CProxy_CLA_MM3D_multiplier p, int M, int K, int N,
61  int m, int k, int n, int strideM, int strideK, int strideN, int part,
62  CkCallback cb, CkGroupID gid, int gemmSplit);
63  entry void ready(CkReductionMsg *m);
64  entry void readyC(CkReductionMsg *m);
65  entry void mult_done(CkReductionMsg *m);
66  };
67 
68 }