SCM

SCM Repository

[matrix] Annotation of /pkg/src/cscBlocked.c
ViewVC logotype

Annotation of /pkg/src/cscBlocked.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 339 - (view) (download) (as text)

1 : bates 298 #include "cscBlocked.h"
2 :    
3 :     SEXP cscBlocked_validate(SEXP x)
4 :     {
5 :     SEXP pp = GET_SLOT(x, Matrix_pSym),
6 :     ip = GET_SLOT(x, Matrix_iSym),
7 :     xp = GET_SLOT(x, Matrix_xSym),
8 :     dp = getAttrib(xp, R_DimSymbol);
9 :     int *pv = INTEGER(pp),
10 :     *dim = INTEGER(dp),
11 :     ncol = length(pp) - 1;
12 :     int nnz = pv[ncol];
13 :    
14 : bates 304 if (!(isReal(xp) && isArray(xp)))
15 : bates 298 return ScalarString(mkChar("slot x should be a real array"));
16 :     if (length(dp) != 3)
17 :     return ScalarString(mkChar("slot x should be a 3-dimensional array"));
18 :     if (length(ip) != nnz)
19 :     return ScalarString(mkChar("length of slot i does not matck last element of slot p"));
20 :     if (dim[2] != nnz)
21 :     return ScalarString(mkChar("third dimension of slot x does not match number of nonzeros"));
22 :     return ScalarLogical(1);
23 :     }
24 :    
25 : bates 300 /**
26 :     * Perform one of the matrix operations
27 :     * C := alpha*op(A)*B + beta*C
28 :     * or
29 :     * C := alpha*B*op(A) + beta*C
30 :     * where A is a compressed, sparse, blocked matrix and
31 :     * B and C are dense matrices.
32 :     *
33 :     * @param side 'L' or 'l' for left, otherwise right
34 :     * @param transa 'T' or 't' for transpose.
35 :     * @param m number of rows in C
36 :     * @param n number of columns in C
37 :     * @param k number of columns in B if side = 'L', otherwise
38 :     * number of rows in B.
39 :     * @param alpha
40 :     * @param nr number of rows per block of A
41 :     * @param nc number of columns per block of A
42 :     * @param ap vector of column pointers in A
43 :     * @param ai vector of row indices in A
44 :     * @param ax contents of non-zero blocks of A
45 :     * @param b matrix to be multiplied
46 :     * @param ldb leading dimension of b as declared in the calling
47 :     * routine
48 : bates 304 * @param beta scalar multiplier of c
49 : bates 300 * @param c product matrix to be modified
50 : bates 304 * @param ldc leading dimension of c as declared in the calling
51 : bates 300 * routine
52 :     */
53 :     void
54 :     cscBlocked_mm(char side, char transa, int m, int n, int k,
55 :     double alpha, int nr, int nc,
56 :     const int ap[], const int ai[],
57 :     const double ax[],
58 :     const double b[], int ldb,
59 :     double beta, double c[], int ldc)
60 :     {
61 : bates 304 int j, kk, lside = (side == 'L' || side == 'l');
62 :     int ncb, nrb, sz = nr * nc, tra = (transa == 'T' || transa == 't');
63 : bates 300
64 : bates 304 if (nr < 1 || nc < 1 || m < 0 || n < 0 || k < 0)
65 :     error("improper dims m=%d, n=%d, k=%d, nr=%d, nc=%d",
66 : bates 300 m, n, k, nr, nc);
67 : bates 304 if (ldc < n) error("incompatible dims n=%d, ldc=%d", n, ldc);
68 :     if (lside) {
69 :     if (ldb < k)
70 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
71 :     k, ldb, n, nr, nc);
72 :     if (tra) {
73 :     if (m % nc || k % nr)
74 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
75 :     m, k, nr, nc);
76 :     nrb = k/nr; ncb = m/nc;
77 :     } else {
78 :     if (m % nr || k % nc)
79 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
80 :     m, k, nr, nc);
81 :     nrb = m/nr; ncb = k/nc;
82 :     }
83 : bates 300 for (j = 0; j < ncb; j++) {
84 : bates 304 int j2 = ap[j + 1];
85 :     for (kk = ap[j]; kk < j2; kk++) {
86 :     int ii = ai[kk];
87 :     if (ii < 0 || ii >= nrb)
88 :     error("improper row index ii=%d, nrb=%d", ii, nrb);
89 :     if (tra) {
90 :     F77_CALL(dgemm)("T", "N", &nc, &n, &nr,
91 :     &alpha, ax + j*sz, &nr,
92 :     b + ii * nr, &ldb,
93 :     &beta, c + j * nc, &ldc);
94 :     } else {
95 :     F77_CALL(dgemm)("N", "N", &nr, &n, &nc,
96 :     &alpha, ax + j * sz, &nr,
97 :     b + j*nc, &ldb,
98 :     &beta, c + ii * nr, &ldc);
99 :     }
100 : bates 300 }
101 :     }
102 :     } else {
103 : bates 304 error("Call to cscBlocked_mm must have side == 'L'");
104 : bates 300 }
105 :     }
106 : bates 304
107 :     /**
108 :     * Invert a triangular sparse blocked matrix. This is not done in
109 :     * place because the number of non-zero blocks in A-inverse can be
110 :     * different than the number of non-zero blocks in A.
111 :     *
112 :     * @param upper 'U' indicates upper triangular, 'L' lower
113 :     * @param unit 'U' indicates unit diagonal, 'N' non-unit
114 :     * @param n number of columns of blocks in A and A-inverse
115 :     * @param nr number of rows per block in A and A-inverse
116 :     * @param nc number of columns per block in A and A-inverse
117 :     * @param ap vector of column pointers for A
118 :     * @param ai vector of row indices for A
119 :     * @param ax contents of the non-zero blocks of A
120 :     * @param aip vector of column pointers for A-inverse
121 :     * @param aii vector of row indices for A-inverse
122 :     * @param aix contents of the non-zero blocks of A-inverse
123 :     */
124 :     void
125 :     cscBlocked_tri(char upper, char unit, int n, int nr, int nc,
126 :     const int ap[], const int ai[], const double ax[],
127 :     int aip[], int aii[], double aix[])
128 :     {
129 :     int iup = (upper == 'U' || upper == 'u') ? 1 : 0,
130 :     iunit = (unit == 'U' || unit == 'u') ? 1 : 0;
131 :    
132 :     if (!iunit)
133 :     error("Code for non-unit triangular matrices not yet written");
134 :     if (ap[n] > 0)
135 :     error("Code for non-trivial unit inverse not yet written");
136 :     else {
137 :     if (aip[n] == 0) return;
138 :     error ("Structure of A and A-inverse does not agree");
139 :     }
140 :     }
141 : bates 339
142 :     /**
143 :     * Search for the element in a compressed sparse matrix at a given row and column
144 :     *
145 :     * @param row row index
146 :     * @param col column index
147 :     * @param cp column pointers
148 :     * @param ci row indices
149 :     *
150 :     * @return index of element in ci, if it exists, else -1
151 :     */
152 :     static R_INLINE
153 :     int Ind(int row, int col, const int cp[], const int ci[])
154 :     {
155 :     int i, i2 = cp[col + 1];
156 :     for (i = cp[col]; i < i2; i++)
157 :     if (ci[i] == row) return i;
158 :     return -1;
159 :     }
160 :    
161 :     /**
162 :     * Perform one of the matrix operations
163 :     * C := alpha*A*A' + beta*C,
164 :     * or
165 :     * C := alpha*A'*A + beta*C,
166 :     * where A is a compressed, sparse, blocked matrix and
167 :     * C is a compressed, sparse, symmetric blocked matrix.
168 :     *
169 :     * @param uplo 'U' or 'u' for upper triangular storage, else lower.
170 :     * @param trans 'T' or 't' for transpose.
171 :     * @param alpha scalar multiplier of outer product
172 :     * @param A compressed sparse blocked matrix
173 :     * @param beta scalar multiplier of c
174 :     * @param C compressed sparse blocked symmetric matrix to be updated
175 :     */
176 :     SEXP cscBlocked_syrk(SEXP uplo, SEXP trans, SEXP alpha, SEXP A, SEXP beta, SEXP C)
177 :     {
178 :     SEXP axp = GET_SLOT(A, Matrix_xSym),
179 :     app = GET_SLOT(A, Matrix_pSym),
180 :     cxp = GET_SLOT(C, Matrix_xSym),
181 :     cpp = GET_SLOT(C, Matrix_pSym);
182 :     int *adims = INTEGER(getAttrib(axp, R_DimSymbol)),
183 :     *ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
184 :     *ap = INTEGER(app),
185 :     *cdims = INTEGER(getAttrib(cxp, R_DimSymbol)),
186 :     *ci = INTEGER(GET_SLOT(A, Matrix_iSym)),
187 :     *cp = INTEGER(cpp),
188 :     j, k,
189 :     nca = length(app) - 1,
190 :     ncc = length(cpp) - 1,
191 :     npairs,
192 :     inplace;
193 :     char *ul, *tr;
194 :     double *ax = REAL(axp),
195 :     *cx = REAL(cxp),
196 :     bta = asReal(beta);
197 :    
198 :     if (length(uplo) != 1 || length(trans) != 1 || length(alpha) != 1 ||
199 :     length(beta) != 1 || !isReal(alpha) || !isReal(beta) ||
200 :     !isString(uplo) || !isString(trans))
201 :     error("uplo and trans must be character scalars, alpha and beta real scalars");
202 :     if (cdims[0] != cdims[1]) error("blocks in C must be square");
203 :     ul = CHAR(STRING_ELT(uplo, 0));
204 :     tr = CHAR(STRING_ELT(trans, 0));
205 :     if (toupper(tr[0]) == 'T')
206 :     error("Code for trans == 'T' not yet written");
207 :     /* FIXME: Write the other version */
208 :     if (adims[0] != cdims[0])
209 :     error("Dimension inconsistency in blocks: dim(A)=[%d,,], dim(C)=[%d,,]",
210 :     adims[0], cdims[0]);
211 :     /* check the row indices */
212 :     for (k = 0; k < adims[2]; k++) {
213 :     int aik = ai[k];
214 :     if (aik < 0 || aik >= ncc)
215 :     error("Row index %d = %d is out of range [0, %d]",
216 :     k, ai[k], ncc - 1);
217 :     }
218 :     /* check if C can be updated in place */
219 :     inplace = 1;
220 :     npairs = 0;
221 :     for (j = 0; j < nca; j++) {
222 :     int k, kk, k2 = ap[j+1];
223 :     int nnz = k2 - ap[j];
224 :     npairs += (nnz * (nnz - 1)) / 2;
225 :     for (k = ap[j]; k < k2; k++) {
226 :     /* FIXME: This check assumes uplo == 'L' */
227 :     for (kk = k; kk < k2; kk++) {
228 :     if (Ind(ai[k], ai[kk], cp, ci) < 0) {
229 :     inplace = 0;
230 :     break;
231 :     }
232 :     }
233 :     if (!inplace) break;
234 :     }
235 :     }
236 :     /* multiply C by beta */
237 :     for (j = 0; j < cdims[0]*cdims[1]*cdims[2]; j++) cx[j] *= bta;
238 :     if (inplace) {
239 :     int scalar = (adims[0] == 1 && adims[1] == 1);
240 :    
241 :     for (j = 0; j < nca; j++) {
242 :     int k, kk, k2 = ap[j+1];
243 :     for (k = ap[j]; k < k2; k++) {
244 :     int ii = ai[k];
245 :     for (kk = k; kk < k2; kk++) {
246 :     int jj = ai[kk], K = Ind(ii, jj, cp, ci);
247 :     if (scalar) cx[K] += ax[k] * ax[kk];
248 :     else {
249 :     }
250 :     }
251 :     }
252 :     }
253 :     }
254 :     }
255 :    

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge