SCM

SCM Repository

[matrix] Annotation of /pkg/src/cscBlocked.c
ViewVC logotype

Annotation of /pkg/src/cscBlocked.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 447 - (view) (download) (as text)

1 : bates 298 #include "cscBlocked.h"
2 : bates 415 /* TODO
3 :     * - code for trans = 'T' in cscb_syrk
4 :     * - code for non-trivial cscb_trmm and cscb_ldl
5 : bates 447 * - replace calls to Ind by calls to Tind (unless the error checking is different)
6 : bates 415 */
7 : bates 447
8 : bates 298 SEXP cscBlocked_validate(SEXP x)
9 :     {
10 :     SEXP pp = GET_SLOT(x, Matrix_pSym),
11 :     ip = GET_SLOT(x, Matrix_iSym),
12 :     xp = GET_SLOT(x, Matrix_xSym),
13 :     dp = getAttrib(xp, R_DimSymbol);
14 :     int *pv = INTEGER(pp),
15 :     *dim = INTEGER(dp),
16 :     ncol = length(pp) - 1;
17 :     int nnz = pv[ncol];
18 :    
19 : bates 304 if (!(isReal(xp) && isArray(xp)))
20 : bates 415 return mkString("slot x should be a real array");
21 : bates 298 if (length(dp) != 3)
22 : bates 415 return mkString("slot x should be a 3-dimensional array");
23 : bates 298 if (length(ip) != nnz)
24 : bates 415 return mkString("length of slot i does not matck last element of slot p");
25 : bates 298 if (dim[2] != nnz)
26 : bates 415 return
27 :     mkString("third dimension of slot x does not match number of nonzeros");
28 : bates 298 return ScalarLogical(1);
29 :     }
30 :    
31 : bates 434 static int*
32 :     expand_column_pointers(int ncol, const int mp[], int mj[])
33 :     {
34 :     int j;
35 :     for (j = 0; j < ncol; j++) {
36 :     int j2 = mp[j+1], jj;
37 :     for (jj = mp[j]; jj < j2; jj++) mj[jj] = j;
38 :     }
39 :     return mj;
40 :     }
41 :    
42 :     static R_INLINE
43 :     int Tind(const int rowind[], const int colptr[], int i, int j)
44 :     {
45 :     int k, k2 = colptr[j + 1];
46 :     for (k = colptr[j]; k < k2; k++)
47 :     if (rowind[k] == i) return k;
48 :     error("row %d and column %d not defined in rowind and colptr",
49 :     i, j);
50 :     return -1; /* -Wall */
51 :     }
52 :    
53 : bates 300 /**
54 :     * Perform one of the matrix operations
55 :     * C := alpha*op(A)*B + beta*C
56 :     * or
57 :     * C := alpha*B*op(A) + beta*C
58 :     * where A is a compressed, sparse, blocked matrix and
59 :     * B and C are dense matrices.
60 :     *
61 :     * @param side 'L' or 'l' for left, otherwise right
62 :     * @param transa 'T' or 't' for transpose.
63 :     * @param m number of rows in C
64 :     * @param n number of columns in C
65 : bates 415 * @param k number of rows in B if side = 'L', otherwise
66 :     * number of columns in B.
67 : bates 300 * @param alpha
68 :     * @param nr number of rows per block of A
69 :     * @param nc number of columns per block of A
70 :     * @param ap vector of column pointers in A
71 :     * @param ai vector of row indices in A
72 :     * @param ax contents of non-zero blocks of A
73 :     * @param b matrix to be multiplied
74 :     * @param ldb leading dimension of b as declared in the calling
75 :     * routine
76 : bates 304 * @param beta scalar multiplier of c
77 : bates 300 * @param c product matrix to be modified
78 : bates 304 * @param ldc leading dimension of c as declared in the calling
79 : bates 300 * routine
80 :     */
81 :     void
82 : bates 447 cscBlocked_mm(enum CBLAS_SIDE side, enum CBLAS_TRANSPOSE transa, int m, int n, int k,
83 : bates 300 double alpha, int nr, int nc,
84 :     const int ap[], const int ai[],
85 :     const double ax[],
86 :     const double b[], int ldb,
87 :     double beta, double c[], int ldc)
88 :     {
89 : bates 447 int j, kk;
90 :     int ncb, nrb, sz = nr * nc;
91 : bates 300
92 : bates 304 if (nr < 1 || nc < 1 || m < 0 || n < 0 || k < 0)
93 :     error("improper dims m=%d, n=%d, k=%d, nr=%d, nc=%d",
94 : bates 300 m, n, k, nr, nc);
95 : bates 304 if (ldc < n) error("incompatible dims n=%d, ldc=%d", n, ldc);
96 : bates 447 if (side == LFT) {
97 : bates 304 if (ldb < k)
98 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
99 :     k, ldb, n, nr, nc);
100 : bates 447 if (transa == TRN) {
101 : bates 304 if (m % nc || k % nr)
102 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
103 :     m, k, nr, nc);
104 :     nrb = k/nr; ncb = m/nc;
105 :     } else {
106 :     if (m % nr || k % nc)
107 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
108 :     m, k, nr, nc);
109 :     nrb = m/nr; ncb = k/nc;
110 :     }
111 : bates 300 for (j = 0; j < ncb; j++) {
112 : bates 304 int j2 = ap[j + 1];
113 :     for (kk = ap[j]; kk < j2; kk++) {
114 :     int ii = ai[kk];
115 :     if (ii < 0 || ii >= nrb)
116 :     error("improper row index ii=%d, nrb=%d", ii, nrb);
117 : bates 447 if (transa == TRN) {
118 : bates 304 F77_CALL(dgemm)("T", "N", &nc, &n, &nr,
119 : bates 415 &alpha, ax + kk * sz, &nr,
120 : bates 304 b + ii * nr, &ldb,
121 :     &beta, c + j * nc, &ldc);
122 :     } else {
123 :     F77_CALL(dgemm)("N", "N", &nr, &n, &nc,
124 : bates 415 &alpha, ax + kk * sz, &nr,
125 : bates 304 b + j*nc, &ldb,
126 :     &beta, c + ii * nr, &ldc);
127 :     }
128 : bates 300 }
129 :     }
130 :     } else {
131 : bates 304 error("Call to cscBlocked_mm must have side == 'L'");
132 : bates 300 }
133 :     }
134 : bates 304
135 :     /**
136 : bates 415 * Perform one of the matrix operations
137 :     * C := alpha*op(A)*B + beta*C
138 :     * or
139 :     * C := alpha*B*op(A) + beta*C
140 :     * where A is a compressed, sparse, blocked matrix and
141 :     * B and C are dense matrices.
142 :     *
143 :     * @param side 'L' or 'l' for left, otherwise right
144 :     * @param transa 'T' or 't' for transpose.
145 :     * @param m number of rows in C
146 :     * @param n number of columns in C
147 :     * @param k number of rows in B if side = 'L', otherwise
148 :     * number of columns in B.
149 :     * @param alpha
150 :     * @param A pointer to a cscBlocked object
151 :     * @param B matrix to be multiplied
152 :     * @param ldb leading dimension of b as declared in the calling
153 :     * routine
154 :     * @param beta scalar multiplier of c
155 :     * @param C product matrix to be modified
156 :     * @param ldc leading dimension of c as declared in the calling
157 :     * routine
158 :     */
159 :     void
160 : bates 447 cscb_mm(enum CBLAS_SIDE side, enum CBLAS_TRANSPOSE transa,
161 :     int m, int n, int k, double alpha, SEXP A,
162 :     const double B[], int ldb, double beta, double C[], int ldc)
163 : bates 415 {
164 : bates 447 int lside = (side == LFT);
165 : bates 415 SEXP AxP = GET_SLOT(A, Matrix_xSym),
166 :     ApP = GET_SLOT(A, Matrix_pSym);
167 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
168 :     *Ap = INTEGER(ApP),
169 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
170 :     ancb = length(ApP) - 1, /* number of column blocks */
171 :     anrb; /* number of row blocks */
172 :     int absz = adims[0] * adims[1]; /* block size */
173 :     int j;
174 :     double *Ax = REAL(AxP);
175 :    
176 : bates 422 if (ldc < m) error("incompatible dims m=%d, ldc=%d", m, ldc);
177 : bates 415 if (lside) {
178 :     /* B is of size k by n */
179 :     if (ldb < k)
180 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
181 :     k, ldb, n, adims[0], adims[1]);
182 : bates 447 if (transa == TRN) {
183 : bates 415 if (m % adims[1] || k % adims[0])
184 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
185 :     m, k, adims[0], adims[1]);
186 :     if (ancb != m/adims[1])
187 :     error("incompatible LT dims m=%d, ancb=%d, adims=[%d,%d,%d]",
188 : bates 447 m, ancb, adims[0], adims[1], adims[2]);
189 : bates 415 anrb = k/adims[0];
190 :     } else {
191 :     if (m % adims[0] || k % adims[1])
192 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
193 :     m, k, adims[0], adims[1]);
194 :     if (ancb != k/adims[1])
195 :     error("incompatible LN dims k=%d, ancb=%d, adims=[%d,%d,%d]",
196 : bates 447 k, ancb, adims[0], adims[1], adims[2]);
197 : bates 415 anrb = m/adims[0];
198 :     }
199 :     for (j = 0; j < ancb; j++) {
200 :     int kk, j2 = Ap[j + 1];
201 :     for (kk = Ap[j]; kk < j2; kk++) {
202 :     int ii = Ai[kk];
203 :     if (ii < 0 || ii >= anrb)
204 :     error("improper row index ii=%d, anrb=%d", ii, anrb);
205 : bates 447 if (transa == TRN) {
206 : bates 415 F77_CALL(dgemm)("T", "N", adims+1, &n, adims,
207 :     &alpha, Ax + kk * absz, adims,
208 :     B + ii * adims[0], &ldb,
209 :     &beta, C + j * adims[1], &ldc);
210 :     } else {
211 :     F77_CALL(dgemm)("N", "N", adims, &n, adims+1,
212 :     &alpha, Ax + kk * absz, adims,
213 : bates 447 B + j * adims[1], &ldb,
214 : bates 422 &beta, C + ii * adims[0], &ldc);
215 : bates 415 }
216 :     }
217 :     }
218 :     } else {
219 :     /* B is of size m by k */
220 :     error("Call to cscb_mm must have side == 'L'");
221 :     }
222 :     }
223 :    
224 :     /**
225 : bates 339 * Search for the element in a compressed sparse matrix at a given row and column
226 :     *
227 :     * @param row row index
228 :     * @param col column index
229 :     * @param cp column pointers
230 :     * @param ci row indices
231 :     *
232 :     * @return index of element in ci, if it exists, else -1
233 :     */
234 :     static R_INLINE
235 :     int Ind(int row, int col, const int cp[], const int ci[])
236 :     {
237 :     int i, i2 = cp[col + 1];
238 :     for (i = cp[col]; i < i2; i++)
239 :     if (ci[i] == row) return i;
240 :     return -1;
241 :     }
242 :    
243 :     /**
244 :     * Perform one of the matrix operations
245 :     * C := alpha*A*A' + beta*C,
246 :     * or
247 :     * C := alpha*A'*A + beta*C,
248 :     * where A is a compressed, sparse, blocked matrix and
249 :     * C is a compressed, sparse, symmetric blocked matrix.
250 :     *
251 :     * @param uplo 'U' or 'u' for upper triangular storage, else lower.
252 :     * @param trans 'T' or 't' for transpose.
253 :     * @param alpha scalar multiplier of outer product
254 :     * @param A compressed sparse blocked matrix
255 :     * @param beta scalar multiplier of c
256 :     * @param C compressed sparse blocked symmetric matrix to be updated
257 :     */
258 : bates 447 void
259 :     cscb_syrk(enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE trans,
260 :     double alpha, SEXP A,
261 :     double beta, SEXP C)
262 : bates 339 {
263 : bates 415 SEXP AxP = GET_SLOT(A, Matrix_xSym),
264 :     ApP = GET_SLOT(A, Matrix_pSym),
265 :     CxP = GET_SLOT(C, Matrix_xSym),
266 :     CpP = GET_SLOT(C, Matrix_pSym);
267 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
268 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
269 :     *Ap = INTEGER(ApP),
270 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
271 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
272 :     *Cp = INTEGER(CpP),
273 :     iup = (uplo == 'U' || uplo == 'u'),
274 :     itr = (trans == 'T' || trans == 't'),
275 : bates 422 j, k;
276 : bates 415 double *Ax = REAL(AxP), *Cx = REAL(CxP), one = 1.;
277 :     int scalar = (adims[0] == 1 && adims[1] == 1),
278 : bates 434 anc = length(ApP) - 1,
279 : bates 415 asz = adims[0] * adims[1],
280 :     csz = cdims[0] * cdims[1];
281 : bates 339
282 : bates 415
283 : bates 339 if (cdims[0] != cdims[1]) error("blocks in C must be square");
284 : bates 415 if (itr) {
285 : bates 339 error("Code for trans == 'T' not yet written");
286 : bates 415 /* FIXME: Write this part */
287 :     } else {
288 : bates 422 if (adims[0] != cdims[0])
289 :     error("Inconsistent dimensions: A[%d,%d,%d], C[%d,%d,%d]",
290 :     adims[0], adims[1], adims[2],
291 :     cdims[0], cdims[1], cdims[2]);
292 : bates 339 /* check the row indices */
293 : bates 415 for (k = 0; k < adims[2]; k++) {
294 :     int aik = Ai[k];
295 : bates 422 if (aik < 0 || aik >= cdims[2])
296 : bates 415 error("Row index %d = %d is out of range [0, %d]",
297 : bates 422 k, aik, cdims[2] - 1);
298 : bates 415 }
299 :     /* multiply C by beta */
300 :     if (beta != 1.)
301 :     for (j = 0; j < csz * cdims[2]; j++) Cx[j] *= beta;
302 :     /* individual products */
303 : bates 434 for (j = 0; j < anc; j++) {
304 : bates 415 int k, kk, k2 = Ap[j+1];
305 :     for (k = Ap[j]; k < k2; k++) {
306 :     int ii = Ai[k], K = Ind(ii, ii, Cp, Ci);
307 :    
308 : bates 438 if (K < 0) error("cscb_syrk: C[%d,%d] not defined", ii, ii);
309 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[k];
310 : bates 447 else F77_CALL(dsyrk)((uplo == UPP)?"U":"L", "N", cdims, adims + 1,
311 : bates 415 &alpha, Ax + k * asz, adims,
312 :     &one, Cx + K * csz, cdims);
313 :     for (kk = k+1; kk < k2; kk++) {
314 :     int jj = Ai[kk];
315 : bates 438 K = (iup) ? Ind(ii, jj, Cp, Ci) : Ind(jj, ii, Cp, Ci);
316 :    
317 :     if (K < 0) error("cscb_syrk: C[%d,%d] not defined", ii, jj);
318 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[kk];
319 :     else F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
320 :     &alpha, Ax + ((iup)?kk:k) * asz, adims,
321 :     Ax + ((iup)?k:kk) * asz, adims,
322 :     &one, Cx + K * asz, cdims);
323 : bates 339 }
324 :     }
325 :     }
326 :     }
327 : bates 415 }
328 :    
329 :     /**
330 :     * Create the LDL' decomposition of the positive definite symmetric
331 :     * cscBlocked matrix A (upper triangle stored) in L and D.
332 :     *
333 :     * @param A pointer to a cscBlocked object containing the upper
334 :     * triangle of a positive definite symmetric matrix.
335 :     * @param Parent the parent array for A
336 :     * @param L pointer to a cscBlocked object to hold L
337 :     * @param D pointer to a 3D array to hold D
338 :     *
339 :     * @return n the number of column blocks in A for success. A value
340 :     * less than n indicates the first column block whose diagonal was not
341 :     * positive definite.
342 :     */
343 :     int
344 :     cscb_ldl(SEXP A, const int Parent[], SEXP L, SEXP D)
345 :     {
346 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
347 :     AxP = GET_SLOT(A, Matrix_xSym);
348 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
349 : bates 434 diag, info, j, k, n = length(ApP) - 1;
350 : bates 415 int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
351 :     *Ap = INTEGER(ApP),
352 :     *Li = INTEGER(GET_SLOT(L, Matrix_iSym)),
353 : bates 434 *Lp = INTEGER(GET_SLOT(L, Matrix_pSym)), nci = adims[0];
354 : bates 415 double *Lx = REAL(GET_SLOT(L, Matrix_xSym)),
355 : bates 434 *Ax = REAL(AxP), *Dx = REAL(D), minus1 = -1., one = 1.;
356 : bates 415
357 : bates 434 if (adims[1] != nci || nci < 1)
358 :     error("cscb_ldl: dim(A) is [%d, %d, %d]", adims[0], adims[1], adims[2]);
359 : bates 415 for (j = 0, diag = 1; j < n; j++) { /* check for trivial structure */
360 :     if (Parent[j] >= 0) {diag = 0; break;}
361 :     }
362 :     if (diag) {
363 : bates 434 int ncisqr = nci * nci;
364 :     Memcpy(Dx, Ax, ncisqr * n);
365 :     for (j = 0; j < n; j++) { /* form D_i^{1/2} */
366 :     F77_CALL(dpotrf)("U", &nci, Dx + j * ncisqr, &nci, &k);
367 :     if (k) error("D[ , , %d], level %d, is not positive definite",
368 :     j + 1);
369 :     }
370 : bates 415 return n;
371 :     }
372 : bates 434 if (nci == 1) {
373 :     k = R_ldl_numeric(n, Ap, Ai, Ax, Lp, Parent, Li, Lx, Dx,
374 :     (int *) NULL, (int *) NULL);
375 :     if (k < n) error("cscb_ldl: error code %k from R_ldl_numeric", k);
376 :     for (j = 0; j < n; j++) Dx[j] = sqrt(Dx[j]);
377 :     return n;
378 :     } else { /* Copy of ldl_numeric from the LDL package
379 :     * modified for blocked sparse matrices */
380 :     int i, k, p, p2, len, nci = adims[0], ncisqr = adims[0]*adims[0], top;
381 :     int *Lnz = Calloc(n, int),
382 :     *Pattern = Calloc(n, int),
383 :     *Flag = Calloc(n, int);
384 :     double *Y = Calloc(n * ncisqr, double), *Yi = Calloc(ncisqr, double);
385 : bates 415
386 : bates 434 for (k = 0; k < n; k++) {
387 :     /* compute nonzero Pattern of kth row of L, in topological order */
388 :     AZERO(Y + k*ncisqr, ncisqr); /* Y[,,0:k] is now all zero */
389 :     top = n; /* stack for pattern is empty */
390 :     Flag[k] = k; /* mark node k as visited */
391 :     Lnz[k] = 0; /* count of nonzeros in column k of L */
392 :     p2 = Ap[k+1];
393 :     for (p = Ap[k]; p < p2; p++) {
394 :     i = Ai[p]; /* get A[i,k] */
395 :     if (i <= k) { /* [i,k] in upper triangle? Should always be true */
396 :     /* copy A(i,k) into Y */
397 :     Memcpy(Y + i * ncisqr, Ax + p * ncisqr, ncisqr);
398 :     /* follow path from i to root of etree, stop at flagged node */
399 :     for (len = 0; Flag[i] != k; i = Parent[i]) {
400 :     Pattern[len++] = i; /* L[k,i] is nonzero */
401 :     Flag[i] = k; /* mark i as visited */
402 :     }
403 :     while (len > 0) { /* push path on top of stack */
404 :     Pattern[--top] = Pattern[--len];
405 :     }
406 :     }
407 :     }
408 :     /* Pattern [top ... n-1] now contains nonzero pattern of L[,k] */
409 :     /* compute numerical values in kth row of L (a sparse triangular solve) */
410 :     Memcpy(Dx + k * ncisqr, Y + k * ncisqr, ncisqr); /* get D[,,k] */
411 :     AZERO(Y + k*ncisqr, ncisqr); /* clear Y[,,k] */
412 :     for (; top < n; top++) {
413 :     i = Pattern[top];
414 :     Memcpy(Yi, Y + i*ncisqr, ncisqr); /* copy Y[,,i] */
415 :     AZERO(Y + i*ncisqr, ncisqr); /* clear Y[,,i] */
416 :     p2 = Lp[i] + Lnz[i];
417 :     for (p = Lp[i]; p < p2; p++) {
418 :     F77_CALL(dgemm)("N", "N", &nci, &nci, &nci, &minus1,
419 :     Lx + p*ncisqr, &nci, Yi, &nci,
420 :     &one, Y + Li[p]*ncisqr, &nci);
421 :     }
422 :     /* FIXME: Check that this is the correct order and transposition */
423 :     F77_CALL(dtrsm)("L", "U", "N", "N", &nci, &nci,
424 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
425 :     F77_CALL(dsyrk)("U", "N", &nci, &nci, &minus1, Yi, &nci,
426 :     &one, Dx + k*ncisqr, &nci);
427 :     F77_CALL(dtrsm)("L", "U", "T", "N", &nci, &nci,
428 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
429 :     Li[p] = k; /* store L[k,i] in column form of L */
430 :     Memcpy(Lx + p * ncisqr, Yi, ncisqr);
431 :     Lnz[i]++; /* increment count of nonzeros in col i */
432 :     }
433 :     F77_CALL(dpotrf)("U", &nci, Dx + k*ncisqr, &nci, &info);
434 :     if (info) {
435 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
436 :     return k; /* failure, D[,,k] is not positive definite */
437 :     }
438 :     }
439 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
440 :     return n; /* success, diagonal of D is all nonzero */
441 : bates 415 }
442 :     return -1; /* keep -Wall happy */
443 :     }
444 :    
445 :     /**
446 :     * Perform one of the cscBlocked-matrix operations B := alpha*op(A)*B
447 :     * or B := alpha*B*op(A)
448 :     *
449 :     * @param side 'L' or 'R' for left or right
450 :     * @param uplo 'U' or 'L' for upper or lower
451 :     * @param transa 'T' or 'N' for transpose or no transpose
452 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
453 :     * @param A pointer to a triangular cscBlocked object
454 :     * @param B pointer to the contents of the matrix B
455 :     * @param m number of rows in B
456 :     * @param n number of columns in B
457 :     * @param ldb leading dimension of B as declared in the calling function
458 :     */
459 : bates 447 void
460 :     cscb_trmm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
461 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
462 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
463 : bates 415 {
464 : bates 447 int ileft = (side == LFT),
465 :     iup = (uplo == UPP),
466 :     itr = (transa == TRN),
467 :     iunit = (diag == UNT);
468 : bates 415 SEXP ApP = GET_SLOT(A, Matrix_pSym),
469 :     AxP = GET_SLOT(A, Matrix_xSym);
470 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
471 :     *Ap = INTEGER(ApP),
472 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
473 :     i, j, nb = length(ApP) - 1;
474 :    
475 :     if (xdims[0] != xdims[1])
476 :     error("Argument A to cscb_trmm is not triangular");
477 :     if (alpha != 1.0) {
478 :     for (j = 0; j < n; j++) { /* scale by alpha */
479 :     for (i = 0; i < m; i++)
480 :     B[i + j * ldb] *= alpha;
481 :     }
482 :     }
483 :     if (iunit && xdims[2] < 1) return; /* A is the identity */
484 :     error("Code for non-identity cases of cscb_trmm not yet written");
485 :     iup += 0; /* keep -Wall happy */
486 :     ileft += 0;
487 :     itr += 0;
488 :     }
489 :    
490 :     /**
491 : bates 434 * Solve a triangular system of the form op(A)*X = alpha*B where A
492 :     * is a cscBlocked triangular matrix and B is a dense matrix.
493 :     *
494 :     * @param uplo 'U' or 'L' for upper or lower
495 :     * @param trans 'T' or 'N' for transpose or no transpose
496 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
497 :     * @param A pointer to a triangular cscBlocked object
498 :     * @param B pointer to the contents of the matrix B
499 :     * @param m number of rows in B
500 :     * @param n number of columns in B
501 :     * @param ldb leading dimension of B as declared in the calling function
502 :     */
503 : bates 447 void
504 :     cscb_trsm(enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
505 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
506 : bates 434 {
507 : bates 447 int iup = (uplo == UPP),
508 :     itr = (transa == TRN),
509 :     iunit = (diag == UNT);
510 : bates 434 SEXP ApP = GET_SLOT(A, Matrix_pSym),
511 :     AxP = GET_SLOT(A, Matrix_xSym);
512 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
513 :     *Ap = INTEGER(ApP),
514 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
515 :     i, j, nb = length(ApP) - 1;
516 :     double *Ax = REAL(GET_SLOT(A, Matrix_xSym));
517 :    
518 :     if (xdims[0] != xdims[1])
519 :     error("Argument A to cscb_trsm is not triangular");
520 :     if (ldb < m || ldb <= 0 || n <= 0)
521 :     error("cscb_trsm: inconsistent dims m = %d, n = %d, ldb = %d",
522 :     m, n, ldb);
523 :     if (m != (nb * xdims[0]))
524 :     error("cscb_trsm: inconsistent dims m = %d, A[%d,%d,]x%d",
525 :     m, xdims[0], xdims[1], xdims[2]);
526 :     if (alpha != 1.0) {
527 :     for (j = 0; j < n; j++) { /* scale by alpha */
528 :     for (i = 0; i < m; i++)
529 :     B[i + j * ldb] *= alpha;
530 :     }
531 :     }
532 :     if (iunit) {
533 :     if (xdims[2] < 1) return; /* A is the identity */
534 :     if (xdims[0] == 1) {
535 :     if (iup) error("Code for upper triangle not yet written");
536 :     if (itr) {
537 :     for (j = 0; j < n; j++)
538 :     R_ldl_ltsolve(m, B + j * ldb, Ap, Ai, Ax);
539 :     } else {
540 :     for (j = 0; j < n; j++)
541 :     R_ldl_lsolve(m, B + j * ldb, Ap, Ai, Ax);
542 :     }
543 :     return;
544 :     }
545 :     error("Code for non-scalar cscBlocked objects not yet written");
546 :     }
547 :     error("Code for non-unit cases of cscb_trsm not yet written");
548 :     }
549 :    
550 :     /**
551 : bates 415 * Perform one of the operations B := alpha*op(A)*B or
552 :     * B := alpha*B*op(A) where A and B are both cscBlocked.
553 :     *
554 :     * @param side 'L' or 'R' for left or right
555 :     * @param uplo 'U' or 'L' for upper or lower
556 :     * @param transa 'T' or 'N' for transpose or no transpose
557 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
558 :     * @param alpha scalar multiplier
559 :     * @param A pointer to a triangular cscBlocked object
560 :     * @param B pointer to a general cscBlocked matrix
561 :     */
562 : bates 447 void
563 :     cscb_trcbm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
564 :     double alpha, SEXP A, SEXP B)
565 : bates 415 {
566 : bates 447 int ileft = (side == LFT),
567 :     iup = (uplo == UPP),
568 :     itr = (transa == TRN),
569 :     iunit = (diag == UNT);
570 : bates 415 SEXP ApP = GET_SLOT(A, Matrix_pSym),
571 :     AxP = GET_SLOT(A, Matrix_xSym),
572 :     BpP = GET_SLOT(B, Matrix_pSym),
573 :     BxP = GET_SLOT(B, Matrix_xSym);
574 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
575 :     *Ap = INTEGER(ApP),
576 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
577 :     *Bp = INTEGER(BpP),
578 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
579 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
580 :     ncbA = length(ApP) - 1,
581 :     ncbB = length(BpP) - 1;
582 :     int i, nbx = bxdims[0] * bxdims[1] * bxdims[2];
583 :    
584 :     if (axdims[0] != axdims[1])
585 :     error("Argument A to cscb_trcbm is not triangular");
586 :     if (alpha != 1.0) {
587 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
588 :     REAL(BxP)[i] *= alpha;
589 :     }
590 :     }
591 :     if (iunit && axdims[2] < 1) return; /* A is the identity */
592 :     error("Code for non-trivial cscb_trcbm not yet written");
593 :     }
594 :    
595 :     /**
596 : bates 434 * Expand a column of a compressed, sparse, column-oriented matrix.
597 :     *
598 :     * @param dest array to hold the result
599 :     * @param m number of rows in the matrix
600 :     * @param j index (0-based) of column to expand
601 :     * @param Ap array of column pointers
602 :     * @param Ai array of row indices
603 :     * @param Ax array of non-zero values
604 :     *
605 :     * @return dest
606 :     */
607 :     static
608 :     double *expand_column(double *dest, int m, int j,
609 :     const int Ap[], const int Ai[], const double Ax[])
610 :     {
611 :     int k, k2 = Ap[j + 1];
612 :    
613 :     for (k = 0; k < m; k++) dest[k] = 0.;
614 :     for (k = Ap[j]; k < k2; k++) dest[Ai[k]] = Ax[k];
615 :     return dest;
616 :     }
617 :    
618 :     /**
619 :     * Solve one of the systems op(A)*X = alpha*B or
620 :     * X*op(A) = alpha*B where A cscBlocked triangular and B is cscBlocked.
621 :     *
622 :     * @param side 'L' or 'R' for left or right
623 :     * @param uplo 'U' or 'L' for upper or lower
624 :     * @param transa 'T' or 'N' for transpose or no transpose
625 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
626 :     * @param alpha scalar multiplier
627 :     * @param A pointer to a triangular cscBlocked object
628 :     * @param B pointer to a general cscBlocked matrix
629 :     */
630 : bates 447 void
631 :     cscb_trcbsm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
632 :     double alpha, SEXP A, const int Parent[], SEXP B)
633 : bates 434 {
634 : bates 447 int ileft = (side == LFT),
635 :     iup = (uplo == UPP),
636 :     itr = (transa == TRN),
637 :     iunit = (diag == UNT);
638 : bates 434 SEXP ApP = GET_SLOT(A, Matrix_pSym),
639 :     AxP = GET_SLOT(A, Matrix_xSym),
640 :     BpP = GET_SLOT(B, Matrix_pSym),
641 :     BxP = GET_SLOT(B, Matrix_xSym);
642 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
643 :     *Ap = INTEGER(ApP),
644 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
645 :     *Bp = INTEGER(BpP),
646 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
647 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
648 :     ncbA = length(ApP) - 1,
649 :     ncbB = length(BpP) - 1;
650 :     int i, j, nbx = bxdims[0] * bxdims[1] * bxdims[2];
651 :     double *Ax = REAL(AxP), *Bx = REAL(BxP);
652 :    
653 :     if (axdims[0] != axdims[1])
654 :     error("Argument A to cscb_trcbm is not triangular");
655 :     if (alpha != 1.0) {
656 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
657 :     REAL(BxP)[i] *= alpha;
658 :     }
659 :     }
660 :     if (iunit && axdims[2] < 1) return; /* A is the identity */
661 :     if (iunit && axdims[0] == 1) { /* can use R_ldl code */
662 :     if (!ileft && itr) { /* case required for lmer */
663 :     int *BTp, nnz = bxdims[2], nrbB;
664 :     int *tmp = expand_column_pointers(ncbB, Bp, Calloc(nnz, int));
665 :     int *BTi = Calloc(nnz, int);
666 :     double *BTx = Calloc(nnz, double), *rhs;
667 :    
668 :     /* transpose B */
669 :     for (i = 0, nrbB = -1; i < nnz; i++) if (Bi[i] > nrbB) nrbB = Bi[i];
670 :     BTp = Calloc(nrbB, int);
671 :     triplet_to_col(ncbB, nrbB, nnz, tmp, Bi, Bx, BTp, BTi, BTx);
672 :     /* sanity check */
673 :     if (BTp[nrbB] != nnz) error("cscb_trcbsm: transpose operation failed");
674 :     Free(tmp);
675 :     /* Solve one column at a time */
676 :     rhs = Calloc(ncbB, double);
677 :     AZERO(Bx, nnz); /* zero the result */
678 :     for (i = 0; i < nrbB; i++) {
679 :     R_ldl_lsolve(ncbB, expand_column(rhs, ncbB, i, BTp, BTi, BTx),
680 :     Ap, Ai, Ax);
681 :     for (j = 0; j < ncbB; j++) { /* write non-zeros in sol'n into B */
682 :     if (BTx[j]) Bx[Tind(Bi, Bp, j, i)] = BTx[j];
683 :     }
684 :     Free(rhs); Free(BTp); Free(BTx); Free(BTi);
685 :     }
686 :     }
687 :     error("cscb_trcbsm: method not yet written");
688 :     }
689 :     error("cscb_trcbsm: method not yet written");
690 :     }
691 :    
692 :     /**
693 : bates 415 * Perform one of the matrix-matrix operations
694 :     * C := alpha*op(A)*op(B) + beta*C
695 :     * on compressed, sparse, blocked matrices.
696 :     *
697 :     * @param transa 'T' for transpose of A, else 'N'
698 :     * @param transb 'T' for transpose of B, else 'N'
699 :     * @param alpha scalar multiplier
700 :     * @param A pointer to a cscBlocked object
701 :     * @param B pointer to a cscBlocked object
702 :     * @param beta scalar multiplier
703 :     * @param C pointer to a cscBlocked object
704 :     */
705 : bates 447 void
706 :     cscb_cscbm(enum CBLAS_TRANSPOSE transa, enum CBLAS_TRANSPOSE transb,
707 :     double alpha, SEXP A, SEXP B, double beta, SEXP C)
708 : bates 415 {
709 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
710 :     AxP = GET_SLOT(A, Matrix_xSym),
711 :     BpP = GET_SLOT(B, Matrix_pSym),
712 :     BxP = GET_SLOT(B, Matrix_xSym),
713 :     CxP = GET_SLOT(C, Matrix_xSym);
714 :     int *Ap = INTEGER(ApP),
715 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
716 :     *Bp = INTEGER(BpP),
717 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
718 :     *Cp = INTEGER(GET_SLOT(C, Matrix_pSym)),
719 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
720 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
721 :     *bdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
722 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
723 :     nca = length(ApP) - 1,
724 :     ncb = length(BpP) - 1;
725 :     int ablk = adims[0] * adims[1],
726 :     bblk = bdims[0] * bdims[1],
727 :     cblk = cdims[0] * cdims[1];
728 :     double *Ax = REAL(AxP),
729 :     *Bx = REAL(BxP),
730 :     *Cx = REAL(CxP),
731 :     one = 1.0;
732 :    
733 : bates 447 if ((transa == NTR) && transb == TRN) { /* transposed crossproduct */
734 : bates 415 int jj;
735 :    
736 :     if (adims[1] != bdims[1] ||
737 :     adims[0] != cdims[0] ||
738 :     bdims[0] != cdims[1])
739 :     error("C[%d,%d,%d] := A[%d,%d,%d] %*% t(B[%d,%d,%d])",
740 :     cdims[0], cdims[1], cdims[2],
741 :     adims[0], adims[1], adims[2],
742 :     bdims[0], bdims[1], bdims[2]);
743 :     if (nca != ncb)
744 :     error("C := A(ncblocks = %d) %*% t(B(ncblocks = %d)", nca, ncb);
745 :     if (beta != 1.) { /* scale C by beta */
746 :     int ctot = cdims[0] * cdims[1] * cdims[2];
747 :     for (jj = 0; jj < ctot; jj++) Cx[jj] *= beta;
748 :     }
749 :     for (jj = 0; jj < nca; jj++) {
750 :     int ia, ib, a2 = Ap[jj + 1], b2 = Bp[jj + 1];
751 :     for (ia = Ap[jj]; ia < a2; ia++) {
752 :     for (ib = Bp[jj]; ib < b2; ib++) {
753 :     int cind = Ind(Ai[ia], Bi[ib], Cp, Ci);
754 :     if (cind < 0)
755 :     error("Invalid index [%d,%d]", Ai[ia], Bi[ib]);
756 :     F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
757 :     &alpha, Ax + ia * ablk, adims,
758 :     Bx + ib * bblk, bdims, &one,
759 :     Cx + cind * cblk, cdims);
760 : bates 339 }
761 :     }
762 :     }
763 : bates 415 return;
764 : bates 339 }
765 : bates 415 error("Code not yet written");
766 : bates 339 }

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge