SCM

SCM Repository

[matrix] Annotation of /pkg/src/cscBlocked.c
ViewVC logotype

Annotation of /pkg/src/cscBlocked.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 438 - (view) (download) (as text)

1 : bates 298 #include "cscBlocked.h"
2 : bates 415 /* TODO
3 :     * - code for trans = 'T' in cscb_syrk
4 :     * - code for non-trivial cscb_trmm and cscb_ldl
5 :     */
6 : bates 298 SEXP cscBlocked_validate(SEXP x)
7 :     {
8 :     SEXP pp = GET_SLOT(x, Matrix_pSym),
9 :     ip = GET_SLOT(x, Matrix_iSym),
10 :     xp = GET_SLOT(x, Matrix_xSym),
11 :     dp = getAttrib(xp, R_DimSymbol);
12 :     int *pv = INTEGER(pp),
13 :     *dim = INTEGER(dp),
14 :     ncol = length(pp) - 1;
15 :     int nnz = pv[ncol];
16 :    
17 : bates 304 if (!(isReal(xp) && isArray(xp)))
18 : bates 415 return mkString("slot x should be a real array");
19 : bates 298 if (length(dp) != 3)
20 : bates 415 return mkString("slot x should be a 3-dimensional array");
21 : bates 298 if (length(ip) != nnz)
22 : bates 415 return mkString("length of slot i does not matck last element of slot p");
23 : bates 298 if (dim[2] != nnz)
24 : bates 415 return
25 :     mkString("third dimension of slot x does not match number of nonzeros");
26 : bates 298 return ScalarLogical(1);
27 :     }
28 :    
29 : bates 434 static int*
30 :     expand_column_pointers(int ncol, const int mp[], int mj[])
31 :     {
32 :     int j;
33 :     for (j = 0; j < ncol; j++) {
34 :     int j2 = mp[j+1], jj;
35 :     for (jj = mp[j]; jj < j2; jj++) mj[jj] = j;
36 :     }
37 :     return mj;
38 :     }
39 :    
40 :     static R_INLINE
41 :     int Tind(const int rowind[], const int colptr[], int i, int j)
42 :     {
43 :     int k, k2 = colptr[j + 1];
44 :     for (k = colptr[j]; k < k2; k++)
45 :     if (rowind[k] == i) return k;
46 :     error("row %d and column %d not defined in rowind and colptr",
47 :     i, j);
48 :     return -1; /* -Wall */
49 :     }
50 :    
51 : bates 300 /**
52 :     * Perform one of the matrix operations
53 :     * C := alpha*op(A)*B + beta*C
54 :     * or
55 :     * C := alpha*B*op(A) + beta*C
56 :     * where A is a compressed, sparse, blocked matrix and
57 :     * B and C are dense matrices.
58 :     *
59 :     * @param side 'L' or 'l' for left, otherwise right
60 :     * @param transa 'T' or 't' for transpose.
61 :     * @param m number of rows in C
62 :     * @param n number of columns in C
63 : bates 415 * @param k number of rows in B if side = 'L', otherwise
64 :     * number of columns in B.
65 : bates 300 * @param alpha
66 :     * @param nr number of rows per block of A
67 :     * @param nc number of columns per block of A
68 :     * @param ap vector of column pointers in A
69 :     * @param ai vector of row indices in A
70 :     * @param ax contents of non-zero blocks of A
71 :     * @param b matrix to be multiplied
72 :     * @param ldb leading dimension of b as declared in the calling
73 :     * routine
74 : bates 304 * @param beta scalar multiplier of c
75 : bates 300 * @param c product matrix to be modified
76 : bates 304 * @param ldc leading dimension of c as declared in the calling
77 : bates 300 * routine
78 :     */
79 :     void
80 :     cscBlocked_mm(char side, char transa, int m, int n, int k,
81 :     double alpha, int nr, int nc,
82 :     const int ap[], const int ai[],
83 :     const double ax[],
84 :     const double b[], int ldb,
85 :     double beta, double c[], int ldc)
86 :     {
87 : bates 304 int j, kk, lside = (side == 'L' || side == 'l');
88 :     int ncb, nrb, sz = nr * nc, tra = (transa == 'T' || transa == 't');
89 : bates 300
90 : bates 304 if (nr < 1 || nc < 1 || m < 0 || n < 0 || k < 0)
91 :     error("improper dims m=%d, n=%d, k=%d, nr=%d, nc=%d",
92 : bates 300 m, n, k, nr, nc);
93 : bates 304 if (ldc < n) error("incompatible dims n=%d, ldc=%d", n, ldc);
94 :     if (lside) {
95 :     if (ldb < k)
96 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
97 :     k, ldb, n, nr, nc);
98 :     if (tra) {
99 :     if (m % nc || k % nr)
100 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
101 :     m, k, nr, nc);
102 :     nrb = k/nr; ncb = m/nc;
103 :     } else {
104 :     if (m % nr || k % nc)
105 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
106 :     m, k, nr, nc);
107 :     nrb = m/nr; ncb = k/nc;
108 :     }
109 : bates 300 for (j = 0; j < ncb; j++) {
110 : bates 304 int j2 = ap[j + 1];
111 :     for (kk = ap[j]; kk < j2; kk++) {
112 :     int ii = ai[kk];
113 :     if (ii < 0 || ii >= nrb)
114 :     error("improper row index ii=%d, nrb=%d", ii, nrb);
115 :     if (tra) {
116 :     F77_CALL(dgemm)("T", "N", &nc, &n, &nr,
117 : bates 415 &alpha, ax + kk * sz, &nr,
118 : bates 304 b + ii * nr, &ldb,
119 :     &beta, c + j * nc, &ldc);
120 :     } else {
121 :     F77_CALL(dgemm)("N", "N", &nr, &n, &nc,
122 : bates 415 &alpha, ax + kk * sz, &nr,
123 : bates 304 b + j*nc, &ldb,
124 :     &beta, c + ii * nr, &ldc);
125 :     }
126 : bates 300 }
127 :     }
128 :     } else {
129 : bates 304 error("Call to cscBlocked_mm must have side == 'L'");
130 : bates 300 }
131 :     }
132 : bates 304
133 :     /**
134 : bates 415 * Perform one of the matrix operations
135 :     * C := alpha*op(A)*B + beta*C
136 :     * or
137 :     * C := alpha*B*op(A) + beta*C
138 :     * where A is a compressed, sparse, blocked matrix and
139 :     * B and C are dense matrices.
140 :     *
141 :     * @param side 'L' or 'l' for left, otherwise right
142 :     * @param transa 'T' or 't' for transpose.
143 :     * @param m number of rows in C
144 :     * @param n number of columns in C
145 :     * @param k number of rows in B if side = 'L', otherwise
146 :     * number of columns in B.
147 :     * @param alpha
148 :     * @param A pointer to a cscBlocked object
149 :     * @param B matrix to be multiplied
150 :     * @param ldb leading dimension of b as declared in the calling
151 :     * routine
152 :     * @param beta scalar multiplier of c
153 :     * @param C product matrix to be modified
154 :     * @param ldc leading dimension of c as declared in the calling
155 :     * routine
156 :     */
157 :     void
158 :     cscb_mm(char side, char transa, int m, int n, int k,
159 :     double alpha, SEXP A,
160 :     const double B[], int ldb,
161 :     double beta, double C[], int ldc)
162 :     {
163 :     int lside = (side == 'L' || side == 'l'),
164 :     tra = (transa == 'T' || transa == 't');
165 :     SEXP AxP = GET_SLOT(A, Matrix_xSym),
166 :     ApP = GET_SLOT(A, Matrix_pSym);
167 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
168 :     *Ap = INTEGER(ApP),
169 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
170 :     ancb = length(ApP) - 1, /* number of column blocks */
171 :     anrb; /* number of row blocks */
172 :     int absz = adims[0] * adims[1]; /* block size */
173 :     int j;
174 :     double *Ax = REAL(AxP);
175 :    
176 : bates 422 if (ldc < m) error("incompatible dims m=%d, ldc=%d", m, ldc);
177 : bates 415 if (lside) {
178 :     /* B is of size k by n */
179 :     if (ldb < k)
180 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
181 :     k, ldb, n, adims[0], adims[1]);
182 :     if (tra) {
183 :     if (m % adims[1] || k % adims[0])
184 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
185 :     m, k, adims[0], adims[1]);
186 :     if (ancb != m/adims[1])
187 :     error("incompatible LT dims m=%d, ancb=%d, adims=[%d,%d,%d]",
188 :     m, ancb, adims[0], adims[1], adims[3]);
189 :     anrb = k/adims[0];
190 :     } else {
191 :     if (m % adims[0] || k % adims[1])
192 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
193 :     m, k, adims[0], adims[1]);
194 :     if (ancb != k/adims[1])
195 :     error("incompatible LN dims k=%d, ancb=%d, adims=[%d,%d,%d]",
196 :     k, ancb, adims[0], adims[1], adims[3]);
197 :     anrb = m/adims[0];
198 :     }
199 :     for (j = 0; j < ancb; j++) {
200 :     int kk, j2 = Ap[j + 1];
201 :     for (kk = Ap[j]; kk < j2; kk++) {
202 :     int ii = Ai[kk];
203 :     if (ii < 0 || ii >= anrb)
204 :     error("improper row index ii=%d, anrb=%d", ii, anrb);
205 :     if (tra) {
206 :     F77_CALL(dgemm)("T", "N", adims+1, &n, adims,
207 :     &alpha, Ax + kk * absz, adims,
208 :     B + ii * adims[0], &ldb,
209 :     &beta, C + j * adims[1], &ldc);
210 :     } else {
211 :     F77_CALL(dgemm)("N", "N", adims, &n, adims+1,
212 :     &alpha, Ax + kk * absz, adims,
213 :     B + j*adims[1], &ldb,
214 : bates 422 &beta, C + ii * adims[0], &ldc);
215 : bates 415 }
216 :     }
217 :     }
218 :     } else {
219 :     /* B is of size m by k */
220 :     error("Call to cscb_mm must have side == 'L'");
221 :     }
222 :     }
223 :    
224 :     /**
225 : bates 304 * Invert a triangular sparse blocked matrix. This is not done in
226 :     * place because the number of non-zero blocks in A-inverse can be
227 :     * different than the number of non-zero blocks in A.
228 :     *
229 :     * @param upper 'U' indicates upper triangular, 'L' lower
230 :     * @param unit 'U' indicates unit diagonal, 'N' non-unit
231 : bates 415 * @param A Pointer to a triangular cscBlocked object
232 :     * @param Ai Pointer to a triangular cscBlocked object
233 : bates 304 */
234 :     void
235 : bates 415 cscb_tri(char upper, char unit, SEXP A, const int Parent[], SEXP AI)
236 : bates 304 {
237 : bates 415 SEXP ApP = GET_SLOT(A, Matrix_pSym),
238 :     AxP = GET_SLOT(A, Matrix_xSym),
239 :     AIpP = GET_SLOT(AI, Matrix_pSym),
240 :     AIxP = GET_SLOT(AI, Matrix_xSym);
241 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
242 :     *Ap = INTEGER(ApP),
243 :     *AIi = INTEGER(GET_SLOT(AI, Matrix_iSym)),
244 :     *AIp = INTEGER(AIpP),
245 :     *adim = INTEGER(getAttrib(AxP, R_DimSymbol)),
246 :     *aidim = INTEGER(getAttrib(AxP, R_DimSymbol)),
247 :     ancb = length(ApP) - 1,
248 :     aincb = length(AIpP) - 1,
249 :     iup = (upper == 'U' || upper == 'u'),
250 :     iunit = (unit == 'U' || unit == 'u');
251 :    
252 :     if (aidim[0] != adim[0] || aidim[1] != adim[1] || aidim[2] != adim[2] ||
253 :     adim[0] != adim[1] || ancb != aincb)
254 :     error("Incompatible dimensions A[%d,%d,%d]x%d; AI[%d,%d,%d]x%d",
255 :     adim[0], adim[1], adim[2], ancb,
256 :     aidim[0], aidim[1], aidim[2], aincb);
257 : bates 304 if (!iunit)
258 :     error("Code for non-unit triangular matrices not yet written");
259 : bates 415 if (adim[2] > 0)
260 : bates 304 error("Code for non-trivial unit inverse not yet written");
261 :     }
262 : bates 339
263 :     /**
264 :     * Search for the element in a compressed sparse matrix at a given row and column
265 :     *
266 :     * @param row row index
267 :     * @param col column index
268 :     * @param cp column pointers
269 :     * @param ci row indices
270 :     *
271 :     * @return index of element in ci, if it exists, else -1
272 :     */
273 :     static R_INLINE
274 :     int Ind(int row, int col, const int cp[], const int ci[])
275 :     {
276 :     int i, i2 = cp[col + 1];
277 :     for (i = cp[col]; i < i2; i++)
278 :     if (ci[i] == row) return i;
279 :     return -1;
280 :     }
281 :    
282 :     /**
283 :     * Perform one of the matrix operations
284 :     * C := alpha*A*A' + beta*C,
285 :     * or
286 :     * C := alpha*A'*A + beta*C,
287 :     * where A is a compressed, sparse, blocked matrix and
288 :     * C is a compressed, sparse, symmetric blocked matrix.
289 :     *
290 :     * @param uplo 'U' or 'u' for upper triangular storage, else lower.
291 :     * @param trans 'T' or 't' for transpose.
292 :     * @param alpha scalar multiplier of outer product
293 :     * @param A compressed sparse blocked matrix
294 :     * @param beta scalar multiplier of c
295 :     * @param C compressed sparse blocked symmetric matrix to be updated
296 :     */
297 : bates 415 void cscb_syrk(char uplo, char trans, double alpha, SEXP A, double beta, SEXP C)
298 : bates 339 {
299 : bates 415 SEXP AxP = GET_SLOT(A, Matrix_xSym),
300 :     ApP = GET_SLOT(A, Matrix_pSym),
301 :     CxP = GET_SLOT(C, Matrix_xSym),
302 :     CpP = GET_SLOT(C, Matrix_pSym);
303 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
304 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
305 :     *Ap = INTEGER(ApP),
306 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
307 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
308 :     *Cp = INTEGER(CpP),
309 :     iup = (uplo == 'U' || uplo == 'u'),
310 :     itr = (trans == 'T' || trans == 't'),
311 : bates 422 j, k;
312 : bates 415 double *Ax = REAL(AxP), *Cx = REAL(CxP), one = 1.;
313 :     int scalar = (adims[0] == 1 && adims[1] == 1),
314 : bates 434 anc = length(ApP) - 1,
315 : bates 415 asz = adims[0] * adims[1],
316 :     csz = cdims[0] * cdims[1];
317 : bates 339
318 : bates 415
319 : bates 339 if (cdims[0] != cdims[1]) error("blocks in C must be square");
320 : bates 415 if (itr) {
321 : bates 339 error("Code for trans == 'T' not yet written");
322 : bates 415 /* FIXME: Write this part */
323 :     } else {
324 : bates 422 if (adims[0] != cdims[0])
325 :     error("Inconsistent dimensions: A[%d,%d,%d], C[%d,%d,%d]",
326 :     adims[0], adims[1], adims[2],
327 :     cdims[0], cdims[1], cdims[2]);
328 : bates 339 /* check the row indices */
329 : bates 415 for (k = 0; k < adims[2]; k++) {
330 :     int aik = Ai[k];
331 : bates 422 if (aik < 0 || aik >= cdims[2])
332 : bates 415 error("Row index %d = %d is out of range [0, %d]",
333 : bates 422 k, aik, cdims[2] - 1);
334 : bates 415 }
335 :     /* multiply C by beta */
336 :     if (beta != 1.)
337 :     for (j = 0; j < csz * cdims[2]; j++) Cx[j] *= beta;
338 :     /* individual products */
339 : bates 434 for (j = 0; j < anc; j++) {
340 : bates 415 int k, kk, k2 = Ap[j+1];
341 :     for (k = Ap[j]; k < k2; k++) {
342 :     int ii = Ai[k], K = Ind(ii, ii, Cp, Ci);
343 :    
344 : bates 438 if (K < 0) error("cscb_syrk: C[%d,%d] not defined", ii, ii);
345 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[k];
346 :     else F77_CALL(dsyrk)(&uplo, "N", cdims, adims + 1,
347 :     &alpha, Ax + k * asz, adims,
348 :     &one, Cx + K * csz, cdims);
349 :    
350 :     for (kk = k+1; kk < k2; kk++) {
351 :     int jj = Ai[kk];
352 : bates 438 K = (iup) ? Ind(ii, jj, Cp, Ci) : Ind(jj, ii, Cp, Ci);
353 :    
354 :     if (K < 0) error("cscb_syrk: C[%d,%d] not defined", ii, jj);
355 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[kk];
356 :     else F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
357 :     &alpha, Ax + ((iup)?kk:k) * asz, adims,
358 :     Ax + ((iup)?k:kk) * asz, adims,
359 :     &one, Cx + K * asz, cdims);
360 : bates 339 }
361 :     }
362 :     }
363 :     }
364 : bates 415 }
365 :    
366 :     /**
367 :     * Create the LDL' decomposition of the positive definite symmetric
368 :     * cscBlocked matrix A (upper triangle stored) in L and D.
369 :     *
370 :     * @param A pointer to a cscBlocked object containing the upper
371 :     * triangle of a positive definite symmetric matrix.
372 :     * @param Parent the parent array for A
373 :     * @param L pointer to a cscBlocked object to hold L
374 :     * @param D pointer to a 3D array to hold D
375 :     *
376 :     * @return n the number of column blocks in A for success. A value
377 :     * less than n indicates the first column block whose diagonal was not
378 :     * positive definite.
379 :     */
380 :     int
381 :     cscb_ldl(SEXP A, const int Parent[], SEXP L, SEXP D)
382 :     {
383 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
384 :     AxP = GET_SLOT(A, Matrix_xSym);
385 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
386 : bates 434 diag, info, j, k, n = length(ApP) - 1;
387 : bates 415 int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
388 :     *Ap = INTEGER(ApP),
389 :     *Li = INTEGER(GET_SLOT(L, Matrix_iSym)),
390 : bates 434 *Lp = INTEGER(GET_SLOT(L, Matrix_pSym)), nci = adims[0];
391 : bates 415 double *Lx = REAL(GET_SLOT(L, Matrix_xSym)),
392 : bates 434 *Ax = REAL(AxP), *Dx = REAL(D), minus1 = -1., one = 1.;
393 : bates 415
394 : bates 434 if (adims[1] != nci || nci < 1)
395 :     error("cscb_ldl: dim(A) is [%d, %d, %d]", adims[0], adims[1], adims[2]);
396 : bates 415 for (j = 0, diag = 1; j < n; j++) { /* check for trivial structure */
397 :     if (Parent[j] >= 0) {diag = 0; break;}
398 :     }
399 :     if (diag) {
400 : bates 434 int ncisqr = nci * nci;
401 :     Memcpy(Dx, Ax, ncisqr * n);
402 :     for (j = 0; j < n; j++) { /* form D_i^{1/2} */
403 :     F77_CALL(dpotrf)("U", &nci, Dx + j * ncisqr, &nci, &k);
404 :     if (k) error("D[ , , %d], level %d, is not positive definite",
405 :     j + 1);
406 :     }
407 : bates 415 return n;
408 :     }
409 : bates 434 if (nci == 1) {
410 :     k = R_ldl_numeric(n, Ap, Ai, Ax, Lp, Parent, Li, Lx, Dx,
411 :     (int *) NULL, (int *) NULL);
412 :     if (k < n) error("cscb_ldl: error code %k from R_ldl_numeric", k);
413 :     for (j = 0; j < n; j++) Dx[j] = sqrt(Dx[j]);
414 :     return n;
415 :     } else { /* Copy of ldl_numeric from the LDL package
416 :     * modified for blocked sparse matrices */
417 :     int i, k, p, p2, len, nci = adims[0], ncisqr = adims[0]*adims[0], top;
418 :     int *Lnz = Calloc(n, int),
419 :     *Pattern = Calloc(n, int),
420 :     *Flag = Calloc(n, int);
421 :     double *Y = Calloc(n * ncisqr, double), *Yi = Calloc(ncisqr, double);
422 : bates 415
423 : bates 434 for (k = 0; k < n; k++) {
424 :     /* compute nonzero Pattern of kth row of L, in topological order */
425 :     AZERO(Y + k*ncisqr, ncisqr); /* Y[,,0:k] is now all zero */
426 :     top = n; /* stack for pattern is empty */
427 :     Flag[k] = k; /* mark node k as visited */
428 :     Lnz[k] = 0; /* count of nonzeros in column k of L */
429 :     p2 = Ap[k+1];
430 :     for (p = Ap[k]; p < p2; p++) {
431 :     i = Ai[p]; /* get A[i,k] */
432 :     if (i <= k) { /* [i,k] in upper triangle? Should always be true */
433 :     /* copy A(i,k) into Y */
434 :     Memcpy(Y + i * ncisqr, Ax + p * ncisqr, ncisqr);
435 :     /* follow path from i to root of etree, stop at flagged node */
436 :     for (len = 0; Flag[i] != k; i = Parent[i]) {
437 :     Pattern[len++] = i; /* L[k,i] is nonzero */
438 :     Flag[i] = k; /* mark i as visited */
439 :     }
440 :     while (len > 0) { /* push path on top of stack */
441 :     Pattern[--top] = Pattern[--len];
442 :     }
443 :     }
444 :     }
445 :     /* Pattern [top ... n-1] now contains nonzero pattern of L[,k] */
446 :     /* compute numerical values in kth row of L (a sparse triangular solve) */
447 :     Memcpy(Dx + k * ncisqr, Y + k * ncisqr, ncisqr); /* get D[,,k] */
448 :     AZERO(Y + k*ncisqr, ncisqr); /* clear Y[,,k] */
449 :     for (; top < n; top++) {
450 :     i = Pattern[top];
451 :     Memcpy(Yi, Y + i*ncisqr, ncisqr); /* copy Y[,,i] */
452 :     AZERO(Y + i*ncisqr, ncisqr); /* clear Y[,,i] */
453 :     p2 = Lp[i] + Lnz[i];
454 :     for (p = Lp[i]; p < p2; p++) {
455 :     F77_CALL(dgemm)("N", "N", &nci, &nci, &nci, &minus1,
456 :     Lx + p*ncisqr, &nci, Yi, &nci,
457 :     &one, Y + Li[p]*ncisqr, &nci);
458 :     }
459 :     /* FIXME: Check that this is the correct order and transposition */
460 :     F77_CALL(dtrsm)("L", "U", "N", "N", &nci, &nci,
461 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
462 :     F77_CALL(dsyrk)("U", "N", &nci, &nci, &minus1, Yi, &nci,
463 :     &one, Dx + k*ncisqr, &nci);
464 :     F77_CALL(dtrsm)("L", "U", "T", "N", &nci, &nci,
465 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
466 :     Li[p] = k; /* store L[k,i] in column form of L */
467 :     Memcpy(Lx + p * ncisqr, Yi, ncisqr);
468 :     Lnz[i]++; /* increment count of nonzeros in col i */
469 :     }
470 :     F77_CALL(dpotrf)("U", &nci, Dx + k*ncisqr, &nci, &info);
471 :     if (info) {
472 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
473 :     return k; /* failure, D[,,k] is not positive definite */
474 :     }
475 :     }
476 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
477 :     return n; /* success, diagonal of D is all nonzero */
478 : bates 415 }
479 :     return -1; /* keep -Wall happy */
480 :     }
481 :    
482 :     /**
483 :     * Perform one of the cscBlocked-matrix operations B := alpha*op(A)*B
484 :     * or B := alpha*B*op(A)
485 :     *
486 :     * @param side 'L' or 'R' for left or right
487 :     * @param uplo 'U' or 'L' for upper or lower
488 :     * @param transa 'T' or 'N' for transpose or no transpose
489 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
490 :     * @param A pointer to a triangular cscBlocked object
491 :     * @param B pointer to the contents of the matrix B
492 :     * @param m number of rows in B
493 :     * @param n number of columns in B
494 :     * @param ldb leading dimension of B as declared in the calling function
495 :     */
496 :     void cscb_trmm(char side, char uplo, char transa, char diag,
497 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
498 :     {
499 :     int ileft = (side == 'L' || side == 'l'),
500 :     iup = (uplo == 'U' || uplo == 'u'),
501 :     itr = (transa == 'T' || transa == 't'),
502 :     iunit = (diag == 'U' || diag == 'u');
503 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
504 :     AxP = GET_SLOT(A, Matrix_xSym);
505 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
506 :     *Ap = INTEGER(ApP),
507 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
508 :     i, j, nb = length(ApP) - 1;
509 :    
510 :     if (xdims[0] != xdims[1])
511 :     error("Argument A to cscb_trmm is not triangular");
512 :     if (alpha != 1.0) {
513 :     for (j = 0; j < n; j++) { /* scale by alpha */
514 :     for (i = 0; i < m; i++)
515 :     B[i + j * ldb] *= alpha;
516 :     }
517 :     }
518 :     if (iunit && xdims[2] < 1) return; /* A is the identity */
519 :     error("Code for non-identity cases of cscb_trmm not yet written");
520 :     iup += 0; /* keep -Wall happy */
521 :     ileft += 0;
522 :     itr += 0;
523 :     }
524 :    
525 :     /**
526 : bates 434 * Solve a triangular system of the form op(A)*X = alpha*B where A
527 :     * is a cscBlocked triangular matrix and B is a dense matrix.
528 :     *
529 :     * @param uplo 'U' or 'L' for upper or lower
530 :     * @param trans 'T' or 'N' for transpose or no transpose
531 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
532 :     * @param A pointer to a triangular cscBlocked object
533 :     * @param B pointer to the contents of the matrix B
534 :     * @param m number of rows in B
535 :     * @param n number of columns in B
536 :     * @param ldb leading dimension of B as declared in the calling function
537 :     */
538 :     void cscb_trsm(char uplo, char transa, char diag,
539 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
540 :     {
541 :     int iup = (uplo == 'U' || uplo == 'u'),
542 :     itr = (transa == 'T' || transa == 't'),
543 :     iunit = (diag == 'U' || diag == 'u');
544 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
545 :     AxP = GET_SLOT(A, Matrix_xSym);
546 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
547 :     *Ap = INTEGER(ApP),
548 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
549 :     i, j, nb = length(ApP) - 1;
550 :     double *Ax = REAL(GET_SLOT(A, Matrix_xSym));
551 :    
552 :     if (xdims[0] != xdims[1])
553 :     error("Argument A to cscb_trsm is not triangular");
554 :     if (ldb < m || ldb <= 0 || n <= 0)
555 :     error("cscb_trsm: inconsistent dims m = %d, n = %d, ldb = %d",
556 :     m, n, ldb);
557 :     if (m != (nb * xdims[0]))
558 :     error("cscb_trsm: inconsistent dims m = %d, A[%d,%d,]x%d",
559 :     m, xdims[0], xdims[1], xdims[2]);
560 :     if (alpha != 1.0) {
561 :     for (j = 0; j < n; j++) { /* scale by alpha */
562 :     for (i = 0; i < m; i++)
563 :     B[i + j * ldb] *= alpha;
564 :     }
565 :     }
566 :     if (iunit) {
567 :     if (xdims[2] < 1) return; /* A is the identity */
568 :     if (xdims[0] == 1) {
569 :     if (iup) error("Code for upper triangle not yet written");
570 :     if (itr) {
571 :     for (j = 0; j < n; j++)
572 :     R_ldl_ltsolve(m, B + j * ldb, Ap, Ai, Ax);
573 :     } else {
574 :     for (j = 0; j < n; j++)
575 :     R_ldl_lsolve(m, B + j * ldb, Ap, Ai, Ax);
576 :     }
577 :     return;
578 :     }
579 :     error("Code for non-scalar cscBlocked objects not yet written");
580 :     }
581 :     error("Code for non-unit cases of cscb_trsm not yet written");
582 :     }
583 :    
584 :     /**
585 : bates 415 * Perform one of the operations B := alpha*op(A)*B or
586 :     * B := alpha*B*op(A) where A and B are both cscBlocked.
587 :     *
588 :     * @param side 'L' or 'R' for left or right
589 :     * @param uplo 'U' or 'L' for upper or lower
590 :     * @param transa 'T' or 'N' for transpose or no transpose
591 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
592 :     * @param alpha scalar multiplier
593 :     * @param A pointer to a triangular cscBlocked object
594 :     * @param B pointer to a general cscBlocked matrix
595 :     */
596 :     void cscb_trcbm(char side, char uplo, char transa, char diag,
597 :     double alpha, SEXP A, SEXP B)
598 :     {
599 :     int ileft = (side == 'L' || side == 'l'),
600 :     iup = (uplo == 'U' || uplo == 'u'),
601 :     itr = (transa == 'T' || transa == 't'),
602 :     iunit = (diag == 'U' || diag == 'u');
603 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
604 :     AxP = GET_SLOT(A, Matrix_xSym),
605 :     BpP = GET_SLOT(B, Matrix_pSym),
606 :     BxP = GET_SLOT(B, Matrix_xSym);
607 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
608 :     *Ap = INTEGER(ApP),
609 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
610 :     *Bp = INTEGER(BpP),
611 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
612 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
613 :     ncbA = length(ApP) - 1,
614 :     ncbB = length(BpP) - 1;
615 :     int i, nbx = bxdims[0] * bxdims[1] * bxdims[2];
616 :    
617 :     if (axdims[0] != axdims[1])
618 :     error("Argument A to cscb_trcbm is not triangular");
619 :     if (alpha != 1.0) {
620 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
621 :     REAL(BxP)[i] *= alpha;
622 :     }
623 :     }
624 :     if (iunit && axdims[2] < 1) return; /* A is the identity */
625 :     error("Code for non-trivial cscb_trcbm not yet written");
626 :     }
627 :    
628 :     /**
629 : bates 434 * Expand a column of a compressed, sparse, column-oriented matrix.
630 :     *
631 :     * @param dest array to hold the result
632 :     * @param m number of rows in the matrix
633 :     * @param j index (0-based) of column to expand
634 :     * @param Ap array of column pointers
635 :     * @param Ai array of row indices
636 :     * @param Ax array of non-zero values
637 :     *
638 :     * @return dest
639 :     */
640 :     static
641 :     double *expand_column(double *dest, int m, int j,
642 :     const int Ap[], const int Ai[], const double Ax[])
643 :     {
644 :     int k, k2 = Ap[j + 1];
645 :    
646 :     for (k = 0; k < m; k++) dest[k] = 0.;
647 :     for (k = Ap[j]; k < k2; k++) dest[Ai[k]] = Ax[k];
648 :     return dest;
649 :     }
650 :    
651 :     /**
652 :     * Solve one of the systems op(A)*X = alpha*B or
653 :     * X*op(A) = alpha*B where A cscBlocked triangular and B is cscBlocked.
654 :     *
655 :     * @param side 'L' or 'R' for left or right
656 :     * @param uplo 'U' or 'L' for upper or lower
657 :     * @param transa 'T' or 'N' for transpose or no transpose
658 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
659 :     * @param alpha scalar multiplier
660 :     * @param A pointer to a triangular cscBlocked object
661 :     * @param B pointer to a general cscBlocked matrix
662 :     */
663 :     void cscb_trcbsm(char side, char uplo, char transa, char diag,
664 :     double alpha, SEXP A, const int Parent[], SEXP B)
665 :     {
666 :     int ileft = (side == 'L' || side == 'l'),
667 :     iup = (uplo == 'U' || uplo == 'u'),
668 :     itr = (transa == 'T' || transa == 't'),
669 :     iunit = (diag == 'U' || diag == 'u');
670 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
671 :     AxP = GET_SLOT(A, Matrix_xSym),
672 :     BpP = GET_SLOT(B, Matrix_pSym),
673 :     BxP = GET_SLOT(B, Matrix_xSym);
674 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
675 :     *Ap = INTEGER(ApP),
676 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
677 :     *Bp = INTEGER(BpP),
678 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
679 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
680 :     ncbA = length(ApP) - 1,
681 :     ncbB = length(BpP) - 1;
682 :     int i, j, nbx = bxdims[0] * bxdims[1] * bxdims[2];
683 :     double *Ax = REAL(AxP), *Bx = REAL(BxP);
684 :    
685 :     if (axdims[0] != axdims[1])
686 :     error("Argument A to cscb_trcbm is not triangular");
687 :     if (alpha != 1.0) {
688 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
689 :     REAL(BxP)[i] *= alpha;
690 :     }
691 :     }
692 :     if (iunit && axdims[2] < 1) return; /* A is the identity */
693 :     if (iunit && axdims[0] == 1) { /* can use R_ldl code */
694 :     if (!ileft && itr) { /* case required for lmer */
695 :     int *BTp, nnz = bxdims[2], nrbB;
696 :     int *tmp = expand_column_pointers(ncbB, Bp, Calloc(nnz, int));
697 :     int *BTi = Calloc(nnz, int);
698 :     double *BTx = Calloc(nnz, double), *rhs;
699 :    
700 :     /* transpose B */
701 :     for (i = 0, nrbB = -1; i < nnz; i++) if (Bi[i] > nrbB) nrbB = Bi[i];
702 :     BTp = Calloc(nrbB, int);
703 :     triplet_to_col(ncbB, nrbB, nnz, tmp, Bi, Bx, BTp, BTi, BTx);
704 :     /* sanity check */
705 :     if (BTp[nrbB] != nnz) error("cscb_trcbsm: transpose operation failed");
706 :     Free(tmp);
707 :     /* Solve one column at a time */
708 :     rhs = Calloc(ncbB, double);
709 :     AZERO(Bx, nnz); /* zero the result */
710 :     for (i = 0; i < nrbB; i++) {
711 :     R_ldl_lsolve(ncbB, expand_column(rhs, ncbB, i, BTp, BTi, BTx),
712 :     Ap, Ai, Ax);
713 :     for (j = 0; j < ncbB; j++) { /* write non-zeros in sol'n into B */
714 :     if (BTx[j]) Bx[Tind(Bi, Bp, j, i)] = BTx[j];
715 :     }
716 :     Free(rhs); Free(BTp); Free(BTx); Free(BTi);
717 :     }
718 :     }
719 :     error("cscb_trcbsm: method not yet written");
720 :     }
721 :     error("cscb_trcbsm: method not yet written");
722 :     }
723 :    
724 :     /**
725 : bates 415 * Perform one of the matrix-matrix operations
726 :     * C := alpha*op(A)*op(B) + beta*C
727 :     * on compressed, sparse, blocked matrices.
728 :     *
729 :     * @param transa 'T' for transpose of A, else 'N'
730 :     * @param transb 'T' for transpose of B, else 'N'
731 :     * @param alpha scalar multiplier
732 :     * @param A pointer to a cscBlocked object
733 :     * @param B pointer to a cscBlocked object
734 :     * @param beta scalar multiplier
735 :     * @param C pointer to a cscBlocked object
736 :     */
737 :     void cscb_cscbm(char transa, char transb, double alpha, SEXP A, SEXP B,
738 :     double beta, SEXP C)
739 :     {
740 :     int ta = (transa == 'T' || transa == 't') ? 1 : 0,
741 :     tb = (transb == 'T' || transb == 't') ? 1 : 0;
742 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
743 :     AxP = GET_SLOT(A, Matrix_xSym),
744 :     BpP = GET_SLOT(B, Matrix_pSym),
745 :     BxP = GET_SLOT(B, Matrix_xSym),
746 :     CxP = GET_SLOT(C, Matrix_xSym);
747 :     int *Ap = INTEGER(ApP),
748 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
749 :     *Bp = INTEGER(BpP),
750 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
751 :     *Cp = INTEGER(GET_SLOT(C, Matrix_pSym)),
752 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
753 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
754 :     *bdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
755 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
756 :     nca = length(ApP) - 1,
757 :     ncb = length(BpP) - 1;
758 :     int ablk = adims[0] * adims[1],
759 :     bblk = bdims[0] * bdims[1],
760 :     cblk = cdims[0] * cdims[1];
761 :     double *Ax = REAL(AxP),
762 :     *Bx = REAL(BxP),
763 :     *Cx = REAL(CxP),
764 :     one = 1.0;
765 :    
766 :     if ((!ta) && tb) { /* transposed crossproduct */
767 :     int jj;
768 :    
769 :     if (adims[1] != bdims[1] ||
770 :     adims[0] != cdims[0] ||
771 :     bdims[0] != cdims[1])
772 :     error("C[%d,%d,%d] := A[%d,%d,%d] %*% t(B[%d,%d,%d])",
773 :     cdims[0], cdims[1], cdims[2],
774 :     adims[0], adims[1], adims[2],
775 :     bdims[0], bdims[1], bdims[2]);
776 :     if (nca != ncb)
777 :     error("C := A(ncblocks = %d) %*% t(B(ncblocks = %d)", nca, ncb);
778 :     if (beta != 1.) { /* scale C by beta */
779 :     int ctot = cdims[0] * cdims[1] * cdims[2];
780 :     for (jj = 0; jj < ctot; jj++) Cx[jj] *= beta;
781 :     }
782 :     for (jj = 0; jj < nca; jj++) {
783 :     int ia, ib, a2 = Ap[jj + 1], b2 = Bp[jj + 1];
784 :     for (ia = Ap[jj]; ia < a2; ia++) {
785 :     for (ib = Bp[jj]; ib < b2; ib++) {
786 :     int cind = Ind(Ai[ia], Bi[ib], Cp, Ci);
787 :     if (cind < 0)
788 :     error("Invalid index [%d,%d]", Ai[ia], Bi[ib]);
789 :     F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
790 :     &alpha, Ax + ia * ablk, adims,
791 :     Bx + ib * bblk, bdims, &one,
792 :     Cx + cind * cblk, cdims);
793 : bates 339 }
794 :     }
795 :     }
796 : bates 415 return;
797 : bates 339 }
798 : bates 415 error("Code not yet written");
799 : bates 339 }

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge