SCM

SCM Repository

[matrix] Annotation of /pkg/src/dgBCMatrix.c
ViewVC logotype

Annotation of /pkg/src/dgBCMatrix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 434 - (view) (download) (as text)
Original Path: pkg/src/cscBlocked.c

1 : bates 298 #include "cscBlocked.h"
2 : bates 415 /* TODO
3 :     * - code for trans = 'T' in cscb_syrk
4 :     * - code for non-trivial cscb_trmm and cscb_ldl
5 :     */
6 : bates 298 SEXP cscBlocked_validate(SEXP x)
7 :     {
8 :     SEXP pp = GET_SLOT(x, Matrix_pSym),
9 :     ip = GET_SLOT(x, Matrix_iSym),
10 :     xp = GET_SLOT(x, Matrix_xSym),
11 :     dp = getAttrib(xp, R_DimSymbol);
12 :     int *pv = INTEGER(pp),
13 :     *dim = INTEGER(dp),
14 :     ncol = length(pp) - 1;
15 :     int nnz = pv[ncol];
16 :    
17 : bates 304 if (!(isReal(xp) && isArray(xp)))
18 : bates 415 return mkString("slot x should be a real array");
19 : bates 298 if (length(dp) != 3)
20 : bates 415 return mkString("slot x should be a 3-dimensional array");
21 : bates 298 if (length(ip) != nnz)
22 : bates 415 return mkString("length of slot i does not matck last element of slot p");
23 : bates 298 if (dim[2] != nnz)
24 : bates 415 return
25 :     mkString("third dimension of slot x does not match number of nonzeros");
26 : bates 298 return ScalarLogical(1);
27 :     }
28 :    
29 : bates 434 static int*
30 :     expand_column_pointers(int ncol, const int mp[], int mj[])
31 :     {
32 :     int j;
33 :     for (j = 0; j < ncol; j++) {
34 :     int j2 = mp[j+1], jj;
35 :     for (jj = mp[j]; jj < j2; jj++) mj[jj] = j;
36 :     }
37 :     return mj;
38 :     }
39 :    
40 :     static R_INLINE
41 :     int Tind(const int rowind[], const int colptr[], int i, int j)
42 :     {
43 :     int k, k2 = colptr[j + 1];
44 :     for (k = colptr[j]; k < k2; k++)
45 :     if (rowind[k] == i) return k;
46 :     error("row %d and column %d not defined in rowind and colptr",
47 :     i, j);
48 :     return -1; /* -Wall */
49 :     }
50 :    
51 : bates 300 /**
52 :     * Perform one of the matrix operations
53 :     * C := alpha*op(A)*B + beta*C
54 :     * or
55 :     * C := alpha*B*op(A) + beta*C
56 :     * where A is a compressed, sparse, blocked matrix and
57 :     * B and C are dense matrices.
58 :     *
59 :     * @param side 'L' or 'l' for left, otherwise right
60 :     * @param transa 'T' or 't' for transpose.
61 :     * @param m number of rows in C
62 :     * @param n number of columns in C
63 : bates 415 * @param k number of rows in B if side = 'L', otherwise
64 :     * number of columns in B.
65 : bates 300 * @param alpha
66 :     * @param nr number of rows per block of A
67 :     * @param nc number of columns per block of A
68 :     * @param ap vector of column pointers in A
69 :     * @param ai vector of row indices in A
70 :     * @param ax contents of non-zero blocks of A
71 :     * @param b matrix to be multiplied
72 :     * @param ldb leading dimension of b as declared in the calling
73 :     * routine
74 : bates 304 * @param beta scalar multiplier of c
75 : bates 300 * @param c product matrix to be modified
76 : bates 304 * @param ldc leading dimension of c as declared in the calling
77 : bates 300 * routine
78 :     */
79 :     void
80 :     cscBlocked_mm(char side, char transa, int m, int n, int k,
81 :     double alpha, int nr, int nc,
82 :     const int ap[], const int ai[],
83 :     const double ax[],
84 :     const double b[], int ldb,
85 :     double beta, double c[], int ldc)
86 :     {
87 : bates 304 int j, kk, lside = (side == 'L' || side == 'l');
88 :     int ncb, nrb, sz = nr * nc, tra = (transa == 'T' || transa == 't');
89 : bates 300
90 : bates 304 if (nr < 1 || nc < 1 || m < 0 || n < 0 || k < 0)
91 :     error("improper dims m=%d, n=%d, k=%d, nr=%d, nc=%d",
92 : bates 300 m, n, k, nr, nc);
93 : bates 304 if (ldc < n) error("incompatible dims n=%d, ldc=%d", n, ldc);
94 :     if (lside) {
95 :     if (ldb < k)
96 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
97 :     k, ldb, n, nr, nc);
98 :     if (tra) {
99 :     if (m % nc || k % nr)
100 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
101 :     m, k, nr, nc);
102 :     nrb = k/nr; ncb = m/nc;
103 :     } else {
104 :     if (m % nr || k % nc)
105 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
106 :     m, k, nr, nc);
107 :     nrb = m/nr; ncb = k/nc;
108 :     }
109 : bates 300 for (j = 0; j < ncb; j++) {
110 : bates 304 int j2 = ap[j + 1];
111 :     for (kk = ap[j]; kk < j2; kk++) {
112 :     int ii = ai[kk];
113 :     if (ii < 0 || ii >= nrb)
114 :     error("improper row index ii=%d, nrb=%d", ii, nrb);
115 :     if (tra) {
116 :     F77_CALL(dgemm)("T", "N", &nc, &n, &nr,
117 : bates 415 &alpha, ax + kk * sz, &nr,
118 : bates 304 b + ii * nr, &ldb,
119 :     &beta, c + j * nc, &ldc);
120 :     } else {
121 :     F77_CALL(dgemm)("N", "N", &nr, &n, &nc,
122 : bates 415 &alpha, ax + kk * sz, &nr,
123 : bates 304 b + j*nc, &ldb,
124 :     &beta, c + ii * nr, &ldc);
125 :     }
126 : bates 300 }
127 :     }
128 :     } else {
129 : bates 304 error("Call to cscBlocked_mm must have side == 'L'");
130 : bates 300 }
131 :     }
132 : bates 304
133 :     /**
134 : bates 415 * Perform one of the matrix operations
135 :     * C := alpha*op(A)*B + beta*C
136 :     * or
137 :     * C := alpha*B*op(A) + beta*C
138 :     * where A is a compressed, sparse, blocked matrix and
139 :     * B and C are dense matrices.
140 :     *
141 :     * @param side 'L' or 'l' for left, otherwise right
142 :     * @param transa 'T' or 't' for transpose.
143 :     * @param m number of rows in C
144 :     * @param n number of columns in C
145 :     * @param k number of rows in B if side = 'L', otherwise
146 :     * number of columns in B.
147 :     * @param alpha
148 :     * @param A pointer to a cscBlocked object
149 :     * @param B matrix to be multiplied
150 :     * @param ldb leading dimension of b as declared in the calling
151 :     * routine
152 :     * @param beta scalar multiplier of c
153 :     * @param C product matrix to be modified
154 :     * @param ldc leading dimension of c as declared in the calling
155 :     * routine
156 :     */
157 :     void
158 :     cscb_mm(char side, char transa, int m, int n, int k,
159 :     double alpha, SEXP A,
160 :     const double B[], int ldb,
161 :     double beta, double C[], int ldc)
162 :     {
163 :     int lside = (side == 'L' || side == 'l'),
164 :     tra = (transa == 'T' || transa == 't');
165 :     SEXP AxP = GET_SLOT(A, Matrix_xSym),
166 :     ApP = GET_SLOT(A, Matrix_pSym);
167 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
168 :     *Ap = INTEGER(ApP),
169 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
170 :     ancb = length(ApP) - 1, /* number of column blocks */
171 :     anrb; /* number of row blocks */
172 :     int absz = adims[0] * adims[1]; /* block size */
173 :     int j;
174 :     double *Ax = REAL(AxP);
175 :    
176 : bates 422 if (ldc < m) error("incompatible dims m=%d, ldc=%d", m, ldc);
177 : bates 415 if (lside) {
178 :     /* B is of size k by n */
179 :     if (ldb < k)
180 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
181 :     k, ldb, n, adims[0], adims[1]);
182 :     if (tra) {
183 :     if (m % adims[1] || k % adims[0])
184 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
185 :     m, k, adims[0], adims[1]);
186 :     if (ancb != m/adims[1])
187 :     error("incompatible LT dims m=%d, ancb=%d, adims=[%d,%d,%d]",
188 :     m, ancb, adims[0], adims[1], adims[3]);
189 :     anrb = k/adims[0];
190 :     } else {
191 :     if (m % adims[0] || k % adims[1])
192 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
193 :     m, k, adims[0], adims[1]);
194 :     if (ancb != k/adims[1])
195 :     error("incompatible LN dims k=%d, ancb=%d, adims=[%d,%d,%d]",
196 :     k, ancb, adims[0], adims[1], adims[3]);
197 :     anrb = m/adims[0];
198 :     }
199 :     for (j = 0; j < ancb; j++) {
200 :     int kk, j2 = Ap[j + 1];
201 :     for (kk = Ap[j]; kk < j2; kk++) {
202 :     int ii = Ai[kk];
203 :     if (ii < 0 || ii >= anrb)
204 :     error("improper row index ii=%d, anrb=%d", ii, anrb);
205 :     if (tra) {
206 :     F77_CALL(dgemm)("T", "N", adims+1, &n, adims,
207 :     &alpha, Ax + kk * absz, adims,
208 :     B + ii * adims[0], &ldb,
209 :     &beta, C + j * adims[1], &ldc);
210 :     } else {
211 :     F77_CALL(dgemm)("N", "N", adims, &n, adims+1,
212 :     &alpha, Ax + kk * absz, adims,
213 :     B + j*adims[1], &ldb,
214 : bates 422 &beta, C + ii * adims[0], &ldc);
215 : bates 415 }
216 :     }
217 :     }
218 :     } else {
219 :     /* B is of size m by k */
220 :     error("Call to cscb_mm must have side == 'L'");
221 :     }
222 :     }
223 :    
224 :     /**
225 : bates 304 * Invert a triangular sparse blocked matrix. This is not done in
226 :     * place because the number of non-zero blocks in A-inverse can be
227 :     * different than the number of non-zero blocks in A.
228 :     *
229 :     * @param upper 'U' indicates upper triangular, 'L' lower
230 :     * @param unit 'U' indicates unit diagonal, 'N' non-unit
231 : bates 415 * @param A Pointer to a triangular cscBlocked object
232 :     * @param Ai Pointer to a triangular cscBlocked object
233 : bates 304 */
234 :     void
235 : bates 415 cscb_tri(char upper, char unit, SEXP A, const int Parent[], SEXP AI)
236 : bates 304 {
237 : bates 415 SEXP ApP = GET_SLOT(A, Matrix_pSym),
238 :     AxP = GET_SLOT(A, Matrix_xSym),
239 :     AIpP = GET_SLOT(AI, Matrix_pSym),
240 :     AIxP = GET_SLOT(AI, Matrix_xSym);
241 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
242 :     *Ap = INTEGER(ApP),
243 :     *AIi = INTEGER(GET_SLOT(AI, Matrix_iSym)),
244 :     *AIp = INTEGER(AIpP),
245 :     *adim = INTEGER(getAttrib(AxP, R_DimSymbol)),
246 :     *aidim = INTEGER(getAttrib(AxP, R_DimSymbol)),
247 :     ancb = length(ApP) - 1,
248 :     aincb = length(AIpP) - 1,
249 :     iup = (upper == 'U' || upper == 'u'),
250 :     iunit = (unit == 'U' || unit == 'u');
251 :    
252 :     if (aidim[0] != adim[0] || aidim[1] != adim[1] || aidim[2] != adim[2] ||
253 :     adim[0] != adim[1] || ancb != aincb)
254 :     error("Incompatible dimensions A[%d,%d,%d]x%d; AI[%d,%d,%d]x%d",
255 :     adim[0], adim[1], adim[2], ancb,
256 :     aidim[0], aidim[1], aidim[2], aincb);
257 : bates 304 if (!iunit)
258 :     error("Code for non-unit triangular matrices not yet written");
259 : bates 415 if (adim[2] > 0)
260 : bates 304 error("Code for non-trivial unit inverse not yet written");
261 :     }
262 : bates 339
263 :     /**
264 :     * Search for the element in a compressed sparse matrix at a given row and column
265 :     *
266 :     * @param row row index
267 :     * @param col column index
268 :     * @param cp column pointers
269 :     * @param ci row indices
270 :     *
271 :     * @return index of element in ci, if it exists, else -1
272 :     */
273 :     static R_INLINE
274 :     int Ind(int row, int col, const int cp[], const int ci[])
275 :     {
276 :     int i, i2 = cp[col + 1];
277 :     for (i = cp[col]; i < i2; i++)
278 :     if (ci[i] == row) return i;
279 :     return -1;
280 :     }
281 :    
282 :     /**
283 :     * Perform one of the matrix operations
284 :     * C := alpha*A*A' + beta*C,
285 :     * or
286 :     * C := alpha*A'*A + beta*C,
287 :     * where A is a compressed, sparse, blocked matrix and
288 :     * C is a compressed, sparse, symmetric blocked matrix.
289 :     *
290 :     * @param uplo 'U' or 'u' for upper triangular storage, else lower.
291 :     * @param trans 'T' or 't' for transpose.
292 :     * @param alpha scalar multiplier of outer product
293 :     * @param A compressed sparse blocked matrix
294 :     * @param beta scalar multiplier of c
295 :     * @param C compressed sparse blocked symmetric matrix to be updated
296 :     */
297 : bates 415 void cscb_syrk(char uplo, char trans, double alpha, SEXP A, double beta, SEXP C)
298 : bates 339 {
299 : bates 415 SEXP AxP = GET_SLOT(A, Matrix_xSym),
300 :     ApP = GET_SLOT(A, Matrix_pSym),
301 :     CxP = GET_SLOT(C, Matrix_xSym),
302 :     CpP = GET_SLOT(C, Matrix_pSym);
303 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
304 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
305 :     *Ap = INTEGER(ApP),
306 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
307 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
308 :     *Cp = INTEGER(CpP),
309 :     iup = (uplo == 'U' || uplo == 'u'),
310 :     itr = (trans == 'T' || trans == 't'),
311 : bates 422 j, k;
312 : bates 415 double *Ax = REAL(AxP), *Cx = REAL(CxP), one = 1.;
313 :     int scalar = (adims[0] == 1 && adims[1] == 1),
314 : bates 434 anc = length(ApP) - 1,
315 : bates 415 asz = adims[0] * adims[1],
316 :     csz = cdims[0] * cdims[1];
317 : bates 339
318 : bates 415
319 : bates 339 if (cdims[0] != cdims[1]) error("blocks in C must be square");
320 : bates 415 if (itr) {
321 : bates 339 error("Code for trans == 'T' not yet written");
322 : bates 415 /* FIXME: Write this part */
323 :     } else {
324 : bates 422 if (adims[0] != cdims[0])
325 :     error("Inconsistent dimensions: A[%d,%d,%d], C[%d,%d,%d]",
326 :     adims[0], adims[1], adims[2],
327 :     cdims[0], cdims[1], cdims[2]);
328 : bates 339 /* check the row indices */
329 : bates 415 for (k = 0; k < adims[2]; k++) {
330 :     int aik = Ai[k];
331 : bates 422 if (aik < 0 || aik >= cdims[2])
332 : bates 415 error("Row index %d = %d is out of range [0, %d]",
333 : bates 422 k, aik, cdims[2] - 1);
334 : bates 415 }
335 :     /* multiply C by beta */
336 :     if (beta != 1.)
337 :     for (j = 0; j < csz * cdims[2]; j++) Cx[j] *= beta;
338 :     /* individual products */
339 : bates 434 for (j = 0; j < anc; j++) {
340 : bates 415 int k, kk, k2 = Ap[j+1];
341 :     for (k = Ap[j]; k < k2; k++) {
342 :     int ii = Ai[k], K = Ind(ii, ii, Cp, Ci);
343 :    
344 :     if (scalar) Cx[K] += alpha * Ax[k] * Ax[k];
345 :     else F77_CALL(dsyrk)(&uplo, "N", cdims, adims + 1,
346 :     &alpha, Ax + k * asz, adims,
347 :     &one, Cx + K * csz, cdims);
348 :    
349 :     for (kk = k+1; kk < k2; kk++) {
350 :     int jj = Ai[kk];
351 :     K = (iup) ? Ind(jj, ii, Cp, Ci) : Ind(ii, jj, Cp, Ci);
352 :     if (scalar) Cx[K] += alpha * Ax[k] * Ax[kk];
353 :     else F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
354 :     &alpha, Ax + ((iup)?kk:k) * asz, adims,
355 :     Ax + ((iup)?k:kk) * asz, adims,
356 :     &one, Cx + K * asz, cdims);
357 : bates 339 }
358 :     }
359 :     }
360 :     }
361 : bates 415 }
362 :    
363 :     /**
364 :     * Create the LDL' decomposition of the positive definite symmetric
365 :     * cscBlocked matrix A (upper triangle stored) in L and D.
366 :     *
367 :     * @param A pointer to a cscBlocked object containing the upper
368 :     * triangle of a positive definite symmetric matrix.
369 :     * @param Parent the parent array for A
370 :     * @param L pointer to a cscBlocked object to hold L
371 :     * @param D pointer to a 3D array to hold D
372 :     *
373 :     * @return n the number of column blocks in A for success. A value
374 :     * less than n indicates the first column block whose diagonal was not
375 :     * positive definite.
376 :     */
377 :     int
378 :     cscb_ldl(SEXP A, const int Parent[], SEXP L, SEXP D)
379 :     {
380 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
381 :     AxP = GET_SLOT(A, Matrix_xSym);
382 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
383 : bates 434 diag, info, j, k, n = length(ApP) - 1;
384 : bates 415 int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
385 :     *Ap = INTEGER(ApP),
386 :     *Li = INTEGER(GET_SLOT(L, Matrix_iSym)),
387 : bates 434 *Lp = INTEGER(GET_SLOT(L, Matrix_pSym)), nci = adims[0];
388 : bates 415 double *Lx = REAL(GET_SLOT(L, Matrix_xSym)),
389 : bates 434 *Ax = REAL(AxP), *Dx = REAL(D), minus1 = -1., one = 1.;
390 : bates 415
391 : bates 434 if (adims[1] != nci || nci < 1)
392 :     error("cscb_ldl: dim(A) is [%d, %d, %d]", adims[0], adims[1], adims[2]);
393 : bates 415 for (j = 0, diag = 1; j < n; j++) { /* check for trivial structure */
394 :     if (Parent[j] >= 0) {diag = 0; break;}
395 :     }
396 :     if (diag) {
397 : bates 434 int ncisqr = nci * nci;
398 :     Memcpy(Dx, Ax, ncisqr * n);
399 :     for (j = 0; j < n; j++) { /* form D_i^{1/2} */
400 :     F77_CALL(dpotrf)("U", &nci, Dx + j * ncisqr, &nci, &k);
401 :     if (k) error("D[ , , %d], level %d, is not positive definite",
402 :     j + 1);
403 :     }
404 : bates 415 return n;
405 :     }
406 : bates 434 if (nci == 1) {
407 :     k = R_ldl_numeric(n, Ap, Ai, Ax, Lp, Parent, Li, Lx, Dx,
408 :     (int *) NULL, (int *) NULL);
409 :     if (k < n) error("cscb_ldl: error code %k from R_ldl_numeric", k);
410 :     for (j = 0; j < n; j++) Dx[j] = sqrt(Dx[j]);
411 :     return n;
412 :     } else { /* Copy of ldl_numeric from the LDL package
413 :     * modified for blocked sparse matrices */
414 :     int i, k, p, p2, len, nci = adims[0], ncisqr = adims[0]*adims[0], top;
415 :     int *Lnz = Calloc(n, int),
416 :     *Pattern = Calloc(n, int),
417 :     *Flag = Calloc(n, int);
418 :     double *Y = Calloc(n * ncisqr, double), *Yi = Calloc(ncisqr, double);
419 : bates 415
420 : bates 434 for (k = 0; k < n; k++) {
421 :     /* compute nonzero Pattern of kth row of L, in topological order */
422 :     AZERO(Y + k*ncisqr, ncisqr); /* Y[,,0:k] is now all zero */
423 :     top = n; /* stack for pattern is empty */
424 :     Flag[k] = k; /* mark node k as visited */
425 :     Lnz[k] = 0; /* count of nonzeros in column k of L */
426 :     p2 = Ap[k+1];
427 :     for (p = Ap[k]; p < p2; p++) {
428 :     i = Ai[p]; /* get A[i,k] */
429 :     if (i <= k) { /* [i,k] in upper triangle? Should always be true */
430 :     /* copy A(i,k) into Y */
431 :     Memcpy(Y + i * ncisqr, Ax + p * ncisqr, ncisqr);
432 :     /* follow path from i to root of etree, stop at flagged node */
433 :     for (len = 0; Flag[i] != k; i = Parent[i]) {
434 :     Pattern[len++] = i; /* L[k,i] is nonzero */
435 :     Flag[i] = k; /* mark i as visited */
436 :     }
437 :     while (len > 0) { /* push path on top of stack */
438 :     Pattern[--top] = Pattern[--len];
439 :     }
440 :     }
441 :     }
442 :     /* Pattern [top ... n-1] now contains nonzero pattern of L[,k] */
443 :     /* compute numerical values in kth row of L (a sparse triangular solve) */
444 :     Memcpy(Dx + k * ncisqr, Y + k * ncisqr, ncisqr); /* get D[,,k] */
445 :     AZERO(Y + k*ncisqr, ncisqr); /* clear Y[,,k] */
446 :     for (; top < n; top++) {
447 :     i = Pattern[top];
448 :     Memcpy(Yi, Y + i*ncisqr, ncisqr); /* copy Y[,,i] */
449 :     AZERO(Y + i*ncisqr, ncisqr); /* clear Y[,,i] */
450 :     p2 = Lp[i] + Lnz[i];
451 :     for (p = Lp[i]; p < p2; p++) {
452 :     F77_CALL(dgemm)("N", "N", &nci, &nci, &nci, &minus1,
453 :     Lx + p*ncisqr, &nci, Yi, &nci,
454 :     &one, Y + Li[p]*ncisqr, &nci);
455 :     }
456 :     /* FIXME: Check that this is the correct order and transposition */
457 :     F77_CALL(dtrsm)("L", "U", "N", "N", &nci, &nci,
458 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
459 :     F77_CALL(dsyrk)("U", "N", &nci, &nci, &minus1, Yi, &nci,
460 :     &one, Dx + k*ncisqr, &nci);
461 :     F77_CALL(dtrsm)("L", "U", "T", "N", &nci, &nci,
462 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
463 :     Li[p] = k; /* store L[k,i] in column form of L */
464 :     Memcpy(Lx + p * ncisqr, Yi, ncisqr);
465 :     Lnz[i]++; /* increment count of nonzeros in col i */
466 :     }
467 :     F77_CALL(dpotrf)("U", &nci, Dx + k*ncisqr, &nci, &info);
468 :     if (info) {
469 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
470 :     return k; /* failure, D[,,k] is not positive definite */
471 :     }
472 :     }
473 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
474 :     return n; /* success, diagonal of D is all nonzero */
475 : bates 415 }
476 :     return -1; /* keep -Wall happy */
477 :     }
478 :    
479 :     /**
480 :     * Perform one of the cscBlocked-matrix operations B := alpha*op(A)*B
481 :     * or B := alpha*B*op(A)
482 :     *
483 :     * @param side 'L' or 'R' for left or right
484 :     * @param uplo 'U' or 'L' for upper or lower
485 :     * @param transa 'T' or 'N' for transpose or no transpose
486 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
487 :     * @param A pointer to a triangular cscBlocked object
488 :     * @param B pointer to the contents of the matrix B
489 :     * @param m number of rows in B
490 :     * @param n number of columns in B
491 :     * @param ldb leading dimension of B as declared in the calling function
492 :     */
493 :     void cscb_trmm(char side, char uplo, char transa, char diag,
494 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
495 :     {
496 :     int ileft = (side == 'L' || side == 'l'),
497 :     iup = (uplo == 'U' || uplo == 'u'),
498 :     itr = (transa == 'T' || transa == 't'),
499 :     iunit = (diag == 'U' || diag == 'u');
500 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
501 :     AxP = GET_SLOT(A, Matrix_xSym);
502 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
503 :     *Ap = INTEGER(ApP),
504 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
505 :     i, j, nb = length(ApP) - 1;
506 :    
507 :     if (xdims[0] != xdims[1])
508 :     error("Argument A to cscb_trmm is not triangular");
509 :     if (alpha != 1.0) {
510 :     for (j = 0; j < n; j++) { /* scale by alpha */
511 :     for (i = 0; i < m; i++)
512 :     B[i + j * ldb] *= alpha;
513 :     }
514 :     }
515 :     if (iunit && xdims[2] < 1) return; /* A is the identity */
516 :     error("Code for non-identity cases of cscb_trmm not yet written");
517 :     iup += 0; /* keep -Wall happy */
518 :     ileft += 0;
519 :     itr += 0;
520 :     }
521 :    
522 :     /**
523 : bates 434 * Solve a triangular system of the form op(A)*X = alpha*B where A
524 :     * is a cscBlocked triangular matrix and B is a dense matrix.
525 :     *
526 :     * @param uplo 'U' or 'L' for upper or lower
527 :     * @param trans 'T' or 'N' for transpose or no transpose
528 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
529 :     * @param A pointer to a triangular cscBlocked object
530 :     * @param B pointer to the contents of the matrix B
531 :     * @param m number of rows in B
532 :     * @param n number of columns in B
533 :     * @param ldb leading dimension of B as declared in the calling function
534 :     */
535 :     void cscb_trsm(char uplo, char transa, char diag,
536 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
537 :     {
538 :     int iup = (uplo == 'U' || uplo == 'u'),
539 :     itr = (transa == 'T' || transa == 't'),
540 :     iunit = (diag == 'U' || diag == 'u');
541 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
542 :     AxP = GET_SLOT(A, Matrix_xSym);
543 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
544 :     *Ap = INTEGER(ApP),
545 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
546 :     i, j, nb = length(ApP) - 1;
547 :     double *Ax = REAL(GET_SLOT(A, Matrix_xSym));
548 :    
549 :     if (xdims[0] != xdims[1])
550 :     error("Argument A to cscb_trsm is not triangular");
551 :     if (ldb < m || ldb <= 0 || n <= 0)
552 :     error("cscb_trsm: inconsistent dims m = %d, n = %d, ldb = %d",
553 :     m, n, ldb);
554 :     if (m != (nb * xdims[0]))
555 :     error("cscb_trsm: inconsistent dims m = %d, A[%d,%d,]x%d",
556 :     m, xdims[0], xdims[1], xdims[2]);
557 :     if (alpha != 1.0) {
558 :     for (j = 0; j < n; j++) { /* scale by alpha */
559 :     for (i = 0; i < m; i++)
560 :     B[i + j * ldb] *= alpha;
561 :     }
562 :     }
563 :     if (iunit) {
564 :     if (xdims[2] < 1) return; /* A is the identity */
565 :     if (xdims[0] == 1) {
566 :     if (iup) error("Code for upper triangle not yet written");
567 :     if (itr) {
568 :     for (j = 0; j < n; j++)
569 :     R_ldl_ltsolve(m, B + j * ldb, Ap, Ai, Ax);
570 :     } else {
571 :     for (j = 0; j < n; j++)
572 :     R_ldl_lsolve(m, B + j * ldb, Ap, Ai, Ax);
573 :     }
574 :     return;
575 :     }
576 :     error("Code for non-scalar cscBlocked objects not yet written");
577 :     }
578 :     error("Code for non-unit cases of cscb_trsm not yet written");
579 :     }
580 :    
581 :     /**
582 : bates 415 * Perform one of the operations B := alpha*op(A)*B or
583 :     * B := alpha*B*op(A) where A and B are both cscBlocked.
584 :     *
585 :     * @param side 'L' or 'R' for left or right
586 :     * @param uplo 'U' or 'L' for upper or lower
587 :     * @param transa 'T' or 'N' for transpose or no transpose
588 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
589 :     * @param alpha scalar multiplier
590 :     * @param A pointer to a triangular cscBlocked object
591 :     * @param B pointer to a general cscBlocked matrix
592 :     */
593 :     void cscb_trcbm(char side, char uplo, char transa, char diag,
594 :     double alpha, SEXP A, SEXP B)
595 :     {
596 :     int ileft = (side == 'L' || side == 'l'),
597 :     iup = (uplo == 'U' || uplo == 'u'),
598 :     itr = (transa == 'T' || transa == 't'),
599 :     iunit = (diag == 'U' || diag == 'u');
600 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
601 :     AxP = GET_SLOT(A, Matrix_xSym),
602 :     BpP = GET_SLOT(B, Matrix_pSym),
603 :     BxP = GET_SLOT(B, Matrix_xSym);
604 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
605 :     *Ap = INTEGER(ApP),
606 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
607 :     *Bp = INTEGER(BpP),
608 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
609 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
610 :     ncbA = length(ApP) - 1,
611 :     ncbB = length(BpP) - 1;
612 :     int i, nbx = bxdims[0] * bxdims[1] * bxdims[2];
613 :    
614 :     if (axdims[0] != axdims[1])
615 :     error("Argument A to cscb_trcbm is not triangular");
616 :     if (alpha != 1.0) {
617 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
618 :     REAL(BxP)[i] *= alpha;
619 :     }
620 :     }
621 :     if (iunit && axdims[2] < 1) return; /* A is the identity */
622 :     error("Code for non-trivial cscb_trcbm not yet written");
623 :     }
624 :    
625 :     /**
626 : bates 434 * Expand a column of a compressed, sparse, column-oriented matrix.
627 :     *
628 :     * @param dest array to hold the result
629 :     * @param m number of rows in the matrix
630 :     * @param j index (0-based) of column to expand
631 :     * @param Ap array of column pointers
632 :     * @param Ai array of row indices
633 :     * @param Ax array of non-zero values
634 :     *
635 :     * @return dest
636 :     */
637 :     static
638 :     double *expand_column(double *dest, int m, int j,
639 :     const int Ap[], const int Ai[], const double Ax[])
640 :     {
641 :     int k, k2 = Ap[j + 1];
642 :    
643 :     for (k = 0; k < m; k++) dest[k] = 0.;
644 :     for (k = Ap[j]; k < k2; k++) dest[Ai[k]] = Ax[k];
645 :     return dest;
646 :     }
647 :    
648 :     /**
649 :     * Solve one of the systems op(A)*X = alpha*B or
650 :     * X*op(A) = alpha*B where A cscBlocked triangular and B is cscBlocked.
651 :     *
652 :     * @param side 'L' or 'R' for left or right
653 :     * @param uplo 'U' or 'L' for upper or lower
654 :     * @param transa 'T' or 'N' for transpose or no transpose
655 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
656 :     * @param alpha scalar multiplier
657 :     * @param A pointer to a triangular cscBlocked object
658 :     * @param B pointer to a general cscBlocked matrix
659 :     */
660 :     void cscb_trcbsm(char side, char uplo, char transa, char diag,
661 :     double alpha, SEXP A, const int Parent[], SEXP B)
662 :     {
663 :     int ileft = (side == 'L' || side == 'l'),
664 :     iup = (uplo == 'U' || uplo == 'u'),
665 :     itr = (transa == 'T' || transa == 't'),
666 :     iunit = (diag == 'U' || diag == 'u');
667 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
668 :     AxP = GET_SLOT(A, Matrix_xSym),
669 :     BpP = GET_SLOT(B, Matrix_pSym),
670 :     BxP = GET_SLOT(B, Matrix_xSym);
671 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
672 :     *Ap = INTEGER(ApP),
673 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
674 :     *Bp = INTEGER(BpP),
675 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
676 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
677 :     ncbA = length(ApP) - 1,
678 :     ncbB = length(BpP) - 1;
679 :     int i, j, nbx = bxdims[0] * bxdims[1] * bxdims[2];
680 :     double *Ax = REAL(AxP), *Bx = REAL(BxP);
681 :    
682 :     if (axdims[0] != axdims[1])
683 :     error("Argument A to cscb_trcbm is not triangular");
684 :     if (alpha != 1.0) {
685 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
686 :     REAL(BxP)[i] *= alpha;
687 :     }
688 :     }
689 :     if (iunit && axdims[2] < 1) return; /* A is the identity */
690 :     if (iunit && axdims[0] == 1) { /* can use R_ldl code */
691 :     if (!ileft && itr) { /* case required for lmer */
692 :     int *BTp, nnz = bxdims[2], nrbB;
693 :     int *tmp = expand_column_pointers(ncbB, Bp, Calloc(nnz, int));
694 :     int *BTi = Calloc(nnz, int);
695 :     double *BTx = Calloc(nnz, double), *rhs;
696 :    
697 :     /* transpose B */
698 :     for (i = 0, nrbB = -1; i < nnz; i++) if (Bi[i] > nrbB) nrbB = Bi[i];
699 :     BTp = Calloc(nrbB, int);
700 :     triplet_to_col(ncbB, nrbB, nnz, tmp, Bi, Bx, BTp, BTi, BTx);
701 :     /* sanity check */
702 :     if (BTp[nrbB] != nnz) error("cscb_trcbsm: transpose operation failed");
703 :     Free(tmp);
704 :     /* Solve one column at a time */
705 :     rhs = Calloc(ncbB, double);
706 :     AZERO(Bx, nnz); /* zero the result */
707 :     for (i = 0; i < nrbB; i++) {
708 :     R_ldl_lsolve(ncbB, expand_column(rhs, ncbB, i, BTp, BTi, BTx),
709 :     Ap, Ai, Ax);
710 :     for (j = 0; j < ncbB; j++) { /* write non-zeros in sol'n into B */
711 :     if (BTx[j]) Bx[Tind(Bi, Bp, j, i)] = BTx[j];
712 :     }
713 :     Free(rhs); Free(BTp); Free(BTx); Free(BTi);
714 :     }
715 :     }
716 :     error("cscb_trcbsm: method not yet written");
717 :     }
718 :     error("cscb_trcbsm: method not yet written");
719 :     }
720 :    
721 :     /**
722 : bates 415 * Perform one of the matrix-matrix operations
723 :     * C := alpha*op(A)*op(B) + beta*C
724 :     * on compressed, sparse, blocked matrices.
725 :     *
726 :     * @param transa 'T' for transpose of A, else 'N'
727 :     * @param transb 'T' for transpose of B, else 'N'
728 :     * @param alpha scalar multiplier
729 :     * @param A pointer to a cscBlocked object
730 :     * @param B pointer to a cscBlocked object
731 :     * @param beta scalar multiplier
732 :     * @param C pointer to a cscBlocked object
733 :     */
734 :     void cscb_cscbm(char transa, char transb, double alpha, SEXP A, SEXP B,
735 :     double beta, SEXP C)
736 :     {
737 :     int ta = (transa == 'T' || transa == 't') ? 1 : 0,
738 :     tb = (transb == 'T' || transb == 't') ? 1 : 0;
739 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
740 :     AxP = GET_SLOT(A, Matrix_xSym),
741 :     BpP = GET_SLOT(B, Matrix_pSym),
742 :     BxP = GET_SLOT(B, Matrix_xSym),
743 :     CxP = GET_SLOT(C, Matrix_xSym);
744 :     int *Ap = INTEGER(ApP),
745 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
746 :     *Bp = INTEGER(BpP),
747 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
748 :     *Cp = INTEGER(GET_SLOT(C, Matrix_pSym)),
749 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
750 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
751 :     *bdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
752 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
753 :     nca = length(ApP) - 1,
754 :     ncb = length(BpP) - 1;
755 :     int ablk = adims[0] * adims[1],
756 :     bblk = bdims[0] * bdims[1],
757 :     cblk = cdims[0] * cdims[1];
758 :     double *Ax = REAL(AxP),
759 :     *Bx = REAL(BxP),
760 :     *Cx = REAL(CxP),
761 :     one = 1.0;
762 :    
763 :     if ((!ta) && tb) { /* transposed crossproduct */
764 :     int jj;
765 :    
766 :     if (adims[1] != bdims[1] ||
767 :     adims[0] != cdims[0] ||
768 :     bdims[0] != cdims[1])
769 :     error("C[%d,%d,%d] := A[%d,%d,%d] %*% t(B[%d,%d,%d])",
770 :     cdims[0], cdims[1], cdims[2],
771 :     adims[0], adims[1], adims[2],
772 :     bdims[0], bdims[1], bdims[2]);
773 :     if (nca != ncb)
774 :     error("C := A(ncblocks = %d) %*% t(B(ncblocks = %d)", nca, ncb);
775 :     if (beta != 1.) { /* scale C by beta */
776 :     int ctot = cdims[0] * cdims[1] * cdims[2];
777 :     for (jj = 0; jj < ctot; jj++) Cx[jj] *= beta;
778 :     }
779 :     for (jj = 0; jj < nca; jj++) {
780 :     int ia, ib, a2 = Ap[jj + 1], b2 = Bp[jj + 1];
781 :     for (ia = Ap[jj]; ia < a2; ia++) {
782 :     for (ib = Bp[jj]; ib < b2; ib++) {
783 :     int cind = Ind(Ai[ia], Bi[ib], Cp, Ci);
784 :     if (cind < 0)
785 :     error("Invalid index [%d,%d]", Ai[ia], Bi[ib]);
786 :     F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
787 :     &alpha, Ax + ia * ablk, adims,
788 :     Bx + ib * bblk, bdims, &one,
789 :     Cx + cind * cblk, cdims);
790 : bates 339 }
791 :     }
792 :     }
793 : bates 415 return;
794 : bates 339 }
795 : bates 415 error("Code not yet written");
796 : bates 339 }

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge