SCM

SCM Repository

[matrix] Annotation of /pkg/src/dgBCMatrix.c
ViewVC logotype

Annotation of /pkg/src/dgBCMatrix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 448 - (view) (download) (as text)
Original Path: pkg/src/cscBlocked.c

1 : bates 298 #include "cscBlocked.h"
2 : bates 415 /* TODO
3 :     * - code for trans = 'T' in cscb_syrk
4 :     * - code for non-trivial cscb_trmm and cscb_ldl
5 :     */
6 : bates 447
7 : bates 298 SEXP cscBlocked_validate(SEXP x)
8 :     {
9 :     SEXP pp = GET_SLOT(x, Matrix_pSym),
10 :     ip = GET_SLOT(x, Matrix_iSym),
11 :     xp = GET_SLOT(x, Matrix_xSym),
12 :     dp = getAttrib(xp, R_DimSymbol);
13 :     int *pv = INTEGER(pp),
14 :     *dim = INTEGER(dp),
15 :     ncol = length(pp) - 1;
16 :     int nnz = pv[ncol];
17 :    
18 : bates 304 if (!(isReal(xp) && isArray(xp)))
19 : bates 415 return mkString("slot x should be a real array");
20 : bates 298 if (length(dp) != 3)
21 : bates 415 return mkString("slot x should be a 3-dimensional array");
22 : bates 298 if (length(ip) != nnz)
23 : bates 415 return mkString("length of slot i does not matck last element of slot p");
24 : bates 298 if (dim[2] != nnz)
25 : bates 415 return
26 :     mkString("third dimension of slot x does not match number of nonzeros");
27 : bates 298 return ScalarLogical(1);
28 :     }
29 :    
30 : bates 434 static int*
31 :     expand_column_pointers(int ncol, const int mp[], int mj[])
32 :     {
33 :     int j;
34 :     for (j = 0; j < ncol; j++) {
35 :     int j2 = mp[j+1], jj;
36 :     for (jj = mp[j]; jj < j2; jj++) mj[jj] = j;
37 :     }
38 :     return mj;
39 :     }
40 :    
41 :     static R_INLINE
42 :     int Tind(const int rowind[], const int colptr[], int i, int j)
43 :     {
44 :     int k, k2 = colptr[j + 1];
45 :     for (k = colptr[j]; k < k2; k++)
46 :     if (rowind[k] == i) return k;
47 :     error("row %d and column %d not defined in rowind and colptr",
48 :     i, j);
49 :     return -1; /* -Wall */
50 :     }
51 :    
52 : bates 300 /**
53 :     * Perform one of the matrix operations
54 :     * C := alpha*op(A)*B + beta*C
55 :     * or
56 :     * C := alpha*B*op(A) + beta*C
57 :     * where A is a compressed, sparse, blocked matrix and
58 :     * B and C are dense matrices.
59 :     *
60 :     * @param side 'L' or 'l' for left, otherwise right
61 :     * @param transa 'T' or 't' for transpose.
62 :     * @param m number of rows in C
63 :     * @param n number of columns in C
64 : bates 415 * @param k number of rows in B if side = 'L', otherwise
65 :     * number of columns in B.
66 : bates 300 * @param alpha
67 : bates 415 * @param A pointer to a cscBlocked object
68 :     * @param B matrix to be multiplied
69 :     * @param ldb leading dimension of b as declared in the calling
70 :     * routine
71 :     * @param beta scalar multiplier of c
72 :     * @param C product matrix to be modified
73 :     * @param ldc leading dimension of c as declared in the calling
74 :     * routine
75 :     */
76 :     void
77 : bates 447 cscb_mm(enum CBLAS_SIDE side, enum CBLAS_TRANSPOSE transa,
78 :     int m, int n, int k, double alpha, SEXP A,
79 :     const double B[], int ldb, double beta, double C[], int ldc)
80 : bates 415 {
81 :     SEXP AxP = GET_SLOT(A, Matrix_xSym),
82 :     ApP = GET_SLOT(A, Matrix_pSym);
83 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
84 :     *Ap = INTEGER(ApP),
85 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
86 :     ancb = length(ApP) - 1, /* number of column blocks */
87 :     anrb; /* number of row blocks */
88 :     int absz = adims[0] * adims[1]; /* block size */
89 :     int j;
90 :     double *Ax = REAL(AxP);
91 :    
92 : bates 422 if (ldc < m) error("incompatible dims m=%d, ldc=%d", m, ldc);
93 : bates 448 if (side == LFT) {
94 : bates 415 /* B is of size k by n */
95 :     if (ldb < k)
96 :     error("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d",
97 :     k, ldb, n, adims[0], adims[1]);
98 : bates 447 if (transa == TRN) {
99 : bates 415 if (m % adims[1] || k % adims[0])
100 :     error("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d",
101 :     m, k, adims[0], adims[1]);
102 :     if (ancb != m/adims[1])
103 :     error("incompatible LT dims m=%d, ancb=%d, adims=[%d,%d,%d]",
104 : bates 447 m, ancb, adims[0], adims[1], adims[2]);
105 : bates 415 anrb = k/adims[0];
106 :     } else {
107 :     if (m % adims[0] || k % adims[1])
108 :     error("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d",
109 :     m, k, adims[0], adims[1]);
110 :     if (ancb != k/adims[1])
111 :     error("incompatible LN dims k=%d, ancb=%d, adims=[%d,%d,%d]",
112 : bates 447 k, ancb, adims[0], adims[1], adims[2]);
113 : bates 415 anrb = m/adims[0];
114 :     }
115 :     for (j = 0; j < ancb; j++) {
116 :     int kk, j2 = Ap[j + 1];
117 :     for (kk = Ap[j]; kk < j2; kk++) {
118 :     int ii = Ai[kk];
119 :     if (ii < 0 || ii >= anrb)
120 :     error("improper row index ii=%d, anrb=%d", ii, anrb);
121 : bates 447 if (transa == TRN) {
122 : bates 415 F77_CALL(dgemm)("T", "N", adims+1, &n, adims,
123 :     &alpha, Ax + kk * absz, adims,
124 :     B + ii * adims[0], &ldb,
125 :     &beta, C + j * adims[1], &ldc);
126 :     } else {
127 :     F77_CALL(dgemm)("N", "N", adims, &n, adims+1,
128 :     &alpha, Ax + kk * absz, adims,
129 : bates 447 B + j * adims[1], &ldb,
130 : bates 422 &beta, C + ii * adims[0], &ldc);
131 : bates 415 }
132 :     }
133 :     }
134 :     } else {
135 :     /* B is of size m by k */
136 :     error("Call to cscb_mm must have side == 'L'");
137 :     }
138 :     }
139 :    
140 :     /**
141 : bates 339 * Perform one of the matrix operations
142 :     * C := alpha*A*A' + beta*C,
143 :     * or
144 :     * C := alpha*A'*A + beta*C,
145 :     * where A is a compressed, sparse, blocked matrix and
146 :     * C is a compressed, sparse, symmetric blocked matrix.
147 :     *
148 :     * @param uplo 'U' or 'u' for upper triangular storage, else lower.
149 :     * @param trans 'T' or 't' for transpose.
150 :     * @param alpha scalar multiplier of outer product
151 :     * @param A compressed sparse blocked matrix
152 :     * @param beta scalar multiplier of c
153 :     * @param C compressed sparse blocked symmetric matrix to be updated
154 :     */
155 : bates 447 void
156 :     cscb_syrk(enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE trans,
157 :     double alpha, SEXP A,
158 :     double beta, SEXP C)
159 : bates 339 {
160 : bates 415 SEXP AxP = GET_SLOT(A, Matrix_xSym),
161 :     ApP = GET_SLOT(A, Matrix_pSym),
162 :     CxP = GET_SLOT(C, Matrix_xSym),
163 :     CpP = GET_SLOT(C, Matrix_pSym);
164 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
165 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
166 :     *Ap = INTEGER(ApP),
167 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
168 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
169 :     *Cp = INTEGER(CpP),
170 : bates 422 j, k;
171 : bates 415 double *Ax = REAL(AxP), *Cx = REAL(CxP), one = 1.;
172 :     int scalar = (adims[0] == 1 && adims[1] == 1),
173 : bates 434 anc = length(ApP) - 1,
174 : bates 415 asz = adims[0] * adims[1],
175 :     csz = cdims[0] * cdims[1];
176 : bates 339
177 : bates 415
178 : bates 339 if (cdims[0] != cdims[1]) error("blocks in C must be square");
179 : bates 448 if (trans == TRN) {
180 : bates 339 error("Code for trans == 'T' not yet written");
181 : bates 415 /* FIXME: Write this part */
182 :     } else {
183 : bates 422 if (adims[0] != cdims[0])
184 :     error("Inconsistent dimensions: A[%d,%d,%d], C[%d,%d,%d]",
185 :     adims[0], adims[1], adims[2],
186 :     cdims[0], cdims[1], cdims[2]);
187 : bates 339 /* check the row indices */
188 : bates 415 for (k = 0; k < adims[2]; k++) {
189 :     int aik = Ai[k];
190 : bates 422 if (aik < 0 || aik >= cdims[2])
191 : bates 415 error("Row index %d = %d is out of range [0, %d]",
192 : bates 422 k, aik, cdims[2] - 1);
193 : bates 415 }
194 :     /* multiply C by beta */
195 :     if (beta != 1.)
196 :     for (j = 0; j < csz * cdims[2]; j++) Cx[j] *= beta;
197 :     /* individual products */
198 : bates 434 for (j = 0; j < anc; j++) {
199 : bates 415 int k, kk, k2 = Ap[j+1];
200 :     for (k = Ap[j]; k < k2; k++) {
201 : bates 448 int ii = Ai[k], K = Tind(Ci, Cp, ii, ii);
202 : bates 415
203 : bates 438 if (K < 0) error("cscb_syrk: C[%d,%d] not defined", ii, ii);
204 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[k];
205 : bates 447 else F77_CALL(dsyrk)((uplo == UPP)?"U":"L", "N", cdims, adims + 1,
206 : bates 415 &alpha, Ax + k * asz, adims,
207 :     &one, Cx + K * csz, cdims);
208 :     for (kk = k+1; kk < k2; kk++) {
209 :     int jj = Ai[kk];
210 : bates 448 K = (uplo == UPP) ? Tind(Ci, Cp, ii, jj) : Tind(Ci, Cp, jj, ii);
211 :    
212 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[kk];
213 :     else F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
214 : bates 448 &alpha, Ax + ((uplo==UPP)?kk:k) * asz, adims,
215 :     Ax + ((uplo==UPP)?k:kk) * asz, adims,
216 : bates 415 &one, Cx + K * asz, cdims);
217 : bates 339 }
218 :     }
219 :     }
220 :     }
221 : bates 415 }
222 :    
223 :     /**
224 :     * Create the LDL' decomposition of the positive definite symmetric
225 :     * cscBlocked matrix A (upper triangle stored) in L and D.
226 :     *
227 :     * @param A pointer to a cscBlocked object containing the upper
228 :     * triangle of a positive definite symmetric matrix.
229 :     * @param Parent the parent array for A
230 :     * @param L pointer to a cscBlocked object to hold L
231 :     * @param D pointer to a 3D array to hold D
232 :     *
233 :     * @return n the number of column blocks in A for success. A value
234 :     * less than n indicates the first column block whose diagonal was not
235 :     * positive definite.
236 :     */
237 :     int
238 :     cscb_ldl(SEXP A, const int Parent[], SEXP L, SEXP D)
239 :     {
240 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
241 :     AxP = GET_SLOT(A, Matrix_xSym);
242 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
243 : bates 434 diag, info, j, k, n = length(ApP) - 1;
244 : bates 415 int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
245 :     *Ap = INTEGER(ApP),
246 :     *Li = INTEGER(GET_SLOT(L, Matrix_iSym)),
247 : bates 434 *Lp = INTEGER(GET_SLOT(L, Matrix_pSym)), nci = adims[0];
248 : bates 415 double *Lx = REAL(GET_SLOT(L, Matrix_xSym)),
249 : bates 434 *Ax = REAL(AxP), *Dx = REAL(D), minus1 = -1., one = 1.;
250 : bates 415
251 : bates 434 if (adims[1] != nci || nci < 1)
252 :     error("cscb_ldl: dim(A) is [%d, %d, %d]", adims[0], adims[1], adims[2]);
253 : bates 415 for (j = 0, diag = 1; j < n; j++) { /* check for trivial structure */
254 :     if (Parent[j] >= 0) {diag = 0; break;}
255 :     }
256 :     if (diag) {
257 : bates 434 int ncisqr = nci * nci;
258 :     Memcpy(Dx, Ax, ncisqr * n);
259 :     for (j = 0; j < n; j++) { /* form D_i^{1/2} */
260 :     F77_CALL(dpotrf)("U", &nci, Dx + j * ncisqr, &nci, &k);
261 :     if (k) error("D[ , , %d], level %d, is not positive definite",
262 :     j + 1);
263 :     }
264 : bates 415 return n;
265 :     }
266 : bates 434 if (nci == 1) {
267 :     k = R_ldl_numeric(n, Ap, Ai, Ax, Lp, Parent, Li, Lx, Dx,
268 :     (int *) NULL, (int *) NULL);
269 :     if (k < n) error("cscb_ldl: error code %k from R_ldl_numeric", k);
270 :     for (j = 0; j < n; j++) Dx[j] = sqrt(Dx[j]);
271 :     return n;
272 :     } else { /* Copy of ldl_numeric from the LDL package
273 :     * modified for blocked sparse matrices */
274 :     int i, k, p, p2, len, nci = adims[0], ncisqr = adims[0]*adims[0], top;
275 :     int *Lnz = Calloc(n, int),
276 :     *Pattern = Calloc(n, int),
277 :     *Flag = Calloc(n, int);
278 :     double *Y = Calloc(n * ncisqr, double), *Yi = Calloc(ncisqr, double);
279 : bates 415
280 : bates 434 for (k = 0; k < n; k++) {
281 :     /* compute nonzero Pattern of kth row of L, in topological order */
282 :     AZERO(Y + k*ncisqr, ncisqr); /* Y[,,0:k] is now all zero */
283 :     top = n; /* stack for pattern is empty */
284 :     Flag[k] = k; /* mark node k as visited */
285 :     Lnz[k] = 0; /* count of nonzeros in column k of L */
286 :     p2 = Ap[k+1];
287 :     for (p = Ap[k]; p < p2; p++) {
288 :     i = Ai[p]; /* get A[i,k] */
289 :     if (i <= k) { /* [i,k] in upper triangle? Should always be true */
290 :     /* copy A(i,k) into Y */
291 :     Memcpy(Y + i * ncisqr, Ax + p * ncisqr, ncisqr);
292 :     /* follow path from i to root of etree, stop at flagged node */
293 :     for (len = 0; Flag[i] != k; i = Parent[i]) {
294 :     Pattern[len++] = i; /* L[k,i] is nonzero */
295 :     Flag[i] = k; /* mark i as visited */
296 :     }
297 :     while (len > 0) { /* push path on top of stack */
298 :     Pattern[--top] = Pattern[--len];
299 :     }
300 :     }
301 :     }
302 :     /* Pattern [top ... n-1] now contains nonzero pattern of L[,k] */
303 :     /* compute numerical values in kth row of L (a sparse triangular solve) */
304 :     Memcpy(Dx + k * ncisqr, Y + k * ncisqr, ncisqr); /* get D[,,k] */
305 :     AZERO(Y + k*ncisqr, ncisqr); /* clear Y[,,k] */
306 :     for (; top < n; top++) {
307 :     i = Pattern[top];
308 :     Memcpy(Yi, Y + i*ncisqr, ncisqr); /* copy Y[,,i] */
309 :     AZERO(Y + i*ncisqr, ncisqr); /* clear Y[,,i] */
310 :     p2 = Lp[i] + Lnz[i];
311 :     for (p = Lp[i]; p < p2; p++) {
312 :     F77_CALL(dgemm)("N", "N", &nci, &nci, &nci, &minus1,
313 :     Lx + p*ncisqr, &nci, Yi, &nci,
314 :     &one, Y + Li[p]*ncisqr, &nci);
315 :     }
316 :     /* FIXME: Check that this is the correct order and transposition */
317 :     F77_CALL(dtrsm)("L", "U", "N", "N", &nci, &nci,
318 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
319 :     F77_CALL(dsyrk)("U", "N", &nci, &nci, &minus1, Yi, &nci,
320 :     &one, Dx + k*ncisqr, &nci);
321 :     F77_CALL(dtrsm)("L", "U", "T", "N", &nci, &nci,
322 :     &one, Dx + i*ncisqr, &nci, Yi, &nci);
323 :     Li[p] = k; /* store L[k,i] in column form of L */
324 :     Memcpy(Lx + p * ncisqr, Yi, ncisqr);
325 :     Lnz[i]++; /* increment count of nonzeros in col i */
326 :     }
327 :     F77_CALL(dpotrf)("U", &nci, Dx + k*ncisqr, &nci, &info);
328 :     if (info) {
329 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
330 :     return k; /* failure, D[,,k] is not positive definite */
331 :     }
332 :     }
333 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
334 :     return n; /* success, diagonal of D is all nonzero */
335 : bates 415 }
336 :     return -1; /* keep -Wall happy */
337 :     }
338 :    
339 :     /**
340 :     * Perform one of the cscBlocked-matrix operations B := alpha*op(A)*B
341 :     * or B := alpha*B*op(A)
342 :     *
343 : bates 448 * @param side
344 :     * @param uplo
345 :     * @param transa
346 :     * @param diag
347 : bates 415 * @param A pointer to a triangular cscBlocked object
348 : bates 448 * @param B contents of the matrix B
349 : bates 415 * @param m number of rows in B
350 :     * @param n number of columns in B
351 :     * @param ldb leading dimension of B as declared in the calling function
352 :     */
353 : bates 447 void
354 :     cscb_trmm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
355 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
356 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
357 : bates 415 {
358 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
359 :     AxP = GET_SLOT(A, Matrix_xSym);
360 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
361 :     *Ap = INTEGER(ApP),
362 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
363 :     i, j, nb = length(ApP) - 1;
364 :    
365 :     if (xdims[0] != xdims[1])
366 :     error("Argument A to cscb_trmm is not triangular");
367 :     if (alpha != 1.0) {
368 :     for (j = 0; j < n; j++) { /* scale by alpha */
369 :     for (i = 0; i < m; i++)
370 :     B[i + j * ldb] *= alpha;
371 :     }
372 :     }
373 : bates 448 if (diag == UNT && xdims[2] < 1) return; /* A is the identity */
374 : bates 415 error("Code for non-identity cases of cscb_trmm not yet written");
375 :     }
376 :    
377 :     /**
378 : bates 434 * Solve a triangular system of the form op(A)*X = alpha*B where A
379 :     * is a cscBlocked triangular matrix and B is a dense matrix.
380 :     *
381 :     * @param uplo 'U' or 'L' for upper or lower
382 :     * @param trans 'T' or 'N' for transpose or no transpose
383 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
384 :     * @param A pointer to a triangular cscBlocked object
385 :     * @param B pointer to the contents of the matrix B
386 :     * @param m number of rows in B
387 :     * @param n number of columns in B
388 :     * @param ldb leading dimension of B as declared in the calling function
389 :     */
390 : bates 447 void
391 :     cscb_trsm(enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
392 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
393 : bates 434 {
394 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
395 :     AxP = GET_SLOT(A, Matrix_xSym);
396 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
397 :     *Ap = INTEGER(ApP),
398 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
399 :     i, j, nb = length(ApP) - 1;
400 : bates 448 double *Ax = REAL(GET_SLOT(A, Matrix_xSym)), minus1 = -1., one = 1.;
401 : bates 434
402 :     if (xdims[0] != xdims[1])
403 :     error("Argument A to cscb_trsm is not triangular");
404 :     if (ldb < m || ldb <= 0 || n <= 0)
405 :     error("cscb_trsm: inconsistent dims m = %d, n = %d, ldb = %d",
406 :     m, n, ldb);
407 :     if (m != (nb * xdims[0]))
408 :     error("cscb_trsm: inconsistent dims m = %d, A[%d,%d,]x%d",
409 :     m, xdims[0], xdims[1], xdims[2]);
410 :     if (alpha != 1.0) {
411 :     for (j = 0; j < n; j++) { /* scale by alpha */
412 :     for (i = 0; i < m; i++)
413 :     B[i + j * ldb] *= alpha;
414 :     }
415 :     }
416 : bates 448 if (diag == UNT) {
417 : bates 434 if (xdims[2] < 1) return; /* A is the identity */
418 : bates 448 if (xdims[0] == 1) { /* scalar case */
419 :     if (uplo == UPP) error("Code for upper triangle not yet written");
420 :     if (transa == TRN) {
421 : bates 434 for (j = 0; j < n; j++)
422 :     R_ldl_ltsolve(m, B + j * ldb, Ap, Ai, Ax);
423 :     } else {
424 :     for (j = 0; j < n; j++)
425 :     R_ldl_lsolve(m, B + j * ldb, Ap, Ai, Ax);
426 :     }
427 :     return;
428 : bates 448 } else {
429 :     int p, p2, sza = xdims[0] * xdims[0], szb = xdims[0] * n;
430 :     double *tmp = Calloc(szb, double);
431 :     if (uplo == UPP) error("Code for upper triangle not yet written");
432 :     if (transa == TRN) {
433 :     for (j = nb - 1; j >= 0; j--) {
434 :     p2 = Ap[j+1];
435 :    
436 :     F77_CALL(dlacpy)("A", xdims, &n, B + j * xdims[0], &ldb,
437 :     tmp, xdims);
438 :     for (p = Ap[j]; p < p2; p++)
439 :     F77_CALL(dgemm)("T", "N", xdims, &n, xdims,
440 :     &minus1, Ax + p * sza, xdims,
441 :     B + Ai[p] * xdims[0], &ldb,
442 :     &one, tmp, xdims);
443 :     F77_CALL(dlacpy)("A", xdims, &n, tmp, xdims,
444 :     B + j * xdims[0], &ldb);
445 :     }
446 :     } else {
447 :     for (j = 0; j < nb; j++) {
448 :     p2 = Ap[j+1];
449 :    
450 :     F77_CALL(dlacpy)("A", xdims, &n, B + j * xdims[0], &ldb,
451 :     tmp, xdims);
452 :     for (p = Ap[j]; p < p2; p++)
453 :     F77_CALL(dgemm)("N", "N", xdims, &n, xdims,
454 :     &minus1, Ax + p * sza, xdims,
455 :     B + Ai[p] * xdims[0], &ldb,
456 :     &one, tmp, xdims);
457 :     F77_CALL(dlacpy)("A", xdims, &n, tmp, xdims,
458 :     B + j * xdims[0], &ldb);
459 :     }
460 :     }
461 : bates 434 }
462 : bates 448 } else {error("Code for non-unit cases of cscb_trsm not yet written");}
463 : bates 434 }
464 :    
465 :     /**
466 : bates 415 * Perform one of the operations B := alpha*op(A)*B or
467 :     * B := alpha*B*op(A) where A and B are both cscBlocked.
468 :     *
469 : bates 448 * @param side
470 :     * @param uplo
471 :     * @param transa
472 :     * @param diag
473 : bates 415 * @param alpha scalar multiplier
474 :     * @param A pointer to a triangular cscBlocked object
475 :     * @param B pointer to a general cscBlocked matrix
476 :     */
477 : bates 447 void
478 : bates 448 cscb_trcbm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
479 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
480 : bates 447 double alpha, SEXP A, SEXP B)
481 : bates 415 {
482 : bates 448 int
483 : bates 447 iunit = (diag == UNT);
484 : bates 415 SEXP ApP = GET_SLOT(A, Matrix_pSym),
485 :     AxP = GET_SLOT(A, Matrix_xSym),
486 :     BpP = GET_SLOT(B, Matrix_pSym),
487 :     BxP = GET_SLOT(B, Matrix_xSym);
488 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
489 :     *Ap = INTEGER(ApP),
490 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
491 :     *Bp = INTEGER(BpP),
492 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
493 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
494 :     ncbA = length(ApP) - 1,
495 :     ncbB = length(BpP) - 1;
496 :     int i, nbx = bxdims[0] * bxdims[1] * bxdims[2];
497 :    
498 :     if (axdims[0] != axdims[1])
499 :     error("Argument A to cscb_trcbm is not triangular");
500 :     if (alpha != 1.0) {
501 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
502 :     REAL(BxP)[i] *= alpha;
503 :     }
504 :     }
505 : bates 448 if (diag == UNT && axdims[2] < 1) return; /* A is the identity */
506 : bates 415 error("Code for non-trivial cscb_trcbm not yet written");
507 :     }
508 :    
509 :     /**
510 : bates 434 * Expand a column of a compressed, sparse, column-oriented matrix.
511 :     *
512 :     * @param dest array to hold the result
513 :     * @param m number of rows in the matrix
514 :     * @param j index (0-based) of column to expand
515 :     * @param Ap array of column pointers
516 :     * @param Ai array of row indices
517 :     * @param Ax array of non-zero values
518 :     *
519 :     * @return dest
520 :     */
521 :     static
522 :     double *expand_column(double *dest, int m, int j,
523 :     const int Ap[], const int Ai[], const double Ax[])
524 :     {
525 :     int k, k2 = Ap[j + 1];
526 :    
527 :     for (k = 0; k < m; k++) dest[k] = 0.;
528 :     for (k = Ap[j]; k < k2; k++) dest[Ai[k]] = Ax[k];
529 :     return dest;
530 :     }
531 :    
532 :     /**
533 :     * Solve one of the systems op(A)*X = alpha*B or
534 :     * X*op(A) = alpha*B where A cscBlocked triangular and B is cscBlocked.
535 :     *
536 :     * @param side 'L' or 'R' for left or right
537 :     * @param uplo 'U' or 'L' for upper or lower
538 :     * @param transa 'T' or 'N' for transpose or no transpose
539 :     * @param diag 'U' or 'N' for unit diagonal or non-unit
540 :     * @param alpha scalar multiplier
541 :     * @param A pointer to a triangular cscBlocked object
542 :     * @param B pointer to a general cscBlocked matrix
543 :     */
544 : bates 447 void
545 : bates 448 cscb_trcbsm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
546 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
547 : bates 447 double alpha, SEXP A, const int Parent[], SEXP B)
548 : bates 434 {
549 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
550 :     AxP = GET_SLOT(A, Matrix_xSym),
551 :     BpP = GET_SLOT(B, Matrix_pSym),
552 :     BxP = GET_SLOT(B, Matrix_xSym);
553 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
554 :     *Ap = INTEGER(ApP),
555 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
556 :     *Bp = INTEGER(BpP),
557 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
558 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
559 :     ncbA = length(ApP) - 1,
560 :     ncbB = length(BpP) - 1;
561 :     int i, j, nbx = bxdims[0] * bxdims[1] * bxdims[2];
562 :     double *Ax = REAL(AxP), *Bx = REAL(BxP);
563 :    
564 :     if (axdims[0] != axdims[1])
565 :     error("Argument A to cscb_trcbm is not triangular");
566 :     if (alpha != 1.0) {
567 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
568 :     REAL(BxP)[i] *= alpha;
569 :     }
570 :     }
571 : bates 448 if (diag == UNT && axdims[2] < 1) return; /* A is the identity */
572 :     if (diag == UNT && axdims[0] == 1) { /* can use R_ldl code */
573 :     if ((side != LFT) && transa == TRN) { /* case required for lmer */
574 : bates 434 int *BTp, nnz = bxdims[2], nrbB;
575 :     int *tmp = expand_column_pointers(ncbB, Bp, Calloc(nnz, int));
576 :     int *BTi = Calloc(nnz, int);
577 :     double *BTx = Calloc(nnz, double), *rhs;
578 :    
579 :     /* transpose B */
580 :     for (i = 0, nrbB = -1; i < nnz; i++) if (Bi[i] > nrbB) nrbB = Bi[i];
581 :     BTp = Calloc(nrbB, int);
582 :     triplet_to_col(ncbB, nrbB, nnz, tmp, Bi, Bx, BTp, BTi, BTx);
583 :     /* sanity check */
584 :     if (BTp[nrbB] != nnz) error("cscb_trcbsm: transpose operation failed");
585 :     Free(tmp);
586 :     /* Solve one column at a time */
587 :     rhs = Calloc(ncbB, double);
588 :     AZERO(Bx, nnz); /* zero the result */
589 :     for (i = 0; i < nrbB; i++) {
590 :     R_ldl_lsolve(ncbB, expand_column(rhs, ncbB, i, BTp, BTi, BTx),
591 :     Ap, Ai, Ax);
592 :     for (j = 0; j < ncbB; j++) { /* write non-zeros in sol'n into B */
593 :     if (BTx[j]) Bx[Tind(Bi, Bp, j, i)] = BTx[j];
594 :     }
595 :     Free(rhs); Free(BTp); Free(BTx); Free(BTi);
596 :     }
597 :     }
598 :     error("cscb_trcbsm: method not yet written");
599 :     }
600 :     error("cscb_trcbsm: method not yet written");
601 :     }
602 :    
603 :     /**
604 : bates 415 * Perform one of the matrix-matrix operations
605 :     * C := alpha*op(A)*op(B) + beta*C
606 :     * on compressed, sparse, blocked matrices.
607 :     *
608 :     * @param transa 'T' for transpose of A, else 'N'
609 :     * @param transb 'T' for transpose of B, else 'N'
610 :     * @param alpha scalar multiplier
611 :     * @param A pointer to a cscBlocked object
612 :     * @param B pointer to a cscBlocked object
613 :     * @param beta scalar multiplier
614 :     * @param C pointer to a cscBlocked object
615 :     */
616 : bates 447 void
617 :     cscb_cscbm(enum CBLAS_TRANSPOSE transa, enum CBLAS_TRANSPOSE transb,
618 :     double alpha, SEXP A, SEXP B, double beta, SEXP C)
619 : bates 415 {
620 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
621 :     AxP = GET_SLOT(A, Matrix_xSym),
622 :     BpP = GET_SLOT(B, Matrix_pSym),
623 :     BxP = GET_SLOT(B, Matrix_xSym),
624 :     CxP = GET_SLOT(C, Matrix_xSym);
625 :     int *Ap = INTEGER(ApP),
626 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
627 :     *Bp = INTEGER(BpP),
628 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
629 :     *Cp = INTEGER(GET_SLOT(C, Matrix_pSym)),
630 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
631 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
632 :     *bdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
633 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
634 :     nca = length(ApP) - 1,
635 :     ncb = length(BpP) - 1;
636 :     int ablk = adims[0] * adims[1],
637 :     bblk = bdims[0] * bdims[1],
638 :     cblk = cdims[0] * cdims[1];
639 :     double *Ax = REAL(AxP),
640 :     *Bx = REAL(BxP),
641 :     *Cx = REAL(CxP),
642 :     one = 1.0;
643 :    
644 : bates 448 if ((transa == NTR) && transb == TRN) { /* transposed crossproduct */
645 : bates 415 int jj;
646 :    
647 :     if (adims[1] != bdims[1] ||
648 :     adims[0] != cdims[0] ||
649 :     bdims[0] != cdims[1])
650 :     error("C[%d,%d,%d] := A[%d,%d,%d] %*% t(B[%d,%d,%d])",
651 :     cdims[0], cdims[1], cdims[2],
652 :     adims[0], adims[1], adims[2],
653 :     bdims[0], bdims[1], bdims[2]);
654 :     if (nca != ncb)
655 :     error("C := A(ncblocks = %d) %*% t(B(ncblocks = %d)", nca, ncb);
656 :     if (beta != 1.) { /* scale C by beta */
657 :     int ctot = cdims[0] * cdims[1] * cdims[2];
658 :     for (jj = 0; jj < ctot; jj++) Cx[jj] *= beta;
659 :     }
660 :     for (jj = 0; jj < nca; jj++) {
661 :     int ia, ib, a2 = Ap[jj + 1], b2 = Bp[jj + 1];
662 :     for (ia = Ap[jj]; ia < a2; ia++) {
663 : bates 448 for (ib = Bp[jj]; ib < b2; ib++) {
664 :     F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
665 : bates 415 &alpha, Ax + ia * ablk, adims,
666 :     Bx + ib * bblk, bdims, &one,
667 : bates 448 Cx + Tind(Ci, Cp, Ai[ia], Bi[ib])*cblk, cdims);
668 : bates 339 }
669 :     }
670 :     }
671 : bates 415 return;
672 : bates 339 }
673 : bates 415 error("Code not yet written");
674 : bates 339 }

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge