SCM

SCM Repository

[matrix] Annotation of /pkg/src/dgBCMatrix.c
ViewVC logotype

Annotation of /pkg/src/dgBCMatrix.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 746 - (view) (download) (as text)

1 : bates 485 #include "dgBCMatrix.h"
2 : bates 415 /* TODO
3 :     * - code for trans = 'T' in cscb_syrk
4 : bates 556 * - code for non-trivial cscb_trmm
5 : bates 415 */
6 : bates 447
7 : bates 485 SEXP dgBCMatrix_validate(SEXP x)
8 : bates 298 {
9 :     SEXP pp = GET_SLOT(x, Matrix_pSym),
10 :     ip = GET_SLOT(x, Matrix_iSym),
11 :     xp = GET_SLOT(x, Matrix_xSym),
12 :     dp = getAttrib(xp, R_DimSymbol);
13 :     int *pv = INTEGER(pp),
14 :     *dim = INTEGER(dp),
15 :     ncol = length(pp) - 1;
16 :     int nnz = pv[ncol];
17 :    
18 : bates 304 if (!(isReal(xp) && isArray(xp)))
19 : bates 582 return mkString(_("slot x should be a real array"));
20 : bates 298 if (length(dp) != 3)
21 : bates 582 return mkString(_("slot x should be a 3-dimensional array"));
22 : bates 298 if (length(ip) != nnz)
23 : bates 582 return mkString(_("length of slot i does not match last element of slot p"));
24 : bates 298 if (dim[2] != nnz)
25 : bates 415 return
26 : bates 582 mkString(_("third dimension of slot x does not match number of nonzeros"));
27 : bates 298 return ScalarLogical(1);
28 :     }
29 :    
30 : bates 300 /**
31 :     * Perform one of the matrix operations
32 :     * C := alpha*op(A)*B + beta*C
33 :     * or
34 :     * C := alpha*B*op(A) + beta*C
35 :     * where A is a compressed, sparse, blocked matrix and
36 :     * B and C are dense matrices.
37 :     *
38 : bates 559 * @param side LFT or RGT
39 :     * @param transa TRN or NTR
40 : bates 300 * @param m number of rows in C
41 :     * @param n number of columns in C
42 : bates 559 * @param k number of rows in B if side == LFT, otherwise
43 : bates 415 * number of columns in B.
44 : bates 300 * @param alpha
45 : bates 485 * @param A pointer to a dgBCMatrix object
46 : bates 415 * @param B matrix to be multiplied
47 :     * @param ldb leading dimension of b as declared in the calling
48 :     * routine
49 :     * @param beta scalar multiplier of c
50 :     * @param C product matrix to be modified
51 :     * @param ldc leading dimension of c as declared in the calling
52 :     * routine
53 :     */
54 :     void
55 : bates 447 cscb_mm(enum CBLAS_SIDE side, enum CBLAS_TRANSPOSE transa,
56 :     int m, int n, int k, double alpha, SEXP A,
57 :     const double B[], int ldb, double beta, double C[], int ldc)
58 : bates 415 {
59 :     SEXP AxP = GET_SLOT(A, Matrix_xSym),
60 :     ApP = GET_SLOT(A, Matrix_pSym);
61 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
62 :     *Ap = INTEGER(ApP),
63 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
64 :     ancb = length(ApP) - 1, /* number of column blocks */
65 :     anrb; /* number of row blocks */
66 :     int absz = adims[0] * adims[1]; /* block size */
67 :     int j;
68 :     double *Ax = REAL(AxP);
69 :    
70 : bates 582 if (ldc < m) error(_("incompatible dims m=%d, ldc=%d"), m, ldc);
71 : bates 448 if (side == LFT) {
72 : bates 415 /* B is of size k by n */
73 :     if (ldb < k)
74 : bates 582 error(_("incompatible L dims k=%d, ldb=%d, n=%d, nr=%d, nc=%d"),
75 : bates 415 k, ldb, n, adims[0], adims[1]);
76 : bates 447 if (transa == TRN) {
77 : bates 415 if (m % adims[1] || k % adims[0])
78 : bates 582 error(_("incompatible LT dims m=%d, k = %d, nr=%d, nc=%d"),
79 : bates 415 m, k, adims[0], adims[1]);
80 :     if (ancb != m/adims[1])
81 : bates 582 error(_("incompatible LT dims m=%d, ancb=%d, adims=[%d,%d,%d]"),
82 : bates 447 m, ancb, adims[0], adims[1], adims[2]);
83 : bates 415 anrb = k/adims[0];
84 :     } else {
85 :     if (m % adims[0] || k % adims[1])
86 : bates 582 error(_("incompatible LN dims m=%d, k = %d, nr=%d, nc=%d"),
87 : bates 415 m, k, adims[0], adims[1]);
88 :     if (ancb != k/adims[1])
89 : bates 582 error(_("incompatible LN dims k=%d, ancb=%d, adims=[%d,%d,%d]"),
90 : bates 447 k, ancb, adims[0], adims[1], adims[2]);
91 : bates 415 anrb = m/adims[0];
92 :     }
93 :     for (j = 0; j < ancb; j++) {
94 :     int kk, j2 = Ap[j + 1];
95 :     for (kk = Ap[j]; kk < j2; kk++) {
96 :     int ii = Ai[kk];
97 :     if (ii < 0 || ii >= anrb)
98 : bates 582 error(_("improper row index ii=%d, anrb=%d"), ii, anrb);
99 : bates 447 if (transa == TRN) {
100 : bates 415 F77_CALL(dgemm)("T", "N", adims+1, &n, adims,
101 :     &alpha, Ax + kk * absz, adims,
102 :     B + ii * adims[0], &ldb,
103 :     &beta, C + j * adims[1], &ldc);
104 :     } else {
105 :     F77_CALL(dgemm)("N", "N", adims, &n, adims+1,
106 :     &alpha, Ax + kk * absz, adims,
107 : bates 447 B + j * adims[1], &ldb,
108 : bates 422 &beta, C + ii * adims[0], &ldc);
109 : bates 415 }
110 :     }
111 :     }
112 :     } else {
113 :     /* B is of size m by k */
114 : bates 582 error(_("Call to cscb_mm must have side == LFT"));
115 : bates 415 }
116 :     }
117 :    
118 :     /**
119 : bates 339 * Perform one of the matrix operations
120 :     * C := alpha*A*A' + beta*C,
121 :     * or
122 :     * C := alpha*A'*A + beta*C,
123 :     * where A is a compressed, sparse, blocked matrix and
124 :     * C is a compressed, sparse, symmetric blocked matrix.
125 :     *
126 : bates 565 * @param uplo UPP or LOW for upper or lower
127 :     * @param trans TRN or NTR for transpose or no transpose
128 : bates 339 * @param alpha scalar multiplier of outer product
129 :     * @param A compressed sparse blocked matrix
130 :     * @param beta scalar multiplier of c
131 :     * @param C compressed sparse blocked symmetric matrix to be updated
132 :     */
133 : bates 447 void
134 :     cscb_syrk(enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE trans,
135 :     double alpha, SEXP A,
136 :     double beta, SEXP C)
137 : bates 339 {
138 : bates 415 SEXP AxP = GET_SLOT(A, Matrix_xSym),
139 :     ApP = GET_SLOT(A, Matrix_pSym),
140 :     CxP = GET_SLOT(C, Matrix_xSym),
141 :     CpP = GET_SLOT(C, Matrix_pSym);
142 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
143 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
144 :     *Ap = INTEGER(ApP),
145 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
146 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
147 :     *Cp = INTEGER(CpP),
148 : bates 422 j, k;
149 : bates 415 double *Ax = REAL(AxP), *Cx = REAL(CxP), one = 1.;
150 :     int scalar = (adims[0] == 1 && adims[1] == 1),
151 : bates 434 anc = length(ApP) - 1,
152 : bates 415 asz = adims[0] * adims[1],
153 :     csz = cdims[0] * cdims[1];
154 : bates 339
155 : bates 415
156 : bates 582 if (cdims[0] != cdims[1]) error(_("blocks in C must be square"));
157 : bates 448 if (trans == TRN) {
158 : bates 451 /* FIXME: Write this part */
159 : bates 582 error(_("Code for trans == TRN not yet written"));
160 : bates 415 } else {
161 : bates 422 if (adims[0] != cdims[0])
162 : bates 582 error(_("Inconsistent dimensions: A[%d,%d,%d], C[%d,%d,%d]"),
163 : bates 422 adims[0], adims[1], adims[2],
164 :     cdims[0], cdims[1], cdims[2]);
165 : bates 339 /* check the row indices */
166 : bates 415 for (k = 0; k < adims[2]; k++) {
167 :     int aik = Ai[k];
168 : bates 422 if (aik < 0 || aik >= cdims[2])
169 : bates 582 error(_("Row index %d = %d is out of range [0, %d]"),
170 : bates 422 k, aik, cdims[2] - 1);
171 : bates 415 }
172 :     /* multiply C by beta */
173 :     if (beta != 1.)
174 :     for (j = 0; j < csz * cdims[2]; j++) Cx[j] *= beta;
175 :     /* individual products */
176 : bates 434 for (j = 0; j < anc; j++) {
177 : bates 415 int k, kk, k2 = Ap[j+1];
178 :     for (k = Ap[j]; k < k2; k++) {
179 : bates 746 int ii = Ai[k];
180 :     int K = check_csc_index(Cp, Ci, ii, ii, 0);
181 : bates 415
182 :     if (scalar) Cx[K] += alpha * Ax[k] * Ax[k];
183 : bates 553 else F77_CALL(dsyrk)((uplo == UPP) ? "U" : "L", "N",
184 :     cdims, adims + 1,
185 : bates 415 &alpha, Ax + k * asz, adims,
186 :     &one, Cx + K * csz, cdims);
187 :     for (kk = k+1; kk < k2; kk++) {
188 :     int jj = Ai[kk];
189 : bates 536 K = (uplo == UPP) ? check_csc_index(Cp, Ci, ii, jj, 0) :
190 :     check_csc_index(Cp, Ci, jj, ii, 0);
191 : bates 448
192 : bates 415 if (scalar) Cx[K] += alpha * Ax[k] * Ax[kk];
193 : bates 553 else F77_CALL(dgemm)("N", "T", cdims, cdims + 1,
194 :     adims + 1, &alpha,
195 :     Ax+((uplo==UPP)?k:kk)*asz, adims,
196 :     Ax+((uplo==UPP)?kk:k)*asz, adims,
197 : bates 543 &one, Cx + K * csz, cdims);
198 : bates 339 }
199 :     }
200 :     }
201 :     }
202 : bates 415 }
203 :    
204 : bates 556 static void
205 :     copy_transpose(double dest[], const double src[], int n)
206 :     {
207 :     int i, j;
208 :     for (i = 0; i < n; i++) {
209 :     for (j = 0; j < n; j++) {
210 :     dest[i + j * n] = src[j + i * n];
211 :     }
212 :     }
213 :     }
214 :    
215 : bates 415 /**
216 : bates 451 * Create the LD^{T/2}D^{1/2}L' decomposition of the positive definite
217 : bates 536 * symmetric dgBCMatrix matrix A (upper triangle stored) in L and D^{1/2}.
218 :     * D^{1/2} denotes the upper Cholesky factor of the positive definite positive
219 :     * definite block diagonal matrix D. The diagonal blocks are of size nci.
220 : bates 415 *
221 : bates 485 * @param A pointer to a dgBCMatrix object containing the upper
222 : bates 415 * triangle of a positive definite symmetric matrix.
223 :     * @param Parent the parent array for A
224 : bates 485 * @param L pointer to a dgBCMatrix object to hold L
225 : bates 415 * @param D pointer to a 3D array to hold D
226 :     *
227 :     * @return n the number of column blocks in A for success. A value
228 :     * less than n indicates the first column block whose diagonal was not
229 :     * positive definite.
230 :     */
231 :     int
232 :     cscb_ldl(SEXP A, const int Parent[], SEXP L, SEXP D)
233 :     {
234 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
235 :     AxP = GET_SLOT(A, Matrix_xSym);
236 :     int *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
237 : bates 434 diag, info, j, k, n = length(ApP) - 1;
238 : bates 415 int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
239 :     *Ap = INTEGER(ApP),
240 :     *Li = INTEGER(GET_SLOT(L, Matrix_iSym)),
241 : bates 434 *Lp = INTEGER(GET_SLOT(L, Matrix_pSym)), nci = adims[0];
242 : bates 536 int ncisqr = nci * nci;
243 : bates 415 double *Lx = REAL(GET_SLOT(L, Matrix_xSym)),
244 : bates 434 *Ax = REAL(AxP), *Dx = REAL(D), minus1 = -1., one = 1.;
245 : bates 415
246 : bates 434 if (adims[1] != nci || nci < 1)
247 : bates 582 error(_("cscb_ldl: dim(A) is [%d, %d, %d]"), adims[0], adims[1], adims[2]);
248 : bates 415 for (j = 0, diag = 1; j < n; j++) { /* check for trivial structure */
249 :     if (Parent[j] >= 0) {diag = 0; break;}
250 :     }
251 :     if (diag) {
252 : bates 434 Memcpy(Dx, Ax, ncisqr * n);
253 :     for (j = 0; j < n; j++) { /* form D_i^{1/2} */
254 :     F77_CALL(dpotrf)("U", &nci, Dx + j * ncisqr, &nci, &k);
255 : bates 556 if (k) return j; /* return block number, not col no. */
256 : bates 434 }
257 : bates 415 return n;
258 :     }
259 : bates 434 if (nci == 1) {
260 :     k = R_ldl_numeric(n, Ap, Ai, Ax, Lp, Parent, Li, Lx, Dx,
261 :     (int *) NULL, (int *) NULL);
262 : bates 556 if (k < n) return k;
263 : bates 434 for (j = 0; j < n; j++) Dx[j] = sqrt(Dx[j]);
264 :     return n;
265 :     } else { /* Copy of ldl_numeric from the LDL package
266 :     * modified for blocked sparse matrices */
267 : bates 536 int i, k, p, p2, len, top;
268 : bates 434 int *Lnz = Calloc(n, int),
269 :     *Pattern = Calloc(n, int),
270 :     *Flag = Calloc(n, int);
271 :     double *Y = Calloc(n * ncisqr, double), *Yi = Calloc(ncisqr, double);
272 : bates 415
273 : bates 434 for (k = 0; k < n; k++) {
274 :     /* compute nonzero Pattern of kth row of L, in topological order */
275 :     AZERO(Y + k*ncisqr, ncisqr); /* Y[,,0:k] is now all zero */
276 :     top = n; /* stack for pattern is empty */
277 :     Flag[k] = k; /* mark node k as visited */
278 :     Lnz[k] = 0; /* count of nonzeros in column k of L */
279 :     p2 = Ap[k+1];
280 :     for (p = Ap[k]; p < p2; p++) {
281 :     i = Ai[p]; /* get A[i,k] */
282 : bates 582 if (i > k) error(_("cscb_ldl: A has nonzeros below diagonal"));
283 : bates 434 /* copy A(i,k) into Y */
284 : bates 556 Memcpy(Y + i * ncisqr, Ax + p * ncisqr, ncisqr);
285 :     /* follow path from i to root of etree,
286 :     * stop at flagged node */
287 :     for (len = 0; Flag[i] != k; i = Parent[i]) {
288 :     Pattern[len++] = i; /* L[k,i] is nonzero */
289 :     Flag[i] = k; /* mark i as visited */
290 : bates 434 }
291 : bates 556 while (len > 0) { /* push path on top of stack */
292 :     Pattern[--top] = Pattern[--len];
293 :     }
294 : bates 434 }
295 :     /* Pattern [top ... n-1] now contains nonzero pattern of L[,k] */
296 : bates 553 /* compute numerical values in kth row of L
297 :     * (a sparse triangular solve) */
298 : bates 434 Memcpy(Dx + k * ncisqr, Y + k * ncisqr, ncisqr); /* get D[,,k] */
299 :     AZERO(Y + k*ncisqr, ncisqr); /* clear Y[,,k] */
300 :     for (; top < n; top++) {
301 :     i = Pattern[top];
302 :     Memcpy(Yi, Y + i*ncisqr, ncisqr); /* copy Y[,,i] */
303 :     AZERO(Y + i*ncisqr, ncisqr); /* clear Y[,,i] */
304 :     p2 = Lp[i] + Lnz[i];
305 :     for (p = Lp[i]; p < p2; p++) {
306 :     F77_CALL(dgemm)("N", "N", &nci, &nci, &nci, &minus1,
307 :     Lx + p*ncisqr, &nci, Yi, &nci,
308 :     &one, Y + Li[p]*ncisqr, &nci);
309 :     }
310 : bates 553 /* FIXME: Is this the correct order and transposition? */
311 : bates 556 F77_CALL(dtrsm)("L", "U", "T", "N", &nci, &nci,
312 : bates 434 &one, Dx + i*ncisqr, &nci, Yi, &nci);
313 : bates 556 F77_CALL(dsyrk)("U", "T", &nci, &nci, &minus1, Yi, &nci,
314 : bates 434 &one, Dx + k*ncisqr, &nci);
315 : bates 556 F77_CALL(dtrsm)("L", "U", "N", "N", &nci, &nci,
316 : bates 434 &one, Dx + i*ncisqr, &nci, Yi, &nci);
317 :     Li[p] = k; /* store L[k,i] in column form of L */
318 : bates 556 /* Yi contains L[k,i]', not L[k,i] */
319 :     copy_transpose(Lx + p * ncisqr, Yi, nci);
320 :     Lnz[i]++; /* increment count of nonzeros in col i */
321 : bates 434 }
322 :     F77_CALL(dpotrf)("U", &nci, Dx + k*ncisqr, &nci, &info);
323 :     if (info) {
324 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
325 : bates 556 return k; /* failure, D[,,k] not positive definite */
326 : bates 434 }
327 :     }
328 :     Free(Y); Free(Yi); Free(Pattern); Free(Flag); Free(Lnz);
329 :     return n; /* success, diagonal of D is all nonzero */
330 : bates 415 }
331 : bates 536 return -1; /* -Wall */
332 : bates 415 }
333 :    
334 :     /**
335 : bates 485 * Perform one of the dgBCMatrix-matrix operations B := alpha*op(A)*B
336 : bates 415 * or B := alpha*B*op(A)
337 :     *
338 : bates 565 * @param side LFT or RGT for left or right
339 :     * @param uplo UPP or LOW for upper or lower
340 :     * @param transa TRN or NTR for transpose or no transpose
341 :     * @param diag UNT or NUN for unit or non-unit
342 :     * @param alpha scalar multiplier
343 : bates 485 * @param A pointer to a triangular dgBCMatrix object
344 : bates 448 * @param B contents of the matrix B
345 : bates 415 * @param m number of rows in B
346 :     * @param n number of columns in B
347 :     * @param ldb leading dimension of B as declared in the calling function
348 :     */
349 : bates 447 void
350 :     cscb_trmm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
351 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
352 :     double alpha, SEXP A, double B[], int m, int n, int ldb)
353 : bates 415 {
354 : bates 451 SEXP /* ApP = GET_SLOT(A, Matrix_pSym), */
355 : bates 415 AxP = GET_SLOT(A, Matrix_xSym);
356 : bates 451 int /* *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)), */
357 :     /* *Ap = INTEGER(ApP), */
358 : bates 415 *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
359 : bates 451 i, j/* , nb = length(ApP) - 1 */;
360 : bates 415
361 :     if (xdims[0] != xdims[1])
362 : bates 582 error(_("Argument A to cscb_trmm is not triangular"));
363 : bates 415 if (alpha != 1.0) {
364 :     for (j = 0; j < n; j++) { /* scale by alpha */
365 :     for (i = 0; i < m; i++)
366 :     B[i + j * ldb] *= alpha;
367 :     }
368 :     }
369 : bates 448 if (diag == UNT && xdims[2] < 1) return; /* A is the identity */
370 : bates 582 error(_("Code for non-identity cases of cscb_trmm not yet written"));
371 : bates 415 }
372 :    
373 :     /**
374 : bates 434 * Solve a triangular system of the form op(A)*X = alpha*B where A
375 : bates 485 * is a dgBCMatrix triangular matrix and B is a dense matrix.
376 : bates 434 *
377 : bates 559 * @param uplo UPP or LOW
378 : bates 565 * @param transa TRN or NTR
379 : bates 559 * @param diag UNT or NUN
380 : bates 565 * @param alpha scalar multiplier
381 : bates 485 * @param A pointer to a triangular dgBCMatrix object
382 : bates 434 * @param m number of rows in B
383 :     * @param n number of columns in B
384 : bates 559 * @param B pointer to the contents of the matrix B
385 : bates 434 * @param ldb leading dimension of B as declared in the calling function
386 :     */
387 : bates 447 void
388 : bates 556 cscb_trsm(enum CBLAS_UPLO uplo, enum CBLAS_TRANSPOSE transa,
389 :     enum CBLAS_DIAG diag, double alpha, SEXP A,
390 : bates 559 int m, int n, double B[], int ldb)
391 : bates 434 {
392 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
393 :     AxP = GET_SLOT(A, Matrix_xSym);
394 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
395 :     *Ap = INTEGER(ApP),
396 :     *xdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
397 :     i, j, nb = length(ApP) - 1;
398 : bates 448 double *Ax = REAL(GET_SLOT(A, Matrix_xSym)), minus1 = -1., one = 1.;
399 : bates 434
400 :     if (xdims[0] != xdims[1])
401 : bates 582 error(_("Argument A to cscb_trsm is not triangular"));
402 : bates 434 if (ldb < m || ldb <= 0 || n <= 0)
403 : bates 582 error(_("cscb_trsm: inconsistent dims m = %d, n = %d, ldb = %d"),
404 : bates 434 m, n, ldb);
405 :     if (m != (nb * xdims[0]))
406 : bates 582 error(_("cscb_trsm: inconsistent dims m = %d, A[%d,%d,]x%d"),
407 : bates 434 m, xdims[0], xdims[1], xdims[2]);
408 :     if (alpha != 1.0) {
409 :     for (j = 0; j < n; j++) { /* scale by alpha */
410 :     for (i = 0; i < m; i++)
411 :     B[i + j * ldb] *= alpha;
412 :     }
413 :     }
414 : bates 448 if (diag == UNT) {
415 : bates 434 if (xdims[2] < 1) return; /* A is the identity */
416 : bates 448 if (xdims[0] == 1) { /* scalar case */
417 : bates 582 if (uplo == UPP) error(_("Code for upper triangle not yet written"));
418 : bates 448 if (transa == TRN) {
419 : bates 434 for (j = 0; j < n; j++)
420 :     R_ldl_ltsolve(m, B + j * ldb, Ap, Ai, Ax);
421 :     } else {
422 :     for (j = 0; j < n; j++)
423 :     R_ldl_lsolve(m, B + j * ldb, Ap, Ai, Ax);
424 :     }
425 :     return;
426 : bates 448 } else {
427 : bates 559 int p, p2, sza = xdims[0] * xdims[0];
428 :    
429 : bates 582 if (uplo == UPP) error(_("Code for upper triangle not yet written"));
430 : bates 448 if (transa == TRN) {
431 :     for (j = nb - 1; j >= 0; j--) {
432 :     p2 = Ap[j+1];
433 :     for (p = Ap[j]; p < p2; p++)
434 :     F77_CALL(dgemm)("T", "N", xdims, &n, xdims,
435 :     &minus1, Ax + p * sza, xdims,
436 :     B + Ai[p] * xdims[0], &ldb,
437 : bates 559 &one, B + j * xdims[0], &ldb);
438 : bates 448 }
439 :     } else {
440 :     for (j = 0; j < nb; j++) {
441 :     p2 = Ap[j+1];
442 :     for (p = Ap[j]; p < p2; p++)
443 :     F77_CALL(dgemm)("N", "N", xdims, &n, xdims,
444 :     &minus1, Ax + p * sza, xdims,
445 : bates 559 B + j * xdims[0], &ldb,
446 :     &one, B + Ai[p] * xdims[0], &ldb);
447 : bates 448 }
448 :     }
449 : bates 434 }
450 : bates 582 } else {error(_("Code for non-unit cases of cscb_trsm not yet written"));}
451 : bates 434 }
452 :    
453 :     /**
454 : bates 415 * Perform one of the operations B := alpha*op(A)*B or
455 : bates 485 * B := alpha*B*op(A) where A and B are both dgBCMatrix.
456 : bates 415 *
457 : bates 448 * @param side
458 :     * @param uplo
459 :     * @param transa
460 :     * @param diag
461 : bates 415 * @param alpha scalar multiplier
462 : bates 485 * @param A pointer to a triangular dgBCMatrix object
463 :     * @param B pointer to a general dgBCMatrix matrix
464 : bates 415 */
465 : bates 447 void
466 : bates 448 cscb_trcbm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
467 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
468 : bates 447 double alpha, SEXP A, SEXP B)
469 : bates 415 {
470 : bates 451 SEXP
471 :     /* ApP = GET_SLOT(A, Matrix_pSym), */
472 :     AxP = GET_SLOT(A, Matrix_xSym),
473 :     /* , BpP = GET_SLOT(B, Matrix_pSym) */
474 :     BxP = GET_SLOT(B, Matrix_xSym);
475 : bates 448 int
476 : bates 451 /* *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)), */
477 :     /* *Ap = INTEGER(ApP), */
478 :     /* *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)), */
479 :     /* *Bp = INTEGER(BpP), */
480 : bates 415 *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
481 : bates 451 *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol))
482 :     /* , ncbA = length(ApP) - 1 */
483 :     /* , ncbB = length(BpP) - 1 */
484 :     ;
485 : bates 415 int i, nbx = bxdims[0] * bxdims[1] * bxdims[2];
486 :    
487 :     if (axdims[0] != axdims[1])
488 : bates 582 error(_("Argument A to cscb_trcbm is not triangular"));
489 : bates 415 if (alpha != 1.0) {
490 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
491 :     REAL(BxP)[i] *= alpha;
492 :     }
493 :     }
494 : bates 448 if (diag == UNT && axdims[2] < 1) return; /* A is the identity */
495 : bates 582 error(_("Code for non-trivial cscb_trcbm not yet written"));
496 : bates 415 }
497 :    
498 :     /**
499 : bates 434 * Solve one of the systems op(A)*X = alpha*B or
500 : bates 485 * X*op(A) = alpha*B where A dgBCMatrix triangular and B is dgBCMatrix.
501 : bates 434 *
502 : bates 565 * @param side LFT or RGT for left or right
503 :     * @param uplo UPP or LOW for upper or lower
504 :     * @param transa TRN or NTR for transpose or no transpose
505 :     * @param diag UNT or NON for unit or non-unit
506 : bates 434 * @param alpha scalar multiplier
507 : bates 485 * @param A pointer to a triangular dgBCMatrix object
508 : bates 565 * @param Parent parent array for the column blocks of A
509 : bates 485 * @param B pointer to a general dgBCMatrix matrix
510 : bates 434 */
511 : bates 447 void
512 : bates 448 cscb_trcbsm(enum CBLAS_SIDE side, enum CBLAS_UPLO uplo,
513 :     enum CBLAS_TRANSPOSE transa, enum CBLAS_DIAG diag,
514 : bates 447 double alpha, SEXP A, const int Parent[], SEXP B)
515 : bates 434 {
516 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
517 :     AxP = GET_SLOT(A, Matrix_xSym),
518 :     BpP = GET_SLOT(B, Matrix_pSym),
519 :     BxP = GET_SLOT(B, Matrix_xSym);
520 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
521 :     *Ap = INTEGER(ApP),
522 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
523 :     *Bp = INTEGER(BpP),
524 :     *axdims = INTEGER(getAttrib(AxP, R_DimSymbol)),
525 :     *bxdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
526 : bates 451 /* ncbA = length(ApP) - 1, */
527 : bates 434 ncbB = length(BpP) - 1;
528 :     int i, j, nbx = bxdims[0] * bxdims[1] * bxdims[2];
529 :     double *Ax = REAL(AxP), *Bx = REAL(BxP);
530 :    
531 :     if (axdims[0] != axdims[1])
532 : bates 582 error(_("Argument A to cscb_trcbm is not triangular"));
533 : bates 434 if (alpha != 1.0) {
534 :     for (i = 0; i < nbx; i++) { /* scale by alpha */
535 :     REAL(BxP)[i] *= alpha;
536 :     }
537 :     }
538 : bates 448 if (diag == UNT && axdims[2] < 1) return; /* A is the identity */
539 :     if (diag == UNT && axdims[0] == 1) { /* can use R_ldl code */
540 :     if ((side != LFT) && transa == TRN) { /* case required for lmer */
541 : bates 434 int *BTp, nnz = bxdims[2], nrbB;
542 : bates 679 int *tmp = expand_cmprPt(ncbB, Bp, Calloc(nnz, int));
543 : bates 434 int *BTi = Calloc(nnz, int);
544 :     double *BTx = Calloc(nnz, double), *rhs;
545 :    
546 :     /* transpose B */
547 : bates 556 for (i = 0, nrbB = -1; i < nnz; i++)
548 :     if (Bi[i] > nrbB) nrbB = Bi[i];
549 : bates 746 nrbB++; /* max 0-based index is 1 too small */
550 : bates 434 BTp = Calloc(nrbB, int);
551 : bates 488 triplet_to_col(ncbB, nrbB, nnz, tmp, Bi, Bx, BTp, BTi, BTx);
552 : bates 434 /* sanity check */
553 : bates 582 if (BTp[nrbB] != nnz) error(_("cscb_trcbsm: transpose operation failed"));
554 : bates 434 Free(tmp);
555 :     /* Solve one column at a time */
556 :     rhs = Calloc(ncbB, double);
557 :     AZERO(Bx, nnz); /* zero the result */
558 :     for (i = 0; i < nrbB; i++) {
559 : bates 556 R_ldl_lsolve(ncbB,
560 :     expand_csc_column(rhs, ncbB, i, BTp, BTi, BTx),
561 : bates 434 Ap, Ai, Ax);
562 : bates 556 /* write non-zeros in sol'n into B */
563 :     for (j = 0; j < ncbB; j++) {
564 : bates 746 if (rhs[j]) Bx[check_csc_index(Bp, Bi, i, j, 0)] = rhs[j];
565 : bates 434 }
566 :     }
567 : bates 746 Free(rhs); Free(BTp); Free(BTx); Free(BTi);
568 :     return;
569 : bates 434 }
570 : bates 582 error(_("cscb_trcbsm: method not yet written"));
571 : bates 434 }
572 : bates 582 error(_("cscb_trcbsm: method not yet written"));
573 : bates 434 }
574 :    
575 :     /**
576 : bates 415 * Perform one of the matrix-matrix operations
577 :     * C := alpha*op(A)*op(B) + beta*C
578 :     * on compressed, sparse, blocked matrices.
579 :     *
580 : bates 565 * @param transa TRN or NTR for transpose or no transpose of A
581 :     * @param transb TRN or NTR for transpose or no transpose of B
582 : bates 415 * @param alpha scalar multiplier
583 : bates 485 * @param A pointer to a dgBCMatrix object
584 :     * @param B pointer to a dgBCMatrix object
585 : bates 415 * @param beta scalar multiplier
586 : bates 485 * @param C pointer to a dgBCMatrix object
587 : bates 415 */
588 : bates 447 void
589 :     cscb_cscbm(enum CBLAS_TRANSPOSE transa, enum CBLAS_TRANSPOSE transb,
590 :     double alpha, SEXP A, SEXP B, double beta, SEXP C)
591 : bates 415 {
592 :     SEXP ApP = GET_SLOT(A, Matrix_pSym),
593 :     AxP = GET_SLOT(A, Matrix_xSym),
594 :     BpP = GET_SLOT(B, Matrix_pSym),
595 :     BxP = GET_SLOT(B, Matrix_xSym),
596 :     CxP = GET_SLOT(C, Matrix_xSym);
597 :     int *Ap = INTEGER(ApP),
598 :     *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)),
599 :     *Bp = INTEGER(BpP),
600 :     *Bi = INTEGER(GET_SLOT(B, Matrix_iSym)),
601 :     *Cp = INTEGER(GET_SLOT(C, Matrix_pSym)),
602 :     *Ci = INTEGER(GET_SLOT(C, Matrix_iSym)),
603 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
604 :     *bdims = INTEGER(getAttrib(BxP, R_DimSymbol)),
605 :     *cdims = INTEGER(getAttrib(CxP, R_DimSymbol)),
606 :     nca = length(ApP) - 1,
607 :     ncb = length(BpP) - 1;
608 :     int ablk = adims[0] * adims[1],
609 :     bblk = bdims[0] * bdims[1],
610 :     cblk = cdims[0] * cdims[1];
611 :     double *Ax = REAL(AxP),
612 :     *Bx = REAL(BxP),
613 :     *Cx = REAL(CxP),
614 :     one = 1.0;
615 :    
616 : bates 448 if ((transa == NTR) && transb == TRN) { /* transposed crossproduct */
617 : bates 415 int jj;
618 :    
619 :     if (adims[1] != bdims[1] ||
620 :     adims[0] != cdims[0] ||
621 :     bdims[0] != cdims[1])
622 : bates 582 error(_("C[%d,%d,%d] := A[%d,%d,%d] %*% t(B[%d,%d,%d])"),
623 : bates 415 cdims[0], cdims[1], cdims[2],
624 :     adims[0], adims[1], adims[2],
625 :     bdims[0], bdims[1], bdims[2]);
626 :     if (nca != ncb)
627 : bates 582 error(_("C := A(ncblocks = %d) %*% t(B(ncblocks = %d)"), nca, ncb);
628 : bates 415 if (beta != 1.) { /* scale C by beta */
629 :     int ctot = cdims[0] * cdims[1] * cdims[2];
630 :     for (jj = 0; jj < ctot; jj++) Cx[jj] *= beta;
631 :     }
632 :     for (jj = 0; jj < nca; jj++) {
633 :     int ia, ib, a2 = Ap[jj + 1], b2 = Bp[jj + 1];
634 :     for (ia = Ap[jj]; ia < a2; ia++) {
635 : bates 448 for (ib = Bp[jj]; ib < b2; ib++) {
636 :     F77_CALL(dgemm)("N", "T", cdims, cdims + 1, adims + 1,
637 : bates 536 &alpha, Ax + ia * ablk, adims,
638 :     Bx + ib * bblk, bdims, &one,
639 : bates 556 Cx + check_csc_index(Cp,Ci,Ai[ia],Bi[ib],0)*cblk,
640 : bates 536 cdims);
641 : bates 339 }
642 :     }
643 :     }
644 : bates 415 return;
645 : bates 339 }
646 : bates 582 error(_("Code not yet written"));
647 : bates 339 }
648 : bates 451
649 : bates 565 /**
650 :     * Coerce a dgBCMatrix to a dgCMatrix
651 :     *
652 :     * @param A pointer to a dgBCMatrix object to coerce
653 :     *
654 :     * @return pointer to a dgCMatrix
655 :     */
656 : bates 485 SEXP dgBCMatrix_to_dgCMatrix(SEXP A)
657 : bates 451 {
658 : bates 478 SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgCMatrix"))),
659 : bates 451 ApP = GET_SLOT(A, Matrix_pSym),
660 :     AiP = GET_SLOT(A, Matrix_iSym),
661 :     AxP = GET_SLOT(A, Matrix_xSym);
662 :     int *Ai = INTEGER(AiP), *Ap = INTEGER(ApP), *Bi, *Bp, *Dims,
663 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
664 :     ii, j, ncb = length(ApP) - 1, nnz, nrb;
665 :     int nc = adims[1], nr = adims[0];
666 :     int sz = nc * nr;
667 :     double *Ax = REAL(AxP), *Bx;
668 :    
669 : bates 476 SET_SLOT(val, Matrix_factorSym, allocVector(VECSXP, 0));
670 : bates 451 SET_SLOT(val, Matrix_DimSym, allocVector(INTSXP, 2));
671 :     Dims = INTEGER(GET_SLOT(val, Matrix_DimSym));
672 :     Dims[1] = ncb * adims[1];
673 :     /* find number of row blocks */
674 :     for (j = 0, nrb = -1; j < adims[2]; j++) if (Ai[j] > nrb) nrb = Ai[j];
675 : bates 536 Dims[0] = (nrb + 1) * adims[0]; /* +1 because of 0-based indices */
676 : bates 451 nnz = length(AxP);
677 :    
678 :     if (nc == 1) { /* x slot is in the correct order */
679 :     SET_SLOT(val, Matrix_pSym, duplicate(ApP));
680 :     SET_SLOT(val, Matrix_iSym, allocVector(INTSXP, nnz));
681 :     SET_SLOT(val, Matrix_xSym, allocVector(REALSXP, nnz));
682 :     Memcpy(REAL(GET_SLOT(val, Matrix_xSym)), Ax, nnz);
683 :     if (nr == 1) {
684 :     Memcpy(INTEGER(GET_SLOT(val, Matrix_iSym)), Ai, nnz);
685 :     } else {
686 :     Bi = INTEGER(GET_SLOT(val, Matrix_iSym));
687 :     Bp = INTEGER(GET_SLOT(val, Matrix_pSym));
688 :     for (j = 0; j <= ncb; j++) Bp[j] *= nr;
689 :     for (j = 0; j < adims[2]; j++) {
690 :     for (ii = 0; ii < nr; ii++) {
691 :     Bi[j * nr + ii] = Ai[j] * nr + ii;
692 :     }
693 :     }
694 :     }
695 :     } else {
696 :     SET_SLOT(val, Matrix_pSym, allocVector(INTSXP, Dims[1] + 1));
697 :     Bp = INTEGER(GET_SLOT(val, Matrix_pSym));
698 :     SET_SLOT(val, Matrix_iSym, allocVector(INTSXP, nnz));
699 :     Bi = INTEGER(GET_SLOT(val, Matrix_iSym));
700 :     SET_SLOT(val, Matrix_xSym, allocVector(REALSXP, nnz));
701 :     Bx = REAL(GET_SLOT(val, Matrix_xSym));
702 :    
703 :     Bp[0] = 0;
704 :     for (j = 0; j < ncb; j++) { /* Column blocks of A */
705 :     int i, i1 = Ap[j], i2 = Ap[j + 1], jj;
706 :     int nzbc = (i2 - i1) * nr; /* No. of non-zeroes in B column */
707 :    
708 :     for (jj = 0; jj < nc; jj++) { /* column within blocks */
709 :     int jb = nc * j + jj; /* Column number in B */
710 :    
711 :     Bp[jb] = i1 * sz + jj * nzbc;
712 :     for (i = i1; i < i2; i++) { /* index in Ai and Ax */
713 :     for (ii = 0; ii < adims[0]; ii++) { /* row within blocks */
714 :     int ind = ii + (i - i1) * nr + Bp[jb];
715 :    
716 :     Bi[ind] = Ai[i] * sz + jj * nzbc + ii;
717 :     Bx[ind] = Ax[i * sz + jj * nc + ii];
718 :     }
719 :     }
720 :     }
721 :     }
722 :     }
723 :     UNPROTECT(1);
724 :     return val;
725 :     }
726 : bates 536
727 :     SEXP dgBCMatrix_to_dgTMatrix(SEXP A)
728 :     {
729 :     SEXP val = PROTECT(NEW_OBJECT(MAKE_CLASS("dgTMatrix"))),
730 :     ApP = GET_SLOT(A, Matrix_pSym),
731 :     AxP = GET_SLOT(A, Matrix_xSym);
732 :     int *Ai = INTEGER(GET_SLOT(A, Matrix_iSym)), *Ap = INTEGER(ApP),
733 :     *bdims = INTEGER(GET_SLOT(val, Matrix_DimSym)),
734 :     *adims = INTEGER(getAttrib(AxP, R_DimSymbol)),
735 :     i, j, k, kk, ncb = length(ApP) - 1, nnz = length(AxP), nrb;
736 : bates 679 int *Aj = expand_cmprPt(ncb, Ap, Calloc(nnz, int)),
737 : bates 536 *Bi = INTEGER(ALLOC_SLOT(val, Matrix_iSym, INTSXP, nnz)),
738 :     *Bj = INTEGER(ALLOC_SLOT(val, Matrix_jSym, INTSXP, nnz)),
739 :     nblk = adims[2], nc = adims[1], nr = adims[0];
740 :     int sz = nc * nr;
741 :     int *ai = Calloc(sz, int), *aj = Calloc(sz, int);
742 :     double *Ax = REAL(AxP),
743 :     *Bx = REAL(ALLOC_SLOT(val, Matrix_xSym, REALSXP, nnz));
744 :    
745 : bates 556 Memcpy(Bx, Ax, nnz); /* x slot stays as is but w/o dim attribute */
746 : bates 536
747 : bates 556 bdims[1] = ncb * adims[1];
748 :     /* find number of row blocks */
749 : bates 536 for (j = 0, nrb = -1; j < adims[2]; j++) if (Ai[j] > nrb) nrb = Ai[j];
750 :     bdims[0] = (nrb + 1) * adims[0]; /* +1 because of 0-based indices */
751 :    
752 :     for (j = 0; j < nc; j++) { /* arrays of inner indices */
753 :     for (i = 0; i < nr; i++) {
754 :     int ind = j * nc + i;
755 :     ai[ind] = i;
756 :     aj[ind] = j;
757 :     }
758 :     }
759 :     for (i = 0, k = 0; k < nblk; k++) {
760 :     for (kk = 0; kk < sz; kk++) {
761 :     Bi[k * sz + kk] = Ai[k] * nr + ai[kk];
762 :     Bj[k * sz + kk] = Aj[k] * nc + aj[kk];
763 :     }
764 :     }
765 :    
766 :     Free(Aj); Free(ai); Free(aj);
767 :     UNPROTECT(1);
768 :     return val;
769 :     }

root@r-forge.r-project.org
ViewVC Help
Powered by ViewVC 1.0.0  
Thanks to:
Vienna University of Economics and Business Powered By FusionForge