ergo
template_blas_trmm.h
Go to the documentation of this file.
1/* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_BLAS_TRMM_HEADER
38#define TEMPLATE_BLAS_TRMM_HEADER
39
41
42template<class Treal>
43int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag,
44 const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
45 lda, Treal *b, const integer *ldb)
46{
47 /* System generated locals */
48 integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
49 /* Local variables */
50 integer info;
51 Treal temp;
52 integer i__, j, k;
53 logical lside;
54 integer nrowa;
55 logical upper;
56 logical nounit;
57#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
58#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
59/* Purpose
60 =======
61 DTRMM performs one of the matrix-matrix operations
62 B := alpha*op( A )*B, or B := alpha*B*op( A ),
63 where alpha is a scalar, B is an m by n matrix, A is a unit, or
64 non-unit, upper or lower triangular matrix and op( A ) is one of
65 op( A ) = A or op( A ) = A'.
66 Parameters
67 ==========
68 SIDE - CHARACTER*1.
69 On entry, SIDE specifies whether op( A ) multiplies B from
70 the left or right as follows:
71 SIDE = 'L' or 'l' B := alpha*op( A )*B.
72 SIDE = 'R' or 'r' B := alpha*B*op( A ).
73 Unchanged on exit.
74 UPLO - CHARACTER*1.
75 On entry, UPLO specifies whether the matrix A is an upper or
76 lower triangular matrix as follows:
77 UPLO = 'U' or 'u' A is an upper triangular matrix.
78 UPLO = 'L' or 'l' A is a lower triangular matrix.
79 Unchanged on exit.
80 TRANSA - CHARACTER*1.
81 On entry, TRANSA specifies the form of op( A ) to be used in
82 the matrix multiplication as follows:
83 TRANSA = 'N' or 'n' op( A ) = A.
84 TRANSA = 'T' or 't' op( A ) = A'.
85 TRANSA = 'C' or 'c' op( A ) = A'.
86 Unchanged on exit.
87 DIAG - CHARACTER*1.
88 On entry, DIAG specifies whether or not A is unit triangular
89 as follows:
90 DIAG = 'U' or 'u' A is assumed to be unit triangular.
91 DIAG = 'N' or 'n' A is not assumed to be unit
92 triangular.
93 Unchanged on exit.
94 M - INTEGER.
95 On entry, M specifies the number of rows of B. M must be at
96 least zero.
97 Unchanged on exit.
98 N - INTEGER.
99 On entry, N specifies the number of columns of B. N must be
100 at least zero.
101 Unchanged on exit.
102 ALPHA - DOUBLE PRECISION.
103 On entry, ALPHA specifies the scalar alpha. When alpha is
104 zero then A is not referenced and B need not be set before
105 entry.
106 Unchanged on exit.
107 A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
108 when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
109 Before entry with UPLO = 'U' or 'u', the leading k by k
110 upper triangular part of the array A must contain the upper
111 triangular matrix and the strictly lower triangular part of
112 A is not referenced.
113 Before entry with UPLO = 'L' or 'l', the leading k by k
114 lower triangular part of the array A must contain the lower
115 triangular matrix and the strictly upper triangular part of
116 A is not referenced.
117 Note that when DIAG = 'U' or 'u', the diagonal elements of
118 A are not referenced either, but are assumed to be unity.
119 Unchanged on exit.
120 LDA - INTEGER.
121 On entry, LDA specifies the first dimension of A as declared
122 in the calling (sub) program. When SIDE = 'L' or 'l' then
123 LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
124 then LDA must be at least max( 1, n ).
125 Unchanged on exit.
126 B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
127 Before entry, the leading m by n part of the array B must
128 contain the matrix B, and on exit is overwritten by the
129 transformed matrix.
130 LDB - INTEGER.
131 On entry, LDB specifies the first dimension of B as declared
132 in the calling (sub) program. LDB must be at least
133 max( 1, m ).
134 Unchanged on exit.
135 Level 3 Blas routine.
136 -- Written on 8-February-1989.
137 Jack Dongarra, Argonne National Laboratory.
138 Iain Duff, AERE Harwell.
139 Jeremy Du Croz, Numerical Algorithms Group Ltd.
140 Sven Hammarling, Numerical Algorithms Group Ltd.
141 Test the input parameters.
142 Parameter adjustments */
143 a_dim1 = *lda;
144 a_offset = 1 + a_dim1 * 1;
145 a -= a_offset;
146 b_dim1 = *ldb;
147 b_offset = 1 + b_dim1 * 1;
148 b -= b_offset;
149 /* Function Body */
150 lside = template_blas_lsame(side, "L");
151 if (lside) {
152 nrowa = *m;
153 } else {
154 nrowa = *n;
155 }
156 nounit = template_blas_lsame(diag, "N");
157 upper = template_blas_lsame(uplo, "U");
158 info = 0;
159 if (! lside && ! template_blas_lsame(side, "R")) {
160 info = 1;
161 } else if (! upper && ! template_blas_lsame(uplo, "L")) {
162 info = 2;
163 } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
164 "T") && ! template_blas_lsame(transa, "C")) {
165 info = 3;
166 } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag,
167 "N")) {
168 info = 4;
169 } else if (*m < 0) {
170 info = 5;
171 } else if (*n < 0) {
172 info = 6;
173 } else if (*lda < maxMACRO(1,nrowa)) {
174 info = 9;
175 } else if (*ldb < maxMACRO(1,*m)) {
176 info = 11;
177 }
178 if (info != 0) {
179 template_blas_erbla("TRMM ", &info);
180 return 0;
181 }
182/* Quick return if possible. */
183 if (*n == 0) {
184 return 0;
185 }
186/* And when alpha.eq.zero. */
187 if (*alpha == 0.) {
188 i__1 = *n;
189 for (j = 1; j <= i__1; ++j) {
190 i__2 = *m;
191 for (i__ = 1; i__ <= i__2; ++i__) {
192 b_ref(i__, j) = 0.;
193/* L10: */
194 }
195/* L20: */
196 }
197 return 0;
198 }
199/* Start the operations. */
200 if (lside) {
201 if (template_blas_lsame(transa, "N")) {
202/* Form B := alpha*A*B. */
203 if (upper) {
204 i__1 = *n;
205 for (j = 1; j <= i__1; ++j) {
206 i__2 = *m;
207 for (k = 1; k <= i__2; ++k) {
208 if (b_ref(k, j) != 0.) {
209 temp = *alpha * b_ref(k, j);
210 i__3 = k - 1;
211 for (i__ = 1; i__ <= i__3; ++i__) {
212 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
213 i__, k);
214/* L30: */
215 }
216 if (nounit) {
217 temp *= a_ref(k, k);
218 }
219 b_ref(k, j) = temp;
220 }
221/* L40: */
222 }
223/* L50: */
224 }
225 } else {
226 i__1 = *n;
227 for (j = 1; j <= i__1; ++j) {
228 for (k = *m; k >= 1; --k) {
229 if (b_ref(k, j) != 0.) {
230 temp = *alpha * b_ref(k, j);
231 b_ref(k, j) = temp;
232 if (nounit) {
233 b_ref(k, j) = b_ref(k, j) * a_ref(k, k);
234 }
235 i__2 = *m;
236 for (i__ = k + 1; i__ <= i__2; ++i__) {
237 b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
238 i__, k);
239/* L60: */
240 }
241 }
242/* L70: */
243 }
244/* L80: */
245 }
246 }
247 } else {
248/* Form B := alpha*A'*B. */
249 if (upper) {
250 i__1 = *n;
251 for (j = 1; j <= i__1; ++j) {
252 for (i__ = *m; i__ >= 1; --i__) {
253 temp = b_ref(i__, j);
254 if (nounit) {
255 temp *= a_ref(i__, i__);
256 }
257 i__2 = i__ - 1;
258 for (k = 1; k <= i__2; ++k) {
259 temp += a_ref(k, i__) * b_ref(k, j);
260/* L90: */
261 }
262 b_ref(i__, j) = *alpha * temp;
263/* L100: */
264 }
265/* L110: */
266 }
267 } else {
268 i__1 = *n;
269 for (j = 1; j <= i__1; ++j) {
270 i__2 = *m;
271 for (i__ = 1; i__ <= i__2; ++i__) {
272 temp = b_ref(i__, j);
273 if (nounit) {
274 temp *= a_ref(i__, i__);
275 }
276 i__3 = *m;
277 for (k = i__ + 1; k <= i__3; ++k) {
278 temp += a_ref(k, i__) * b_ref(k, j);
279/* L120: */
280 }
281 b_ref(i__, j) = *alpha * temp;
282/* L130: */
283 }
284/* L140: */
285 }
286 }
287 }
288 } else {
289 if (template_blas_lsame(transa, "N")) {
290/* Form B := alpha*B*A. */
291 if (upper) {
292 for (j = *n; j >= 1; --j) {
293 temp = *alpha;
294 if (nounit) {
295 temp *= a_ref(j, j);
296 }
297 i__1 = *m;
298 for (i__ = 1; i__ <= i__1; ++i__) {
299 b_ref(i__, j) = temp * b_ref(i__, j);
300/* L150: */
301 }
302 i__1 = j - 1;
303 for (k = 1; k <= i__1; ++k) {
304 if (a_ref(k, j) != 0.) {
305 temp = *alpha * a_ref(k, j);
306 i__2 = *m;
307 for (i__ = 1; i__ <= i__2; ++i__) {
308 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
309 i__, k);
310/* L160: */
311 }
312 }
313/* L170: */
314 }
315/* L180: */
316 }
317 } else {
318 i__1 = *n;
319 for (j = 1; j <= i__1; ++j) {
320 temp = *alpha;
321 if (nounit) {
322 temp *= a_ref(j, j);
323 }
324 i__2 = *m;
325 for (i__ = 1; i__ <= i__2; ++i__) {
326 b_ref(i__, j) = temp * b_ref(i__, j);
327/* L190: */
328 }
329 i__2 = *n;
330 for (k = j + 1; k <= i__2; ++k) {
331 if (a_ref(k, j) != 0.) {
332 temp = *alpha * a_ref(k, j);
333 i__3 = *m;
334 for (i__ = 1; i__ <= i__3; ++i__) {
335 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
336 i__, k);
337/* L200: */
338 }
339 }
340/* L210: */
341 }
342/* L220: */
343 }
344 }
345 } else {
346/* Form B := alpha*B*A'. */
347 if (upper) {
348 i__1 = *n;
349 for (k = 1; k <= i__1; ++k) {
350 i__2 = k - 1;
351 for (j = 1; j <= i__2; ++j) {
352 if (a_ref(j, k) != 0.) {
353 temp = *alpha * a_ref(j, k);
354 i__3 = *m;
355 for (i__ = 1; i__ <= i__3; ++i__) {
356 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
357 i__, k);
358/* L230: */
359 }
360 }
361/* L240: */
362 }
363 temp = *alpha;
364 if (nounit) {
365 temp *= a_ref(k, k);
366 }
367 if (temp != 1.) {
368 i__2 = *m;
369 for (i__ = 1; i__ <= i__2; ++i__) {
370 b_ref(i__, k) = temp * b_ref(i__, k);
371/* L250: */
372 }
373 }
374/* L260: */
375 }
376 } else {
377 for (k = *n; k >= 1; --k) {
378 i__1 = *n;
379 for (j = k + 1; j <= i__1; ++j) {
380 if (a_ref(j, k) != 0.) {
381 temp = *alpha * a_ref(j, k);
382 i__2 = *m;
383 for (i__ = 1; i__ <= i__2; ++i__) {
384 b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
385 i__, k);
386/* L270: */
387 }
388 }
389/* L280: */
390 }
391 temp = *alpha;
392 if (nounit) {
393 temp *= a_ref(k, k);
394 }
395 if (temp != 1.) {
396 i__1 = *m;
397 for (i__ = 1; i__ <= i__1; ++i__) {
398 b_ref(i__, k) = temp * b_ref(i__, k);
399/* L290: */
400 }
401 }
402/* L300: */
403 }
404 }
405 }
406 }
407 return 0;
408/* End of DTRMM . */
409} /* dtrmm_ */
410#undef b_ref
411#undef a_ref
412
413#endif
side
Definition: Matrix.h:75
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
int integer
Definition: template_blas_common.h:40
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
bool logical
Definition: template_blas_common.h:41
#define a_ref(a_1, a_2)
#define b_ref(a_1, a_2)
int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, Treal *b, const integer *ldb)
Definition: template_blas_trmm.h:43