Actual source code: ex69.c
1: static char help[] = "Tests MatCreateDenseCUDA(), MatDenseCUDAPlaceArray(), MatDenseCUDAReplaceArray(), MatDenseCUDAResetArray()\n";
3: #include <petscmat.h>
5: static PetscErrorCode MatMult_S(Mat S,Vec x,Vec y)
6: {
7: Mat A;
10: MatShellGetContext(S,&A);
11: MatMult(A,x,y);
12: return 0;
13: }
15: static PetscBool test_cusparse_transgen = PETSC_FALSE;
17: static PetscErrorCode MatMultTranspose_S(Mat S,Vec x,Vec y)
18: {
19: Mat A;
22: MatShellGetContext(S,&A);
23: MatMultTranspose(A,x,y);
25: /* alternate transgen true and false to test code logic */
26: MatSetOption(A,MAT_FORM_EXPLICIT_TRANSPOSE,test_cusparse_transgen);
27: test_cusparse_transgen = (PetscBool)!test_cusparse_transgen;
28: return 0;
29: }
31: int main(int argc,char **argv)
32: {
33: Mat A,B,C,S;
34: Vec t,v;
35: PetscScalar *vv,*aa;
36: PetscInt n=30,k=6,l=0,i,Istart,Iend,nloc,bs,test=1;
37: PetscBool flg,reset,use_shell = PETSC_FALSE;
38: VecType vtype;
40: PetscInitialize(&argc,&argv,(char*)0,help);
41: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
42: PetscOptionsGetInt(NULL,NULL,"-k",&k,NULL);
43: PetscOptionsGetInt(NULL,NULL,"-l",&l,NULL);
44: PetscOptionsGetInt(NULL,NULL,"-test",&test,NULL);
45: PetscOptionsGetBool(NULL,NULL,"-use_shell",&use_shell,NULL);
50: /* sparse matrix */
51: MatCreate(PETSC_COMM_WORLD,&A);
52: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,n,n);
53: MatSetType(A,MATAIJCUSPARSE);
54: MatSetOptionsPrefix(A,"A_");
55: MatSetFromOptions(A);
56: MatSetUp(A);
58: /* test special case for SeqAIJCUSPARSE to generate explicit transpose (not default) */
59: MatSetOption(A,MAT_FORM_EXPLICIT_TRANSPOSE,PETSC_TRUE);
61: MatGetOwnershipRange(A,&Istart,&Iend);
62: for (i=Istart;i<Iend;i++) {
63: if (i>0) MatSetValue(A,i,i-1,-1.0,INSERT_VALUES);
64: if (i<n-1) MatSetValue(A,i,i+1,-1.0,INSERT_VALUES);
65: MatSetValue(A,i,i,2.0,INSERT_VALUES);
66: }
67: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
68: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
70: /* template vector */
71: MatCreateVecs(A,NULL,&t);
72: VecGetType(t,&vtype);
74: /* long vector, contains the stacked columns of an nxk dense matrix */
75: VecGetLocalSize(t,&nloc);
76: VecGetBlockSize(t,&bs);
77: VecCreate(PetscObjectComm((PetscObject)t),&v);
78: VecSetType(v,vtype);
79: VecSetSizes(v,k*nloc,k*n);
80: VecSetBlockSize(v,bs);
81: VecSetRandom(v,NULL);
83: /* dense matrix that contains the columns of v */
84: VecCUDAGetArray(v,&vv);
86: /* test few cases for MatDenseCUDA handling pointers */
87: switch (test) {
88: case 1:
89: MatCreateDenseCUDA(PetscObjectComm((PetscObject)v),nloc,PETSC_DECIDE,n,k-l,vv,&B); /* pass a pointer to avoid allocation of storage */
90: MatDenseCUDAReplaceArray(B,NULL); /* replace with a null pointer, the value after BVRestoreMat */
91: MatDenseCUDAPlaceArray(B,vv+l*nloc); /* set the actual pointer */
92: reset = PETSC_TRUE;
93: break;
94: case 2:
95: MatCreateDenseCUDA(PetscObjectComm((PetscObject)v),nloc,PETSC_DECIDE,n,k-l,NULL,&B);
96: MatDenseCUDAPlaceArray(B,vv+l*nloc); /* set the actual pointer */
97: reset = PETSC_TRUE;
98: break;
99: default:
100: MatCreateDenseCUDA(PetscObjectComm((PetscObject)v),nloc,PETSC_DECIDE,n,k-l,vv+l*nloc,&B);
101: reset = PETSC_FALSE;
102: break;
103: }
104: VecCUDARestoreArray(v,&vv);
106: /* Test MatMatMult */
107: if (use_shell) {
108: /* we could have called the general convertor below, but we explicit set the operations
109: ourselves to test MatProductSymbolic_X_Dense, MatProductNumeric_X_Dense code */
110: /* MatConvert(A,MATSHELL,MAT_INITIAL_MATRIX,&S); */
111: MatCreateShell(PetscObjectComm((PetscObject)v),nloc,nloc,n,n,A,&S);
112: MatShellSetOperation(S,MATOP_MULT,(void(*)(void))MatMult_S);
113: MatShellSetOperation(S,MATOP_MULT_TRANSPOSE,(void(*)(void))MatMultTranspose_S);
114: MatShellSetVecType(S,vtype);
115: } else {
116: PetscObjectReference((PetscObject)A);
117: S = A;
118: }
120: MatCreateDenseCUDA(PetscObjectComm((PetscObject)v),nloc,PETSC_DECIDE,n,k-l,NULL,&C);
121: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
122: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
123: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
124: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
126: /* test MatMatMult */
127: MatProductCreateWithMat(S,B,NULL,C);
128: MatProductSetType(C,MATPRODUCT_AB);
129: MatProductSetFromOptions(C);
130: MatProductSymbolic(C);
131: MatProductNumeric(C);
132: MatMatMultEqual(S,B,C,10,&flg);
133: if (!flg) PetscPrintf(PETSC_COMM_WORLD,"Error MatMatMult\n");
135: /* test MatTransposeMatMult */
136: MatProductCreateWithMat(S,B,NULL,C);
137: MatProductSetType(C,MATPRODUCT_AtB);
138: MatProductSetFromOptions(C);
139: MatProductSymbolic(C);
140: MatProductNumeric(C);
141: MatTransposeMatMultEqual(S,B,C,10,&flg);
142: if (!flg) PetscPrintf(PETSC_COMM_WORLD,"Error MatTransposeMatMult\n");
144: MatDestroy(&C);
145: MatDestroy(&S);
147: /* finished using B */
148: MatDenseCUDAGetArray(B,&aa);
150: MatDenseCUDARestoreArray(B,&aa);
151: if (reset) {
152: MatDenseCUDAResetArray(B);
153: }
154: VecCUDARestoreArray(v,&vv);
156: if (test == 1) {
157: MatDenseCUDAGetArray(B,&aa);
159: MatDenseCUDARestoreArray(B,&aa);
160: }
162: /* free work space */
163: MatDestroy(&B);
164: MatDestroy(&A);
165: VecDestroy(&t);
166: VecDestroy(&v);
167: PetscFinalize();
168: return 0;
169: }
171: /*TEST
173: build:
174: requires: cuda
176: test:
177: requires: cuda
178: suffix: 1
179: nsize: {{1 2}}
180: args: -A_mat_type {{aij aijcusparse}} -test {{0 1 2}} -k 6 -l {{0 5}} -use_shell {{0 1}}
182: TEST*/