subroutine rscf1 (array, right, prodx1, nrwblk, pivot, iflag, * minblk, remblk, nparts, blaws) c double precision array(nrwblk,2*nrwblk,1), * right(nrwblk,nrwblk,1), prodx1(nrwblk,nrwblk,1), * blaws(nrwblk,nrwblk,1) integer nrwblk, pivot(1), iflag, minblk, remblk, nparts c ***----------------------------------------------------------*** c * The following notation is used in the comments: * c * * c * V_k, W_k <=> array(1,1,k), array(1,nrwblk+1,k) * c * R_k <=> right(1,1,k) * c * S_k <=> prodx1(1,1,k) * c * * c * In addition, the affix ' designates that the matrix is * c * transformed at the first level of the factorization. * c ***----------------------------------------------------------*** integer nsquar, kpart, kblok, base, base1, top, info c nsquar = nrwblk**2 c ***----------------------------------------------------------*** c * Each loop 20 iteration is independent and could * c * execute concurrently with the others. * c ***----------------------------------------------------------*** C$DOACROSS SHARE (array, right, prodx1, nrwblk, pivot, iflag, C$& minblk, remblk, nparts, blaws, nsquar), C$& LOCAL (kpart, kblok, base, base1, top, info) do 20 kpart = 1, nparts c ***----------------------------------------------------------*** c * Rescaling starts at the second-last block-row * c * of each partition. * c ***----------------------------------------------------------*** call partx(minblk,remblk,kpart,base,top) base1 = base - 1 c ***----------------------------------------------------------*** c * W_base1' <- (W_base1 - V_base1) * c ***----------------------------------------------------------*** call DAXPY(nsquar,-1.d0,array(1,1,base1),1, * array(1,nrwblk+1,base1),1) c ***----------------------------------------------------------*** c * W_base1' <- LUfact(W_base1 - V_base1) * c ***----------------------------------------------------------*** call DGETRF(nrwblk,nrwblk,array(1,nrwblk+1,base1),nrwblk, * pivot(base1*nrwblk+1),iflag) if (iflag .ne. 0) return c ***----------------------------------------------------------*** c * V_base1' <- (W_base1 - V_base1)^-1 V_base1 * c ***----------------------------------------------------------*** call DGETRS('N',nrwblk,nrwblk,array(1,nrwblk+1,base1), * nrwblk,pivot(base1*nrwblk+1), * array(1,1,base1),nrwblk,info) c ***----------------------------------------------------------*** c * S_kpart' <- (V_base V_base1')^T * c * * c * [ Notes: 1. The transpose of the product is accumulated * c * since Lapack's DGEMM is faster multiplying in * c * this mode when both matrices are dense. * c * * c * 2. After the last call to DGEMM, the resulting * c * matrix must be transposed once. ] * c ***----------------------------------------------------------*** call DGEMM('T','T',nrwblk,nrwblk,nrwblk, * 1.d0,array(1,1,base1),nrwblk, * array(1,1,base),nrwblk, * 0.d0,prodx1(1,1,kpart),nrwblk) c ***----------------------------------------------------------*** c * Each partition is now processed sequentially * c * from the third-last block row to the top. * c ***----------------------------------------------------------*** do 10 kblok = base-2, top, -1 c ***----------------------------------------------------------*** c * R_kblok <- W_kblok * c * * c * [ Note: The right block must be saved in order to * c * transform subsequent right hand sides. ] * c ***----------------------------------------------------------*** call DCOPY(nsquar,array(1,nrwblk+1,kblok),1, * right(1,1,kblok),1) c ***----------------------------------------------------------*** c * W_kblok' <- W_kblok(I + V_kblok+1') - V_kblok * c ***----------------------------------------------------------*** call DCOPY(nsquar,array(1,1,kblok+1),1, * blaws(1,1,kpart),1) call maddi('+',nrwblk,blaws(1,1,kpart)) call DCOPY(nsquar,array(1,1,kblok),1, * array(1,nrwblk+1,kblok),1) call DGEMM('N','N',nrwblk,nrwblk,nrwblk, * 1.d0,right(1,1,kblok),nrwblk, * blaws(1,1,kpart),nrwblk, * -1.d0,array(1,nrwblk+1,kblok),nrwblk) c ***----------------------------------------------------------*** c * W_kblok' <- LUfact(W_kblok(I + V_kblok+1') - V_kblok) * c ***----------------------------------------------------------*** call DGETRF(nrwblk,nrwblk,array(1,nrwblk+1,kblok), * nrwblk,pivot(kblok*nrwblk+1),iflag) if (iflag .ne. 0) return c ***----------------------------------------------------------*** c * V_kblok' <- (W_kblok(I + V_kblok+1') * c * - V_kblok)^-1 V_kblok * c ***----------------------------------------------------------*** call DGETRS('N',nrwblk,nrwblk,array(1,nrwblk+1,kblok), * nrwblk,pivot(kblok*nrwblk+1), * array(1,1,kblok),nrwblk,info) c ***----------------------------------------------------------*** c * S_kpart' <- V_kblok'^T S_kpart' * c ***----------------------------------------------------------*** call DGEMM('T','N',nrwblk,nrwblk,nrwblk, * 1.d0,array(1,1,kblok),nrwblk, * prodx1(1,1,kpart),nrwblk, * 0.d0,blaws(1,1,kpart),nrwblk) call DCOPY(nsquar,blaws(1,1,kpart),1, * prodx1(1,1,kpart),1) 10 continue c ***----------------------------------------------------------*** c * The final product must be transposed. * c ***----------------------------------------------------------*** call mtran(nrwblk,prodx1(1,1,kpart)) c ***----------------------------------------------------------*** c * If there was an odd number of multiplications, * c * the final product also must be negated. * c ***----------------------------------------------------------*** if (mod(base-top,2) .ne. 0) then call mnegv(nrwblk,prodx1(1,1,kpart)) end if 20 continue c ***----------------------------------------------------------*** c * The second-level array right blocks are computed. * c * (This could be done concurrently.) * c ***----------------------------------------------------------*** do 30 kpart = 1, nparts - 1 call partx(minblk,remblk,kpart,base,top) c ***----------------------------------------------------------*** c * R_base <- W_base * c ***----------------------------------------------------------*** call DCOPY(nsquar,array(1,nrwblk+1,base),1, * right(1,1,base),1) c ***----------------------------------------------------------*** c * W_base' <- W_base(I + V_base+1') * c ***----------------------------------------------------------*** call DCOPY(nsquar,array(1,1,base+1),1, * blaws(1,1,kpart),1) call maddi('+',nrwblk,blaws(1,1,kpart)) call DGEMM('N','N',nrwblk,nrwblk,nrwblk, * 1.d0,right(1,1,base),nrwblk, * blaws(1,1,kpart),nrwblk, * 0.d0,array(1,nrwblk+1,base),nrwblk) c ***----------------------------------------------------------*** c * R_base-1 <- W_base(I + V_base+1') * c * * c * [ Notes: 1. R_base-1 is free at this point. It is the * c * only right block that need not be saved. * c * * c * 2. W_base(I + V_base+1') is stored in R_base-1 * c * for use during second-level processing. ] * c ***----------------------------------------------------------*** call DCOPY(nsquar,array(1,nrwblk+1,base),1, * right(1,1,base-1),1) 30 continue return end