diff --git a/examples/stokes/stok_mob_iter_example.f90 b/examples/stokes/stok_mob_iter_example.f90
new file mode 100644
index 00000000..96580a20
--- /dev/null
+++ b/examples/stokes/stok_mob_iter_example.f90
@@ -0,0 +1,222 @@
+      implicit real *8 (a-h,o-z) 
+      implicit integer *8 (i-n)
+      real *8, allocatable :: srcvals(:,:),srccoefs(:,:)
+      real *8, allocatable :: wts(:)
+      integer *8, allocatable :: norders(:),ixyzs(:),iptype(:)
+      
+      real *8, allocatable :: srcvals1(:,:),srccoefs1(:,:)
+      real *8, allocatable :: srccoefs2(:,:)
+      real *8, allocatable :: wts1(:)
+      integer *8, allocatable :: norders1(:),ixyzs1(:),iptype1(:)
+
+      real *8 ts(2), rres
+      real *8, allocatable :: errs(:)
+      character *100 fname
+
+      real *8 c0(3), abc(3)
+      real *8 nabc(3)
+      real *8, allocatable :: forces(:,:), torques(:,:)
+      real *8, allocatable :: trans_vels_ex(:,:), rot_vels_ex(:,:)
+      real *8, allocatable :: trans_vels(:,:), rot_vels(:,:)
+      real *8, allocatable :: centroids(:,:), shifts(:,:), rmois(:,:,:)
+      real *8, allocatable :: soln_mob(:,:), soln_res(:,:), rhs_res(:,:)
+      complex * 16 zpars
+      integer *8 int8_0,int8_3,int8_9
+      integer *8 ncomp
+      integer *8, allocatable :: icomps(:)
+      real *8 xdiff(3), rtmp(3), dpars(2) 
+
+      call prini(6,13)
+
+      int8_0 = 0
+      int8_3 = 3
+      int8_9 = 9
+      done = 1
+      pi = atan(done)*4
+
+      nlatx = 2
+      nlaty = 1
+      nlatz = 1
+
+      dx = 4.0d0
+      dy = 4.0d0
+      dz = 4.0d0
+
+      dh = 0.01d0*0
+
+      ncomp = nlatx*nlaty*nlatz
+
+      allocate(shifts(3,ncomp))
+      allocate(trans_vels_ex(3,ncomp), rot_vels_ex(3,ncomp))
+      do ilatz = 1,nlatz
+        do ilaty = 1,nlaty
+          do ilatx = 1,nlatx
+            icomp = (ilatz-1)*nlatx*nlaty + (ilaty-1)*nlatx + ilatx
+            shifts(1,icomp) = (ilatx-1)*dx + 2*dh*(hkrand(0)-0.5d0)
+            shifts(2,icomp) = (ilaty-1)*dy + 2*dh*(hkrand(0)-0.5d0)
+            shifts(3,icomp) = (ilatz-1)*dz + 2*dh*(hkrand(0)-0.5d0)
+
+            trans_vels_ex(1,icomp) = hkrand(0)-0.5d0
+            trans_vels_ex(2,icomp) = (hkrand(0)-0.5d0)
+            trans_vels_ex(3,icomp) = (hkrand(0)-0.5d0)
+
+            rot_vels_ex(1,icomp) = (hkrand(0)-0.5d0)
+            rot_vels_ex(2,icomp) = (hkrand(0)-0.5d0)
+            rot_vels_ex(3,icomp) = (hkrand(0)-0.5d0)
+          enddo
+        enddo
+      enddo
+
+      call prin2('shifts=*',shifts,3*ncomp)
+
+!
+!  extract shape of geometry to be extracted
+!
+      a = 1.0d0
+      na = 2
+      c0(1) = 0
+      c0(2) = 0
+      c0(3) = 0
+
+      iptype0 = 1
+
+      norder = 7
+      npols = (norder+1)*(norder+2)/2
+      
+      npatches1 = 0
+      npts1 = 0
+      call get_sphere_npat_mem(a, na, c0, norder, iptype0, npatches1, &
+        npts1)
+      call prinf('npatches=*',npatches1,1)
+      call prinf('npts=*',npts1,1)
+      
+      allocate(srcvals1(12,npts1), srccoefs1(9,npts1), &
+        ixyzs1(npatches1+1), iptype1(npatches1), norders1(npatches1))
+      call get_sphere_npat(a, na, c0, norder, iptype0, npatches1, &
+        npts1, norders1, ixyzs1, iptype1, srccoefs1, srcvals1)
+
+      allocate(icomps(ncomp+1))
+      icomps(1) = 1
+      icomps(2) = npatches+1
+      npatches = ncomp*npatches1
+      npts = ncomp*npts1
+!
+!
+!
+      do icomp=1,ncomp+1
+        icomps(icomp) = npatches1*(icomp-1)+1
+      enddo
+!
+!  Define ixyzs
+!
+      allocate(ixyzs(npatches+1), iptype(npatches), norders(npatches))
+      allocate(srcvals(12,npts),srccoefs(9,npts))
+      do icomp = 1,ncomp
+        ipstart = icomps(icomp)
+        do j=1,npatches1
+          ixyzs(ipstart+j-1) = ixyzs1(j) + (icomp-1)*npts1
+          norders(ipstart+j-1) = norders1(j)
+          iptype(ipstart+j-1) = iptype1(j)
+        enddo
+        istart = (icomp-1)*npts1 + 1
+        do j=1,npts1
+           srcvals(1:12,istart+j-1) = srcvals1(1:12,j) 
+           srcvals(1:3,istart+j-1) = srcvals(1:3,istart+j-1) + &
+                     shifts(1:3,icomp)
+        enddo
+      enddo
+
+
+      ixyzs(npatches+1) = npts+1
+      allocate(srccoefs2(9,npts))
+      int8_9 = 9
+      call surf_vals_to_coefs(int8_9, npatches, norders, ixyzs, iptype, &
+       npts, srcvals(1:9,1:npts), srccoefs)
+      allocate(wts(npts))
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, srcvals, &
+        wts)
+
+
+      allocate(rhs_res(3,npts),soln_res(3,npts))
+      allocate(centroids(3,ncomp),rmois(3,3,ncomp))
+      do icomp=1,ncomp
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1)-1
+        nptsloc = iend - istart + 1
+        call get_surf_moments(npatches1, norders(ipstart), ixyzs1, &
+          iptype(ipstart), npts1, srcvals(1,istart), wts(istart), &
+          area, centroids(1,icomp), rmois(1,1,icomp))
+        do i=istart,iend
+          xdiff(1:3) = srcvals(1:3,i) - centroids(1:3,icomp)
+          call cross_prod3d(xdiff, rot_vels_ex(1:3,icomp), rtmp)
+          rhs_res(1:3,i) = trans_vels_ex(1:3,icomp) + rtmp(1:3)
+        enddo
+      enddo
+      call prin2('centroids=*',centroids,3*ncomp)
+      call prin2('shifts=*',shifts,3*ncomp)
+      call prin2('rmois=*',rmois,9*ncomp)
+
+      numit = 100
+      allocate(errs(numit+1))
+      eps = 1.0d-8
+      eps_gmres = 1.0d-8
+      ifinout = 1
+      dpars(1) = 1.0d0
+      dpars(2) = 2.0d0
+
+      call stok_comb_vel_solver(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, srcvals, eps, dpars, numit, ifinout, rhs_res, &
+        eps_gmres, niter, errs, rres, soln_res)
+      call prin2('soln_res=*',soln_res,24)
+      allocate(forces(3,ncomp), torques(3,ncomp))
+      allocate(trans_vels(3,ncomp), rot_vels(3,ncomp))
+      
+      do icomp=1,ncomp
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1)-1
+        nptsloc = iend - istart + 1
+        forces(1:3,icomp) = 0
+        torques(1:3,icomp) = 0
+        trans_vels(1:3,icomp) = 0
+        rot_vels(1:3,icomp) = 0
+        do i=istart,iend
+          forces(1:3,icomp) = forces(1:3,icomp) + &
+            soln_res(1:3,i)*wts(i)
+          xdiff(1:3) = srcvals(1:3,i) - centroids(1:3,icomp)
+          call cross_prod3d(xdiff, soln_res(1,i), rtmp)
+          torques(1:3,icomp) = torques(1:3,icomp) + rtmp(1:3)*wts(i)
+        enddo
+      enddo
+      call prin2('forces=*',forces,3*ncomp)
+      call prin2('torques=*',torques,3*ncomp)
+
+
+
+      allocate(soln_mob(3,npts))
+
+
+      call stok_s_mob_solver(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, srcvals, ncomp, icomps, eps, numit, forces, &
+        torques, eps, niter, errs, rres, soln_mob, trans_vels, rot_vels)
+      
+      print *, "============================="
+      call prin2('trans_vels1=*',trans_vels,3*ncomp)
+      call prin2('trans_vels_ex=*',trans_vels_ex,3*ncomp)
+      print *, " "
+      print *, "============================="
+      print *, " "
+      call prin2('rot_vels1=*',rot_vels,3*ncomp)
+      call prin2('rot_vels_ex=*',rot_vels_ex,3*ncomp)
+      print *, " "
+      print *, "============================="
+
+
+      
+      stop
+      end
+
+
diff --git a/makefile b/makefile
index b9c4fc64..64fc71d1 100644
--- a/makefile
+++ b/makefile
@@ -140,7 +140,7 @@ EMOBJS = $(EM)/em_mfie_pec.o $(EM)/em_aumfie_pec.o \
 
 # Stokes wrappers
 STOK = src/stok_wrappers
-STOKOBJS = $(STOK)/stok_comb_vel.o 
+STOKOBJS = $(STOK)/stok_comb_vel.o $(STOK)/stok_s_mob.o
 
 # Kernels
 KER = src/kernels
diff --git a/src/common/rigidbodies.f b/src/common/rigidbodies.f
index 8f6ecdaf..b3802239 100644
--- a/src/common/rigidbodies.f
+++ b/src/common/rigidbodies.f
@@ -197,3 +197,36 @@ subroutine dcrossvectomat(r,a)
 
      
       
+      subroutine get_linear_transf_eul(deul, dmat)
+c
+c  compute the 3x3 rotation matrix corresponding to euler angles
+c  in the zxz notation
+c 
+c  Input arguments:
+c    deul: real *(3)
+c      the three Euler angles
+c  Output arguments:
+c    dmat: real *8(3,3)
+c      the corresponding rotation matrix 
+c
+      implicit real *8 (a-h,o-z)
+      real *8 deul(3), dmat(3,3)
+
+      psi = deul(1)
+      thet = deul(2)
+      phi = deul(3)
+
+      dmat(1,1) = cos(phi)*cos(psi) - sin(phi)*cos(thet)*sin(psi)
+      dmat(1,2) = cos(phi)*sin(psi) + sin(phi)*cos(thet)*cos(psi)
+      dmat(1,3) = sin(thet)*sin(phi)
+  
+      dmat(2,1) = -sin(phi)*cos(psi)
+      dmat(2,2) = -sin(phi)*sin(psi) + cos(phi)*cos(thet)*cos(psi)
+      dmat(2,3) = cos(phi)*sin(thet)
+
+      dmat(3,1) = sin(thet)*sin(psi)
+      dmat(3,2) = -sin(thet)*cos(psi)
+      dmat(3,3) = cos(thet)
+
+      return
+      end
diff --git a/src/stok_wrappers/stok_comb_vel.f90 b/src/stok_wrappers/stok_comb_vel.f90
index 3b57093d..d4e17eac 100644
--- a/src/stok_wrappers/stok_comb_vel.f90
+++ b/src/stok_wrappers/stok_comb_vel.f90
@@ -1072,12 +1072,14 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       real *8, allocatable :: sigmaover(:,:)
       real *8, allocatable :: stoklet(:,:), strslet(:,:)
       real *8, allocatable :: pottarg(:,:), strsvec(:,:)
+      real *8, allocatable :: rotlet(:,:), rotvec(:,:)
+      real *8, allocatable :: doublet(:,:), doubvec(:,:)
       integer *8 ns, nt
       real *8 alpha, beta
       integer *8 ifstoklet, ifstrslet
+      integer *8 ifrotlet, ifdoublet
       integer *8 ifppreg, ifppregtarg
       real *8 tmp(10), val
-      real *8 over4pi
 
       real *8 w11, w12, w13, w21, w22, w23, w31, w32, w33
       real *8 sig1, sig2, sig3
@@ -1107,7 +1109,6 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       real *8 potv(3), pv, gradv(3,3)
 
       parameter (nd=1,ntarg0=1)
-      data over4pi/0.07957747154594767d0/
 
       ns = nptso
       done = 1
@@ -1146,8 +1147,8 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
 !      set relevant parameters for the fmm
 !
  
-      alpha = dpars(1)*over4pi
-      beta = dpars(2)*over4pi
+      alpha = dpars(1)
+      beta = dpars(2)
 
 
 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)      
@@ -1163,9 +1164,9 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
 !  Stokes double layer is negative of a stresslet with orienatation
 !  given by surface normal
 !
-        strslet(1,i) = -sigmaover(1,i)*whtsover(i)*beta 
-        strslet(2,i) = -sigmaover(2,i)*whtsover(i)*beta 
-        strslet(3,i) = -sigmaover(3,i)*whtsover(i)*beta
+        strslet(1,i) = sigmaover(1,i)*whtsover(i)*beta 
+        strslet(2,i) = sigmaover(2,i)*whtsover(i)*beta 
+        strslet(3,i) = sigmaover(3,i)*whtsover(i)*beta
 
         strsvec(1,i) = srcover(10,i)
         strsvec(2,i) = srcover(11,i)
@@ -1186,6 +1187,9 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       ifstoklet = 1
       ifstrslet = 1
 
+      ifrotlet = 0
+      ifdoublet = 0
+
       if(alpha.eq.0) ifstoklet = 0
       if(beta.eq.0) ifstrslet = 0
 
@@ -1200,7 +1204,8 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       call cpu_time(t1)
 !$      t1 = omp_get_wtime()      
       call stfmm3d(nd, eps, ns, sources, ifstoklet, stoklet, &
-        ifstrslet, strslet, strsvec, ifppreg, tmp, tmp, tmp, ntarg, &
+        ifstrslet, strslet, strsvec, ifrotlet, rotlet, rotvec, &
+        ifdoublet, doublet, doubvec, ifppreg, tmp, tmp, tmp, ntarg, &
         targvals, ifppregtarg, pottarg, tmp, tmp, ier)
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()
diff --git a/src/stok_wrappers/stok_s_mob.f90 b/src/stok_wrappers/stok_s_mob.f90
new file mode 100644
index 00000000..4ef86183
--- /dev/null
+++ b/src/stok_wrappers/stok_s_mob.f90
@@ -0,0 +1,1578 @@
+!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+!
+!  Single layer representation for the Stokes mobility problem 
+!
+!  PDE:
+!    \Delta u       = \nabla p
+!    \Delta \cdot u = 0
+!
+!  Boundary conditions:
+!    u = u_{i} + \omega_{i} \times (x - x_{c,i}) on component \Gamma_{i}
+!      where u_{i}, \omega_{i} are unknown
+!    \int_{\Gamma_{i}} f dS = F_{i}, f is the surface traction
+!    \int_{\Gamma_{i}} (x - x_{c,i}) \times f ds = T_{i}
+!      where F_{i}, T_{i} are the forces and torques on body i and
+!      are given
+!
+!
+!  Representation:
+!    u = S_{stok}[\sigma] + S_{stok}[\sigma_{0}] 
+!  
+!  where \sigma_{0} = F_{i}/|\Gamma_{i}| + 
+!     \tau_{i}^{-1} T_{i} \times (x - x_{c,i})
+!
+!  with \tau_{i} is the moment of inertia tensor. In addition, 
+!
+!  \sigma satisfies
+!    \int_{\Gamma_{i}} \sigma = 0, 
+!    \int_{\Gamma_{i}} (x-x_{c,i}) \times \sigma = 0
+!
+!  Integral equations obtained by imposing:
+!    f^{-} = 0 \,,
+!
+!  where f^{-} is the interior surface traction
+!
+!  and is given by:
+!    1/2 \sigma + S_{stok}'[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  along with the constraints above. 
+!
+!  Instead, we use the generalized 1's matrix trick to solve for
+!  sigma as
+!    1/2 \sigma + S_{stok}'[\sigma] + L[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
+!       \tau_{i}^{-1}(\int_{{\Gamma_{i}}} (y-x_{c,i}) \times \sigma dS) \times (x-x_{c,i})
+!  on \Gamma_{i}
+!
+!
+!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+!
+!  User callable routines:
+!    - stok_s_mob_solver: Given data F_{i}, T_{i}, 
+!       this routine returns the solution \sigma, and u_{i}, \omega_{i}
+!
+!    - stok_s_vel_eval: Given \sigma this routine
+!        evaluates the solution at a collection of targets
+!
+!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+!
+!  Advanced interfaces:
+!    - getnearquad_stok_s_mob: compute the near quadrature correction
+!        for constructing the on-surface integral equation for the
+!        traction data corresponding to the single layer
+!        representation with user-provided near-information prescribed
+!        in row-sparse compressed format
+!
+!    - stok_s_mob_addsub: apply the principal value part
+!        of the integeral equation on surface. On input, user provides
+!        precomputed near quadrature in row-sparse compressed format,
+!        and oversampling information for handling the far part of the
+!        computation
+!
+!    - stok_s_mob_solver_guru: Guru solver routine, where user is
+!        responsible for providing precomputed near quadrature
+!        information in row-sparse compressed format and oversampling
+!        surface information for the far-part
+!
+!
+
+      subroutine getnearquad_stok_s_mob(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, eps, &
+        iquadtype, nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+!
+!  This subroutine generates the near field quadrature
+!  for the representation:
+!
+!  u = S_{stok}[\sigma] - (1) 
+!
+!  and returns quantities related to evaluating the surface 
+!  traction of u on surface at the surface discretization nodes.
+!
+!  On imposing the boundary condition, we get the following operator
+!
+!  u = \sigma/2 + S_{stok}'[\sigma] 
+!
+!  The quadrature is computed by the following strategy
+!  targets within a sphere of radius rfac0*rs
+!  of a patch centroid is handled using adaptive integration
+!  where rs is the radius of the bounding sphere
+!  for the patch
+!  
+!  All other targets in the near field are handled via
+!  oversampled quadrature
+!
+!  The recommended parameter for rfac0 is 1.25d0
+!  
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev 
+!                     nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested
+!    - iquadtype: integer *8
+!        quadrature type
+!          * iquadtype = 1, use ggq for self + adaptive integration
+!            for rest
+!    - nnz: integer *8
+!        number of source patch-> target interactions in the near field
+!    - row_ptr: integer *8(npts+1)
+!        row_ptr(i) is the pointer
+!        to col_ind array where list of relevant source patches
+!        for target i start
+!    - col_ind: integer *8 (nnz)
+!        list of source patches relevant for all targets, sorted
+!        by the target number
+!    - iquad: integer *8(nnz+1)
+!        location in wnear_ij array where quadrature for col_ind(i)
+!        starts for a single kernel. In this case the different kernels
+!        are matrix entries are located at (m-1)*nquad+iquad(i), where
+!        m is the kernel number
+!    - rfac0: real *8
+!        radius parameter for switching to predetermined quadarature
+!        rule        
+!    - nquad: integer *8
+!        number of near field entries corresponding to each source 
+!        target pair
+!
+!  Output arguments
+!    - wnear: real *8(6,nquad)
+!        The desired near field quadrature for
+!        \alpha S_{stok} + \beta D_{stok}. Since the Stokes 
+!        tensor is symmetric, quadratures are only
+!        generated for the lower half of the tensor
+!        * wnear(1,:) - stores the quadratures for (1,1) entry of the 
+!                       tensor 
+!        * wnear(2,:) - stores the quadratures for (1,2) entry of the 
+!                       tensor 
+!        * wnear(3,:) - stores the quadratures for (1,3) entry of the 
+!                       tensor 
+!        * wnear(4,:) - stores the quadratures for (2,2) entry of the 
+!                       tensor 
+!        * wnear(5,:) - stores the quadratures for (2,3) entry of the 
+!                       tensor 
+!        * wnear(6,:) - stores the quadratures for (3,3) entry of the 
+!                       tensor 
+!
+      implicit none 
+      integer *8, intent(in) :: npatches, norders(npatches), npts, nquad
+      integer *8, intent(in) :: ixyzs(npatches+1), iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts), eps
+      real *8, intent(in) :: rfac0
+      integer *8, intent(in) :: iquadtype
+      integer *8, intent(in) :: nnz
+      integer *8, intent(in) :: row_ptr(npts+1), col_ind(nnz), iquad(nnz+1)
+      real *8, intent(out) :: wnear(6,nquad)
+
+      integer *8 :: ndtarg, ntarg
+      integer *8, allocatable :: ipatch_id(:)
+      real *8, allocatable :: uvs_targ(:,:)
+
+      real *8 alpha, beta
+      real *8, allocatable :: wneartmp(:)
+      integer *8 ndd, ndi, ndz
+      real *8 dpars(1)
+      complex *16 zpars(1)
+      integer *8 ipars(2)
+
+      integer *8 i, ipv, j, ii, ijloc(2,6)
+
+      procedure (), pointer :: fker
+      external st3d_strac 
+      
+
+      ndtarg = 12
+      ntarg = npts
+      allocate(ipatch_id(npts), uvs_targ(2,npts))
+   
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
+      do i = 1,npts
+        ipatch_id(i) = -1
+        uvs_targ(1,i) = 0
+        uvs_targ(2,i) = 0
+      enddo
+!$OMP END PARALLEL DO
+
+      call get_patch_id_uvs(npatches, norders, ixyzs, iptype, npts, &
+        ipatch_id, uvs_targ)
+
+      ndd = 2
+      ndi = 2
+      ndz = 0
+      fker => st3d_strac
+      ipv = 1
+
+      ijloc(1,1) = 1
+      ijloc(2,1) = 1
+      ijloc(1,2) = 1
+      ijloc(2,2) = 2
+      ijloc(1,3) = 1
+      ijloc(2,3) = 3
+      ijloc(1,4) = 2
+      ijloc(2,4) = 2
+      ijloc(1,5) = 2
+      ijloc(2,5) = 3
+      ijloc(1,6) = 3
+      ijloc(2,6) = 3
+
+      allocate(wneartmp(nquad))
+
+
+      if(iquadtype.eq.1) then
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ii)          
+        do ii = 1,nquad
+          wneartmp(ii) = 0
+        enddo
+!$OMP END PARALLEL DO
+
+        do i = 1,6   
+          call dgetnearquad_ggq_guru(npatches, norders, ixyzs, &
+            iptype, npts, srccoefs, srcvals, ndtarg, npts, srcvals, &
+            ipatch_id, uvs_targ, eps, ipv, fker, ndd, dpars, ndz, &
+            zpars, ndi, ijloc(1,i), nnz, row_ptr, col_ind, iquad, &
+            rfac0, nquad, wneartmp)
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ii)          
+          do ii = 1,nquad
+            wnear(i,ii) = wneartmp(ii)
+          enddo
+!$OMP END PARALLEL DO
+        enddo
+      endif
+
+
+      return
+      end
+!
+!
+!
+!
+!
+      subroutine stok_s_vel_eval(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        ipatch_id, uvs_targ, eps, sigma, pot)
+!
+!f2py intent(in) npatches,norders,ixyzs,iptype,npts,srccoefs,srcvals
+!f2py intent(in) ndtarg,ntarg,targs,ipatch_id,uvs_targ,eps
+!f2py intent(in) sigma
+!f2py intent(out) pot
+!
+!
+!------------------------------
+!  This subroutine evaluates the layer potential for the 
+!  Stokes single layer representation 
+!
+!  Representation:
+!    u = \mathcal{S}_{stok}[\sigma]
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - ndtarg: integer *8
+!        leading dimension of target information array
+!    - ntarg: integer *8
+!        number of targets
+!    - targs: real *8(ndtarg,ntarg)
+!        target information 
+!    - ipatch_id: integer *8(ntarg)
+!        id of patch of target i, id = -1, if target is off-surface
+!    - uvs_targ: double precision (2,ntarg)
+!        local uv coordinates on patch if on surface, otherwise
+!        set to 0 by default
+!    - eps: double precision
+!        precision requested
+!     - sigma: double precision(3,npts)
+!         density for layer potential
+!
+!  Output arguments
+!    - pot: double precision(3,ntarg)
+!        layer potential evaluated at the target points
+!
+!-----------------------------------
+!
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: ndtarg, ntarg
+      integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts), eps
+      real *8, intent(in) :: targs(ndtarg,ntarg)
+      real *8, intent(in) :: sigma(3,npts)
+      integer *8, intent(in) :: ipatch_id(ntarg)
+      real *8, intent(in) :: uvs_targ(2,ntarg)
+
+      real *8, intent(out) :: pot(3,ntarg)
+
+      real *8 dpars(2)
+
+      dpars(1) = 1.0d0
+      dpars(2) = 0.0d0
+
+      call stok_comb_vel_eval(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        ipatch_id, uvs_targ, eps, dpars, sigma, pot)
+
+      return
+      end
+!
+!
+!  
+!
+!
+      subroutine stok_s_mob_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndd, dpars, ndz, zpars, &
+        ndi, ipars, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, &
+        novers, nptso, ixyzso, srcover, whtsover, lwork, work, ndim, &
+        sigma, pot)
+!
+!  This subroutine evaluates the surface traction +
+!  the generalized ones matrix corresponding to
+!  the following integral representation:
+!  
+!  u = S_{stok}[\sigma]
+!
+!  pot = S_{stok}'[\sigma] + c L[\sigma]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
+!       tau_{i}^{-1}(\int_{{\Gamma_{i}}} (y-x_{c,i}) \times \sigma dS) \times (x-x_{c,i})
+!  on \Gamma_{i}
+!   
+!  We refer to L as the generlized ones matrix
+!
+!  The near field is precomputed and stored
+!  in the row sparse compressed format.
+!        
+!  The fmm is used to accelerate the far-field and 
+!  near-field interactions are handled via precomputed quadrature
+!
+!  Using add and subtract - no need to call tree and set fmm parameters
+!  can directly call existing fmm library
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested
+!    - ndd: integer *8
+!        number of real parameters defining the kernel/
+!        integral representation (must be 3)
+!    - dpars: real *8(ndd)
+!        real parameters defining the kernel/
+!        integral representation 
+!        * dpars(1) = c*(multiple of generalized 1s matrix) 
+!    - ndz: integer *8
+!        number of complex parameters defining the kernel/
+!        integral representation (unused in this routine)
+!    - zpars: complex *16(ndz)
+!        complex parameters defining the kernel/
+!        integral represnetation (unused in this routine)
+!    - ndi: integer *8
+!        must be ncomp + 1, where ncomp is the number of
+!        components in the geometry
+!    - ipars: integer *8(ndi)
+!        ipars(1) = ncomp
+!        ipars(i+1) = starting patch index for patches on component
+!          i (note that the patches must be ordered by component)
+!    - nnz: integer *8
+!        number of source patch-> target interactions in the near field
+!    - row_ptr: integer *8(npts+1)
+!        row_ptr(i) is the pointer
+!        to col_ind array where list of relevant source patches
+!        for target i start
+!    - col_ind: integer *8 (nnz)
+!        list of source patches relevant for all targets, sorted
+!        by the target number
+!    - iquad: integer *8(nnz+1)
+!        location in wnear_ij array where quadrature for col_ind(i)
+!        starts for a single kernel. In this case the different kernels
+!        are matrix entries are located at (m-1)*nquad+iquad(i), where
+!        m is the kernel number
+!    - nquad: integer *8
+!        number of near field entries corresponding to each source target
+!        pair
+!    - nker: integer *8
+!        number of kernels in quadrature correction, must be 6
+!    - wnear: real *8(nker, nquad)
+!        Precomputed near field quadrature for
+!        S_{stok}'. Since the Stokes 
+!        tensor is symmetric, quadratures are only
+!        generated for the lower half of the tensor
+!        * wnear(1,:) - stores the quadratures for (1,1) entry of the 
+!                       tensor 
+!        * wnear(2,:) - stores the quadratures for (1,2) entry of the 
+!                       tensor 
+!        * wnear(3,:) - stores the quadratures for (1,3) entry of the 
+!                       tensor 
+!        * wnear(4,:) - stores the quadratures for (2,2) entry of the 
+!                       tensor 
+!        * wnear(5,:) - stores the quadratures for (2,3) entry of the 
+!                       tensor 
+!        * wnear(6,:) - stores the quadratures for (3,3) entry of the 
+!                       tensor 
+!    - novers: integer *8(npatches)
+!        order of discretization for oversampled sources and
+!        density
+!    - ixyzso: integer *8(npatches+1)
+!        ixyzso(i) denotes the starting location in srcover,
+!        corresponding to patch i
+!    - nptso: integer *8
+!        total number of oversampled points
+!    - srcover: real *8 (12,nptso)
+!        oversampled set of source information
+!    - whtsover: real *8 (nptso)
+!        smooth quadrature weights at oversampled nodes
+!    - lwork: integer *8
+!        size of work array (must be npts)
+!    - work: real *8(lwork)
+!        work array
+!        * work(1:npts) stores the coordinates the quadrature
+!          weights for integrating smooth functions
+!    - ndim: integer *8
+!        number of densities per point on the surface,
+!        must be 3 for this routine
+!    - sigma: real *8(3,npts)
+!        The density sigma above                                        
+!
+!  Output arguments:
+!    - pot: real *8(3,npts)
+!        u corresponding to representation
+!
+
+  
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches),ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+
+      real *8, intent(in) :: eps
+      
+      integer *8, intent(in) :: ndd, ndz, ndi
+      real *8, intent(in) :: dpars(ndd)
+      complex *16, intent(in) :: zpars(ndz)
+      integer *8, intent(in) :: ipars(ndi)
+
+      integer *8, intent(in) :: nnz, nquad
+      integer *8, intent(in) :: row_ptr(npts+1), col_ind(nnz)
+      integer *8, intent(in) :: iquad(nnz+1)
+      
+      integer *8, intent(in) :: nker
+      real *8, intent(in) :: wnear(nker,nquad)
+
+      integer *8, intent(in) :: nptso
+      integer *8, intent(in) :: ixyzso(npatches+1), novers(npatches)
+      real *8, intent(in) :: srcover(12,nptso), whtsover(nptso)
+      
+      integer *8, intent(in) :: lwork
+      real *8, intent(in) :: work(lwork)
+
+      integer *8, intent(in) :: ndim
+    
+      real *8, intent(in) :: sigma(ndim,npts)
+    
+      real *8, intent(out) :: pot(ndim,npts)
+
+      integer *8 ndtarg, idensflag, ipotflag, ndim_p
+
+      integer *8 norder,npols,nover,npolso
+      real *8, allocatable :: potsort(:)
+
+      real *8, allocatable :: sources(:,:), targvals(:,:)
+      real *8, allocatable :: sigmaover(:,:)
+      real *8, allocatable :: stoklet(:,:)
+      real *8, allocatable :: strslet(:,:), strsvec(:,:)
+      real *8, allocatable :: rotlet(:,:), rotvec(:,:)
+      real *8, allocatable :: doublet(:,:), doubvec(:,:)
+      real *8, allocatable :: pottarg(:,:), gradtarg(:,:,:)
+      real *8, allocatable :: pretarg(:)
+      integer *8 ns, nt
+      real *8 alpha, beta
+      integer *8 ifstoklet, ifstrslet, ifrotlet, ifdoublet
+      integer *8 ifppreg, ifppregtarg
+      real *8 tmp(10), val
+      real *8 over4pi
+
+      real *8 w11, w12, w13, w21, w22, w23, w31, w32, w33
+      real *8 sig1, sig2, sig3
+
+      integer *8 istress
+
+      integer *8 i, j, jpatch, jquadstart, jstart
+
+      real *8 pottmp
+      real *8, allocatable :: sttmp2(:,:)
+      real *8 strstmp2(3), strsvec2(3)
+
+      integer *8 nmax
+      real *8 timeinfo(10), t1, t2, omp_get_wtime
+
+      real *8, allocatable :: srctmp2(:,:)
+      real *8 thresh, ra
+      real *8 rr, rmin
+      integer *8 nss, ii, l, npover
+
+      integer *8 nd, ntarg0, ntarg
+      integer *8 ier, iper
+
+      integer *8 ndtmp, ndsigma
+
+      real *8 ttot, done, pi
+
+      real *8 potv(3), pv, gradv(3,3)
+
+      real *8 rint(3), rint_torque(3), xyzc(3), xdiff(3), rtmp(3)
+      real *8 centroid(3), rmoi(3,3), rmoi_inv(3,3), area
+      integer *8 ncomp, icomp, iend, info
+      integer *8 ipstart, ipend, istart
+      integer *8 nploc, nptsloc
+      integer *8, allocatable :: ixyzsloc(:)
+      integer *8 int8_3
+
+      parameter (nd=1,ntarg0=1)
+      data over4pi/0.07957747154594767d0/
+
+      ns = nptso
+      done = 1
+      pi = atan(done)*4
+
+!
+!    estimate max number of sources in neear field of 
+!    any target
+!
+      
+      
+      nmax = 0
+      ntarg = npts
+      call get_near_corr_max(ntarg, row_ptr, nnz, col_ind, npatches, &
+        ixyzso, nmax)
+      allocate(srctmp2(3,nmax), sttmp2(3,nmax))
+           
+           
+      ifppreg = 0
+      ifppregtarg = 3
+      allocate(sources(3,ns), targvals(3,ntarg))
+      allocate(stoklet(3,ns))
+      allocate(strslet(3,ns), strsvec(3,ns))
+      allocate(rotlet(3,ns), rotvec(3,ns))
+      allocate(doublet(3,ns), doubvec(3,ns))
+      allocate(sigmaover(3,ns))
+      allocate(pottarg(3,ntarg), pretarg(ntarg))
+      allocate(gradtarg(3,3,ntarg))
+
+! 
+!       oversample density
+!
+
+      ndsigma = 3
+      call oversample_fun_surf(ndsigma, npatches, norders, ixyzs, &
+        iptype, npts, sigma, novers, ixyzso, ns, sigmaover)
+
+
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)      
+      do i=1,ns
+        sources(1,i) = srcover(1,i)
+        sources(2,i) = srcover(2,i)
+        sources(3,i) = srcover(3,i)
+
+        stoklet(1,i) = sigmaover(1,i)*whtsover(i)
+        stoklet(2,i) = sigmaover(2,i)*whtsover(i)
+        stoklet(3,i) = sigmaover(3,i)*whtsover(i)
+
+        strslet(1:3,i) = 0
+        strsvec(1:3,i) = 0
+        
+        rotlet(1:3,i) = 0
+        rotvec(1:3,i) = 0
+
+        doublet(1:3,i) = 0
+        doubvec(1:3,i) = 0
+      enddo
+!$OMP END PARALLEL DO      
+      
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
+      do i = 1,ntarg
+        targvals(1,i) = srcvals(1,i)
+        targvals(2,i) = srcvals(2,i)
+        targvals(3,i) = srcvals(3,i)
+      enddo
+!$OMP END PARALLEL DO      
+      
+
+      ifstoklet = 1
+      ifstrslet = 0
+      ifdoublet = 0
+      ifrotlet = 0
+
+      iper = 0
+      ier = 0
+
+!
+!       call the fmm
+!
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()      
+      call stfmm3d(nd, eps, ns, sources, ifstoklet, stoklet, &
+        ifstrslet, strslet, strsvec, ifrotlet, rotlet, rotvec, &
+        ifdoublet, doublet, doubvec, ifppreg, tmp, tmp, tmp, ntarg, &
+        targvals, ifppregtarg, pottarg, pretarg, gradtarg, ier)
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()
+      
+      timeinfo(1) = t2-t1
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
+      do i = 1,ntarg
+        pot(1,i) = srcvals(10,i)*(-pretarg(i) + 2*gradtarg(1,1,i)) + &
+             srcvals(11,i)*(gradtarg(1,2,i) + gradtarg(2,1,i)) + &
+             srcvals(12,i)*(gradtarg(1,3,i) + gradtarg(3,1,i))
+        pot(2,i) = srcvals(10,i)*(gradtarg(1,2,i) + gradtarg(2,1,i)) + &
+             srcvals(11,i)*(-pretarg(i) + 2*gradtarg(2,2,i)) + &
+             srcvals(12,i)*(gradtarg(2,3,i) + gradtarg(3,2,i))
+        pot(3,i) = srcvals(10,i)*(gradtarg(1,3,i) + gradtarg(3,1,i)) + &
+             srcvals(11,i)*(gradtarg(2,3,i) + gradtarg(3,2,i)) + &
+             srcvals(12,i)*(-pretarg(i) + 2*gradtarg(3,3,i))
+      enddo
+!$OMP END PARALLEL DO 
+
+!
+!        compute threshold for ignoring local computation
+!
+      ndtmp = 3 
+      call get_fmm_thresh(ndtmp, ns, sources, ndtmp, ntarg, targvals, &
+        thresh)
+      
+!
+!
+!       add in precomputed quadrature
+!
+
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j, jpatch, jquadstart) &
+!$OMP PRIVATE(jstart, npols, l, sig1, sig2, sig3, w11, w12, w13, w21) &
+!$OMP PRIVATE(w22, w23, w31, w32, w33)
+      do i = 1,ntarg
+        do j = row_ptr(i),row_ptr(i+1)-1
+          jpatch = col_ind(j)
+          npols = ixyzs(jpatch+1) - ixyzs(jpatch)
+          jquadstart = iquad(j)
+          jstart = ixyzs(jpatch) 
+          do l=1,npols
+            sig1 = sigma(1,jstart+l-1)
+            sig2 = sigma(2,jstart+l-1)
+            sig3 = sigma(3,jstart+l-1)
+
+            w11 = wnear(1,jquadstart+l-1)
+            w12 = wnear(2,jquadstart+l-1)
+            w13 = wnear(3,jquadstart+l-1)
+
+            w21 = w12 
+            w22 = wnear(4,jquadstart+l-1)
+            w23 = wnear(5,jquadstart+l-1)
+
+            w31 = w13 
+            w32 = w23 
+            w33 = wnear(6,jquadstart+l-1)
+
+
+            pot(1,i) = pot(1,i) + w11*sig1 + w12*sig2 + w13*sig3 
+            pot(2,i) = pot(2,i) + w21*sig1 + w22*sig2 + w23*sig3 
+            pot(3,i) = pot(3,i) + w31*sig1 + w32*sig2 + w33*sig3 
+          enddo
+        enddo
+      enddo
+!$OMP END PARALLEL DO
+
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i, j, jpatch, srctmp2) &
+!$OMP PRIVATE(sttmp2, strstmp2, strsvec2, nss, l, jstart, ii, npover) &
+!$OMP PRIVATE(potv, pv, gradv, istress)
+      do i = 1,ntarg
+        nss = 0
+        do j = row_ptr(i), row_ptr(i+1)-1
+          jpatch = col_ind(j)
+          do l = ixyzso(jpatch),ixyzso(jpatch+1)-1
+            nss = nss + 1
+            srctmp2(1,nss) = srcover(1,l)
+            srctmp2(2,nss) = srcover(2,l)
+            srctmp2(3,nss) = srcover(3,l)
+
+            sttmp2(1,nss) = stoklet(1,l)
+            sttmp2(2,nss) = stoklet(2,l)
+            sttmp2(3,nss) = stoklet(3,l)
+          enddo
+        enddo
+
+        potv(1) = 0
+        potv(2) = 0
+        potv(3) = 0
+        pv = 0
+        gradv(1:3,1:3) = 0
+        
+        istress = 0
+        call st3ddirectstokstrsg(nd, srctmp2, sttmp2, istress, &
+          strstmp2, strsvec2, nss, targvals(1,i), ntarg0, potv, pv, &
+          gradv, thresh)
+        potv(1) = srcvals(10,i)*(-pv + 2*gradv(1,1)) + &
+             srcvals(11,i)*(gradv(1,2) + gradv(2,1)) + &
+             srcvals(12,i)*(gradv(1,3) + gradv(3,1))
+        potv(2) = srcvals(10,i)*(gradv(1,2) + gradv(2,1)) + &
+             srcvals(11,i)*(-pv + 2*gradv(2,2)) + &
+             srcvals(12,i)*(gradv(2,3) + gradv(3,2))
+        potv(3) = srcvals(10,i)*(gradv(1,3) + gradv(3,1)) + &
+             srcvals(11,i)*(gradv(2,3) + gradv(3,2)) + &
+             srcvals(12,i)*(-pv + 2*gradv(3,3))
+
+        pot(1,i) = pot(1,i) - potv(1)
+        pot(2,i) = pot(2,i) - potv(2)
+        pot(3,i) = pot(3,i) - potv(3)
+
+      enddo
+
+      
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()     
+
+      allocate(ixyzsloc(npatches+1))
+!
+!  Now add in contribution of c L[\sigma]
+!
+      
+      ncomp = ipars(1)
+      int8_3 = 3
+      do icomp = 1,ncomp
+        rint(1:3) = 0
+        rint_torque(1:3) = 0
+        ipstart = ipars(icomp+1)
+        ipend = ipars(icomp+2)-1
+
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1)-1
+        nploc = ipend - ipstart + 1
+        nptsloc = iend - istart + 1
+
+        do i=1,nploc+1
+          ixyzsloc(i) = ixyzs(i+ipstart-1) - ixyzs(ipstart)+1
+        enddo
+
+        rmoi(1:3,1:3) = 0
+        rmoi_inv(1:3,1:3) = 0
+        area = 0
+        centroid(1:3) = 0
+        call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
+          iptype(ipstart), nptsloc, srcvals(1,istart), work(istart), &
+          area, centroid, rmoi)
+        info = 0
+        call dinverse(int8_3, rmoi, info, rmoi_inv) 
+        
+        do i = istart,iend
+          rint(1:3) = rint(1:3) + sigma(1:3,i)*work(i)
+          rtmp(1:3) = 0
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          call cross_prod3d(xdiff, sigma(1:3,i), rtmp)
+          rint_torque(1:3) = rint_torque(1:3) + rtmp(1:3)*work(i)
+        enddo
+
+        do i = istart,iend
+          pot(1:3,i) = pot(1:3,i) + dpars(1)*rint(1:3)/area
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          rtmp(1:3) = 0
+          call cross_prod3d(rint_torque, xdiff, rtmp)
+          pot(1,i) = pot(1,i) + dpars(1)*(rmoi_inv(1,1)*rtmp(1) + &
+            rmoi_inv(1,2)*rtmp(2) + rmoi_inv(1,3)*rtmp(3))
+          pot(2,i) = pot(2,i) + dpars(1)*(rmoi_inv(2,1)*rtmp(1) + &
+            rmoi_inv(2,2)*rtmp(2) + rmoi_inv(2,3)*rtmp(3))
+          pot(3,i) = pot(3,i) + dpars(1)*(rmoi_inv(3,1)*rtmp(1) + &
+            rmoi_inv(3,2)*rtmp(2) + rmoi_inv(3,3)*rtmp(3))
+        enddo
+      enddo
+
+      return
+      end
+
+!
+!
+!
+!
+!
+!
+!
+!        
+      subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ncomp, icomps, eps, numit, &
+        forces, torques, eps_gmres, niter, errs, rres, soln, &
+        trans_vels, rot_vels)
+
+!
+!
+!  This subroutine solves the Stokes mobility problem
+!  where the potential is represented using a 
+!  single layer representation.
+!
+!  This subroutine is the simple interface as opposed to the
+!  _solver_guru routine which is called after initialization
+!  in this routine.
+!
+!
+!  Representation:
+!    u = S_{stok}[\sigma] + S_{stok}[\sigma_{0}] 
+!  
+!  where \sigma_{0} = F_{i}/|\Gamma_{i}| + 
+!     \tau_{i}^{-1} T_{i} \times (x - x_{c,i})
+!
+!  with \tau_{i} is the moment of inertia tensor. In addition, 
+!
+!  \sigma satisfies
+!    \int_{\Gamma_{i}} \sigma = 0, 
+!    \int_{\Gamma_{i}} (x-x_{c,i}) \times \sigma = 0
+!
+!  Integral equations obtained by imposing:
+!    f^{-} = 0 \,,
+!
+!  where f^{-} is the interior surface traction
+!
+!  and is given by:
+!    1/2 \sigma + S_{stok}'[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  along with the constraints above. 
+!
+!  Instead, we use the generalized 1's matrix trick to solve for
+!  sigma as
+!    1/2 \sigma + S_{stok}'[\sigma] + L[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
+!
+!  The linear system is solved iteratively using GMRES
+!  until a relative residual of eps_gmres is reached
+!
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - ncomp: integer *8
+!        number of components
+!    - icomps: integer *8(ncomp+1)
+!        icomps(i) = starting patch index for patches on component
+!          i (note that the patches must be ordered by component)
+!    - eps: real *8
+!        precision requested for computing quadrature and fmm
+!        tolerance
+!    - numit: integer *8
+!        max number of gmres iterations
+!    - forces: real *8(3,ncomp)
+!        prescribed force on the components
+!    - torques: real *8(3,ncomp)
+!        prescribed torque on the components
+!    - eps_gmres: real *8
+!        gmres tolerance requested
+!      
+!
+!  Output arguments:
+!    - niter: integer *8
+!        number of gmres iterations required for relative residual
+!        to converge to eps_gmres
+!    - errs:  real *8 (numit+1)
+!        relative residual as a function of iteration
+!        number (only errs(1:niter) is populated))
+!    - rres: real *8
+!        relative residual for computed solution
+!    - soln: real *8(3,npts)
+!        density which solves the mobility problem \sigma
+!    - trans_vels: real *8(3,ncomp)
+!        translational velocity of the components
+!    - rot_vels: real *8(3,ncomp)
+!        rotational velocity of the components
+!
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      integer *8, intent(in) :: ncomp, icomps(ncomp+1)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+      real *8, intent(in) :: eps, eps_gmres
+      real *8, intent(in) :: forces(3,ncomp), torques(3,ncomp) 
+      integer *8, intent(in) :: numit
+      real *8, intent(out) :: soln(3,npts)
+      real *8, intent(out) :: trans_vels(3,ncomp), rot_vels(3,ncomp)
+      real *8, intent(out) :: errs(numit+1)
+      real *8, intent(out) :: rres
+      integer *8, intent(out) :: niter
+
+      integer *8 norder, npols
+      real *8, allocatable :: targs(:,:)
+      integer *8, allocatable :: ipatch_id(:)
+      real *8, allocatable :: uvs_targ(:,:)
+      integer *8 ndtarg, ntarg
+
+
+
+      integer *8 nover, npolso, nptso
+      integer *8 nnz,nquad
+      integer *8, allocatable :: row_ptr(:), col_ind(:), iquad(:)
+      real *8, allocatable :: wnear(:,:), wnear_s(:,:)
+
+      real *8, allocatable :: srcover(:,:), wover(:)
+      integer *8, allocatable :: ixyzso(:), novers(:)
+
+      real *8, allocatable :: cms(:,:), rads(:), rad_near(:) 
+      real *8, allocatable :: rhs(:,:), sigma0(:,:), uvel(:,:)
+
+      integer *8 i, j, jpatch, jquadstart, jstart
+
+      integer *8, allocatable :: ipars(:)
+      complex *16 zpars
+      real *8 dpars(2)
+      real *8 timeinfo(10), t1, t2, omp_get_wtime
+
+      real *8 ttot, done, pi
+      real *8 rfac, rfac0
+      integer *8 iptype_avg, norder_avg
+      integer *8 ikerorder, iquadtype, npts_over
+
+      real *8 xdiff(3), rtmp(3), rtuse(3), ftuse(3), rtmp2(3)
+      real *8 rmoi(3,3), rmoi_inv(3,3), centroid(3), area
+      integer *8 icomp, idensflag, iend, info
+      integer *8 ipstart, ipend, ipotflag, istart, lwork
+      integer *8 ndd, ndi, ndz
+      integer *8 ndim_p, ndim_s
+      integer *8 nploc, nptsloc
+      real *8 work(1)
+      integer *8, allocatable :: ixyzsloc(:)
+      real *8, allocatable :: wts(:)
+      real *8 ra
+      integer *8 int8_3
+ 
+!
+!
+!       gmres variables
+!
+      real *8 did, dtmp
+      complex *16 ztmp
+      integer *8 nker
+
+
+      done = 1
+      pi = atan(done)*4
+!
+!
+!        setup targets as on surface discretization points
+! 
+      ndtarg = 3
+      ntarg = npts
+      allocate(targs(ndtarg,npts), uvs_targ(2,ntarg), ipatch_id(ntarg))
+
+!$OMP PARALLEL DO DEFAULT(SHARED)
+      do i=1,ntarg
+        targs(1,i) = srcvals(1,i)
+        targs(2,i) = srcvals(2,i)
+        targs(3,i) = srcvals(3,i)
+        ipatch_id(i) = -1
+        uvs_targ(1,i) = 0
+        uvs_targ(2,i) = 0
+      enddo
+!$OMP END PARALLEL DO   
+
+
+!
+!    initialize patch_id and uv_targ for on surface targets
+!
+      call get_patch_id_uvs(npatches, norders, ixyzs, iptype, npts, &
+        ipatch_id, uvs_targ)
+
+!
+!
+!        this might need fixing
+!
+      iptype_avg = floor(sum(iptype)/(npatches+0.0d0))
+      norder_avg = floor(sum(norders)/(npatches+0.0d0))
+
+      call get_rfacs(norder_avg, iptype_avg, rfac, rfac0)
+
+
+      allocate(cms(3,npatches), rads(npatches), rad_near(npatches))
+
+      call get_centroid_rads(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, cms, rads)
+
+!$OMP PARALLEL DO DEFAULT(SHARED) 
+      do i=1,npatches
+        rad_near(i) = rads(i)*rfac
+      enddo
+!$OMP END PARALLEL DO      
+
+!
+!    find near quadrature correction interactions
+!
+      print *, "entering find near mem"
+      call findnearmem(cms, npatches, rad_near, ndtarg, targs, npts, nnz)
+
+      allocate(row_ptr(npts+1), col_ind(nnz))
+      
+      call findnear(cms, npatches, rad_near, ndtarg, targs, npts, &
+        row_ptr, col_ind)
+
+      allocate(iquad(nnz+1)) 
+      call get_iquad_rsc(npatches, ixyzs, npts, nnz, row_ptr, col_ind, &
+        iquad)
+
+      ikerorder = -1
+      if(abs(dpars(2)).gt.1.0d-16) ikerorder = 0
+
+!
+!    estimate oversampling for far-field, and oversample geometry
+!
+
+      allocate(novers(npatches), ixyzso(npatches+1))
+
+      print *, "beginning far order estimation"
+
+      ztmp = 0
+
+      call get_far_order(eps, npatches, norders, ixyzs, iptype, cms, &
+        rads, npts, srccoefs, ndtarg, npts, targs, ikerorder, ztmp, &
+        nnz, row_ptr, col_ind, rfac, novers, ixyzso)
+
+      npts_over = ixyzso(npatches+1) - 1
+
+      allocate(srcover(12,npts_over), wover(npts_over))
+
+      call oversample_geom(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, srcvals, novers, ixyzso, npts_over, srcover)
+
+      call get_qwts(npatches, novers, ixyzso, iptype, npts_over, &
+        srcover, wover)
+
+!
+!   compute near quadrature correction
+!
+      nquad = iquad(nnz+1) - 1
+      nker = 6
+      allocate(wnear(nker,nquad), wnear_s(nker,nquad))
+      
+!$OMP PARALLEL DO DEFAULT(SHARED)      
+      do i=1,nquad
+        wnear(1,i) = 0
+        wnear(2,i) = 0
+        wnear(3,i) = 0
+        wnear(4,i) = 0
+        wnear(5,i) = 0
+        wnear(6,i) = 0
+
+        wnear_s(1,i) = 0
+        wnear_s(2,i) = 0
+        wnear_s(3,i) = 0
+        wnear_s(4,i) = 0
+        wnear_s(5,i) = 0
+        wnear_s(6,i) = 0
+      enddo
+!$OMP END PARALLEL DO    
+
+
+      iquadtype = 1
+
+      print *, "starting to generate near quadrature"
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()     
+
+!
+!  quadrature for S'
+!
+      call getnearquad_stok_s_mob(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, eps, &
+        iquadtype, nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+
+!
+!  quadrature for S
+!
+      dpars(1) = 1.0d0
+      dpars(2) = 0
+      call getnearquad_stok_comb_vel(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, eps, dpars, iquadtype, &
+        nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear_s)
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()     
+
+!
+!  Compute S[\sigma_{0}]
+!
+      allocate(wts(npts))
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, &
+        srcvals, wts)
+      allocate(sigma0(3,npts), rhs(3,npts), uvel(3,npts))
+      allocate(ixyzsloc(npatches+1))
+      ndd = 2
+      ndz = 0
+      ndi = 0
+      nker = 6
+      lwork = 0
+      ndim_s = 3
+      idensflag = 0
+      ipotflag = 0
+      ndim_p = 3
+
+      allocate(ipars(ncomp+2))
+      ipars(1) = ncomp
+      do icomp = 1,ncomp+1
+        ipars(icomp+1) = icomps(icomp)
+      enddo
+
+      int8_3 = 3
+
+      
+      do icomp = 1,ncomp
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
+
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1) - 1
+        nploc = ipend - ipstart + 1
+        nptsloc = iend - istart + 1
+
+        do i=1,nploc+1
+          ixyzsloc(i) = ixyzs(i+ipstart-1) - ixyzs(ipstart) + 1
+        enddo
+        rmoi(1:3,1:3) = 0
+        rmoi_inv(1:3,1:3) = 0
+        area = 0
+        centroid(1:3) = 0
+
+        call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
+          iptype(ipstart), nptsloc, srcvals(1,istart), wts(istart), &
+          area, centroid, rmoi)
+        info = 0
+        call dinverse(int8_3, rmoi, info, rmoi_inv)
+        rtuse(1) = rmoi_inv(1,1)*torques(1,icomp) + &
+           rmoi_inv(1,2)*torques(2,icomp) + &
+           rmoi_inv(1,3)*torques(3,icomp)
+        rtuse(2) = rmoi_inv(2,1)*torques(1,icomp) + &
+           rmoi_inv(2,2)*torques(2,icomp) + &
+           rmoi_inv(2,3)*torques(3,icomp)
+        rtuse(3) = rmoi_inv(3,1)*torques(1,icomp) + &
+           rmoi_inv(3,2)*torques(2,icomp) + &
+           rmoi_inv(3,3)*torques(3,icomp)
+        ftuse(1:3) = forces(1:3,icomp)/area
+        do i = istart,iend
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          call cross_prod3d(rtuse, xdiff, rtmp)
+          sigma0(1:3,i) = ftuse(1:3) + rtmp(1:3)
+        enddo
+      enddo
+
+      dpars(1) = 0.0d0
+
+      call stok_s_mob_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndd, dpars, ndz, zpars, &
+        ndi, ipars, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, &
+        novers, npts_over, ixyzso, srcover, wover, npts, wts, ndim_s, &
+        sigma0, rhs)
+
+!$OMP PARALLEL DO PRIVATE(i)
+      do i=1,npts
+        rhs(1:3,i) = -(0.5d0*sigma0(1:3,i) + rhs(1:3,i))
+      enddo
+!$OMP END PARALLEL DO 
+      call prin2('rhs=*',rhs,12)
+      
+      print *, "done generating near quadrature, now starting gmres"
+      ndi = ncomp+2
+      
+      call stok_s_mob_solver_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndi, ipars, numit, &
+        rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
+        npts_over, ixyzso, srcover, wover, eps_gmres, niter, &
+        errs, rres, soln)
+      
+      do i=1,npts
+        soln(1:3,i) = soln(1:3,i) + sigma0(1:3,i)
+      enddo
+
+      ndd = 2
+      dpars(1) = 1.0d0
+      dpars(2) = 0.0d0
+
+      call stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        eps, ndd, dpars, ndz, zpars, ndi, ipars, nnz, row_ptr, &
+        col_ind, iquad, nquad, nker, wnear_s, novers, npts_over, &
+        ixyzso, srcover, wover, lwork, work, idensflag, ndim_s, &
+        soln, ipotflag, ndim_p, uvel)
+!
+!  Compute translational and rotational velocities on each component
+!
+      do icomp = 1,ncomp
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
+
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1) - 1
+        nploc = ipend - ipstart + 1
+        nptsloc = iend - istart + 1
+
+        trans_vels(1:3,icomp) = 0
+        rot_vels(1:3,icomp) = 0
+
+        do i=1,nploc+1
+          ixyzsloc(i) = ixyzs(i+ipstart-1) - ixyzs(ipstart)+1
+        enddo
+        rmoi(1:3,1:3) = 0
+        rmoi_inv(1:3,1:3) = 0
+        area = 0
+        centroid(1:3) = 0
+        call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
+          iptype(ipstart), nptsloc, srcvals(1,istart), wts(istart), &
+          area, centroid, rmoi)
+        info = 0
+        call dinverse(int8_3, rmoi, info, rmoi_inv) 
+        do i = istart,iend
+          trans_vels(1:3,icomp) = trans_vels(1:3,icomp) + uvel(1:3,i)*wts(i)
+        enddo
+        trans_vels(1:3,icomp) = trans_vels(1:3,icomp)/area
+        
+        rtmp(1:3) = 0
+        do i=istart,iend
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          rtuse(1:3) = uvel(1:3,i) - trans_vels(1:3,icomp)
+          rtmp2(1:3) = 0
+          call cross_prod3d(xdiff, rtuse, rtmp2)
+          rtmp(1:3) = rtmp(1:3) + rtmp2(1:3)*wts(i)
+        enddo
+        rot_vels(1:3,icomp) = -(rmoi_inv(1:3,1)*rtmp(1) + & 
+          rmoi_inv(1:3,2)*rtmp(2) + rmoi_inv(1:3,3)*rtmp(3))
+      enddo
+
+      
+      
+      return
+      end
+!
+!
+!
+!
+!
+
+      subroutine stok_s_mob_solver_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndi, ipars, numit, &
+        rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
+        nptso, ixyzso, srcover, whtsover, eps_gmres, niter, &
+        errs, rres, soln)
+!
+!
+!  This subroutine solves the Stokes mobility problem
+!  where the potential is represented using a 
+!  single layer representation.
+!
+!  This subroutine is the simple interface as opposed to the
+!  _solver_guru routine which is called after initialization
+!  in this routine.
+!
+!
+!  Representation:
+!    u = S_{stok}[\sigma] + S_{stok}[\sigma_{0}] 
+!  
+!  where \sigma_{0} = F_{i}/|\Gamma_{i}| + 
+!     \tau_{i}^{-1} T_{i} \times (x - x_{c,i})
+!
+!  with \tau_{i} is the moment of inertia tensor. In addition, 
+!
+!  \sigma satisfies
+!    \int_{\Gamma_{i}} \sigma = 0, 
+!    \int_{\Gamma_{i}} (x-x_{c,i}) \times \sigma = 0
+!
+!  Integral equations obtained by imposing:
+!    f^{-} = 0 \,,
+!
+!  where f^{-} is the interior surface traction
+!
+!  and is given by:
+!    1/2 \sigma + S_{stok}'[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  along with the constraints above. 
+!
+!  Instead, we use the generalized 1's matrix trick to solve for
+!  sigma as
+!    1/2 \sigma + S_{stok}'[\sigma] + L[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
+!
+!  The linear system is solved iteratively using GMRES
+!  until a relative residual of eps_gmres is reached
+!
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested for computing quadrature and fmm
+!        tolerance
+!    - ndi: integer *8
+!        number of ipars, must be atleast ncomp+2, where ncomp
+!        is the number of components
+!    - ipars: integer *8(ndi)
+!        ipars(1) = ncomp
+!        ipars(i+1) = starting patch index for patches on component
+!          i (note that the patches must be ordered by component)
+!    - numit: integer *8
+!        max number of gmres iterations
+!    - ifinout: integer *8
+!        ifinout = 0, interior problem
+!        ifinout = 1, exterior problem
+!    - rhs: real *8(3,npts)
+!        velocity data
+!    - nnz: integer *8
+!        number of source patch-> target interactions in the near field
+!    - row_ptr: integer *8(npts+1)
+!        row_ptr(i) is the pointer
+!        to col_ind array where list of relevant source patches
+!        for target i start
+!    - col_ind: integer *8 (nnz)
+!        list of source patches relevant for all targets, sorted
+!        by the target number
+!    - iquad: integer *8(nnz+1)
+!        location in wnear_ij array where quadrature for col_ind(i)
+!        starts for a single kernel. In this case the different kernels
+!        are matrix entries are located at (m-1)*nquad+iquad(i), where
+!        m is the kernel number
+!    - nquad: integer *8
+!        number of near field entries corresponding to each source target
+!        pair
+!    - nker: integer *8
+!        number of kernels in quadrature correction, must be 6
+!    - wnear: real *8(nker, nquad)
+!        precomputed quadrature corrections 
+!    - novers: integer *8(npatches)
+!        order of discretization for oversampled sources and
+!        density
+!    - ixyzso: integer *8(npatches+1)
+!        ixyzso(i) denotes the starting location in srcover,
+!        corresponding to patch i
+!    - nptso: integer *8
+!        total number of oversampled points
+!    - srcover: real *8 (12,nptso)
+!        oversampled set of source information
+!    - whtsover: real *8 (nptso)
+!        smooth quadrature weights at oversampled nodes
+!    - eps_gmres: real *8
+!        gmres tolerance requested
+!
+!  output
+!    - niter: integer *8
+!        number of gmres iterations required for relative residual
+!        to converge to eps_gmres
+!    - errs:  real *8 (numit+1)
+!        relative residual as a function of iteration
+!        number (only errs(1:niter) is populated))
+!    - rres: real *8
+!        relative residual for computed solution
+!    - soln: real *8(3,npts)
+!        density which solves the neumann problem \sigma
+!				 
+
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+      real *8, intent(in) :: eps, eps_gmres
+      integer *8, intent(in) :: ndi
+      integer *8, intent(in) :: ipars(ndi)
+      real *8, intent(in) :: rhs(3,npts)
+      integer *8, intent(in) :: numit
+
+      real *8, intent(out) :: errs(numit+1)
+      real *8, intent(out) :: rres
+      integer *8, intent(out) :: niter
+      
+      integer *8, intent(in) :: nnz, nquad
+      integer *8, intent(in) :: row_ptr(npts+1), col_ind(nnz)
+      integer *8, intent(in) :: iquad(nnz+1)
+
+      integer *8, intent(in) :: nker
+      real *8, intent(in) :: wnear(nker,nquad)
+
+      integer *8, intent(in) :: nptso
+      integer *8, intent(in) :: novers(npatches), ixyzso(npatches+1)
+      real *8, intent(in) :: srcover(12,nptso), whtsover(nptso)
+      
+  
+      real *8, intent(out) :: soln(3,npts)
+
+      real *8 did
+      real *8 dpars
+      integer *8 ndd_use
+
+      procedure (), pointer :: fker
+      external stok_s_mob_addsub
+
+      integer *8 ndd, ndz, lwork, ndim
+      integer *8 nkertmp
+      complex *16 zpars
+
+      integer *8 ndtarg
+      complex *16 ima
+      data ima/(0.0d0,1.0d0)/
+      real *8, allocatable :: wts(:)
+
+      integer *8 i
+
+      did = 0.5d0 
+      fker => stok_s_mob_addsub
+
+      ndd = 1
+      dpars = 1.0d0
+      ndz = 0
+
+      lwork = npts
+      ndim = 3
+      allocate(wts(npts))
+
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, &
+        srcvals, wts)
+
+      call dgmres_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, wts, &
+        eps, ndd, dpars, ndz, zpars, ndi, ipars, &
+        nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
+        nptso, ixyzso, srcover, whtsover, npts, wts, &
+        ndim, fker, did, rhs, numit, eps_gmres, niter, errs, &
+        rres, soln)
+
+      return
+      end
+
+!
diff --git a/src/surface_routs/analytic_geometry_routs.f90 b/src/surface_routs/analytic_geometry_routs.f90
index e26dfeeb..38181102 100644
--- a/src/surface_routs/analytic_geometry_routs.f90
+++ b/src/surface_routs/analytic_geometry_routs.f90
@@ -1407,7 +1407,7 @@ subroutine get_axissym_fcurve_npat(nch2d, tchse, fcurve, &
       pars_end(2) = tchse(nch2d)
 
       pars_end(3:(np+2)) = pars(1:np)
-
+      int8_6 = 6
 
 
        
diff --git a/src/surface_routs/surf_routs.f90 b/src/surface_routs/surf_routs.f90
index fb9fc117..8545a44a 100644
--- a/src/surface_routs/surf_routs.f90
+++ b/src/surface_routs/surf_routs.f90
@@ -943,7 +943,7 @@ subroutine refine_patches_list(npatches, npatmax, norders, ixyzs, &
 
          do l=1,npols
             ll = istart+l-1
-            do m=1,9
+do m=1,9
               srctmp(m) = srctmp(m) + srccoefs(m,ll)*pols(l)
             enddo
          enddo
@@ -993,8 +993,491 @@ subroutine refine_patches_list(npatches, npatmax, norders, ixyzs, &
 
   return
 end subroutine refine_patches_list
+!
+!
+!
+!
+subroutine get_surf_moments(npatches, norders, ixyzs, &
+    iptype, npts, srcvals, wts, area, centroid, rmoi)
+!
+!  This subroutine returns the 0th, 1st and 2nd moments
+!  of a surface. The 0th moment is just the surface area, 
+!  the first moment is the centroid, and the second moment
+!  is given by
+!
+!  II_{ij} = 
+!   \int_{\Gamma} (|x-c|^2  \delta_{ij} - (x_{i} - c_{i})(x_{j}-c_{j}))dS
+!  
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points on the surface
+!    - srcvals: double precision (12,npts)
+!        xyz, dxyz/du,dxyz/dv, normals at all nodes
+!    - wts: double precision(npts)
+!        quadrature weights for integrating smooth 
+!        functions
+!
+!  Output arguments:   
+!    - area: double precision
+!        the 0th moment of the surface, i.e. surface area
+!    - centroid: double precision(3)
+!        the first moment of the surface, i.e. the centroid
+!    - rmoi: double precision(3,3)
+!        the second moment of inertia of the surface, defined
+!        above
+!
+!----------------------------------------------
+!
+
+  implicit none
+  integer *8, intent(in) :: npatches, norders(npatches)
+  integer *8, intent(in) :: ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  integer *8, intent(in) :: npts
+
+  real *8, intent(in) :: srcvals(12,npts), wts(npts)
+
+  real *8, intent(out) :: area, centroid(3), rmoi(3,3)
+
+  integer *8 i
+  real *8 xdiff(3)
+
+  area = 0
+  centroid(1:3) = 0
+  rmoi(1:3,1:3) = 0
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) &
+!$OMP REDUCTION(+:area,centroid)
+  do i=1,npts
+    area = area + wts(i)
+    centroid(1:3) = centroid(1:3) + srcvals(1:3,i)*wts(i)
+  enddo
+!$OMP END PARALLEL DO
+  centroid(1:3) = centroid(1:3)/area
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,xdiff) REDUCTION(+:rmoi)
+  do i=1,npts
+    xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+    rmoi(1,1) = rmoi(1,1) + (xdiff(2)**2 + xdiff(3)**2)*wts(i)
+    rmoi(2,2) = rmoi(2,2) + (xdiff(1)**2 + xdiff(3)**2)*wts(i)
+    rmoi(3,3) = rmoi(3,3) + (xdiff(1)**2 + xdiff(2)**2)*wts(i)
+
+    rmoi(1,2) = rmoi(1,2) - xdiff(1)*xdiff(2)*wts(i)
+    rmoi(1,3) = rmoi(1,3) - xdiff(1)*xdiff(3)*wts(i)
+    rmoi(2,3) = rmoi(2,3) - xdiff(2)*xdiff(3)*wts(i)
+  enddo
+!$OMP END PARALLEL DO
+  
+  rmoi(2,1) = rmoi(1,2)
+  rmoi(3,1) = rmoi(1,3)
+  rmoi(3,2) = rmoi(2,3)
+
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_rotate(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, deul, srccoefs_out, srcvals_out)
+! 
+!  This subroutine rotates the surfer object using Euler angles. 
+!  We use the z-x-z convention
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - deul: real *8(3)
+!        The three euler angles
+!
+!  Output arguments:
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs_out(1:9,1:npts) are the koornwinder expansion coefs of 
+!        the rotated geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes on the rotated geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: deul(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  real *8 dmat(3,3), dshift(3)
+
+  dshift(1) = 0
+  dshift(2) = 0
+  dshift(3) = 0
+
+  call get_linear_transf_eul(deul, dmat)
+
+!
+!  construct the first rotation matrix
+!
+  call surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+    srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+
+
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_translate(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, dshift, srccoefs_out, srcvals_out)
+! 
+!  This subroutine translates the surfer object. 
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - dshift: real *8(3)
+!        (x,y,z) coordinates of shift 
+!
+!  Output arguments: 
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        translated geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes for the translated geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: dshift(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  real *8 dmat(3,3)
+
+  dmat(1:3,1:3) = 0
+  dmat(1,1) = 1
+  dmat(2,2) = 1
+  dmat(3,3) = 1
+
+  call surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+    srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+
+
+  return
+  end
+
+!
+!
+!
+!
+!
+subroutine surf_scale(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, dscale, srccoefs_out, srcvals_out)
+! 
+!  This subroutine rescales the surfer object 
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - dscale: real *8(3)
+!        rescaling parameter for each of the coordinates 
+!
+!  Output arguments: (All arguments are updated on output)
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        the scaled geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the scaled geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: dscale(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  real *8 dmat(3,3), dshift(3)
+  
+  dshift(1:3) = 0
+
+  dmat(1:3,1:3) = 0
+  dmat(1,1) = 1  
+  dmat(2,2) = 1
+  dmat(3,3) = 1
+
+  call surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+    srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
 
 
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+! 
+!  This subroutine applies an affine transformation to the surface.  
+!  x \to Mx + v, where v=dshift, and M=dmat
+!
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - dmat: real *8(3,3)
+!        transformation matrix
+!    - dshift: real *8(3)
+!        translation 
+!
+!  Output arguments: (All arguments are updated on output)
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        the scaled geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the scaled geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: dmat(3,3), dshift(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  integer *8 nd9
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ds)
+  do i=1,npts
+    srcvals_out(1:3,i) = dmat(1:3,1)*srcvals(1,i) + &
+                         dmat(1:3,2)*srcvals(2,i) + &
+                         dmat(1:3,3)*srcvals(3,i) + dshift(1:3)
+    srcvals_out(4:6,i) = dmat(1:3,1)*srcvals(4,i) + &
+                         dmat(1:3,2)*srcvals(5,i) + &
+                         dmat(1:3,3)*srcvals(6,i) 
+    srcvals_out(7:9,i) = dmat(1:3,1)*srcvals(7,i) + &
+                         dmat(1:3,2)*srcvals(8,i) + &
+                         dmat(1:3,3)*srcvals(9,i) 
+      call cross_prod3d(srcvals_out(4,i), srcvals_out(7,i), &
+        srcvals_out(10,i))
+
+      ds = sqrt(srcvals_out(10,i)**2 + srcvals_out(11,i)**2 + &
+              srcvals_out(12,i)**2)
+      srcvals_out(10,i) = srcvals_out(10,i)/ds
+      srcvals_out(11,i) = srcvals_out(11,i)/ds
+      srcvals_out(12,i) = srcvals_out(12,i)/ds
+
+  enddo
+!$OMP END PARALLEL DO
+  nd9 = 9
+  call surf_vals_to_coefs(nd9, npatches, norders, ixyzs, iptype, npts, &
+    srcvals_out(1:9,1:npts), srccoefs_out)
+
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_append_single_comp(npatches, norders, ixyzs, iptype, &
+  npts, srccoefs, srcvals, ncomp, icomps, npatches_out, norders_out, &
+  ixyzs_out, iptype_out, npts_out, srccoefs_out, srcvals_out)
+!
+!
+!  Given a multicomponent surfer description, append to it a new copoment
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface to be appended 
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface to be
+!        appended
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info of the the surface to be appended 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the surface to be appended
+!
+!  Input/Output arguments:
+!    - ncomp: integer *8
+!        On input, number of components in output surfer
+!        On output, ncomp = ncomp + 1
+!    - icomps: integer *8 (*)
+!        Must be atleast of size ncomp+2
+!        On input and output icomps(i) is the id of the patch where
+!        information for component i begins in the output surfer
+!    - npatches_out: integer *8
+!        On input, number of patches describing the discretized surface
+!        On output, npatches_out = npatches_out + npatches
+!    - norders_out: integer *8(*)
+!        Must be at least of size npatches_out + npatches on input
+!        Describes the order of discretization of each patch
+!    - ixyzs: integer *8(*)
+!        Must be at least of size npatches_out + npatches + 1 on input
+!        Describes starting location of points on patch i
+!    - iptype: integer *8(*)
+!        Must be at least of size npatches_out + npatches on input
+!        Describes the type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts_out: integer *8
+!        On input, total number of points describing the discretized surface
+!        On output, npts_out = npts_out + npts
+!    - srccoefs_out: double precision (9,*)
+!        Must be atleast of size (9,npts_out + npts)
+!        Describes the koornwinder expansion coefs of 
+!        geometry info of the the surface 
+!    - srcvals_out: double precision (12,*)
+!        Must be atleast of size (12,npts_out + npts)
+!        Describes the xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the surface 
+!
+      implicit real *8(a-h,o-z)
+      implicit integer *8(i-n)
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches), iptype(npatches)
+      integer *8, intent(in) :: ixyzs(npatches+1)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+
+      integer *8, intent(inout) :: npatches_out, npts_out, ncomp, icomps(*)
+      integer *8, intent(inout) :: norders_out(*), iptype_out(*)
+      integer *8, intent(inout) :: ixyzs_out(*)
+      real *8, intent(inout) :: srccoefs_out(9,*), srcvals_out(12,*)
+
+!$OMP PARALLEL DO DEFAULT(SHARED)
+      do i=1,npatches
+        ixyzs_out(npatches_out+i+1) = ixyzs_out(npatches+1) + ixyzs(i+1) - 1
+        norders_out(npatches_out+i) = norders(i)
+        iptype_out(npatches_out+i) = iptype(i)
+      enddo
+!$OMP END PARALLEL DO
+
+!$OMP PARALLEL DO DEFAULT(SHARED)
+      do i=1,npts
+        srccoefs_out(1:9,npts_out+i) = srccoefs(1:9,i)
+        srcvals_out(1:12,npts_out+i) = srcvals(1:12,i)
+      enddo
+!$OMP END PARALLEL DO
+      
+      npatches_out = npatches_out + npatches
+      npts_out = npts_out + npts
+      ncomp = ncomp + 1
+      icomps(ncomp+1) = npatches_out + 1
+
+      return
+      end
+!
+!
+!
+!
+!
+!
 subroutine surf_vals_to_coefs(nd,npatches,norders,ixyzs,iptype,npts, &
    u,ucoefs)