From fbf99461912ff1d6d21f5b85a2086381638a7ffd Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Tue, 20 Jan 2026 09:59:05 +0530
Subject: [PATCH 1/7] adding stokes mobility wrapper

---
 src/stok_wrappers/stok_s_mob.f90 | 1614 ++++++++++++++++++++++++++++++
 src/surface_routs/surf_routs.f90 |   60 ++
 2 files changed, 1674 insertions(+)
 create mode 100644 src/stok_wrappers/stok_s_mob.f90

diff --git a/src/stok_wrappers/stok_s_mob.f90 b/src/stok_wrappers/stok_s_mob.f90
new file mode 100644
index 00000000..2b83cca5
--- /dev/null
+++ b/src/stok_wrappers/stok_s_mob.f90
@@ -0,0 +1,1614 @@
+!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+!
+!  Single layer representation for the Stokes mobility problem 
+!
+!  PDE:
+!    \Delta u       = \nabla p
+!    \Delta \cdot u = 0
+!
+!  Boundary conditions:
+!    u = u_{i} + \omega_{i} \times (x - x_{c,i}) on component \Gamma_{i}
+!      where u_{i}, \omega_{i} are unknown
+!    \int_{\Gamma_{i}} f dS = F_{i}, f is the surface traction
+!    \int_{\Gamma_{i}} (x - x_{c,i}) \times f ds = T_{i}
+!      where F_{i}, T_{i} are the forces and torques on body i and
+!      are given
+!
+!
+!  Representation:
+!    u = S_{stok}[\sigma] + S_{stok}[\sigma_{0}] 
+!  
+!  where \sigma_{0} = F_{i}/|\Gamma_{i}| + 
+!     \tau_{i}^{-1} T_{i} \times (x - x_{c,i})
+!
+!  with \tau_{i} is the moment of inertia tensor. In addition, 
+!
+!  \sigma satisfies
+!    \int_{\Gamma_{i}} \sigma = 0, 
+!    \int_{\Gamma_{i}} (x-x_{c,i}) \times \sigma = 0
+!
+!  Integral equations obtained by imposing:
+!    f^{-} = 0 \,,
+!
+!  where f^{-} is the interior surface traction
+!
+!  and is given by:
+!    1/2 \sigma + S_{stok}'[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  along with the constraints above. 
+!
+!  Instead, we use the generalized 1's matrix trick to solve for
+!  sigma as
+!    1/2 \sigma + S_{stok}'[\sigma] + L[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
+!       \tau_{i}^{-1}(\int_{{\Gamma_{i}}} (y-x_{c,i}) \times \sigma dS) \times (x-x_{c,i})
+!  on \Gamma_{i}
+!
+!
+!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+!
+!  User callable routines:
+!    - stok_s_mob_solver: Given data F_{i}, T_{i}, 
+!       this routine returns the solution \sigma, and u_{i}, \omega_{i}
+!
+!    - stok_s_vel_eval: Given \sigma this routine
+!        evaluates the solution at a collection of targets
+!
+!ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+!
+!  Advanced interfaces:
+!    - getnearquad_stok_s_mob: compute the near quadrature correction
+!        for constructing the on-surface integral equation for the
+!        traction data corresponding to the single layer
+!        representation with user-provided near-information prescribed
+!        in row-sparse compressed format
+!
+!    - lpcomp_stok_s_mob_addsub: apply the principal value part
+!        of the integeral equation on surface. On input, user provides
+!        precomputed near quadrature in row-sparse compressed format,
+!        and oversampling information for handling the far part of the
+!        computation
+!
+!    - stok_s_mob_solver_guru: Guru solver routine, where user is
+!        responsible for providing precomputed near quadrature
+!        information in row-sparse compressed format and oversampling
+!        surface information for the far-part
+!
+!
+
+      subroutine getnearquad_stok_s_mob(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, eps, &
+        iquadtype, nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+!
+!  This subroutine generates the near field quadrature
+!  for the representation:
+!
+!  u = S_{stok}[\sigma] - (1) 
+!
+!  and returns quantities related to evaluating the surface 
+!  traction of u on surface at the surface discretization nodes.
+!
+!  On imposing the boundary condition, we get the following operator
+!
+!  u = \sigma/2 + S_{stok}'[\sigma] 
+!
+!  The quadrature is computed by the following strategy
+!  targets within a sphere of radius rfac0*rs
+!  of a patch centroid is handled using adaptive integration
+!  where rs is the radius of the bounding sphere
+!  for the patch
+!  
+!  All other targets in the near field are handled via
+!  oversampled quadrature
+!
+!  The recommended parameter for rfac0 is 1.25d0
+!  
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev 
+!                     nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested
+!    - iquadtype: integer *8
+!        quadrature type
+!          * iquadtype = 1, use ggq for self + adaptive integration
+!            for rest
+!    - nnz: integer *8
+!        number of source patch-> target interactions in the near field
+!    - row_ptr: integer *8(npts+1)
+!        row_ptr(i) is the pointer
+!        to col_ind array where list of relevant source patches
+!        for target i start
+!    - col_ind: integer *8 (nnz)
+!        list of source patches relevant for all targets, sorted
+!        by the target number
+!    - iquad: integer *8(nnz+1)
+!        location in wnear_ij array where quadrature for col_ind(i)
+!        starts for a single kernel. In this case the different kernels
+!        are matrix entries are located at (m-1)*nquad+iquad(i), where
+!        m is the kernel number
+!    - rfac0: real *8
+!        radius parameter for switching to predetermined quadarature
+!        rule        
+!    - nquad: integer *8
+!        number of near field entries corresponding to each source 
+!        target pair
+!
+!  Output arguments
+!    - wnear: real *8(6,nquad)
+!        The desired near field quadrature for
+!        \alpha S_{stok} + \beta D_{stok}. Since the Stokes 
+!        tensor is symmetric, quadratures are only
+!        generated for the lower half of the tensor
+!        * wnear(1,:) - stores the quadratures for (1,1) entry of the 
+!                       tensor 
+!        * wnear(2,:) - stores the quadratures for (1,2) entry of the 
+!                       tensor 
+!        * wnear(3,:) - stores the quadratures for (1,3) entry of the 
+!                       tensor 
+!        * wnear(4,:) - stores the quadratures for (2,2) entry of the 
+!                       tensor 
+!        * wnear(5,:) - stores the quadratures for (2,3) entry of the 
+!                       tensor 
+!        * wnear(6,:) - stores the quadratures for (3,3) entry of the 
+!                       tensor 
+!
+      implicit none 
+      integer *8, intent(in) :: npatches, norders(npatches), npts, nquad
+      integer *8, intent(in) :: ixyzs(npatches+1), iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts), eps
+      real *8, intent(in) :: rfac0
+      integer *8, intent(in) :: iquadtype
+      integer *8, intent(in) :: nnz
+      integer *8, intent(in) :: row_ptr(npts+1), col_ind(nnz), iquad(nnz+1)
+      real *8, intent(out) :: wnear(6,nquad)
+
+      integer *8 :: ndtarg, ntarg
+      integer *8, allocatable :: ipatch_id(:)
+      real *8, allocatable :: uvs_targ(:,:)
+
+      real *8 alpha, beta
+      real *8, allocatable :: wneartmp(:)
+      integer *8 ndd, ndi, ndz
+      complex *16 zpars(1)
+      integer *8 ipars(2)
+
+      integer *8 i, ipv, j, ii, ijloc(2,6)
+
+      procedure (), pointer :: fker
+      external st3d_strac 
+      
+
+      ndtarg = 12
+      ntarg = npts
+      allocate(ipatch_id(npts), uvs_targ(2,npts))
+   
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
+      do i = 1,npts
+        ipatch_id(i) = -1
+        uvs_targ(1,i) = 0
+        uvs_targ(2,i) = 0
+      enddo
+!$OMP END PARALLEL DO
+
+      call get_patch_id_uvs(npatches, norders, ixyzs, iptype, npts, &
+        ipatch_id, uvs_targ)
+
+      ndd = 2
+      ndi = 2
+      ndz = 0
+      fker => st3d_strac
+      ipv = 1
+
+      ijloc(1,1) = 1
+      ijloc(2,1) = 1
+      ijloc(1,2) = 1
+      ijloc(2,2) = 2
+      ijloc(1,3) = 1
+      ijloc(2,3) = 3
+      ijloc(1,4) = 2
+      ijloc(2,4) = 2
+      ijloc(1,5) = 2
+      ijloc(2,5) = 3
+      ijloc(1,6) = 3
+      ijloc(2,6) = 3
+
+      allocate(wneartmp(nquad))
+
+
+      if(iquadtype.eq.1) then
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ii)          
+        do ii = 1,nquad
+          wneartmp(ii) = 0
+        enddo
+!$OMP END PARALLEL DO
+
+        do i = 1,6   
+          call dgetnearquad_ggq_guru(npatches, norders, ixyzs, &
+            iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+            ipatch_id, uvs_targ, eps, ipv, fker, ndd, dpars, ndz, &
+            zpars, ndi, ijloc(1,i), nnz, row_ptr, col_ind, iquad, &
+            rfac0, nquad, wneartmp)
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ii)          
+          do ii = 1,nquad
+            wnear(i,ii) = wneartmp(ii)
+          enddo
+!$OMP END PARALLEL DO
+        enddo
+      endif
+
+
+      return
+      end
+!
+!
+!
+!
+!
+      subroutine stok_s_vel_eval(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        ipatch_id, uvs_targ, eps, sigma, pot)
+!
+!f2py intent(in) npatches,norders,ixyzs,iptype,npts,srccoefs,srcvals
+!f2py intent(in) ndtarg,ntarg,targs,ipatch_id,uvs_targ,eps
+!f2py intent(in) sigma
+!f2py intent(out) pot
+!
+!
+!------------------------------
+!  This subroutine evaluates the layer potential for the 
+!  Stokes single layer representation 
+!
+!  Representation:
+!    u = \mathcal{S}_{stok}[\sigma]
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - ndtarg: integer *8
+!        leading dimension of target information array
+!    - ntarg: integer *8
+!        number of targets
+!    - targs: real *8(ndtarg,ntarg)
+!        target information 
+!    - ipatch_id: integer *8(ntarg)
+!        id of patch of target i, id = -1, if target is off-surface
+!    - uvs_targ: double precision (2,ntarg)
+!        local uv coordinates on patch if on surface, otherwise
+!        set to 0 by default
+!    - eps: double precision
+!        precision requested
+!     - sigma: double precision(3,npts)
+!         density for layer potential
+!
+!  Output arguments
+!    - pot: double precision(3,ntarg)
+!        layer potential evaluated at the target points
+!
+!-----------------------------------
+!
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: ndtarg, ntarg
+      integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts), eps
+      real *8, intent(in) :: targs(ndtarg,ntarg)
+      real *8, intent(in) :: sigma(3,npts)
+      integer *8, intent(in) :: ipatch_id(ntarg)
+      real *8, intent(in) :: uvs_targ(2,ntarg)
+
+      real *8, intent(out) :: pot(3,ntarg)
+
+      real *8 dpars(2)
+
+      dpars(1) = 1.0d0
+      dpars(2) = 0.0d0
+
+      call stok_comb_vel_eval(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        ipatch_id, uvs_targ, eps, dpars, sigma, pot)
+
+      return
+      end
+!
+!
+!  
+!
+!
+      subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndd, dpars, ndz, zpars, &
+        ndi, ipars, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, &
+        novers, nptso, ixyzso, srcover, whtsover, lwork, work, ndim, &
+        sigma, pot)
+!
+!  This subroutine evaluates the surface traction +
+!  the generalized ones matrix corresponding to
+!  the following integral representation:
+!  
+!  u = S_{stok}[\sigma]
+!
+!  pot = S_{stok}'[\sigma] + L[\sigma]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
+!       tau_{i}^{-1}(\int_{{\Gamma_{i}}} (y-x_{c,i}) \times \sigma dS) \times (x-x_{c,i})
+!  on \Gamma_{i}
+!
+!  The near field is precomputed and stored
+!  in the row sparse compressed format.
+!        
+!  The fmm is used to accelerate the far-field and 
+!  near-field interactions are handled via precomputed quadrature
+!
+!  Using add and subtract - no need to call tree and set fmm parameters
+!  can directly call existing fmm library
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested
+!    - ndd: integer *8
+!        number of real parameters defining the kernel/
+!        integral representation (must be 3)
+!    - dpars: real *8(ndd)
+!        real parameters defining the kernel/
+!        integral representation 
+!        * dpars(1) = alpha (single layer strength) 
+!        * dpars(2) = beta (double layer strength)
+!        * dpars(3) = strength of 1s matrix of 
+!            n \int \sigma \cdot n
+!    - ndz: integer *8
+!        number of complex parameters defining the kernel/
+!        integral representation (unused in this routine)
+!    - zpars: complex *16(ndz)
+!        complex parameters defining the kernel/
+!        integral represnetation (unused in this routine)
+!    - ndi: integer *8
+!        must be ncomp + 1, where ncomp is the number of
+!        components in the geometry
+!    - ipars: integer *8(ndi)
+!        ipars(1) = ncomp
+!        ipars(i+1) = starting patch index for patches on component
+!          i (note that the patches must be ordered by component)
+!    - nnz: integer *8
+!        number of source patch-> target interactions in the near field
+!    - row_ptr: integer *8(npts+1)
+!        row_ptr(i) is the pointer
+!        to col_ind array where list of relevant source patches
+!        for target i start
+!    - col_ind: integer *8 (nnz)
+!        list of source patches relevant for all targets, sorted
+!        by the target number
+!    - iquad: integer *8(nnz+1)
+!        location in wnear_ij array where quadrature for col_ind(i)
+!        starts for a single kernel. In this case the different kernels
+!        are matrix entries are located at (m-1)*nquad+iquad(i), where
+!        m is the kernel number
+!    - nquad: integer *8
+!        number of near field entries corresponding to each source target
+!        pair
+!    - nker: integer *8
+!        number of kernels in quadrature correction, must be 6
+!    - wnear: real *8(nker, nquad)
+!        Precomputed near field quadrature for
+!        S_{stok}'. Since the Stokes 
+!        tensor is symmetric, quadratures are only
+!        generated for the lower half of the tensor
+!        * wnear(1,:) - stores the quadratures for (1,1) entry of the 
+!                       tensor 
+!        * wnear(2,:) - stores the quadratures for (1,2) entry of the 
+!                       tensor 
+!        * wnear(3,:) - stores the quadratures for (1,3) entry of the 
+!                       tensor 
+!        * wnear(4,:) - stores the quadratures for (2,2) entry of the 
+!                       tensor 
+!        * wnear(5,:) - stores the quadratures for (2,3) entry of the 
+!                       tensor 
+!        * wnear(6,:) - stores the quadratures for (3,3) entry of the 
+!                       tensor 
+!    - novers: integer *8(npatches)
+!        order of discretization for oversampled sources and
+!        density
+!    - ixyzso: integer *8(npatches+1)
+!        ixyzso(i) denotes the starting location in srcover,
+!        corresponding to patch i
+!    - nptso: integer *8
+!        total number of oversampled points
+!    - srcover: real *8 (12,nptso)
+!        oversampled set of source information
+!    - whtsover: real *8 (nptso)
+!        smooth quadrature weights at oversampled nodes
+!    - lwork: integer *8
+!        size of work array (must be npts)
+!    - work: real *8(lwork)
+!        work array
+!        * work(1:npts) stores the coordinates the quadrature
+!          weights for integrating smooth functions
+!    - ndim: integer *8
+!        number of densities per point on the surface,
+!        must be 3 for this routine
+!    - sigma: real *8(3,npts)
+!        The density sigma above                                        
+!
+!  Output arguments:
+!    - pot: real *8(3,npts)
+!        u corresponding to representation
+!
+
+  
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches),ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+
+      real *8, intent(in) :: eps
+      
+      integer *8, intent(in) :: ndd, ndz, ndi
+      real *8, intent(in) :: dpars(ndd)
+      complex *16, intent(in) :: zpars(ndz)
+      integer *8, intent(in) :: ipars(ndi)
+
+      integer *8, intent(in) :: nnz, nquad
+      integer *8, intent(in) :: row_ptr(npts+1), col_ind(nnz)
+      integer *8, intent(in) :: iquad(nnz+1)
+      
+      integer *8, intent(in) :: nker
+      real *8, intent(in) :: wnear(nker,nquad)
+
+      integer *8, intent(in) :: nptso
+      integer *8, intent(in) :: ixyzso(npatches+1), novers(npatches)
+      real *8, intent(in) :: srcover(12,nptso), whtsover(nptso)
+      
+      integer *8, intent(in) :: lwork
+      real *8, intent(in) :: work(lwork)
+
+      integer *8, intent(in) :: ndim
+    
+      real *8, intent(in) :: sigma(ndim,npts)
+    
+      real *8, intent(out) :: pot(ndim,npts)
+
+      integer *8 ndtarg, idensflag, ipotflag, ndim_p, i
+      real *8 rint
+
+      integer *8 norder,npols,nover,npolso
+      real *8, allocatable :: potsort(:)
+
+      real *8, allocatable :: sources(:,:), targvals(:,:)
+      real *8, allocatable :: sigmaover(:,:)
+      real *8, allocatable :: stoklet(:,:)
+      real *8, allocatable :: pottarg(:,:), gradtarg(:,:,:)
+      real *8, allocatable :: pretarg(:)
+      integer *8 ns, nt
+      real *8 alpha, beta
+      integer *8 ifstoklet, ifstrslet
+      integer *8 ifppreg, ifppregtarg
+      real *8 tmp(10), val
+      real *8 over4pi
+
+      real *8 w11, w12, w13, w21, w22, w23, w31, w32, w33
+      real *8 sig1, sig2, sig3
+
+      integer *8 istress
+
+      integer *8 i, j, jpatch, jquadstart, jstart
+
+      real *8 pottmp
+      real *8, allocatable :: sttmp2(:,:)
+      real *8 strstmp2(3), strsvec2(3)
+
+      integer *8 nmax
+      real *8 timeinfo(10), t1, t2, omp_get_wtime
+
+      real *8, allocatable :: srctmp2(:,:)
+      real *8 thresh, ra
+      real *8 rr, rmin
+      integer *8 nss, ii, l, npover
+
+      integer *8 nd, ntarg0, ntarg
+      integer *8 ier, iper
+
+      integer *8 ndtmp, ndsigma
+
+      real *8 ttot, done, pi
+
+      real *8 potv(3), pv, gradv(3,3)
+      real *8 strslet(3), strvec(3)
+
+      real *8 rint(3), rint_torque(3), xyzc(3), xdiff(3), rtmp(3)
+      integer *8 ncomp, icomp
+
+      parameter (nd=1,ntarg0=1)
+      data over4pi/0.07957747154594767d0/
+
+      ns = nptso
+      done = 1
+      pi = atan(done)*4
+
+!
+!    estimate max number of sources in neear field of 
+!    any target
+!
+      
+      
+      nmax = 0
+      ntarg = npts
+      call get_near_corr_max(ntarg, row_ptr, nnz, col_ind, npatches, &
+        ixyzso, nmax)
+      allocate(srctmp2(3,nmax), sttmp2(3,nmax))
+           
+           
+      ifppreg = 0
+      ifppregtarg = 3
+      allocate(sources(3,ns), targvals(3,ntarg))
+      allocate(stoklet(3,ns), strslet(3,ns), strsvec(3,ns))
+      allocate(sigmaover(3,ns))
+      allocate(pottarg(3,ntarg), pretarg(ntarg))
+      allocate(gradtarg(3,3,ntarg))
+
+! 
+!       oversample density
+!
+
+      ndsigma = 3
+      call oversample_fun_surf(ndsigma, npatches, norders, ixyzs, &
+        iptype, npts, sigma, novers, ixyzso, ns, sigmaover)
+
+
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)      
+      do i=1,ns
+        sources(1,i) = srcover(1,i)
+        sources(2,i) = srcover(2,i)
+        sources(3,i) = srcover(3,i)
+
+        stoklet(1,i) = sigmaover(1,i)*whtsover(i)*over4pi
+        stoklet(2,i) = sigmaover(2,i)*whtsover(i)*over4pi
+        stoklet(3,i) = sigmaover(3,i)*whtsover(i)*over4pi
+      enddo
+!$OMP END PARALLEL DO      
+      
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
+      do i = 1,ntarg
+        targvals(1,i) = srcvals(1,i)
+        targvals(2,i) = srcvals(2,i)
+        targvals(3,i) = srcvals(3,i)
+      enddo
+!$OMP END PARALLEL DO      
+      
+
+      ifstoklet = 1
+      ifstrslet = 0
+
+      iper = 0
+      ier = 0
+!
+!       call the fmm
+!
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()      
+      call stfmm3d(nd, eps, ns, sources, ifstoklet, stoklet, &
+        ifstrslet, strslet, strsvec, ifppreg, tmp, tmp, tmp, ntarg, &
+        targvals, ifppregtarg, pottarg, pretarg, gradtarg, ier)
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()
+
+      timeinfo(1) = t2-t1
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
+      do i = 1,ntarg
+        pot(1,i) = srcvals(10,i)*(-pretarg(i) + 2*gradtarg(1,1,i)) + &
+             srcvals(11,i)*(gradtarg(1,2,i) + gradtarg(2,1,i)) + &
+             srcvals(12,i)*(gradtarg(1,3,i) + gradtarg(3,1,i))
+        pot(2,i) = srcvals(10,i)*(gradtarg(1,2,i) + gradtarg(2,1,i)) + &
+             srcvals(11,i)*(-pretarg(i) + 2*gradtarg(2,2,i)) + &
+             srcvals(12,i)*(gradtarg(2,3,i) + gradtarg(3,2,i))
+        pot(3,i) = srcvals(10,i)*(gradtarg(1,3,i) + gradtarg(3,1,i)) + &
+             srcvals(11,i)*(gradtarg(2,3,i) + gradtarg(3,2,i)) + &
+             srcvals(12,i)*(-pretarg(i) + 2*gradtarg(3,3,i))
+      enddo
+!$OMP END PARALLEL DO 
+
+!
+!        compute threshold for ignoring local computation
+!
+      ndtmp = 3 
+      call get_fmm_thresh(ndtmp, ns, sources, ndtmp, ntarg, targvals, &
+        thresh)
+      
+!
+!
+!       add in precomputed quadrature
+!
+
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,j, jpatch, jquadstart) &
+!$OMP PRIVATE(jstart, npols, l, sig1, sig2, sig3, w11, w12, w13, w21) &
+!$OMP PRIVATE(w22, w23, w31, w32, w33)
+      do i = 1,ntarg
+        do j = row_ptr(i),row_ptr(i+1)-1
+          jpatch = col_ind(j)
+          npols = ixyzs(jpatch+1) - ixyzs(jpatch)
+          jquadstart = iquad(j)
+          jstart = ixyzs(jpatch) 
+          do l=1,npols
+            sig1 = sigma(1,jstart+l-1)
+            sig2 = sigma(2,jstart+l-1)
+            sig3 = sigma(3,jstart+l-1)
+
+            w11 = wnear(1,jquadstart+l-1)
+            w12 = wnear(2,jquadstart+l-1)
+            w13 = wnear(3,jquadstart+l-1)
+
+            w21 = w12 
+            w22 = wnear(4,jquadstart+l-1)
+            w23 = wnear(5,jquadstart+l-1)
+
+            w31 = w13 
+            w32 = w23 
+            w33 = wnear(6,jquadstart+l-1)
+
+
+            pot(1,i) = pot(1,i) + w11*sig1 + w12*sig2 + w13*sig3 
+            pot(2,i) = pot(2,i) + w21*sig1 + w22*sig2 + w23*sig3 
+            pot(3,i) = pot(3,i) + w31*sig1 + w32*sig2 + w33*sig3 
+          enddo
+        enddo
+      enddo
+!$OMP END PARALLEL DO
+
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i, j, jpatch, srctmp2) &
+!$OMP PRIVATE(sttmp2, strstmp2, strsvec2, nss, l, jstart, ii, npover) &
+!$OMP PRIVATE(potv, pv, gradv, istress)
+      do i = 1,ntarg
+        nss = 0
+        do j = row_ptr(i), row_ptr(i+1)-1
+          jpatch = col_ind(j)
+          do l = ixyzso(jpatch),ixyzso(jpatch+1)-1
+            nss = nss + 1
+            srctmp2(1,nss) = srcover(1,l)
+            srctmp2(2,nss) = srcover(2,l)
+            srctmp2(3,nss) = srcover(3,l)
+
+            sttmp2(1,nss) = stoklet(1,l)
+            sttmp2(2,nss) = stoklet(2,l)
+            sttmp2(3,nss) = stoklet(3,l)
+          enddo
+        enddo
+
+        potv(1) = 0
+        potv(2) = 0
+        potv(3) = 0
+        pv = 0
+        gradv(1:3,1:3) = 0
+        
+        istress = 0
+        call st3ddirectstokstrsg(nd, srctmp2, sttmp2, istress, &
+          strstmp2, strsvec2, nss, targvals(1,i), ntarg0, potv, pv, &
+          gradv, thresh)
+        potv(1) = srcvals(10,i)*(-pv + 2*gradv(1,1)) + &
+             srcvals(11,i)*(gradv(1,2) + gradv(2,1)) + &
+             srcvals(12,i)*(gradv(1,3) + gradv(3,1))
+        potv(2) = srcvals(10,i)*(gradv(1,2) + gradv(2,1)) + &
+             srcvals(11,i)*(-pv + 2*gradv(2,2)) + &
+             srcvals(12,i)*(gradv(2,3) + gradv(3,2))
+        potv(3) = srcvals(10,i)*(gradv(1,3) + gradv(3,1)) + &
+             srcvals(11,i)*(gradv(2,3) + gradv(3,2)) + &
+             srcvals(12,i)*(-pv + 2*gradv(3,3))
+
+        pot(1,i) = pot(1,i) - potv(1)
+        pot(2,i) = pot(2,i) - potv(2)
+        pot(3,i) = pot(3,i) - potv(3)
+
+      enddo
+      
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()     
+
+
+!
+!  Now add in contribution of L[\sigma]
+!
+      ncomp = ipars(1)
+      do icomp = 1,ncomp
+        rint(1:3) = 0
+        rint_torque(1:3) = 0
+        ipstart = ipars(icomp+1)
+        ipend = npatches
+        if (icomp.ne.ncomp) ipend = ipars(icomp+2)-1
+
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend)
+        nploc = ipend - ipstart + 1
+        nptsloc = iend - istart + 1
+
+        do i=1,nploc+1
+          ixyzsloc(i) = ixyzs(i+istart-1) - ixyzs(istart)+1
+        enddo
+        rmoi(1:3,1:3) = 0
+        rmoi_inv(1:3,1:3) = 0
+        area = 0
+        centroid(1:3) = 0
+        call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
+          iptype(ipstart), nptsloc, srcvals(1,istart), work(istart), &
+          area, centroid, rmoi)
+        info = 0
+        call dinverse(3, rmoi, info, rmoi_inv) 
+        
+        do i = istart,iend
+          rint(1:3) = rint(1:3) + sigma(1:3,i)*work(i)
+          rtmp(1:3) = 0
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          call cross_prod3d(xdiff, sigma(1:3,i), rtmp)
+          rint_torque(1:3) = rint_torque(1:3) + rtmp(1:3)*work(i)
+        enddo
+
+        do i = istart,iend
+          pot(1:3,i) = pot(1:3,i) + rint(1:3)/area
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          rtmp(1:3) = 0
+          call cross_prod3d(rint_torque, xdiff, rtmp)
+          pot(1,i) = pot(1,i) + rmoi_inv(1,1)*rtmp(1) + &
+            rmoi_inv(1,2)*rtmp(2) + rmoi_inv(1,3)*rtmp(3)
+          pot(2,i) = pot(2,i) + rmoi_inv(2,1)*rtmp(1) + &
+            rmoi_inv(2,2)*rtmp(2) + rmoi_inv(2,3)*rtmp(3)
+          pot(3,i) = pot(3,i) + rmoi_inv(3,1)*rtmp(1) + &
+            rmoi_inv(3,2)*rtmp(2) + rmoi_inv(3,3)*rtmp(3)
+        enddo
+      enddo
+
+      return
+      end
+
+!
+!
+!
+!
+!
+!
+!
+!        
+      subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, dpars, numit, ifinout, &
+        rhs, eps_gmres, niter, errs, rres, soln)
+
+!
+!
+!  This subroutine solves the Stokes velocity problem
+!  on the exterior/interior of an object where the potential
+!  is represented using a combined field integral representation.
+!
+!  This subroutine is the simple interface as opposed to the
+!  _solver_guru routine which is called after initialization
+!  in this routine.
+!
+!
+!  Representation:
+!    u = \alpha S_{stok}[\sigma]+\beta*D_{stok}[\sigma]
+!
+!  Boundary condition:
+!    u = f
+!
+!  The linear system is solved iteratively using GMRES
+!  until a relative residual of eps_gmres is reached
+!
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested for computing quadrature and fmm
+!        tolerance
+!    - dpars: real *8(2)
+!        kernel parameters (Referring to formula (1))
+!          * dpars(1) = \alpha 
+!          * dpars(2) = \beta
+!    - numit: integer *8
+!        max number of gmres iterations
+!    - ifinout: integer *8
+!        ifinout = 0, interior problem
+!        ifinout = 1, exterior problem
+!    - rhs: real *8(3,npts)
+!        velocity data
+!    - eps_gmres: real *8
+!        gmres tolerance requested
+!      
+!
+!  Output arguments:
+!    - niter: integer *8
+!        number of gmres iterations required for relative residual
+!        to converge to eps_gmres
+!    - errs:  real *8 (numit+1)
+!        relative residual as a function of iteration
+!        number (only errs(1:niter) is populated))
+!    - rres: real *8
+!        relative residual for computed solution
+!    - soln: real *8(3,npts)
+!        density which solves the velocity problem \sigma
+!				 
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: ifinout
+      integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+      real *8, intent(in) :: eps, eps_gmres
+      real *8, intent(in) :: dpars(2)
+      real *8, intent(in) :: rhs(3,npts)
+      integer *8, intent(in) :: numit
+      real *8, intent(out) :: soln(3,npts)
+      real *8, intent(out) :: errs(numit+1)
+      real *8, intent(out) :: rres
+      integer *8, intent(out) :: niter
+
+      integer *8 norder, npols
+      real *8, allocatable :: targs(:,:)
+      integer *8, allocatable :: ipatch_id(:)
+      real *8, allocatable :: uvs_targ(:,:)
+      integer *8 ndtarg, ntarg
+
+
+
+      integer *8 nover, npolso, nptso
+      integer *8 nnz,nquad
+      integer *8, allocatable :: row_ptr(:), col_ind(:), iquad(:)
+      real *8, allocatable :: wnear(:,:)
+
+      real *8, allocatable :: srcover(:,:), wover(:)
+      integer *8, allocatable :: ixyzso(:), novers(:)
+
+      real *8, allocatable :: cms(:,:), rads(:), rad_near(:) 
+
+      integer *8 i, j, jpatch, jquadstart, jstart
+
+      integer *8 ipars
+      complex *16 zpars
+      real *8 timeinfo(10), t1, t2, omp_get_wtime
+
+
+      real *8 ttot, done, pi
+      real *8 rfac, rfac0
+      integer *8 iptype_avg, norder_avg
+      integer *8 ikerorder, iquadtype, npts_over
+
+!
+!
+!       gmres variables
+!
+      real *8 did, dtmp
+      complex *16 ztmp
+      integer *8 nker
+
+
+      done = 1
+      pi = atan(done)*4
+!
+!
+!        setup targets as on surface discretization points
+! 
+      ndtarg = 3
+      ntarg = npts
+      allocate(targs(ndtarg,npts), uvs_targ(2,ntarg), ipatch_id(ntarg))
+
+!$OMP PARALLEL DO DEFAULT(SHARED)
+      do i=1,ntarg
+        targs(1,i) = srcvals(1,i)
+        targs(2,i) = srcvals(2,i)
+        targs(3,i) = srcvals(3,i)
+        ipatch_id(i) = -1
+        uvs_targ(1,i) = 0
+        uvs_targ(2,i) = 0
+      enddo
+!$OMP END PARALLEL DO   
+
+
+!
+!    initialize patch_id and uv_targ for on surface targets
+!
+      call get_patch_id_uvs(npatches, norders, ixyzs, iptype, npts, &
+        ipatch_id, uvs_targ)
+
+!
+!
+!        this might need fixing
+!
+      iptype_avg = floor(sum(iptype)/(npatches+0.0d0))
+      norder_avg = floor(sum(norders)/(npatches+0.0d0))
+
+      call get_rfacs(norder_avg, iptype_avg, rfac, rfac0)
+
+
+      allocate(cms(3,npatches), rads(npatches), rad_near(npatches))
+
+      call get_centroid_rads(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, cms, rads)
+
+!$OMP PARALLEL DO DEFAULT(SHARED) 
+      do i=1,npatches
+        rad_near(i) = rads(i)*rfac
+      enddo
+!$OMP END PARALLEL DO      
+
+!
+!    find near quadrature correction interactions
+!
+      print *, "entering find near mem"
+      call findnearmem(cms, npatches, rad_near, ndtarg, targs, npts, nnz)
+
+      allocate(row_ptr(npts+1), col_ind(nnz))
+      
+      call findnear(cms, npatches, rad_near, ndtarg, targs, npts, &
+        row_ptr, col_ind)
+
+      allocate(iquad(nnz+1)) 
+      call get_iquad_rsc(npatches, ixyzs, npts, nnz, row_ptr, col_ind, &
+        iquad)
+
+      ikerorder = -1
+      if(abs(dpars(2)).gt.1.0d-16) ikerorder = 0
+
+!
+!    estimate oversampling for far-field, and oversample geometry
+!
+
+      allocate(novers(npatches), ixyzso(npatches+1))
+
+      print *, "beginning far order estimation"
+
+      ztmp = 0
+
+      call get_far_order(eps, npatches, norders, ixyzs, iptype, cms, &
+        rads, npts, srccoefs, ndtarg, npts, targs, ikerorder, ztmp, &
+        nnz, row_ptr, col_ind, rfac, novers, ixyzso)
+
+      npts_over = ixyzso(npatches+1) - 1
+
+      allocate(srcover(12,npts_over), wover(npts_over))
+
+      call oversample_geom(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, srcvals, novers, ixyzso, npts_over, srcover)
+
+      call get_qwts(npatches, novers, ixyzso, iptype, npts_over, &
+        srcover, wover)
+
+!
+!   compute near quadrature correction
+!
+      nquad = iquad(nnz+1) - 1
+      nker = 6
+      allocate(wnear(nker,nquad))
+      
+!$OMP PARALLEL DO DEFAULT(SHARED)      
+      do i=1,nquad
+        wnear(1,i) = 0
+        wnear(2,i) = 0
+        wnear(3,i) = 0
+        wnear(4,i) = 0
+        wnear(5,i) = 0
+        wnear(6,i) = 0
+      enddo
+!$OMP END PARALLEL DO    
+
+
+      iquadtype = 1
+
+      print *, "starting to generate near quadrature"
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()      
+
+      call getnearquad_stok_comb_vel(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, eps, dpars, iquadtype, &
+        nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()     
+
+      print *, "done generating near quadrature, now starting gmres"
+      
+      call stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, dpars, numit, ifinout, &
+        rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
+        npts_over, ixyzso, srcover, wover, eps_gmres, niter, &
+        errs, rres, soln)
+      
+      return
+      end
+!
+!
+!
+!
+!
+
+      subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, dpars, numit, ifinout, &
+        rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
+        nptso, ixyzso, srcover, whtsover, eps_gmres, niter, &
+        errs, rres, soln)
+!
+!
+!  This subroutine solves the Stokes velocity problem
+!  on the exterior of an object where the potential
+!  is represented using the combined field integral representation.
+!
+!
+!  Representation:
+!    u = \alpha S_{stok}[\sigma] + \beta*D_{stok}[\sigma]
+!
+!  Boundary condition:
+!    u = f
+!
+!  The linear system is solved iteratively using GMRES
+!  until a relative residual of eps_gmres is reached
+!
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        order of discretization on each patch 
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(i) denotes the starting location in srccoefs,
+!        and srcvals array corresponding to patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!        iptype = 1, triangular patch discretized using RV nodes
+!        iptype = 11, quadrangular patch discretized with GL nodes
+!        iptype = 12, quadrangular patch discretized with Chebyshev nodes
+!    - npts: integer *8
+!        total number of discretization points on the boundary
+!    - srccoefs: real *8 (9,npts)
+!        basis expansion coefficients of xyz, dxyz/du,
+!        and dxyz/dv on each patch. 
+!        For each point 
+!          * srccoefs(1:3,i) is xyz info
+!          * srccoefs(4:6,i) is dxyz/du info
+!          * srccoefs(7:9,i) is dxyz/dv info
+!    - srcvals: real *8 (12,npts)
+!        xyz(u,v) and derivative info sampled at the 
+!        discretization nodes on the surface
+!          * srcvals(1:3,i) - xyz info
+!          * srcvals(4:6,i) - dxyz/du info
+!          * srcvals(7:9,i) - dxyz/dv info
+!          * srcvals(10:12,i) - normals info
+!    - eps: real *8
+!        precision requested for computing quadrature and fmm
+!        tolerance
+!    - dpars: real *8(2)
+!        kernel parameters (Referring to formula (1))
+!          * dpars(1) = \alpha 
+!          * dpars(2) = \beta 
+!    - numit: integer *8
+!        max number of gmres iterations
+!    - ifinout: integer *8
+!        ifinout = 0, interior problem
+!        ifinout = 1, exterior problem
+!    - rhs: real *8(3,npts)
+!        velocity data
+!    - nnz: integer *8
+!        number of source patch-> target interactions in the near field
+!    - row_ptr: integer *8(npts+1)
+!        row_ptr(i) is the pointer
+!        to col_ind array where list of relevant source patches
+!        for target i start
+!    - col_ind: integer *8 (nnz)
+!        list of source patches relevant for all targets, sorted
+!        by the target number
+!    - iquad: integer *8(nnz+1)
+!        location in wnear_ij array where quadrature for col_ind(i)
+!        starts for a single kernel. In this case the different kernels
+!        are matrix entries are located at (m-1)*nquad+iquad(i), where
+!        m is the kernel number
+!    - nquad: integer *8
+!        number of near field entries corresponding to each source target
+!        pair
+!    - nker: integer *8
+!        number of kernels in quadrature correction, must be 6
+!    - wnear: real *8(nker, nquad)
+!        precomputed quadrature corrections 
+!    - novers: integer *8(npatches)
+!        order of discretization for oversampled sources and
+!        density
+!    - ixyzso: integer *8(npatches+1)
+!        ixyzso(i) denotes the starting location in srcover,
+!        corresponding to patch i
+!    - nptso: integer *8
+!        total number of oversampled points
+!    - srcover: real *8 (12,nptso)
+!        oversampled set of source information
+!    - whtsover: real *8 (nptso)
+!        smooth quadrature weights at oversampled nodes
+!    - eps_gmres: real *8
+!        gmres tolerance requested
+!
+!  output
+!    - niter: integer *8
+!        number of gmres iterations required for relative residual
+!        to converge to eps_gmres
+!    - errs:  real *8 (numit+1)
+!        relative residual as a function of iteration
+!        number (only errs(1:niter) is populated))
+!    - rres: real *8
+!        relative residual for computed solution
+!    - soln: real *8(3,npts)
+!        density which solves the neumann problem \sigma
+!				 
+
+      implicit none
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+      integer *8, intent(in) :: iptype(npatches)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+      real *8, intent(in) :: eps, eps_gmres
+      integer *8, intent(in) :: ifinout
+      real *8, intent(in) :: dpars(2)
+      real *8, intent(in) :: rhs(3,npts)
+      integer *8, intent(in) :: numit
+
+      real *8, intent(out) :: errs(numit+1)
+      real *8, intent(out) :: rres
+      integer *8, intent(out) :: niter
+      
+      integer *8, intent(in) :: nnz, nquad
+      integer *8, intent(in) :: row_ptr(npts+1), col_ind(nnz)
+      integer *8, intent(in) :: iquad(nnz+1)
+
+      integer *8, intent(in) :: nker
+      real *8, intent(in) :: wnear(nker,nquad)
+
+      integer *8, intent(in) :: nptso
+      integer *8, intent(in) :: novers(npatches), ixyzso(npatches+1)
+      real *8, intent(in) :: srcover(12,nptso), whtsover(nptso)
+      
+  
+      real *8, intent(out) :: soln(3,npts)
+
+      real *8 did
+      real *8 dpars_use(3), pi, done, rsurf
+      integer *8 ndd_use
+
+      procedure (), pointer :: fker
+      external lpcomp_stok_comb_vel_addsub
+
+      integer *8 ndd, ndi, ndz, lwork, ndim
+      real *8 work
+      integer *8 ipars, nkertmp
+      complex *16 zpars
+
+      integer *8 ndtarg
+      complex *16 ima
+      data ima/(0.0d0,1.0d0)/
+      real *8, allocatable :: wts(:)
+
+      complex *16 zpars_use(3)
+      integer *8 i
+
+      did = -(-1)**(ifinout)*dpars(2)/2
+      fker => lpcomp_stok_comb_vel_addsub
+
+      ndd_use = 3
+      dpars_use(1) = dpars(1)
+      dpars_use(2) = dpars(2)
+      dpars_use(3) = 0
+      
+      ndi = 0
+      ndz = 0
+
+      lwork = 0
+      ndim = 3
+      allocate(wts(npts))
+
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, &
+        srcvals, wts)
+      rsurf = 0
+!$OMP PARALLEL DO DEFAULT(SHARED) REDUCTION(+:rsurf)      
+      do i=1,npts
+        rsurf = rsurf + wts(i)
+      enddo
+!$OMP END PARALLEL DO      
+      done = 1.0d0
+      pi = atan(done)*4.0d0
+      if(ifinout.eq.0) dpars_use(3) = -2*pi/rsurf
+! 
+
+      call dgmres_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, wts, &
+        eps, ndd_use, dpars_use, ndz, zpars, ndi, ipars, &
+        nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
+        nptso, ixyzso, srcover, whtsover, npts, wts, &
+        ndim, fker, did, rhs, numit, eps_gmres, niter, errs, &
+        rres, soln)
+
+      return
+      end
+
+!
+!
+!
+!        
+      subroutine stok_comb_vel_matgen(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, dpars, ifinout, &
+        xmat)
+!
+!f2py  intent(in) npatches,norders,ixyzs,iptype
+!f2py  intent(in) npts,srccoefs,srcvals,eps,dpars
+!f2py  intent(in) ifinout
+!f2py  intent(out) xmat
+!
+!     this subroutine returns the discretization matrix
+!     for the on-surface discretization of the stokes combined
+!     field layer potential. The unknowns are ordered as:
+!
+!     \sigma_{1}(x_{1}), \sigma_{2}(x_{1}), \sigma_{3}(x_{1})...
+!      \sigma_{1}(x_{2}), \sigma_{2}(x_{2})l \sigma_{3}(x_{2})..
+!                           .
+!                           .
+!                           .
+!                           .
+!      \sigma_{1}(x_{n}), \sigma_{2}(x_{n}), \sigma_{3}(x_{n})
+!
+!     And the same ordering for the velocity components as well
+!
+!
+!     Representation:
+!        u = alpha S \sigma + beta D \sigma
+!     
+!
+!       input:
+!         npatches - integer *8
+!            number of patches
+!
+!         norders- integer *8(npatches)
+!            order of discretization on each patch 
+!
+!         ixyzs - integer *8(npatches+1)
+!            ixyzs(i) denotes the starting location in srccoefs,
+!               and srcvals array corresponding to patch i
+!   
+!         iptype - integer *8(npatches)
+!            type of patch
+!             iptype = 1, triangular patch discretized using RV nodes
+!
+!         npts - integer *8
+!            total number of discretization points on the boundary
+! 
+!         srccoefs - real *8 (9,npts)
+!            koornwinder expansion coefficients of xyz, dxyz/du,
+!            and dxyz/dv on each patch. 
+!            For each point srccoefs(1:3,i) is xyz info
+!                           srccoefs(4:6,i) is dxyz/du info
+!                           srccoefs(7:9,i) is dxyz/dv info
+!
+!         srcvals - real *8 (12,npts)
+!             xyz(u,v) and derivative info sampled at the 
+!             discretization nodes on the surface
+!             srcvals(1:3,i) - xyz info
+!             srcvals(4:6,i) - dxyz/du info
+!             srcvals(7:9,i) - dxyz/dv info
+! 
+!          eps - real *8
+!             precision requested for computing quadrature and fmm
+!             tolerance
+!
+!          dpars - real *8 (2)
+!             alpha = dpars(1), beta = dpars(2) in the layer potential
+!             representation
+!      
+!          ifinout - integer *8
+!              flag for interior or exterior problems (normals assumed to 
+!                be pointing in exterior of region)
+!              ifinout = 0, interior problem
+!              ifinout = 1, exterior problem
+!
+!         output
+!           xmat - real *8(3*npts,3*npts)
+!              discretization matrix for the stokes boundary value
+!              problem
+!
+!
+      implicit none
+      integer *8 npatches,norder,npols,npts
+      integer *8 ifinout
+      integer *8 norders(npatches),ixyzs(npatches+1)
+      integer *8 iptype(npatches)
+      real *8 srccoefs(9,npts), srcvals(12,npts), eps, eps_gmres
+      real *8 dpars(2)
+      real *8 xmat(3*npts,3*npts)
+
+      real *8 uint
+
+      real *8, allocatable :: targs(:,:)
+      integer *8, allocatable :: ipatch_id(:)
+      real *8, allocatable :: uvs_targ(:,:)
+      integer *8 ndtarg,ntarg
+
+      real *8 rres,eps2
+      integer *8 niter
+
+
+      integer *8 nover,npolso,nptso
+      integer *8 nnz,nquad
+      integer *8, allocatable :: row_ptr(:), col_ind(:), iquad(:)
+      real *8, allocatable :: wnear(:,:), wts(:)
+
+      real *8, allocatable :: srcover(:,:), wover(:)
+      integer *8, allocatable :: ixyzso(:), novers(:)
+
+      real *8, allocatable :: cms(:,:), rads(:), rad_near(:) 
+
+      integer *8 i, j, jpatch, jquadstart, jstart
+
+      integer *8 ipars
+      complex *16 zpars
+      real *8 timeinfo(10), t1, t2, omp_get_wtime
+
+
+      real *8 ttot, done, pi, rsurf
+      real *8 rfac, rfac0, alpha, beta
+      integer *8 iptype_avg, norder_avg
+      integer *8 ikerorder, iquadtype, npts_over
+
+      real *8 did, ra
+      integer *8 jj, l, nmat
+      real *8 w11, w12, w13, w21, w22, w23, w31, w32, w33
+      
+
+      alpha = dpars(1)
+      beta = dpars(2)
+      
+      nmat = 3*npts
+      done = 1
+      pi = atan(done)*4
+
+
+!
+!
+!        this might need fixing
+!
+      iptype_avg = floor(sum(iptype)/(npatches+0.0d0))
+      norder_avg = floor(sum(norders)/(npatches+0.0d0))
+
+      call get_rfacs(norder_avg, iptype_avg, rfac, rfac0)
+
+      nnz = npts*npatches
+      allocate(row_ptr(npts+1), col_ind(nnz))
+
+      do i=1,npts + 1
+        row_ptr(i) = (i-1)*npatches + 1
+      enddo
+
+
+      do i=1,npts
+        do j=1,npatches
+          col_ind((i-1)*npatches + j) = j
+        enddo
+      enddo
+
+      allocate(iquad(nnz+1)) 
+      call get_iquad_rsc(npatches, ixyzs, npts, nnz, row_ptr, col_ind, &
+        iquad)
+
+      ikerorder = -1
+      if(abs(dpars(2)).gt.1.0d-16) ikerorder = 0
+      
+!
+!   compute near quadrature correction
+!
+      nquad = iquad(nnz+1)-1
+      allocate(wnear(6,nquad))
+      
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j)
+      do i=1,nquad
+        do j = 1,6
+          wnear(j,i) = 0
+        enddo
+      enddo
+!$OMP END PARALLEL DO    
+
+
+      iquadtype = 1
+
+      print *, "starting to generate near quadrature"
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()      
+
+      call getnearquad_stok_comb_vel(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, &
+        eps, dpars, iquadtype, nnz, row_ptr, col_ind, &
+        iquad, rfac0, nquad, wnear)
+      call cpu_time(t2)
+!$      t2 = omp_get_wtime()     
+
+      call prin2('quadrature generation time=*',t2-t1,1)
+
+      allocate(wts(npts))
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, &
+        srcvals, wts)
+
+
+!
+!  compute scaling for one's matrix correction for interior problem
+!
+
+      rsurf = 0
+      do i=1,npts
+        rsurf = rsurf + wts(i)
+      enddo
+      
+      ra = 0
+      if(ifinout.eq.0) ra = -2*pi/rsurf
+      
+
+      call cpu_time(t1)
+!$      t1 = omp_get_wtime()
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i, j, jpatch, jquadstart) &
+!$OMP PRIVATE(jstart, npols, l, jj, w11, w12, w13) &
+!$OMP PRIVATE(w21, w22, w23, w31, w32, w33)
+      do i = 1,npts
+        do j = row_ptr(i), row_ptr(i+1)-1
+          jpatch = col_ind(j)
+          npols = ixyzs(jpatch+1)-ixyzs(jpatch)
+          jquadstart = iquad(j)
+          jstart = ixyzs(jpatch) 
+          do l=1,npols
+             jj = jstart + l-1
+             w11 = wnear(1,jquadstart+l-1)
+             w12 = wnear(2,jquadstart+l-1)
+             w13 = wnear(3,jquadstart+l-1)
+             w21 = w12
+             w22 = wnear(4,jquadstart+l-1)
+             w23 = wnear(5,jquadstart+l-1)
+             w31 = w13
+             w32 = w23
+             w33 = wnear(6,jquadstart+l-1)
+             xmat(3*(i-1)+1,3*(jj-1)+1) = w11 + & 
+               srcvals(10,i)*srcvals(10,jj)*ra*wts(jj)
+             xmat(3*(i-1)+1,3*(jj-1)+2) = w12 + &
+                srcvals(10,i)*srcvals(11,jj)*ra*wts(jj)
+             xmat(3*(i-1)+1,3*(jj-1)+3) = w13 + &
+                srcvals(10,i)*srcvals(12,jj)*ra*wts(jj)
+
+             xmat(3*(i-1)+2,3*(jj-1)+1) = w21 + &
+               srcvals(11,i)*srcvals(10,jj)*ra*wts(jj)
+             xmat(3*(i-1)+2,3*(jj-1)+2) = w22 + &
+               srcvals(11,i)*srcvals(11,jj)*ra*wts(jj)
+             xmat(3*(i-1)+2,3*(jj-1)+3) = w23 + &
+               srcvals(11,i)*srcvals(12,jj)*ra*wts(jj)
+
+             xmat(3*(i-1)+3,3*(jj-1)+1) = w31 + &
+               srcvals(12,i)*srcvals(10,jj)*ra*wts(jj)
+             xmat(3*(i-1)+3,3*(jj-1)+2) = w32 + & 
+               srcvals(12,i)*srcvals(11,jj)*ra*wts(jj)
+             xmat(3*(i-1)+3,3*(jj-1)+3) = w33 + &
+               srcvals(12,i)*srcvals(12,jj)*ra*wts(jj)
+
+          enddo
+        enddo
+      enddo
+!$OMP END PARALLEL DO
+     
+
+      did = -(-1)**(ifinout)*beta/2
+      do i=1,3*npts
+         xmat(i,i) = xmat(i,i) + did
+      enddo
+
+     
+!     
+      return
+      end
+!     
+!
+!
diff --git a/src/surface_routs/surf_routs.f90 b/src/surface_routs/surf_routs.f90
index fb9fc117..5d07fb01 100644
--- a/src/surface_routs/surf_routs.f90
+++ b/src/surface_routs/surf_routs.f90
@@ -993,7 +993,67 @@ subroutine refine_patches_list(npatches, npatmax, norders, ixyzs, &
 
   return
 end subroutine refine_patches_list
+!
+!
+!
+!
+subroutine get_surf_moments(npatches, norders, ixyzs, &
+    iptype, npts, srcvals, wts, area, centroid, rmoi)
+!
+!  This subroutine returns the 0th, 1st and 2nd moments
+!  of a surface. The 0th moment is just the surface area, 
+!  the first moment is the centroid, and the second moment
+!  is given by
+!
+!  II_{ij} = 
+!   \int_{\Gamma} (|x-c|^2  \delta_{ij} - (x_{i} - c_{i})(x_{j}-c_{j}))dS
+!  
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches
+!    - norders: integer *8(npatches)
+!        discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points on the surface
+!    - srcvals: double precision (12,npts)
+!        xyz, dxyz/du,dxyz/dv, normals at all nodes
+!    - wts: double precision(npts)
+!        quadrature weights for integrating smooth 
+!        functions
+!
+!  Output arguments:   
+!    - area: double precision
+!        the 0th moment of the surface, i.e. surface area
+!    - centroid: double precision(3)
+!        the first moment of the surface, i.e. the centroid
+!    - rmoi: double precision(3,3)
+!        the second moment of inertia of the surface, defined
+!        above
+!
+!----------------------------------------------
+!
+
+  implicit none
+  integer *8, intent(in) :: npatches, norders(npatches)
+  integer *8, intent(in) :: ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+!
+! CONTINUE FROM HERE
+!
 
+  integer *8, intent(in) :: nfars(npatches)
+  integer *8, intent(in) :: npts,nptso
+  real *8, intent(in) :: srccoefs(9,npts),srcvals(12,npts)
+  real *8, intent(out) :: srcover(12,nptso)
+  integer *8 nfar,norder
+  real *8 tmp(3),rr
 
 subroutine surf_vals_to_coefs(nd,npatches,norders,ixyzs,iptype,npts, &
    u,ucoefs)

From c7a68264169b99c546d1951825b4dc5672bccf85 Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Tue, 27 Jan 2026 20:05:37 +0530
Subject: [PATCH 2/7] small changes

---
 src/stok_wrappers/stok_s_mob.f90 | 608 ++++++++++++++-----------------
 src/surface_routs/surf_routs.f90 |  48 ++-
 2 files changed, 304 insertions(+), 352 deletions(-)

diff --git a/src/stok_wrappers/stok_s_mob.f90 b/src/stok_wrappers/stok_s_mob.f90
index 2b83cca5..0e813590 100644
--- a/src/stok_wrappers/stok_s_mob.f90
+++ b/src/stok_wrappers/stok_s_mob.f90
@@ -855,15 +855,16 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
 !
 !
 !        
-      subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
-        iptype, npts, srccoefs, srcvals, eps, dpars, numit, ifinout, &
-        rhs, eps_gmres, niter, errs, rres, soln)
+      subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ncomp, icomps, eps, numit, &
+        forces, torques, eps_gmres, niter, errs, rres, soln, &
+        trans_vels, rot_vels)
 
 !
 !
-!  This subroutine solves the Stokes velocity problem
-!  on the exterior/interior of an object where the potential
-!  is represented using a combined field integral representation.
+!  This subroutine solves the Stokes mobility problem
+!  where the potential is represented using a 
+!  single layer representation.
 !
 !  This subroutine is the simple interface as opposed to the
 !  _solver_guru routine which is called after initialization
@@ -871,10 +872,33 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
 !
 !
 !  Representation:
-!    u = \alpha S_{stok}[\sigma]+\beta*D_{stok}[\sigma]
+!    u = S_{stok}[\sigma] + S_{stok}[\sigma_{0}] 
+!  
+!  where \sigma_{0} = F_{i}/|\Gamma_{i}| + 
+!     \tau_{i}^{-1} T_{i} \times (x - x_{c,i})
+!
+!  with \tau_{i} is the moment of inertia tensor. In addition, 
+!
+!  \sigma satisfies
+!    \int_{\Gamma_{i}} \sigma = 0, 
+!    \int_{\Gamma_{i}} (x-x_{c,i}) \times \sigma = 0
 !
-!  Boundary condition:
-!    u = f
+!  Integral equations obtained by imposing:
+!    f^{-} = 0 \,,
+!
+!  where f^{-} is the interior surface traction
+!
+!  and is given by:
+!    1/2 \sigma + S_{stok}'[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  along with the constraints above. 
+!
+!  Instead, we use the generalized 1's matrix trick to solve for
+!  sigma as
+!    1/2 \sigma + S_{stok}'[\sigma] + L[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
 !
 !  The linear system is solved iteratively using GMRES
 !  until a relative residual of eps_gmres is reached
@@ -909,20 +933,23 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
 !          * srcvals(4:6,i) - dxyz/du info
 !          * srcvals(7:9,i) - dxyz/dv info
 !          * srcvals(10:12,i) - normals info
+!    - ncomp: integer *8
+!        number of components
+!    - icomps: integer *8(ncomp)
+!        icomps(i) = starting patch index for patches on component
+!          i (note that the patches must be ordered by component)
 !    - eps: real *8
 !        precision requested for computing quadrature and fmm
 !        tolerance
-!    - dpars: real *8(2)
-!        kernel parameters (Referring to formula (1))
-!          * dpars(1) = \alpha 
-!          * dpars(2) = \beta
 !    - numit: integer *8
 !        max number of gmres iterations
 !    - ifinout: integer *8
 !        ifinout = 0, interior problem
 !        ifinout = 1, exterior problem
-!    - rhs: real *8(3,npts)
-!        velocity data
+!    - forces: real *8(3,ncomp)
+!        prescribed force on the components
+!    - torques: real *8(3,ncomp)
+!        prescribed torque on the components
 !    - eps_gmres: real *8
 !        gmres tolerance requested
 !      
@@ -937,19 +964,24 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
 !    - rres: real *8
 !        relative residual for computed solution
 !    - soln: real *8(3,npts)
-!        density which solves the velocity problem \sigma
-!				 
+!        density which solves the mobility problem \sigma
+!    - trans_vels: real *8(3,ncomp)
+!        translational velocity of the components
+!    - rot_vels: real *8(3,ncomp)
+!        rotational velocity of the components
+!
       implicit none
       integer *8, intent(in) :: npatches, npts
       integer *8, intent(in) :: ifinout
       integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
       integer *8, intent(in) :: iptype(npatches)
+      integer *8, intent(in) :: ncomp, icomps(ncomp)
       real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
       real *8, intent(in) :: eps, eps_gmres
-      real *8, intent(in) :: dpars(2)
-      real *8, intent(in) :: rhs(3,npts)
+      real *8, intent(in) :: forces(3,ncomp), torques(3,ncomp) 
       integer *8, intent(in) :: numit
       real *8, intent(out) :: soln(3,npts)
+      real *8, intent(out) :: trans_vels(3,ncomp), rot_vels(3,ncmp)
       real *8, intent(out) :: errs(numit+1)
       real *8, intent(out) :: rres
       integer *8, intent(out) :: niter
@@ -965,25 +997,31 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
       integer *8 nover, npolso, nptso
       integer *8 nnz,nquad
       integer *8, allocatable :: row_ptr(:), col_ind(:), iquad(:)
-      real *8, allocatable :: wnear(:,:)
+      real *8, allocatable :: wnear(:,:), wnear_s(:,:)
 
       real *8, allocatable :: srcover(:,:), wover(:)
       integer *8, allocatable :: ixyzso(:), novers(:)
 
       real *8, allocatable :: cms(:,:), rads(:), rad_near(:) 
+      real *8, allocatable :: rhs(:,:), sigma0(:,:), uvel(:,:)
 
       integer *8 i, j, jpatch, jquadstart, jstart
 
-      integer *8 ipars
+      integer *8, allocatable :: ipars(:)
       complex *16 zpars
+      real *8 dpars(2)
       real *8 timeinfo(10), t1, t2, omp_get_wtime
 
-
       real *8 ttot, done, pi
       real *8 rfac, rfac0
       integer *8 iptype_avg, norder_avg
       integer *8 ikerorder, iquadtype, npts_over
 
+      real *8 xdiff(3), rtmp(3), rtuse(3), ftuse(3), rtmp2(3)
+      real *8 rmoi(3,3), rmoi_inv(3,3), centroid(3)
+      integer *8, allocatable :: ixyzsloc(:)
+      real *8, allocatable :: wts(:)
+ 
 !
 !
 !       gmres variables
@@ -1089,7 +1127,7 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
 !
       nquad = iquad(nnz+1) - 1
       nker = 6
-      allocate(wnear(nker,nquad))
+      allocate(wnear(nker,nquad), wnear_s(nker,nquad))
       
 !$OMP PARALLEL DO DEFAULT(SHARED)      
       do i=1,nquad
@@ -1099,6 +1137,13 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
         wnear(4,i) = 0
         wnear(5,i) = 0
         wnear(6,i) = 0
+
+        wnear_s(1,i) = 0
+        wnear_s(2,i) = 0
+        wnear_s(3,i) = 0
+        wnear_s(4,i) = 0
+        wnear_s(5,i) = 0
+        wnear_s(6,i) = 0
       enddo
 !$OMP END PARALLEL DO    
 
@@ -1107,21 +1152,163 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
 
       print *, "starting to generate near quadrature"
       call cpu_time(t1)
-!$      t1 = omp_get_wtime()      
+!$      t1 = omp_get_wtime()     
 
+!
+!  quadrature for S'
+!
+      call getnearquad_stok_s_mob(npatches, norders, &
+        ixyzs, iptype, npts, srccoefs, srcvals, eps, &
+        iquadtype, nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+!
+!  quadrature for S
+!
+      dpars(1) = 1.0d0
+      dpars(2) = 0
       call getnearquad_stok_comb_vel(npatches, norders, &
         ixyzs, iptype, npts, srccoefs, srcvals, eps, dpars, iquadtype, &
-        nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+        nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear_s)
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()     
 
-      print *, "done generating near quadrature, now starting gmres"
+
+!
+!  Compute S[\sigma_{0}]
+!
+      allocate(wts(npts))
+      call get_qwts(npatches, novers, ixyzs, iptype, npts, &
+        srcvals, wts)
+      allocate(sigma0(3,npts), rhs(3,npts), uvel(3,npts))
+      allocate(ixyzsloc(npatches+1))
+      ndd = 2
+      ndz = 0
+      ndi = 0
+      nker = 6
+      lwork = 0
+      ndim_s = 3
+      idensflag = 0
+      ipotflag = 0
+      ndim_p = 0
+
+      allocate(ipars(ncomp+1))
+      do icomp = 1,ncomp
+        ipstart = ipars(icomp+1)
+        ipend = npatches
+        if (icomp.ne.ncomp) ipend = ipars(icomp+2)-1
+
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend)
+        nploc = ipend - ipstart + 1
+        nptsloc = iend - istart + 1
+
+        do i=1,nploc+1
+          ixyzsloc(i) = ixyzs(i+istart-1) - ixyzs(istart)+1
+        enddo
+        rmoi(1:3,1:3) = 0
+        rmoi_inv(1:3,1:3) = 0
+        area = 0
+        centroid(1:3) = 0
+        call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
+          iptype(ipstart), nptsloc, srcvals(1,istart), wts(istart), &
+          area, centroid, rmoi)
+        info = 0
+        call dinverse(3, rmoi, info, rmoi_inv) 
+        rtuse(1:3) = rtuse(1:3)
+        rtuse(1) = rtuse(1) = rmoi_inv(1,1)*torques(1,icomp) + &
+           rmoi_inv(1,2)*torques(2,icomp) + &
+           rmoi_inv(1,3)*torques(3,icomp)
+        rtuse(2) = rtuse(2) = rmoi_inv(2,1)*torques(1,icomp) + &
+           rmoi_inv(2,2)*torques(2,icomp) + &
+           rmoi_inv(2,3)*torques(3,icomp)
+        rtuse(3) = rtuse(3) = rmoi_inv(3,1)*torques(1,icomp) + &
+           rmoi_inv(3,2)*torques(2,icomp) + &
+           rmoi_inv(3,3)*torques(3,icomp)
+        ftuse(1:3) = forces(1:3,icomp)/area
+        do i = istart,iend
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          call cross_prod3d(rtuse, xdiff, rtmp)
+          sigma0(1:3,i) = ftuse(1:3) + rtmp(1:3)
+        enddo
+      enddo
+
+      call stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        eps, ndd, dpars, ndz, zpars, ndi, ipars, nnz, row_ptr, &
+        col_ind, iquad, nquad, nker, wnear_s, novers, npts_over, &
+        ixyzso, srcover, wover, lwork, work, idensflag, ndim_s, &
+        sigma0, ipotflag, ndim_p, rhs)
+!$OMP PARALLEL DO PRIVATE(i)
+      do i=1,npts
+        rhs(1:3,i) = -rhs(1:3,i)
+      enddo
+!$OMP END PARALLEL DO 
+
       
-      call stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
-        iptype, npts, srccoefs, srcvals, eps, dpars, numit, ifinout, &
+      print *, "done generating near quadrature, now starting gmres"
+      ndi = ncomp+1
+      call stok_s_mob_solver_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndi, ipars, numit, &
         rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
         npts_over, ixyzso, srcover, wover, eps_gmres, niter, &
         errs, rres, soln)
+
+      call stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+        eps, ndd, dpars, ndz, zpars, ndi, ipars, nnz, row_ptr, &
+        col_ind, iquad, nquad, nker, wnear_s, novers, npts_over, &
+        ixyzso, srcover, wover, lwork, work, idensflag, ndim_s, &
+        soln, ipotflag, ndim_p, uvel)
+!
+!  Compute translational and rotational velocities on each component
+!
+!  Note the sign flip in rhs as it was -S[\sigma_{0}]
+      do i=1,npts
+        uvel(1:3,i) = uvel(1:3,i) - rhs(1:3,i)
+      enddo
+
+      do icomp = 1,ncomp
+        trans_vels(1:3,icomp) = 0
+        rot_vels(1:3,icomp) = 0
+
+        ipstart = ipars(icomp+1)
+        ipend = npatches
+        if (icomp.ne.ncomp) ipend = ipars(icomp+2)-1
+
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend)
+        nploc = ipend - ipstart + 1
+        nptsloc = iend - istart + 1
+
+        do i=1,nploc+1
+          ixyzsloc(i) = ixyzs(i+istart-1) - ixyzs(istart)+1
+        enddo
+        rmoi(1:3,1:3) = 0
+        rmoi_inv(1:3,1:3) = 0
+        area = 0
+        centroid(1:3) = 0
+        call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
+          iptype(ipstart), nptsloc, srcvals(1,istart), wts(istart), &
+          area, centroid, rmoi)
+        info = 0
+        call dinverse(3, rmoi, info, rmoi_inv) 
+        do i = istart,iend
+          trans_vels(1:3,icomp) = trans_vels(1:3,icomp) + uvel(1:3,i)*wts(i)
+        enddo
+        trans_vels(1:3,icomp) = trans_vels(1:3,icomp)/area
+        
+        rtmp(1:3) = 0
+        do i=1,istart,iend
+          xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+          rtuse(1:3) = uvel(1:3,i) - trans_vels(1:3,icomp)
+          rtmp2(1:3) = 0
+          call cross_prod3d(xdiff, rtuse, rtmp2)
+          rtmp(1:3) = rtmp(1:3) + rtmp2(1:3)*wts(i)
+        enddo
+        rot_vels(1:3,icomp) = rmoi_inv(1:3,1)*rtmp(1) + 
+          rmoi_inv(1:3,2)*rtmp(2) + rmoi_inv(1:3,3)*rtmp(3)
+      enddo
+
+      
       
       return
       end
@@ -1131,23 +1318,50 @@ subroutine stok_comb_vel_solver(npatches, norders, ixyzs, &
 !
 !
 
-      subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
-        iptype, npts, srccoefs, srcvals, eps, dpars, numit, ifinout, &
+      subroutine stok_s_mob_solver_guru(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndi, ipars, numit, &
         rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
         nptso, ixyzso, srcover, whtsover, eps_gmres, niter, &
         errs, rres, soln)
 !
 !
-!  This subroutine solves the Stokes velocity problem
-!  on the exterior of an object where the potential
-!  is represented using the combined field integral representation.
+!  This subroutine solves the Stokes mobility problem
+!  where the potential is represented using a 
+!  single layer representation.
+!
+!  This subroutine is the simple interface as opposed to the
+!  _solver_guru routine which is called after initialization
+!  in this routine.
 !
 !
 !  Representation:
-!    u = \alpha S_{stok}[\sigma] + \beta*D_{stok}[\sigma]
+!    u = S_{stok}[\sigma] + S_{stok}[\sigma_{0}] 
+!  
+!  where \sigma_{0} = F_{i}/|\Gamma_{i}| + 
+!     \tau_{i}^{-1} T_{i} \times (x - x_{c,i})
+!
+!  with \tau_{i} is the moment of inertia tensor. In addition, 
+!
+!  \sigma satisfies
+!    \int_{\Gamma_{i}} \sigma = 0, 
+!    \int_{\Gamma_{i}} (x-x_{c,i}) \times \sigma = 0
+!
+!  Integral equations obtained by imposing:
+!    f^{-} = 0 \,,
+!
+!  where f^{-} is the interior surface traction
+!
+!  and is given by:
+!    1/2 \sigma + S_{stok}'[\sigma] = -S_{stok}'[\sigma_{0}]
 !
-!  Boundary condition:
-!    u = f
+!  along with the constraints above. 
+!
+!  Instead, we use the generalized 1's matrix trick to solve for
+!  sigma as
+!    1/2 \sigma + S_{stok}'[\sigma] + L[\sigma] = -S_{stok}'[\sigma_{0}]
+!
+!  where L[\sigma] is given by
+!     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
 !
 !  The linear system is solved iteratively using GMRES
 !  until a relative residual of eps_gmres is reached
@@ -1185,10 +1399,13 @@ subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
 !    - eps: real *8
 !        precision requested for computing quadrature and fmm
 !        tolerance
-!    - dpars: real *8(2)
-!        kernel parameters (Referring to formula (1))
-!          * dpars(1) = \alpha 
-!          * dpars(2) = \beta 
+!    - ndi: integer *8
+!        number of ipars, must be atleast ncomp+1, where ncomp
+!        is the number of components
+!    - ipars: integer *8(ndi)
+!        ipars(1) = ncomp
+!        ipars(i+1) = starting patch index for patches on component
+!          i (note that the patches must be ordered by component)
 !    - numit: integer *8
 !        max number of gmres iterations
 !    - ifinout: integer *8
@@ -1251,8 +1468,8 @@ subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
       integer *8, intent(in) :: iptype(npatches)
       real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
       real *8, intent(in) :: eps, eps_gmres
-      integer *8, intent(in) :: ifinout
-      real *8, intent(in) :: dpars(2)
+      integer *8, intent(in) :: ndi
+      integer *8, intent(in) :: ipars(ndi)
       real *8, intent(in) :: rhs(3,npts)
       integer *8, intent(in) :: numit
 
@@ -1275,15 +1492,14 @@ subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
       real *8, intent(out) :: soln(3,npts)
 
       real *8 did
-      real *8 dpars_use(3), pi, done, rsurf
+      real *8 dpars
       integer *8 ndd_use
 
       procedure (), pointer :: fker
       external lpcomp_stok_comb_vel_addsub
 
       integer *8 ndd, ndi, ndz, lwork, ndim
-      real *8 work
-      integer *8 ipars, nkertmp
+      integer *8 nkertmp
       complex *16 zpars
 
       integer *8 ndtarg
@@ -1291,40 +1507,24 @@ subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
       data ima/(0.0d0,1.0d0)/
       real *8, allocatable :: wts(:)
 
-      complex *16 zpars_use(3)
       integer *8 i
 
-      did = -(-1)**(ifinout)*dpars(2)/2
+      did = 0.5d0 
       fker => lpcomp_stok_comb_vel_addsub
 
-      ndd_use = 3
-      dpars_use(1) = dpars(1)
-      dpars_use(2) = dpars(2)
-      dpars_use(3) = 0
-      
-      ndi = 0
+      ndd = 0
       ndz = 0
 
-      lwork = 0
+      lwork = npts
       ndim = 3
       allocate(wts(npts))
 
       call get_qwts(npatches, norders, ixyzs, iptype, npts, &
         srcvals, wts)
-      rsurf = 0
-!$OMP PARALLEL DO DEFAULT(SHARED) REDUCTION(+:rsurf)      
-      do i=1,npts
-        rsurf = rsurf + wts(i)
-      enddo
-!$OMP END PARALLEL DO      
-      done = 1.0d0
-      pi = atan(done)*4.0d0
-      if(ifinout.eq.0) dpars_use(3) = -2*pi/rsurf
-! 
 
       call dgmres_guru(npatches, norders, ixyzs, &
         iptype, npts, srccoefs, srcvals, wts, &
-        eps, ndd_use, dpars_use, ndz, zpars, ndi, ipars, &
+        eps, ndd, dpars, ndz, zpars, ndi, ipars, &
         nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
         nptso, ixyzso, srcover, whtsover, npts, wts, &
         ndim, fker, did, rhs, numit, eps_gmres, niter, errs, &
@@ -1334,281 +1534,3 @@ subroutine stok_comb_vel_solver_guru(npatches, norders, ixyzs, &
       end
 
 !
-!
-!
-!        
-      subroutine stok_comb_vel_matgen(npatches, norders, ixyzs, &
-        iptype, npts, srccoefs, srcvals, eps, dpars, ifinout, &
-        xmat)
-!
-!f2py  intent(in) npatches,norders,ixyzs,iptype
-!f2py  intent(in) npts,srccoefs,srcvals,eps,dpars
-!f2py  intent(in) ifinout
-!f2py  intent(out) xmat
-!
-!     this subroutine returns the discretization matrix
-!     for the on-surface discretization of the stokes combined
-!     field layer potential. The unknowns are ordered as:
-!
-!     \sigma_{1}(x_{1}), \sigma_{2}(x_{1}), \sigma_{3}(x_{1})...
-!      \sigma_{1}(x_{2}), \sigma_{2}(x_{2})l \sigma_{3}(x_{2})..
-!                           .
-!                           .
-!                           .
-!                           .
-!      \sigma_{1}(x_{n}), \sigma_{2}(x_{n}), \sigma_{3}(x_{n})
-!
-!     And the same ordering for the velocity components as well
-!
-!
-!     Representation:
-!        u = alpha S \sigma + beta D \sigma
-!     
-!
-!       input:
-!         npatches - integer *8
-!            number of patches
-!
-!         norders- integer *8(npatches)
-!            order of discretization on each patch 
-!
-!         ixyzs - integer *8(npatches+1)
-!            ixyzs(i) denotes the starting location in srccoefs,
-!               and srcvals array corresponding to patch i
-!   
-!         iptype - integer *8(npatches)
-!            type of patch
-!             iptype = 1, triangular patch discretized using RV nodes
-!
-!         npts - integer *8
-!            total number of discretization points on the boundary
-! 
-!         srccoefs - real *8 (9,npts)
-!            koornwinder expansion coefficients of xyz, dxyz/du,
-!            and dxyz/dv on each patch. 
-!            For each point srccoefs(1:3,i) is xyz info
-!                           srccoefs(4:6,i) is dxyz/du info
-!                           srccoefs(7:9,i) is dxyz/dv info
-!
-!         srcvals - real *8 (12,npts)
-!             xyz(u,v) and derivative info sampled at the 
-!             discretization nodes on the surface
-!             srcvals(1:3,i) - xyz info
-!             srcvals(4:6,i) - dxyz/du info
-!             srcvals(7:9,i) - dxyz/dv info
-! 
-!          eps - real *8
-!             precision requested for computing quadrature and fmm
-!             tolerance
-!
-!          dpars - real *8 (2)
-!             alpha = dpars(1), beta = dpars(2) in the layer potential
-!             representation
-!      
-!          ifinout - integer *8
-!              flag for interior or exterior problems (normals assumed to 
-!                be pointing in exterior of region)
-!              ifinout = 0, interior problem
-!              ifinout = 1, exterior problem
-!
-!         output
-!           xmat - real *8(3*npts,3*npts)
-!              discretization matrix for the stokes boundary value
-!              problem
-!
-!
-      implicit none
-      integer *8 npatches,norder,npols,npts
-      integer *8 ifinout
-      integer *8 norders(npatches),ixyzs(npatches+1)
-      integer *8 iptype(npatches)
-      real *8 srccoefs(9,npts), srcvals(12,npts), eps, eps_gmres
-      real *8 dpars(2)
-      real *8 xmat(3*npts,3*npts)
-
-      real *8 uint
-
-      real *8, allocatable :: targs(:,:)
-      integer *8, allocatable :: ipatch_id(:)
-      real *8, allocatable :: uvs_targ(:,:)
-      integer *8 ndtarg,ntarg
-
-      real *8 rres,eps2
-      integer *8 niter
-
-
-      integer *8 nover,npolso,nptso
-      integer *8 nnz,nquad
-      integer *8, allocatable :: row_ptr(:), col_ind(:), iquad(:)
-      real *8, allocatable :: wnear(:,:), wts(:)
-
-      real *8, allocatable :: srcover(:,:), wover(:)
-      integer *8, allocatable :: ixyzso(:), novers(:)
-
-      real *8, allocatable :: cms(:,:), rads(:), rad_near(:) 
-
-      integer *8 i, j, jpatch, jquadstart, jstart
-
-      integer *8 ipars
-      complex *16 zpars
-      real *8 timeinfo(10), t1, t2, omp_get_wtime
-
-
-      real *8 ttot, done, pi, rsurf
-      real *8 rfac, rfac0, alpha, beta
-      integer *8 iptype_avg, norder_avg
-      integer *8 ikerorder, iquadtype, npts_over
-
-      real *8 did, ra
-      integer *8 jj, l, nmat
-      real *8 w11, w12, w13, w21, w22, w23, w31, w32, w33
-      
-
-      alpha = dpars(1)
-      beta = dpars(2)
-      
-      nmat = 3*npts
-      done = 1
-      pi = atan(done)*4
-
-
-!
-!
-!        this might need fixing
-!
-      iptype_avg = floor(sum(iptype)/(npatches+0.0d0))
-      norder_avg = floor(sum(norders)/(npatches+0.0d0))
-
-      call get_rfacs(norder_avg, iptype_avg, rfac, rfac0)
-
-      nnz = npts*npatches
-      allocate(row_ptr(npts+1), col_ind(nnz))
-
-      do i=1,npts + 1
-        row_ptr(i) = (i-1)*npatches + 1
-      enddo
-
-
-      do i=1,npts
-        do j=1,npatches
-          col_ind((i-1)*npatches + j) = j
-        enddo
-      enddo
-
-      allocate(iquad(nnz+1)) 
-      call get_iquad_rsc(npatches, ixyzs, npts, nnz, row_ptr, col_ind, &
-        iquad)
-
-      ikerorder = -1
-      if(abs(dpars(2)).gt.1.0d-16) ikerorder = 0
-      
-!
-!   compute near quadrature correction
-!
-      nquad = iquad(nnz+1)-1
-      allocate(wnear(6,nquad))
-      
-!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(j)
-      do i=1,nquad
-        do j = 1,6
-          wnear(j,i) = 0
-        enddo
-      enddo
-!$OMP END PARALLEL DO    
-
-
-      iquadtype = 1
-
-      print *, "starting to generate near quadrature"
-      call cpu_time(t1)
-!$      t1 = omp_get_wtime()      
-
-      call getnearquad_stok_comb_vel(npatches, norders, &
-        ixyzs, iptype, npts, srccoefs, srcvals, &
-        eps, dpars, iquadtype, nnz, row_ptr, col_ind, &
-        iquad, rfac0, nquad, wnear)
-      call cpu_time(t2)
-!$      t2 = omp_get_wtime()     
-
-      call prin2('quadrature generation time=*',t2-t1,1)
-
-      allocate(wts(npts))
-      call get_qwts(npatches, norders, ixyzs, iptype, npts, &
-        srcvals, wts)
-
-
-!
-!  compute scaling for one's matrix correction for interior problem
-!
-
-      rsurf = 0
-      do i=1,npts
-        rsurf = rsurf + wts(i)
-      enddo
-      
-      ra = 0
-      if(ifinout.eq.0) ra = -2*pi/rsurf
-      
-
-      call cpu_time(t1)
-!$      t1 = omp_get_wtime()
-
-!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i, j, jpatch, jquadstart) &
-!$OMP PRIVATE(jstart, npols, l, jj, w11, w12, w13) &
-!$OMP PRIVATE(w21, w22, w23, w31, w32, w33)
-      do i = 1,npts
-        do j = row_ptr(i), row_ptr(i+1)-1
-          jpatch = col_ind(j)
-          npols = ixyzs(jpatch+1)-ixyzs(jpatch)
-          jquadstart = iquad(j)
-          jstart = ixyzs(jpatch) 
-          do l=1,npols
-             jj = jstart + l-1
-             w11 = wnear(1,jquadstart+l-1)
-             w12 = wnear(2,jquadstart+l-1)
-             w13 = wnear(3,jquadstart+l-1)
-             w21 = w12
-             w22 = wnear(4,jquadstart+l-1)
-             w23 = wnear(5,jquadstart+l-1)
-             w31 = w13
-             w32 = w23
-             w33 = wnear(6,jquadstart+l-1)
-             xmat(3*(i-1)+1,3*(jj-1)+1) = w11 + & 
-               srcvals(10,i)*srcvals(10,jj)*ra*wts(jj)
-             xmat(3*(i-1)+1,3*(jj-1)+2) = w12 + &
-                srcvals(10,i)*srcvals(11,jj)*ra*wts(jj)
-             xmat(3*(i-1)+1,3*(jj-1)+3) = w13 + &
-                srcvals(10,i)*srcvals(12,jj)*ra*wts(jj)
-
-             xmat(3*(i-1)+2,3*(jj-1)+1) = w21 + &
-               srcvals(11,i)*srcvals(10,jj)*ra*wts(jj)
-             xmat(3*(i-1)+2,3*(jj-1)+2) = w22 + &
-               srcvals(11,i)*srcvals(11,jj)*ra*wts(jj)
-             xmat(3*(i-1)+2,3*(jj-1)+3) = w23 + &
-               srcvals(11,i)*srcvals(12,jj)*ra*wts(jj)
-
-             xmat(3*(i-1)+3,3*(jj-1)+1) = w31 + &
-               srcvals(12,i)*srcvals(10,jj)*ra*wts(jj)
-             xmat(3*(i-1)+3,3*(jj-1)+2) = w32 + & 
-               srcvals(12,i)*srcvals(11,jj)*ra*wts(jj)
-             xmat(3*(i-1)+3,3*(jj-1)+3) = w33 + &
-               srcvals(12,i)*srcvals(12,jj)*ra*wts(jj)
-
-          enddo
-        enddo
-      enddo
-!$OMP END PARALLEL DO
-     
-
-      did = -(-1)**(ifinout)*beta/2
-      do i=1,3*npts
-         xmat(i,i) = xmat(i,i) + did
-      enddo
-
-     
-!     
-      return
-      end
-!     
-!
-!
diff --git a/src/surface_routs/surf_routs.f90 b/src/surface_routs/surf_routs.f90
index 5d07fb01..fe66d3f5 100644
--- a/src/surface_routs/surf_routs.f90
+++ b/src/surface_routs/surf_routs.f90
@@ -1044,16 +1044,46 @@ subroutine get_surf_moments(npatches, norders, ixyzs, &
   integer *8, intent(in) :: npatches, norders(npatches)
   integer *8, intent(in) :: ixyzs(npatches+1)
   integer *8, intent(in) :: iptype(npatches)
-!
-! CONTINUE FROM HERE
-!
+  integer *8, intent(in) :: npts
 
-  integer *8, intent(in) :: nfars(npatches)
-  integer *8, intent(in) :: npts,nptso
-  real *8, intent(in) :: srccoefs(9,npts),srcvals(12,npts)
-  real *8, intent(out) :: srcover(12,nptso)
-  integer *8 nfar,norder
-  real *8 tmp(3),rr
+  real *8, intent(in) :: srcvals(12,npts), wts(npts)
+
+  real *8, intent(out) :: area, centroid(3), rmoi(3,3)
+
+  integer *8 i
+  real *8 xdiff(3)
+
+  area = 0
+  centroid(1:3) = 0
+  rmoi(1:3,1:3) = 0
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i) &
+!$OMP REDUCTION(+:area,centroid)
+  do i=1,npts
+    area = area + wts(i)
+    centroid(1:3) = centroid(1:3) + srcvals(1:3,i)*wts(i)
+  enddo
+!$OMP END PARALLEL DO
+  centroid(1:3) = centroid(1:3)/area
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i,xdiff) REDUCTION(+:rmoi)
+  do i=1,npts
+    xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
+    rmoi(1,1) = rmoi(1,1) + (xdiff(2)**2 + xdiff(3)**2)*wts(i)
+    rmoi(2,2) = rmoi(2,2) + (xdiff(1)**2 + xdiff(3)**2)*wts(i)
+    rmoi(3,3) = rmoi(3,3) + (xdiff(1)**2 + xdiff(2)**2)*wts(i)
+
+    rmoi(1,2) = rmoi(1,2) - xdiff(1)*xdiff(2)*wts(i)
+    rmoi(1,3) = rmoi(1,3) - xdiff(1)*xdiff(3)*wts(i)
+    rmoi(2,3) = rmoi(2,3) - xdiff(2)*xdiff(3)*wts(i)
+  enddo
+!$OMP END PARALLEL DO
+  
+  rmoi(2,1) = rmoi(1,2)
+  rmoi(3,1) = rmoi(1,3)
+  rmoi(3,2) = rmoi(2,3)
+
+  return
+  end
 
 subroutine surf_vals_to_coefs(nd,npatches,norders,ixyzs,iptype,npts, &
    u,ucoefs)

From 09eb40dafd24bb8e1df26350e1a1ba966e2ee064 Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Tue, 24 Feb 2026 11:39:31 +0530
Subject: [PATCH 3/7] adding stokes mobility example for sphere

---
 examples/stokes/stok_mob_iter_example.f90 | 79 +++++++++++++++++++++++
 1 file changed, 79 insertions(+)
 create mode 100644 examples/stokes/stok_mob_iter_example.f90

diff --git a/examples/stokes/stok_mob_iter_example.f90 b/examples/stokes/stok_mob_iter_example.f90
new file mode 100644
index 00000000..df55337a
--- /dev/null
+++ b/examples/stokes/stok_mob_iter_example.f90
@@ -0,0 +1,79 @@
+      implicit real *8 (a-h,o-z) 
+      implicit integer *8 (i-n)
+      real *8, allocatable :: srcvals(:,:),srccoefs(:,:),targs(:,:)
+      real *8, allocatable :: wts(:)
+      
+      real *8 ts(2), rres
+      real *8, allocatable :: rfacs(:,:), errs(:)
+      character *100 fname
+      integer *8 ipars(2)
+      integer *8, allocatable :: row_ptr(:),col_ind(:)
+      integer *8, allocatable :: iquad(:)
+      real *8, allocatable :: srcover(:,:),wover(:)
+
+      integer *8, allocatable :: norders(:),ixyzs(:),iptype(:)
+      integer *8, allocatable :: ixyzso(:),nfars(:)
+
+      integer *8, allocatable :: ipatch_id(:),inode_id(:)
+      real *8, allocatable :: uvs_targ(:,:)
+      real *8 xyz_out(3),xyz_in(3),stracmat(3,3),smat(3,3), dmat(3,3)
+      real *8 xyz_src(3),xyz_targ(3)
+      real *8 velgrad(3,3), vel(3), pre, tractemp(3)
+      real *8 sigout(3), uin(3), uintest(3), dpars(2), st1(3), du1(3)
+      real *8 udir(3), uneu(3,10), uavecomp(3), uavetest(3)
+      real *8 st2(3), du2(3), uconst(3)
+      real *8 v(3), omega(3), r0(3), udiff(3,10), udiff2(3,10)     
+      real *8 c0(3)
+      real *8 area, centroid(3), rmoi(3,3)
+      real *8, allocatable :: uval(:,:), tracval(:,:), soln(:,:)
+      complex * 16 zpars
+      integer *8 int8_0,int8_3,int8_9
+
+      call prini(6,13)
+
+      int8_0 = 0
+      int8_3 = 3
+      int8_9 = 9
+      done = 1
+      pi = atan(done)*4
+
+      a = 1.0d0
+      na = 4
+      c0(1) = 1.1d0
+      c0(2) = 0.3d0
+      c0(3) = 0.2d0
+
+      iptype0 = 1
+
+      norder = 4
+      npols = (norder+1)*(norder+2)/2
+
+      call get_sphere_npat_mem(a, na, c0, norder, iptype0, npatches, &
+        npts)
+      call prinf('npatches=*',npatches,1)
+      call prinf('npts=*',npts,1)
+      
+      allocate(srcvals(12,npts), srccoefs(9,npts), ixyzs(npatches+1), &
+        iptype(npatches), norders(npatches))
+      allocate(wts(npts))
+      call get_sphere_npat(a, na, c0, norder, iptype0, npatches, &
+        npts, norders, ixyzs, iptype, srccoefs, srcvals)
+
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, srcvals, &
+        wts)
+
+      call get_surf_moments(npatches, norders, ixyzs, iptype, npts, &
+        srcvals, wts, area, centroid, rmoi)
+      
+      call prin2('area=*',area,1)
+      call prin2('centroid=*',centroid,3)
+      call prin2('rmoi=*',rmoi,9)
+
+      err_moi = rmoi(1,1) - 4.0d0/3.0d0*2*pi*(a**4)
+      call prin2('err_moi=*', err_moi,1)
+
+      
+      stop
+      end
+
+

From 88ee64cff419c373c0ced39c490b5b65c89183ee Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Tue, 24 Feb 2026 12:27:16 +0530
Subject: [PATCH 4/7] updating stokes mobility example, still beta

---
 examples/stokes/stok_mob_iter_example.f90 | 26 ++++++++++++++------
 src/stok_wrappers/stok_s_mob.f90          | 30 +++++++++++++----------
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/examples/stokes/stok_mob_iter_example.f90 b/examples/stokes/stok_mob_iter_example.f90
index df55337a..bcc31b04 100644
--- a/examples/stokes/stok_mob_iter_example.f90
+++ b/examples/stokes/stok_mob_iter_example.f90
@@ -12,19 +12,13 @@
       real *8, allocatable :: srcover(:,:),wover(:)
 
       integer *8, allocatable :: norders(:),ixyzs(:),iptype(:)
-      integer *8, allocatable :: ixyzso(:),nfars(:)
 
       integer *8, allocatable :: ipatch_id(:),inode_id(:)
       real *8, allocatable :: uvs_targ(:,:)
-      real *8 xyz_out(3),xyz_in(3),stracmat(3,3),smat(3,3), dmat(3,3)
-      real *8 xyz_src(3),xyz_targ(3)
-      real *8 velgrad(3,3), vel(3), pre, tractemp(3)
-      real *8 sigout(3), uin(3), uintest(3), dpars(2), st1(3), du1(3)
-      real *8 udir(3), uneu(3,10), uavecomp(3), uavetest(3)
-      real *8 st2(3), du2(3), uconst(3)
-      real *8 v(3), omega(3), r0(3), udiff(3,10), udiff2(3,10)     
       real *8 c0(3)
       real *8 area, centroid(3), rmoi(3,3)
+      real *8 forces1(3), torques1(3)
+      real *8 trans_vels1(3), rot_vels1(3)
       real *8, allocatable :: uval(:,:), tracval(:,:), soln(:,:)
       complex * 16 zpars
       integer *8 int8_0,int8_3,int8_9
@@ -72,6 +66,22 @@
       err_moi = rmoi(1,1) - 4.0d0/3.0d0*2*pi*(a**4)
       call prin2('err_moi=*', err_moi,1)
 
+      forces1(1) = 1.0d0
+      forces1(2) = 0.0d0
+      forces1(3) = 0.0d0
+      
+      torques1(1) = 0.0d0
+      torques1(2) = 0.0d0
+      torques1(3) = 0.0d0
+
+      numit = 100
+      allocate(errs(numit+1))
+      allocate(soln(3,npts))
+      
+      
+      
+
+
       
       stop
       end
diff --git a/src/stok_wrappers/stok_s_mob.f90 b/src/stok_wrappers/stok_s_mob.f90
index 0e813590..6b54f45d 100644
--- a/src/stok_wrappers/stok_s_mob.f90
+++ b/src/stok_wrappers/stok_s_mob.f90
@@ -647,9 +647,9 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
         sources(2,i) = srcover(2,i)
         sources(3,i) = srcover(3,i)
 
-        stoklet(1,i) = sigmaover(1,i)*whtsover(i)*over4pi
-        stoklet(2,i) = sigmaover(2,i)*whtsover(i)*over4pi
-        stoklet(3,i) = sigmaover(3,i)*whtsover(i)*over4pi
+        stoklet(1,i) = sigmaover(1,i)*whtsover(i)
+        stoklet(2,i) = sigmaover(2,i)*whtsover(i)
+        stoklet(3,i) = sigmaover(3,i)*whtsover(i)
       enddo
 !$OMP END PARALLEL DO      
       
@@ -801,8 +801,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
         rint(1:3) = 0
         rint_torque(1:3) = 0
         ipstart = ipars(icomp+1)
-        ipend = npatches
-        if (icomp.ne.ncomp) ipend = ipars(icomp+2)-1
+        ipend = ipars(icomp+2)-1
 
         istart = ixyzs(ipstart)
         iend = ixyzs(ipend)
@@ -935,7 +934,7 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
 !          * srcvals(10:12,i) - normals info
 !    - ncomp: integer *8
 !        number of components
-!    - icomps: integer *8(ncomp)
+!    - icomps: integer *8(ncomp+1)
 !        icomps(i) = starting patch index for patches on component
 !          i (note that the patches must be ordered by component)
 !    - eps: real *8
@@ -975,7 +974,7 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       integer *8, intent(in) :: ifinout
       integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
       integer *8, intent(in) :: iptype(npatches)
-      integer *8, intent(in) :: ncomp, icomps(ncomp)
+      integer *8, intent(in) :: ncomp, icomps(ncomp+1)
       real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
       real *8, intent(in) :: eps, eps_gmres
       real *8, intent(in) :: forces(3,ncomp), torques(3,ncomp) 
@@ -1190,11 +1189,15 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       ipotflag = 0
       ndim_p = 0
 
-      allocate(ipars(ncomp+1))
+      allocate(ipars(ncomp+2))
+      ipars(1) = ncomp
+      do icomp = 1,ncomp+1
+        ipars(icomp+1) = icomps(icomp)
+      enddo
+      
       do icomp = 1,ncomp
-        ipstart = ipars(icomp+1)
-        ipend = npatches
-        if (icomp.ne.ncomp) ipend = ipars(icomp+2)-1
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
 
         istart = ixyzs(ipstart)
         iend = ixyzs(ipend)
@@ -1245,7 +1248,8 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
 
       
       print *, "done generating near quadrature, now starting gmres"
-      ndi = ncomp+1
+      ndi = ncomp+2
+      
       call stok_s_mob_solver_guru(npatches, norders, ixyzs, &
         iptype, npts, srccoefs, srcvals, eps, ndi, ipars, numit, &
         rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
@@ -1400,7 +1404,7 @@ subroutine stok_s_mob_solver_guru(npatches, norders, ixyzs, &
 !        precision requested for computing quadrature and fmm
 !        tolerance
 !    - ndi: integer *8
-!        number of ipars, must be atleast ncomp+1, where ncomp
+!        number of ipars, must be atleast ncomp+2, where ncomp
 !        is the number of components
 !    - ipars: integer *8(ndi)
 !        ipars(1) = ncomp

From 4345cb4c0145dc26b83937d79c7dcc778ccedb4d Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Mon, 23 Mar 2026 09:27:14 +0530
Subject: [PATCH 5/7] beta mobility solver

---
 examples/stokes/stok_mob_iter_example.f90 | 19 ++++++++++
 src/stok_wrappers/stok_s_mob.f90          | 42 +++++++++++++----------
 2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/examples/stokes/stok_mob_iter_example.f90 b/examples/stokes/stok_mob_iter_example.f90
index bcc31b04..5048141a 100644
--- a/examples/stokes/stok_mob_iter_example.f90
+++ b/examples/stokes/stok_mob_iter_example.f90
@@ -22,6 +22,8 @@
       real *8, allocatable :: uval(:,:), tracval(:,:), soln(:,:)
       complex * 16 zpars
       integer *8 int8_0,int8_3,int8_9
+      integer *8 ncomp
+      integer *8, allocatable :: icomps(:)
 
       call prini(6,13)
 
@@ -77,6 +79,23 @@
       numit = 100
       allocate(errs(numit+1))
       allocate(soln(3,npts))
+      ncomp = 1
+      allocate(icomps(ncomp+1))
+      icomps(1) = 1
+      icomps(2) = npatches+1
+
+      eps = 1.0d-8
+
+
+      call stok_s_mob_solver(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, srcvals, ncomp, icomps, eps, numit, forces1, &
+        torques1, eps, niter, errs, rres, soln, trans_vels1, rot_vels1)
+      
+      call prin2('trans_vels1=*',trans_vels1,3)
+      call prin2('rot_vels1=*',rot_vels1,3)
+
+      erra = abs(trans_vels1(1) - 1.0d0/6/pi)
+      call prin2('error in mobility matrix=*',erra,1)
       
       
       
diff --git a/src/stok_wrappers/stok_s_mob.f90 b/src/stok_wrappers/stok_s_mob.f90
index 6b54f45d..73e1dc53 100644
--- a/src/stok_wrappers/stok_s_mob.f90
+++ b/src/stok_wrappers/stok_s_mob.f90
@@ -198,6 +198,7 @@ subroutine getnearquad_stok_s_mob(npatches, norders, &
       real *8 alpha, beta
       real *8, allocatable :: wneartmp(:)
       integer *8 ndd, ndi, ndz
+      real *8 dpars(1)
       complex *16 zpars(1)
       integer *8 ipars(2)
 
@@ -254,7 +255,7 @@ subroutine getnearquad_stok_s_mob(npatches, norders, &
 
         do i = 1,6   
           call dgetnearquad_ggq_guru(npatches, norders, ixyzs, &
-            iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
+            iptype, npts, srccoefs, srcvals, ndtarg, npts, srcvals, &
             ipatch_id, uvs_targ, eps, ipv, fker, ndd, dpars, ndz, &
             zpars, ndi, ijloc(1,i), nnz, row_ptr, col_ind, iquad, &
             rfac0, nquad, wneartmp)
@@ -553,8 +554,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
     
       real *8, intent(out) :: pot(ndim,npts)
 
-      integer *8 ndtarg, idensflag, ipotflag, ndim_p, i
-      real *8 rint
+      integer *8 ndtarg, idensflag, ipotflag, ndim_p
 
       integer *8 norder,npols,nover,npolso
       real *8, allocatable :: potsort(:)
@@ -598,10 +598,14 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       real *8 ttot, done, pi
 
       real *8 potv(3), pv, gradv(3,3)
-      real *8 strslet(3), strvec(3)
+      real *8 strslet(3), strsvec(3)
 
       real *8 rint(3), rint_torque(3), xyzc(3), xdiff(3), rtmp(3)
-      integer *8 ncomp, icomp
+      real *8 centroid(3), rmoi(3,3), rmoi_inv(3,3), area
+      integer *8 ncomp, icomp, iend, info
+      integer *8 ipstart, ipend, istart
+      integer *8 nploc, nptsloc
+      integer *8, allocatable :: ixyzsloc(:)
 
       parameter (nd=1,ntarg0=1)
       data over4pi/0.07957747154594767d0/
@@ -626,7 +630,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       ifppreg = 0
       ifppregtarg = 3
       allocate(sources(3,ns), targvals(3,ntarg))
-      allocate(stoklet(3,ns), strslet(3,ns), strsvec(3,ns))
+      allocate(stoklet(3,ns))
       allocate(sigmaover(3,ns))
       allocate(pottarg(3,ntarg), pretarg(ntarg))
       allocate(gradtarg(3,3,ntarg))
@@ -792,7 +796,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()     
 
-
+      allocate(ixyzsloc(npatches+1))
 !
 !  Now add in contribution of L[\sigma]
 !
@@ -942,9 +946,6 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
 !        tolerance
 !    - numit: integer *8
 !        max number of gmres iterations
-!    - ifinout: integer *8
-!        ifinout = 0, interior problem
-!        ifinout = 1, exterior problem
 !    - forces: real *8(3,ncomp)
 !        prescribed force on the components
 !    - torques: real *8(3,ncomp)
@@ -971,7 +972,6 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
 !
       implicit none
       integer *8, intent(in) :: npatches, npts
-      integer *8, intent(in) :: ifinout
       integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
       integer *8, intent(in) :: iptype(npatches)
       integer *8, intent(in) :: ncomp, icomps(ncomp+1)
@@ -980,7 +980,7 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       real *8, intent(in) :: forces(3,ncomp), torques(3,ncomp) 
       integer *8, intent(in) :: numit
       real *8, intent(out) :: soln(3,npts)
-      real *8, intent(out) :: trans_vels(3,ncomp), rot_vels(3,ncmp)
+      real *8, intent(out) :: trans_vels(3,ncomp), rot_vels(3,ncomp)
       real *8, intent(out) :: errs(numit+1)
       real *8, intent(out) :: rres
       integer *8, intent(out) :: niter
@@ -1017,7 +1017,13 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       integer *8 ikerorder, iquadtype, npts_over
 
       real *8 xdiff(3), rtmp(3), rtuse(3), ftuse(3), rtmp2(3)
-      real *8 rmoi(3,3), rmoi_inv(3,3), centroid(3)
+      real *8 rmoi(3,3), rmoi_inv(3,3), centroid(3), area
+      integer *8 icomp, idensflag, iend, info
+      integer *8 ipstart, ipend, ipotflag, istart, lwork
+      integer *8 ndd, ndi, ndz
+      integer *8 ndim_p, ndim_s
+      integer *8 nploc, nptsloc
+      real *8 work(1)
       integer *8, allocatable :: ixyzsloc(:)
       real *8, allocatable :: wts(:)
  
@@ -1217,13 +1223,13 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
         info = 0
         call dinverse(3, rmoi, info, rmoi_inv) 
         rtuse(1:3) = rtuse(1:3)
-        rtuse(1) = rtuse(1) = rmoi_inv(1,1)*torques(1,icomp) + &
+        rtuse(1) = rtuse(1) + rmoi_inv(1,1)*torques(1,icomp) + &
            rmoi_inv(1,2)*torques(2,icomp) + &
            rmoi_inv(1,3)*torques(3,icomp)
-        rtuse(2) = rtuse(2) = rmoi_inv(2,1)*torques(1,icomp) + &
+        rtuse(2) = rtuse(2) + rmoi_inv(2,1)*torques(1,icomp) + &
            rmoi_inv(2,2)*torques(2,icomp) + &
            rmoi_inv(2,3)*torques(3,icomp)
-        rtuse(3) = rtuse(3) = rmoi_inv(3,1)*torques(1,icomp) + &
+        rtuse(3) = rtuse(3) + rmoi_inv(3,1)*torques(1,icomp) + &
            rmoi_inv(3,2)*torques(2,icomp) + &
            rmoi_inv(3,3)*torques(3,icomp)
         ftuse(1:3) = forces(1:3,icomp)/area
@@ -1308,7 +1314,7 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
           call cross_prod3d(xdiff, rtuse, rtmp2)
           rtmp(1:3) = rtmp(1:3) + rtmp2(1:3)*wts(i)
         enddo
-        rot_vels(1:3,icomp) = rmoi_inv(1:3,1)*rtmp(1) + 
+        rot_vels(1:3,icomp) = rmoi_inv(1:3,1)*rtmp(1) + & 
           rmoi_inv(1:3,2)*rtmp(2) + rmoi_inv(1:3,3)*rtmp(3)
       enddo
 
@@ -1502,7 +1508,7 @@ subroutine stok_s_mob_solver_guru(npatches, norders, ixyzs, &
       procedure (), pointer :: fker
       external lpcomp_stok_comb_vel_addsub
 
-      integer *8 ndd, ndi, ndz, lwork, ndim
+      integer *8 ndd, ndz, lwork, ndim
       integer *8 nkertmp
       complex *16 zpars
 

From f82e610ad2de4013a5eec563546b95924070879c Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Mon, 23 Mar 2026 15:21:50 +0530
Subject: [PATCH 6/7] fixed stokes mobility solver

---
 examples/stokes/stok_mob_iter_example.f90 | 238 ++++++++++++++++------
 src/stok_wrappers/stok_comb_vel.f90       |  21 +-
 src/stok_wrappers/stok_s_mob.f90          | 152 ++++++++------
 3 files changed, 281 insertions(+), 130 deletions(-)

diff --git a/examples/stokes/stok_mob_iter_example.f90 b/examples/stokes/stok_mob_iter_example.f90
index 5048141a..96580a20 100644
--- a/examples/stokes/stok_mob_iter_example.f90
+++ b/examples/stokes/stok_mob_iter_example.f90
@@ -1,29 +1,30 @@
       implicit real *8 (a-h,o-z) 
       implicit integer *8 (i-n)
-      real *8, allocatable :: srcvals(:,:),srccoefs(:,:),targs(:,:)
+      real *8, allocatable :: srcvals(:,:),srccoefs(:,:)
       real *8, allocatable :: wts(:)
+      integer *8, allocatable :: norders(:),ixyzs(:),iptype(:)
       
+      real *8, allocatable :: srcvals1(:,:),srccoefs1(:,:)
+      real *8, allocatable :: srccoefs2(:,:)
+      real *8, allocatable :: wts1(:)
+      integer *8, allocatable :: norders1(:),ixyzs1(:),iptype1(:)
+
       real *8 ts(2), rres
-      real *8, allocatable :: rfacs(:,:), errs(:)
+      real *8, allocatable :: errs(:)
       character *100 fname
-      integer *8 ipars(2)
-      integer *8, allocatable :: row_ptr(:),col_ind(:)
-      integer *8, allocatable :: iquad(:)
-      real *8, allocatable :: srcover(:,:),wover(:)
-
-      integer *8, allocatable :: norders(:),ixyzs(:),iptype(:)
 
-      integer *8, allocatable :: ipatch_id(:),inode_id(:)
-      real *8, allocatable :: uvs_targ(:,:)
-      real *8 c0(3)
-      real *8 area, centroid(3), rmoi(3,3)
-      real *8 forces1(3), torques1(3)
-      real *8 trans_vels1(3), rot_vels1(3)
-      real *8, allocatable :: uval(:,:), tracval(:,:), soln(:,:)
+      real *8 c0(3), abc(3)
+      real *8 nabc(3)
+      real *8, allocatable :: forces(:,:), torques(:,:)
+      real *8, allocatable :: trans_vels_ex(:,:), rot_vels_ex(:,:)
+      real *8, allocatable :: trans_vels(:,:), rot_vels(:,:)
+      real *8, allocatable :: centroids(:,:), shifts(:,:), rmois(:,:,:)
+      real *8, allocatable :: soln_mob(:,:), soln_res(:,:), rhs_res(:,:)
       complex * 16 zpars
       integer *8 int8_0,int8_3,int8_9
       integer *8 ncomp
       integer *8, allocatable :: icomps(:)
+      real *8 xdiff(3), rtmp(3), dpars(2) 
 
       call prini(6,13)
 
@@ -33,72 +34,185 @@
       done = 1
       pi = atan(done)*4
 
+      nlatx = 2
+      nlaty = 1
+      nlatz = 1
+
+      dx = 4.0d0
+      dy = 4.0d0
+      dz = 4.0d0
+
+      dh = 0.01d0*0
+
+      ncomp = nlatx*nlaty*nlatz
+
+      allocate(shifts(3,ncomp))
+      allocate(trans_vels_ex(3,ncomp), rot_vels_ex(3,ncomp))
+      do ilatz = 1,nlatz
+        do ilaty = 1,nlaty
+          do ilatx = 1,nlatx
+            icomp = (ilatz-1)*nlatx*nlaty + (ilaty-1)*nlatx + ilatx
+            shifts(1,icomp) = (ilatx-1)*dx + 2*dh*(hkrand(0)-0.5d0)
+            shifts(2,icomp) = (ilaty-1)*dy + 2*dh*(hkrand(0)-0.5d0)
+            shifts(3,icomp) = (ilatz-1)*dz + 2*dh*(hkrand(0)-0.5d0)
+
+            trans_vels_ex(1,icomp) = hkrand(0)-0.5d0
+            trans_vels_ex(2,icomp) = (hkrand(0)-0.5d0)
+            trans_vels_ex(3,icomp) = (hkrand(0)-0.5d0)
+
+            rot_vels_ex(1,icomp) = (hkrand(0)-0.5d0)
+            rot_vels_ex(2,icomp) = (hkrand(0)-0.5d0)
+            rot_vels_ex(3,icomp) = (hkrand(0)-0.5d0)
+          enddo
+        enddo
+      enddo
+
+      call prin2('shifts=*',shifts,3*ncomp)
+
+!
+!  extract shape of geometry to be extracted
+!
       a = 1.0d0
-      na = 4
-      c0(1) = 1.1d0
-      c0(2) = 0.3d0
-      c0(3) = 0.2d0
+      na = 2
+      c0(1) = 0
+      c0(2) = 0
+      c0(3) = 0
 
       iptype0 = 1
 
-      norder = 4
+      norder = 7
       npols = (norder+1)*(norder+2)/2
-
-      call get_sphere_npat_mem(a, na, c0, norder, iptype0, npatches, &
-        npts)
-      call prinf('npatches=*',npatches,1)
-      call prinf('npts=*',npts,1)
       
-      allocate(srcvals(12,npts), srccoefs(9,npts), ixyzs(npatches+1), &
-        iptype(npatches), norders(npatches))
-      allocate(wts(npts))
-      call get_sphere_npat(a, na, c0, norder, iptype0, npatches, &
-        npts, norders, ixyzs, iptype, srccoefs, srcvals)
+      npatches1 = 0
+      npts1 = 0
+      call get_sphere_npat_mem(a, na, c0, norder, iptype0, npatches1, &
+        npts1)
+      call prinf('npatches=*',npatches1,1)
+      call prinf('npts=*',npts1,1)
+      
+      allocate(srcvals1(12,npts1), srccoefs1(9,npts1), &
+        ixyzs1(npatches1+1), iptype1(npatches1), norders1(npatches1))
+      call get_sphere_npat(a, na, c0, norder, iptype0, npatches1, &
+        npts1, norders1, ixyzs1, iptype1, srccoefs1, srcvals1)
 
+      allocate(icomps(ncomp+1))
+      icomps(1) = 1
+      icomps(2) = npatches+1
+      npatches = ncomp*npatches1
+      npts = ncomp*npts1
+!
+!
+!
+      do icomp=1,ncomp+1
+        icomps(icomp) = npatches1*(icomp-1)+1
+      enddo
+!
+!  Define ixyzs
+!
+      allocate(ixyzs(npatches+1), iptype(npatches), norders(npatches))
+      allocate(srcvals(12,npts),srccoefs(9,npts))
+      do icomp = 1,ncomp
+        ipstart = icomps(icomp)
+        do j=1,npatches1
+          ixyzs(ipstart+j-1) = ixyzs1(j) + (icomp-1)*npts1
+          norders(ipstart+j-1) = norders1(j)
+          iptype(ipstart+j-1) = iptype1(j)
+        enddo
+        istart = (icomp-1)*npts1 + 1
+        do j=1,npts1
+           srcvals(1:12,istart+j-1) = srcvals1(1:12,j) 
+           srcvals(1:3,istart+j-1) = srcvals(1:3,istart+j-1) + &
+                     shifts(1:3,icomp)
+        enddo
+      enddo
+
+
+      ixyzs(npatches+1) = npts+1
+      allocate(srccoefs2(9,npts))
+      int8_9 = 9
+      call surf_vals_to_coefs(int8_9, npatches, norders, ixyzs, iptype, &
+       npts, srcvals(1:9,1:npts), srccoefs)
+      allocate(wts(npts))
       call get_qwts(npatches, norders, ixyzs, iptype, npts, srcvals, &
         wts)
 
-      call get_surf_moments(npatches, norders, ixyzs, iptype, npts, &
-        srcvals, wts, area, centroid, rmoi)
-      
-      call prin2('area=*',area,1)
-      call prin2('centroid=*',centroid,3)
-      call prin2('rmoi=*',rmoi,9)
-
-      err_moi = rmoi(1,1) - 4.0d0/3.0d0*2*pi*(a**4)
-      call prin2('err_moi=*', err_moi,1)
 
-      forces1(1) = 1.0d0
-      forces1(2) = 0.0d0
-      forces1(3) = 0.0d0
-      
-      torques1(1) = 0.0d0
-      torques1(2) = 0.0d0
-      torques1(3) = 0.0d0
+      allocate(rhs_res(3,npts),soln_res(3,npts))
+      allocate(centroids(3,ncomp),rmois(3,3,ncomp))
+      do icomp=1,ncomp
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1)-1
+        nptsloc = iend - istart + 1
+        call get_surf_moments(npatches1, norders(ipstart), ixyzs1, &
+          iptype(ipstart), npts1, srcvals(1,istart), wts(istart), &
+          area, centroids(1,icomp), rmois(1,1,icomp))
+        do i=istart,iend
+          xdiff(1:3) = srcvals(1:3,i) - centroids(1:3,icomp)
+          call cross_prod3d(xdiff, rot_vels_ex(1:3,icomp), rtmp)
+          rhs_res(1:3,i) = trans_vels_ex(1:3,icomp) + rtmp(1:3)
+        enddo
+      enddo
+      call prin2('centroids=*',centroids,3*ncomp)
+      call prin2('shifts=*',shifts,3*ncomp)
+      call prin2('rmois=*',rmois,9*ncomp)
 
       numit = 100
       allocate(errs(numit+1))
-      allocate(soln(3,npts))
-      ncomp = 1
-      allocate(icomps(ncomp+1))
-      icomps(1) = 1
-      icomps(2) = npatches+1
-
       eps = 1.0d-8
+      eps_gmres = 1.0d-8
+      ifinout = 1
+      dpars(1) = 1.0d0
+      dpars(2) = 2.0d0
+
+      call stok_comb_vel_solver(npatches, norders, ixyzs, iptype, npts, &
+        srccoefs, srcvals, eps, dpars, numit, ifinout, rhs_res, &
+        eps_gmres, niter, errs, rres, soln_res)
+      call prin2('soln_res=*',soln_res,24)
+      allocate(forces(3,ncomp), torques(3,ncomp))
+      allocate(trans_vels(3,ncomp), rot_vels(3,ncomp))
+      
+      do icomp=1,ncomp
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
+        istart = ixyzs(ipstart)
+        iend = ixyzs(ipend+1)-1
+        nptsloc = iend - istart + 1
+        forces(1:3,icomp) = 0
+        torques(1:3,icomp) = 0
+        trans_vels(1:3,icomp) = 0
+        rot_vels(1:3,icomp) = 0
+        do i=istart,iend
+          forces(1:3,icomp) = forces(1:3,icomp) + &
+            soln_res(1:3,i)*wts(i)
+          xdiff(1:3) = srcvals(1:3,i) - centroids(1:3,icomp)
+          call cross_prod3d(xdiff, soln_res(1,i), rtmp)
+          torques(1:3,icomp) = torques(1:3,icomp) + rtmp(1:3)*wts(i)
+        enddo
+      enddo
+      call prin2('forces=*',forces,3*ncomp)
+      call prin2('torques=*',torques,3*ncomp)
+
+
+
+      allocate(soln_mob(3,npts))
 
 
       call stok_s_mob_solver(npatches, norders, ixyzs, iptype, npts, &
-        srccoefs, srcvals, ncomp, icomps, eps, numit, forces1, &
-        torques1, eps, niter, errs, rres, soln, trans_vels1, rot_vels1)
-      
-      call prin2('trans_vels1=*',trans_vels1,3)
-      call prin2('rot_vels1=*',rot_vels1,3)
-
-      erra = abs(trans_vels1(1) - 1.0d0/6/pi)
-      call prin2('error in mobility matrix=*',erra,1)
-      
-      
+        srccoefs, srcvals, ncomp, icomps, eps, numit, forces, &
+        torques, eps, niter, errs, rres, soln_mob, trans_vels, rot_vels)
       
+      print *, "============================="
+      call prin2('trans_vels1=*',trans_vels,3*ncomp)
+      call prin2('trans_vels_ex=*',trans_vels_ex,3*ncomp)
+      print *, " "
+      print *, "============================="
+      print *, " "
+      call prin2('rot_vels1=*',rot_vels,3*ncomp)
+      call prin2('rot_vels_ex=*',rot_vels_ex,3*ncomp)
+      print *, " "
+      print *, "============================="
 
 
       
diff --git a/src/stok_wrappers/stok_comb_vel.f90 b/src/stok_wrappers/stok_comb_vel.f90
index 3b57093d..d4e17eac 100644
--- a/src/stok_wrappers/stok_comb_vel.f90
+++ b/src/stok_wrappers/stok_comb_vel.f90
@@ -1072,12 +1072,14 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       real *8, allocatable :: sigmaover(:,:)
       real *8, allocatable :: stoklet(:,:), strslet(:,:)
       real *8, allocatable :: pottarg(:,:), strsvec(:,:)
+      real *8, allocatable :: rotlet(:,:), rotvec(:,:)
+      real *8, allocatable :: doublet(:,:), doubvec(:,:)
       integer *8 ns, nt
       real *8 alpha, beta
       integer *8 ifstoklet, ifstrslet
+      integer *8 ifrotlet, ifdoublet
       integer *8 ifppreg, ifppregtarg
       real *8 tmp(10), val
-      real *8 over4pi
 
       real *8 w11, w12, w13, w21, w22, w23, w31, w32, w33
       real *8 sig1, sig2, sig3
@@ -1107,7 +1109,6 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       real *8 potv(3), pv, gradv(3,3)
 
       parameter (nd=1,ntarg0=1)
-      data over4pi/0.07957747154594767d0/
 
       ns = nptso
       done = 1
@@ -1146,8 +1147,8 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
 !      set relevant parameters for the fmm
 !
  
-      alpha = dpars(1)*over4pi
-      beta = dpars(2)*over4pi
+      alpha = dpars(1)
+      beta = dpars(2)
 
 
 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)      
@@ -1163,9 +1164,9 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
 !  Stokes double layer is negative of a stresslet with orienatation
 !  given by surface normal
 !
-        strslet(1,i) = -sigmaover(1,i)*whtsover(i)*beta 
-        strslet(2,i) = -sigmaover(2,i)*whtsover(i)*beta 
-        strslet(3,i) = -sigmaover(3,i)*whtsover(i)*beta
+        strslet(1,i) = sigmaover(1,i)*whtsover(i)*beta 
+        strslet(2,i) = sigmaover(2,i)*whtsover(i)*beta 
+        strslet(3,i) = sigmaover(3,i)*whtsover(i)*beta
 
         strsvec(1,i) = srcover(10,i)
         strsvec(2,i) = srcover(11,i)
@@ -1186,6 +1187,9 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       ifstoklet = 1
       ifstrslet = 1
 
+      ifrotlet = 0
+      ifdoublet = 0
+
       if(alpha.eq.0) ifstoklet = 0
       if(beta.eq.0) ifstrslet = 0
 
@@ -1200,7 +1204,8 @@ subroutine stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
       call cpu_time(t1)
 !$      t1 = omp_get_wtime()      
       call stfmm3d(nd, eps, ns, sources, ifstoklet, stoklet, &
-        ifstrslet, strslet, strsvec, ifppreg, tmp, tmp, tmp, ntarg, &
+        ifstrslet, strslet, strsvec, ifrotlet, rotlet, rotvec, &
+        ifdoublet, doublet, doubvec, ifppreg, tmp, tmp, tmp, ntarg, &
         targvals, ifppregtarg, pottarg, tmp, tmp, ier)
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()
diff --git a/src/stok_wrappers/stok_s_mob.f90 b/src/stok_wrappers/stok_s_mob.f90
index 73e1dc53..4ef86183 100644
--- a/src/stok_wrappers/stok_s_mob.f90
+++ b/src/stok_wrappers/stok_s_mob.f90
@@ -65,7 +65,7 @@
 !        representation with user-provided near-information prescribed
 !        in row-sparse compressed format
 !
-!    - lpcomp_stok_s_mob_addsub: apply the principal value part
+!    - stok_s_mob_addsub: apply the principal value part
 !        of the integeral equation on surface. On input, user provides
 !        precomputed near quadrature in row-sparse compressed format,
 !        and oversampling information for handling the far part of the
@@ -373,7 +373,7 @@ subroutine stok_s_vel_eval(npatches, norders, ixyzs, &
 !  
 !
 !
-      subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
+      subroutine stok_s_mob_addsub(npatches, norders, ixyzs, &
         iptype, npts, srccoefs, srcvals, eps, ndd, dpars, ndz, zpars, &
         ndi, ipars, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, &
         novers, nptso, ixyzso, srcover, whtsover, lwork, work, ndim, &
@@ -385,12 +385,14 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
 !  
 !  u = S_{stok}[\sigma]
 !
-!  pot = S_{stok}'[\sigma] + L[\sigma]
+!  pot = S_{stok}'[\sigma] + c L[\sigma]
 !
 !  where L[\sigma] is given by
 !     = 1/|\Gamma_{i}|\int_{\Gamma_{i}} \sigma dS + 
 !       tau_{i}^{-1}(\int_{{\Gamma_{i}}} (y-x_{c,i}) \times \sigma dS) \times (x-x_{c,i})
 !  on \Gamma_{i}
+!   
+!  We refer to L as the generlized ones matrix
 !
 !  The near field is precomputed and stored
 !  in the row sparse compressed format.
@@ -438,10 +440,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
 !    - dpars: real *8(ndd)
 !        real parameters defining the kernel/
 !        integral representation 
-!        * dpars(1) = alpha (single layer strength) 
-!        * dpars(2) = beta (double layer strength)
-!        * dpars(3) = strength of 1s matrix of 
-!            n \int \sigma \cdot n
+!        * dpars(1) = c*(multiple of generalized 1s matrix) 
 !    - ndz: integer *8
 !        number of complex parameters defining the kernel/
 !        integral representation (unused in this routine)
@@ -562,11 +561,14 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       real *8, allocatable :: sources(:,:), targvals(:,:)
       real *8, allocatable :: sigmaover(:,:)
       real *8, allocatable :: stoklet(:,:)
+      real *8, allocatable :: strslet(:,:), strsvec(:,:)
+      real *8, allocatable :: rotlet(:,:), rotvec(:,:)
+      real *8, allocatable :: doublet(:,:), doubvec(:,:)
       real *8, allocatable :: pottarg(:,:), gradtarg(:,:,:)
       real *8, allocatable :: pretarg(:)
       integer *8 ns, nt
       real *8 alpha, beta
-      integer *8 ifstoklet, ifstrslet
+      integer *8 ifstoklet, ifstrslet, ifrotlet, ifdoublet
       integer *8 ifppreg, ifppregtarg
       real *8 tmp(10), val
       real *8 over4pi
@@ -598,7 +600,6 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       real *8 ttot, done, pi
 
       real *8 potv(3), pv, gradv(3,3)
-      real *8 strslet(3), strsvec(3)
 
       real *8 rint(3), rint_torque(3), xyzc(3), xdiff(3), rtmp(3)
       real *8 centroid(3), rmoi(3,3), rmoi_inv(3,3), area
@@ -606,6 +607,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       integer *8 ipstart, ipend, istart
       integer *8 nploc, nptsloc
       integer *8, allocatable :: ixyzsloc(:)
+      integer *8 int8_3
 
       parameter (nd=1,ntarg0=1)
       data over4pi/0.07957747154594767d0/
@@ -631,6 +633,9 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
       ifppregtarg = 3
       allocate(sources(3,ns), targvals(3,ntarg))
       allocate(stoklet(3,ns))
+      allocate(strslet(3,ns), strsvec(3,ns))
+      allocate(rotlet(3,ns), rotvec(3,ns))
+      allocate(doublet(3,ns), doubvec(3,ns))
       allocate(sigmaover(3,ns))
       allocate(pottarg(3,ntarg), pretarg(ntarg))
       allocate(gradtarg(3,3,ntarg))
@@ -654,6 +659,15 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
         stoklet(1,i) = sigmaover(1,i)*whtsover(i)
         stoklet(2,i) = sigmaover(2,i)*whtsover(i)
         stoklet(3,i) = sigmaover(3,i)*whtsover(i)
+
+        strslet(1:3,i) = 0
+        strsvec(1:3,i) = 0
+        
+        rotlet(1:3,i) = 0
+        rotvec(1:3,i) = 0
+
+        doublet(1:3,i) = 0
+        doubvec(1:3,i) = 0
       enddo
 !$OMP END PARALLEL DO      
       
@@ -668,20 +682,24 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
 
       ifstoklet = 1
       ifstrslet = 0
+      ifdoublet = 0
+      ifrotlet = 0
 
       iper = 0
       ier = 0
+
 !
 !       call the fmm
 !
       call cpu_time(t1)
 !$      t1 = omp_get_wtime()      
       call stfmm3d(nd, eps, ns, sources, ifstoklet, stoklet, &
-        ifstrslet, strslet, strsvec, ifppreg, tmp, tmp, tmp, ntarg, &
+        ifstrslet, strslet, strsvec, ifrotlet, rotlet, rotvec, &
+        ifdoublet, doublet, doubvec, ifppreg, tmp, tmp, tmp, ntarg, &
         targvals, ifppregtarg, pottarg, pretarg, gradtarg, ier)
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()
-
+      
       timeinfo(1) = t2-t1
 !$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(i)
       do i = 1,ntarg
@@ -792,15 +810,18 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
         pot(3,i) = pot(3,i) - potv(3)
 
       enddo
+
       
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()     
 
       allocate(ixyzsloc(npatches+1))
 !
-!  Now add in contribution of L[\sigma]
+!  Now add in contribution of c L[\sigma]
 !
+      
       ncomp = ipars(1)
+      int8_3 = 3
       do icomp = 1,ncomp
         rint(1:3) = 0
         rint_torque(1:3) = 0
@@ -808,13 +829,14 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
         ipend = ipars(icomp+2)-1
 
         istart = ixyzs(ipstart)
-        iend = ixyzs(ipend)
+        iend = ixyzs(ipend+1)-1
         nploc = ipend - ipstart + 1
         nptsloc = iend - istart + 1
 
         do i=1,nploc+1
-          ixyzsloc(i) = ixyzs(i+istart-1) - ixyzs(istart)+1
+          ixyzsloc(i) = ixyzs(i+ipstart-1) - ixyzs(ipstart)+1
         enddo
+
         rmoi(1:3,1:3) = 0
         rmoi_inv(1:3,1:3) = 0
         area = 0
@@ -823,7 +845,7 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
           iptype(ipstart), nptsloc, srcvals(1,istart), work(istart), &
           area, centroid, rmoi)
         info = 0
-        call dinverse(3, rmoi, info, rmoi_inv) 
+        call dinverse(int8_3, rmoi, info, rmoi_inv) 
         
         do i = istart,iend
           rint(1:3) = rint(1:3) + sigma(1:3,i)*work(i)
@@ -834,16 +856,16 @@ subroutine lpcomp_stok_s_mob_addsub(npatches, norders, ixyzs, &
         enddo
 
         do i = istart,iend
-          pot(1:3,i) = pot(1:3,i) + rint(1:3)/area
+          pot(1:3,i) = pot(1:3,i) + dpars(1)*rint(1:3)/area
           xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
           rtmp(1:3) = 0
           call cross_prod3d(rint_torque, xdiff, rtmp)
-          pot(1,i) = pot(1,i) + rmoi_inv(1,1)*rtmp(1) + &
-            rmoi_inv(1,2)*rtmp(2) + rmoi_inv(1,3)*rtmp(3)
-          pot(2,i) = pot(2,i) + rmoi_inv(2,1)*rtmp(1) + &
-            rmoi_inv(2,2)*rtmp(2) + rmoi_inv(2,3)*rtmp(3)
-          pot(3,i) = pot(3,i) + rmoi_inv(3,1)*rtmp(1) + &
-            rmoi_inv(3,2)*rtmp(2) + rmoi_inv(3,3)*rtmp(3)
+          pot(1,i) = pot(1,i) + dpars(1)*(rmoi_inv(1,1)*rtmp(1) + &
+            rmoi_inv(1,2)*rtmp(2) + rmoi_inv(1,3)*rtmp(3))
+          pot(2,i) = pot(2,i) + dpars(1)*(rmoi_inv(2,1)*rtmp(1) + &
+            rmoi_inv(2,2)*rtmp(2) + rmoi_inv(2,3)*rtmp(3))
+          pot(3,i) = pot(3,i) + dpars(1)*(rmoi_inv(3,1)*rtmp(1) + &
+            rmoi_inv(3,2)*rtmp(2) + rmoi_inv(3,3)*rtmp(3))
         enddo
       enddo
 
@@ -1026,6 +1048,8 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       real *8 work(1)
       integer *8, allocatable :: ixyzsloc(:)
       real *8, allocatable :: wts(:)
+      real *8 ra
+      integer *8 int8_3
  
 !
 !
@@ -1165,6 +1189,7 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       call getnearquad_stok_s_mob(npatches, norders, &
         ixyzs, iptype, npts, srccoefs, srcvals, eps, &
         iquadtype, nnz, row_ptr, col_ind, iquad, rfac0, nquad, wnear)
+
 !
 !  quadrature for S
 !
@@ -1176,12 +1201,11 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       call cpu_time(t2)
 !$      t2 = omp_get_wtime()     
 
-
 !
 !  Compute S[\sigma_{0}]
 !
       allocate(wts(npts))
-      call get_qwts(npatches, novers, ixyzs, iptype, npts, &
+      call get_qwts(npatches, norders, ixyzs, iptype, npts, &
         srcvals, wts)
       allocate(sigma0(3,npts), rhs(3,npts), uvel(3,npts))
       allocate(ixyzsloc(npatches+1))
@@ -1193,43 +1217,46 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
       ndim_s = 3
       idensflag = 0
       ipotflag = 0
-      ndim_p = 0
+      ndim_p = 3
 
       allocate(ipars(ncomp+2))
       ipars(1) = ncomp
       do icomp = 1,ncomp+1
         ipars(icomp+1) = icomps(icomp)
       enddo
+
+      int8_3 = 3
+
       
       do icomp = 1,ncomp
         ipstart = icomps(icomp)
         ipend = icomps(icomp+1)-1
 
         istart = ixyzs(ipstart)
-        iend = ixyzs(ipend)
+        iend = ixyzs(ipend+1) - 1
         nploc = ipend - ipstart + 1
         nptsloc = iend - istart + 1
 
         do i=1,nploc+1
-          ixyzsloc(i) = ixyzs(i+istart-1) - ixyzs(istart)+1
+          ixyzsloc(i) = ixyzs(i+ipstart-1) - ixyzs(ipstart) + 1
         enddo
         rmoi(1:3,1:3) = 0
         rmoi_inv(1:3,1:3) = 0
         area = 0
         centroid(1:3) = 0
+
         call get_surf_moments(nploc, norders(ipstart), ixyzsloc, &
           iptype(ipstart), nptsloc, srcvals(1,istart), wts(istart), &
           area, centroid, rmoi)
         info = 0
-        call dinverse(3, rmoi, info, rmoi_inv) 
-        rtuse(1:3) = rtuse(1:3)
-        rtuse(1) = rtuse(1) + rmoi_inv(1,1)*torques(1,icomp) + &
+        call dinverse(int8_3, rmoi, info, rmoi_inv)
+        rtuse(1) = rmoi_inv(1,1)*torques(1,icomp) + &
            rmoi_inv(1,2)*torques(2,icomp) + &
            rmoi_inv(1,3)*torques(3,icomp)
-        rtuse(2) = rtuse(2) + rmoi_inv(2,1)*torques(1,icomp) + &
+        rtuse(2) = rmoi_inv(2,1)*torques(1,icomp) + &
            rmoi_inv(2,2)*torques(2,icomp) + &
            rmoi_inv(2,3)*torques(3,icomp)
-        rtuse(3) = rtuse(3) + rmoi_inv(3,1)*torques(1,icomp) + &
+        rtuse(3) = rmoi_inv(3,1)*torques(1,icomp) + &
            rmoi_inv(3,2)*torques(2,icomp) + &
            rmoi_inv(3,3)*torques(3,icomp)
         ftuse(1:3) = forces(1:3,icomp)/area
@@ -1240,18 +1267,20 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
         enddo
       enddo
 
-      call stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
-        iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
-        eps, ndd, dpars, ndz, zpars, ndi, ipars, nnz, row_ptr, &
-        col_ind, iquad, nquad, nker, wnear_s, novers, npts_over, &
-        ixyzso, srcover, wover, lwork, work, idensflag, ndim_s, &
-        sigma0, ipotflag, ndim_p, rhs)
+      dpars(1) = 0.0d0
+
+      call stok_s_mob_addsub(npatches, norders, ixyzs, &
+        iptype, npts, srccoefs, srcvals, eps, ndd, dpars, ndz, zpars, &
+        ndi, ipars, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, &
+        novers, npts_over, ixyzso, srcover, wover, npts, wts, ndim_s, &
+        sigma0, rhs)
+
 !$OMP PARALLEL DO PRIVATE(i)
       do i=1,npts
-        rhs(1:3,i) = -rhs(1:3,i)
+        rhs(1:3,i) = -(0.5d0*sigma0(1:3,i) + rhs(1:3,i))
       enddo
 !$OMP END PARALLEL DO 
-
+      call prin2('rhs=*',rhs,12)
       
       print *, "done generating near quadrature, now starting gmres"
       ndi = ncomp+2
@@ -1261,6 +1290,14 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
         rhs, nnz, row_ptr, col_ind, iquad, nquad, nker, wnear, novers, &
         npts_over, ixyzso, srcover, wover, eps_gmres, niter, &
         errs, rres, soln)
+      
+      do i=1,npts
+        soln(1:3,i) = soln(1:3,i) + sigma0(1:3,i)
+      enddo
+
+      ndd = 2
+      dpars(1) = 1.0d0
+      dpars(2) = 0.0d0
 
       call stok_comb_vel_eval_addsub(npatches, norders, ixyzs, &
         iptype, npts, srccoefs, srcvals, ndtarg, ntarg, targs, &
@@ -1271,26 +1308,20 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
 !
 !  Compute translational and rotational velocities on each component
 !
-!  Note the sign flip in rhs as it was -S[\sigma_{0}]
-      do i=1,npts
-        uvel(1:3,i) = uvel(1:3,i) - rhs(1:3,i)
-      enddo
-
       do icomp = 1,ncomp
-        trans_vels(1:3,icomp) = 0
-        rot_vels(1:3,icomp) = 0
-
-        ipstart = ipars(icomp+1)
-        ipend = npatches
-        if (icomp.ne.ncomp) ipend = ipars(icomp+2)-1
+        ipstart = icomps(icomp)
+        ipend = icomps(icomp+1)-1
 
         istart = ixyzs(ipstart)
-        iend = ixyzs(ipend)
+        iend = ixyzs(ipend+1) - 1
         nploc = ipend - ipstart + 1
         nptsloc = iend - istart + 1
 
+        trans_vels(1:3,icomp) = 0
+        rot_vels(1:3,icomp) = 0
+
         do i=1,nploc+1
-          ixyzsloc(i) = ixyzs(i+istart-1) - ixyzs(istart)+1
+          ixyzsloc(i) = ixyzs(i+ipstart-1) - ixyzs(ipstart)+1
         enddo
         rmoi(1:3,1:3) = 0
         rmoi_inv(1:3,1:3) = 0
@@ -1300,22 +1331,22 @@ subroutine stok_s_mob_solver(npatches, norders, ixyzs, &
           iptype(ipstart), nptsloc, srcvals(1,istart), wts(istart), &
           area, centroid, rmoi)
         info = 0
-        call dinverse(3, rmoi, info, rmoi_inv) 
+        call dinverse(int8_3, rmoi, info, rmoi_inv) 
         do i = istart,iend
           trans_vels(1:3,icomp) = trans_vels(1:3,icomp) + uvel(1:3,i)*wts(i)
         enddo
         trans_vels(1:3,icomp) = trans_vels(1:3,icomp)/area
         
         rtmp(1:3) = 0
-        do i=1,istart,iend
+        do i=istart,iend
           xdiff(1:3) = srcvals(1:3,i) - centroid(1:3)
           rtuse(1:3) = uvel(1:3,i) - trans_vels(1:3,icomp)
           rtmp2(1:3) = 0
           call cross_prod3d(xdiff, rtuse, rtmp2)
           rtmp(1:3) = rtmp(1:3) + rtmp2(1:3)*wts(i)
         enddo
-        rot_vels(1:3,icomp) = rmoi_inv(1:3,1)*rtmp(1) + & 
-          rmoi_inv(1:3,2)*rtmp(2) + rmoi_inv(1:3,3)*rtmp(3)
+        rot_vels(1:3,icomp) = -(rmoi_inv(1:3,1)*rtmp(1) + & 
+          rmoi_inv(1:3,2)*rtmp(2) + rmoi_inv(1:3,3)*rtmp(3))
       enddo
 
       
@@ -1506,7 +1537,7 @@ subroutine stok_s_mob_solver_guru(npatches, norders, ixyzs, &
       integer *8 ndd_use
 
       procedure (), pointer :: fker
-      external lpcomp_stok_comb_vel_addsub
+      external stok_s_mob_addsub
 
       integer *8 ndd, ndz, lwork, ndim
       integer *8 nkertmp
@@ -1520,9 +1551,10 @@ subroutine stok_s_mob_solver_guru(npatches, norders, ixyzs, &
       integer *8 i
 
       did = 0.5d0 
-      fker => lpcomp_stok_comb_vel_addsub
+      fker => stok_s_mob_addsub
 
-      ndd = 0
+      ndd = 1
+      dpars = 1.0d0
       ndz = 0
 
       lwork = npts

From de04da4dfb8ee8fc11a6fcc3c4bc2664f78b29a5 Mon Sep 17 00:00:00 2001
From: Manas Rachh <mrachh@iitb.ac.in>
Date: Thu, 2 Apr 2026 17:25:04 +0530
Subject: [PATCH 7/7] adding more surface routines

---
 makefile                                      |   2 +-
 src/common/rigidbodies.f                      |  33 ++
 src/surface_routs/analytic_geometry_routs.f90 |   2 +-
 src/surface_routs/surf_routs.f90              | 395 +++++++++++++++++-
 4 files changed, 429 insertions(+), 3 deletions(-)

diff --git a/makefile b/makefile
index b9c4fc64..64fc71d1 100644
--- a/makefile
+++ b/makefile
@@ -140,7 +140,7 @@ EMOBJS = $(EM)/em_mfie_pec.o $(EM)/em_aumfie_pec.o \
 
 # Stokes wrappers
 STOK = src/stok_wrappers
-STOKOBJS = $(STOK)/stok_comb_vel.o 
+STOKOBJS = $(STOK)/stok_comb_vel.o $(STOK)/stok_s_mob.o
 
 # Kernels
 KER = src/kernels
diff --git a/src/common/rigidbodies.f b/src/common/rigidbodies.f
index 8f6ecdaf..b3802239 100644
--- a/src/common/rigidbodies.f
+++ b/src/common/rigidbodies.f
@@ -197,3 +197,36 @@ subroutine dcrossvectomat(r,a)
 
      
       
+      subroutine get_linear_transf_eul(deul, dmat)
+c
+c  compute the 3x3 rotation matrix corresponding to euler angles
+c  in the zxz notation
+c 
+c  Input arguments:
+c    deul: real *(3)
+c      the three Euler angles
+c  Output arguments:
+c    dmat: real *8(3,3)
+c      the corresponding rotation matrix 
+c
+      implicit real *8 (a-h,o-z)
+      real *8 deul(3), dmat(3,3)
+
+      psi = deul(1)
+      thet = deul(2)
+      phi = deul(3)
+
+      dmat(1,1) = cos(phi)*cos(psi) - sin(phi)*cos(thet)*sin(psi)
+      dmat(1,2) = cos(phi)*sin(psi) + sin(phi)*cos(thet)*cos(psi)
+      dmat(1,3) = sin(thet)*sin(phi)
+  
+      dmat(2,1) = -sin(phi)*cos(psi)
+      dmat(2,2) = -sin(phi)*sin(psi) + cos(phi)*cos(thet)*cos(psi)
+      dmat(2,3) = cos(phi)*sin(thet)
+
+      dmat(3,1) = sin(thet)*sin(psi)
+      dmat(3,2) = -sin(thet)*cos(psi)
+      dmat(3,3) = cos(thet)
+
+      return
+      end
diff --git a/src/surface_routs/analytic_geometry_routs.f90 b/src/surface_routs/analytic_geometry_routs.f90
index e26dfeeb..38181102 100644
--- a/src/surface_routs/analytic_geometry_routs.f90
+++ b/src/surface_routs/analytic_geometry_routs.f90
@@ -1407,7 +1407,7 @@ subroutine get_axissym_fcurve_npat(nch2d, tchse, fcurve, &
       pars_end(2) = tchse(nch2d)
 
       pars_end(3:(np+2)) = pars(1:np)
-
+      int8_6 = 6
 
 
        
diff --git a/src/surface_routs/surf_routs.f90 b/src/surface_routs/surf_routs.f90
index fe66d3f5..8545a44a 100644
--- a/src/surface_routs/surf_routs.f90
+++ b/src/surface_routs/surf_routs.f90
@@ -943,7 +943,7 @@ subroutine refine_patches_list(npatches, npatmax, norders, ixyzs, &
 
          do l=1,npols
             ll = istart+l-1
-            do m=1,9
+do m=1,9
               srctmp(m) = srctmp(m) + srccoefs(m,ll)*pols(l)
             enddo
          enddo
@@ -1084,7 +1084,400 @@ subroutine get_surf_moments(npatches, norders, ixyzs, &
 
   return
   end
+!
+!
+!
+!
+!
+subroutine surf_rotate(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, deul, srccoefs_out, srcvals_out)
+! 
+!  This subroutine rotates the surfer object using Euler angles. 
+!  We use the z-x-z convention
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - deul: real *8(3)
+!        The three euler angles
+!
+!  Output arguments:
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs_out(1:9,1:npts) are the koornwinder expansion coefs of 
+!        the rotated geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes on the rotated geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: deul(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  real *8 dmat(3,3), dshift(3)
+
+  dshift(1) = 0
+  dshift(2) = 0
+  dshift(3) = 0
+
+  call get_linear_transf_eul(deul, dmat)
+
+!
+!  construct the first rotation matrix
+!
+  call surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+    srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+
+
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_translate(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, dshift, srccoefs_out, srcvals_out)
+! 
+!  This subroutine translates the surfer object. 
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - dshift: real *8(3)
+!        (x,y,z) coordinates of shift 
+!
+!  Output arguments: 
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        translated geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes for the translated geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: dshift(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  real *8 dmat(3,3)
+
+  dmat(1:3,1:3) = 0
+  dmat(1,1) = 1
+  dmat(2,2) = 1
+  dmat(3,3) = 1
+
+  call surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+    srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+
 
+  return
+  end
+
+!
+!
+!
+!
+!
+subroutine surf_scale(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, dscale, srccoefs_out, srcvals_out)
+! 
+!  This subroutine rescales the surfer object 
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - dscale: real *8(3)
+!        rescaling parameter for each of the coordinates 
+!
+!  Output arguments: (All arguments are updated on output)
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        the scaled geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the scaled geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: dscale(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  real *8 dmat(3,3), dshift(3)
+  
+  dshift(1:3) = 0
+
+  dmat(1:3,1:3) = 0
+  dmat(1,1) = 1  
+  dmat(2,2) = 1
+  dmat(3,3) = 1
+
+  call surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+    srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+
+
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_affine_transf(npatches, norders, ixyzs, iptype, npts, &
+  srccoefs, srcvals, dmat, dshift, srccoefs_out, srcvals_out)
+! 
+!  This subroutine applies an affine transformation to the surface.  
+!  x \to Mx + v, where v=dshift, and M=dmat
+!
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info. 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes. 
+!    - dmat: real *8(3,3)
+!        transformation matrix
+!    - dshift: real *8(3)
+!        translation 
+!
+!  Output arguments: (All arguments are updated on output)
+!    - srccoefs_out: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        the scaled geometry info. 
+!    - srcvals_out: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the scaled geometry. 
+!  
+!
+  implicit real *8 (a-h,o-z)
+  implicit integer *8 (i-n)
+  integer *8, intent(in) :: npatches, npts
+  integer *8, intent(in) :: norders(npatches), ixyzs(npatches+1)
+  integer *8, intent(in) :: iptype(npatches)
+  real *8, intent(in) :: dmat(3,3), dshift(3)
+  real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+  real *8, intent(out) :: srccoefs_out(9,npts), srcvals_out(12,npts)
+
+  integer *8 nd9
+
+!$OMP PARALLEL DO DEFAULT(SHARED) PRIVATE(ds)
+  do i=1,npts
+    srcvals_out(1:3,i) = dmat(1:3,1)*srcvals(1,i) + &
+                         dmat(1:3,2)*srcvals(2,i) + &
+                         dmat(1:3,3)*srcvals(3,i) + dshift(1:3)
+    srcvals_out(4:6,i) = dmat(1:3,1)*srcvals(4,i) + &
+                         dmat(1:3,2)*srcvals(5,i) + &
+                         dmat(1:3,3)*srcvals(6,i) 
+    srcvals_out(7:9,i) = dmat(1:3,1)*srcvals(7,i) + &
+                         dmat(1:3,2)*srcvals(8,i) + &
+                         dmat(1:3,3)*srcvals(9,i) 
+      call cross_prod3d(srcvals_out(4,i), srcvals_out(7,i), &
+        srcvals_out(10,i))
+
+      ds = sqrt(srcvals_out(10,i)**2 + srcvals_out(11,i)**2 + &
+              srcvals_out(12,i)**2)
+      srcvals_out(10,i) = srcvals_out(10,i)/ds
+      srcvals_out(11,i) = srcvals_out(11,i)/ds
+      srcvals_out(12,i) = srcvals_out(12,i)/ds
+
+  enddo
+!$OMP END PARALLEL DO
+  nd9 = 9
+  call surf_vals_to_coefs(nd9, npatches, norders, ixyzs, iptype, npts, &
+    srcvals_out(1:9,1:npts), srccoefs_out)
+
+  return
+  end
+!
+!
+!
+!
+!
+subroutine surf_append_single_comp(npatches, norders, ixyzs, iptype, &
+  npts, srccoefs, srcvals, ncomp, icomps, npatches_out, norders_out, &
+  ixyzs_out, iptype_out, npts_out, srccoefs_out, srcvals_out)
+!
+!
+!  Given a multicomponent surfer description, append to it a new copoment
+!
+!  Input arguments:
+!    - npatches: integer *8
+!        number of patches describing the discretized surface to be appended 
+!    - norders: integer *8(npatches)
+!        norders(1:npatches) is the discretization order of patches
+!    - ixyzs: integer *8(npatches+1)
+!        ixyzs(1:npatches+1) is starting location of points on patch i
+!    - iptype: integer *8(npatches)
+!        iptype(1:npatches) type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts: integer *8
+!        total number of points describing the discretized surface to be
+!        appended
+!    - srccoefs: double precision (9,npts)
+!        srccoefs(1:9,1:npts) are the koornwinder expansion coefs of 
+!        geometry info of the the surface to be appended 
+!    - srcvals: double precision (12,npts)
+!        srcvals(1:12,1:npts) are xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the surface to be appended
+!
+!  Input/Output arguments:
+!    - ncomp: integer *8
+!        On input, number of components in output surfer
+!        On output, ncomp = ncomp + 1
+!    - icomps: integer *8 (*)
+!        Must be atleast of size ncomp+2
+!        On input and output icomps(i) is the id of the patch where
+!        information for component i begins in the output surfer
+!    - npatches_out: integer *8
+!        On input, number of patches describing the discretized surface
+!        On output, npatches_out = npatches_out + npatches
+!    - norders_out: integer *8(*)
+!        Must be at least of size npatches_out + npatches on input
+!        Describes the order of discretization of each patch
+!    - ixyzs: integer *8(*)
+!        Must be at least of size npatches_out + npatches + 1 on input
+!        Describes starting location of points on patch i
+!    - iptype: integer *8(*)
+!        Must be at least of size npatches_out + npatches on input
+!        Describes the type of patch
+!       * iptype = 1, triangular patch with RV nodes
+!       * iptype = 11, quad patch with GL nodes
+!       * iptype = 12, quad patch with Cheb nodes
+!    - npts_out: integer *8
+!        On input, total number of points describing the discretized surface
+!        On output, npts_out = npts_out + npts
+!    - srccoefs_out: double precision (9,*)
+!        Must be atleast of size (9,npts_out + npts)
+!        Describes the koornwinder expansion coefs of 
+!        geometry info of the the surface 
+!    - srcvals_out: double precision (12,*)
+!        Must be atleast of size (12,npts_out + npts)
+!        Describes the xyz, dxyz/du,dxyz/dv, normals at 
+!        all nodes of the surface 
+!
+      implicit real *8(a-h,o-z)
+      implicit integer *8(i-n)
+      integer *8, intent(in) :: npatches, npts
+      integer *8, intent(in) :: norders(npatches), iptype(npatches)
+      integer *8, intent(in) :: ixyzs(npatches+1)
+      real *8, intent(in) :: srccoefs(9,npts), srcvals(12,npts)
+
+      integer *8, intent(inout) :: npatches_out, npts_out, ncomp, icomps(*)
+      integer *8, intent(inout) :: norders_out(*), iptype_out(*)
+      integer *8, intent(inout) :: ixyzs_out(*)
+      real *8, intent(inout) :: srccoefs_out(9,*), srcvals_out(12,*)
+
+!$OMP PARALLEL DO DEFAULT(SHARED)
+      do i=1,npatches
+        ixyzs_out(npatches_out+i+1) = ixyzs_out(npatches+1) + ixyzs(i+1) - 1
+        norders_out(npatches_out+i) = norders(i)
+        iptype_out(npatches_out+i) = iptype(i)
+      enddo
+!$OMP END PARALLEL DO
+
+!$OMP PARALLEL DO DEFAULT(SHARED)
+      do i=1,npts
+        srccoefs_out(1:9,npts_out+i) = srccoefs(1:9,i)
+        srcvals_out(1:12,npts_out+i) = srcvals(1:12,i)
+      enddo
+!$OMP END PARALLEL DO
+      
+      npatches_out = npatches_out + npatches
+      npts_out = npts_out + npts
+      ncomp = ncomp + 1
+      icomps(ncomp+1) = npatches_out + 1
+
+      return
+      end
+!
+!
+!
+!
+!
+!
 subroutine surf_vals_to_coefs(nd,npatches,norders,ixyzs,iptype,npts, &
    u,ucoefs)