diff --git a/dock64_optimized b/dock64_optimized
new file mode 100755
index 0000000..c18e5bc
Binary files /dev/null and b/dock64_optimized differ
diff --git a/dock64 b/dock64_original
similarity index 100%
rename from dock64
rename to dock64_original
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 0000000..c594f55
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,10 @@
+The scripts in this folder are used to convert DB2 files to db2bin format.
+The `db2_to_binary_recursive.py` has been tested to convert DB2.tgz files in the ZINC22 database to db2bin.tar.zst format (it uses zstandard compression)
+
+The performance gain is about ~17x faster than text parsing and the file size is about 20% smaller than the original DB2.tgz files.
+
+To use the binary format in DOCK3, you need to specify in the INDOCK:
+
+```
+INDOCK: use_binary_db2: yes
+```
\ No newline at end of file
diff --git a/scripts/db2_to_binary_recursive.py b/scripts/db2_to_binary_recursive.py
new file mode 100755
index 0000000..1d3f1b9
--- /dev/null
+++ b/scripts/db2_to_binary_recursive.py
@@ -0,0 +1,458 @@
+#!/usr/bin/env python3
+"""
+db2_to_binary_recursive.py - Recursively convert DB2 files and archives to db2bin format
+
+This script is a standalone wrapper that:
+1. Recursively finds `.db2`, `.db2.gz`, and `.db2.tgz` (or `.tar.gz`) files in an input directory.
+2. Converts them to `.db2bin`, `.db2bin.gz`, and `.db2bin.tgz` respectively.
+3. For archives, it processes the internals entirely in-memory using `tarfile` and `io.BytesIO`,
+   avoiding any intermediate extraction to disk.
+4. Replicates the relative directory structure if an output directory is provided.
+
+Usage:
+    python3 db2_to_binary_recursive.py /path/to/input/dir /path/to/output/dir
+
+Author: Phong Lam, Uppsala University (Extended by AI)
+Date: January 2026 (Updated March 2026)
+"""
+
+import struct
+import gzip
+import argparse
+import tarfile
+import io
+try:   
+    import zstandard
+except ImportError:
+    print("zstandard not installed, please install it using 'pip install zstandard'")
+    exit(1)
+from pathlib import Path
+
+# Binary format constants
+DB2BIN_MAGIC = b'DB2BIN'
+DB2BIN_VERSION = 1
+
+
+def parse_db2_stream(f_iter):
+    """
+    Parse an iterable of text DB2 lines (or decoded file stream) into structured data.
+    Yields a dict for each molecule to keep memory footprint low.
+    """
+    data = {
+        'refcod': '', 'protcod': '', 'smiles': '', 'molname': '', 'arbitrary': [],
+        'total_atoms': 0, 'total_bonds': 0, 'total_coords': 0, 'total_confs': 0,
+        'total_sets': 0, 'rigid_heavy_count': 0, 'total_clusters': 0, 'mlines': 0,
+        'total_charge': 0.0, 'total_polar_solv': 0.0, 'total_apolar_solv': 0.0,
+        'total_solv': 0.0, 'total_surfarea': 0.0,
+        'atom_num': [], 'atom_name': [], 'atom_type': [], 'atom_vdwtype': [],
+        'atom_color': [], 'atom_charge': [], 'atom_polsolv': [], 'atom_apolsolv': [],
+        'atom_totalsolv': [], 'atom_surfarea': [], 'bond_num': [], 'bond_start': [],
+        'bond_end': [], 'bond_type': [], 'coord_index': [], 'coords': [],
+        'rigid_colors': [], 'rigid_coords': [], 'conf_coord': [], 'sets': [],
+        'clusters': [],
+    }
+
+    line_idx = 0
+    m_lines_remaining = 0
+    atoms_seen = 0
+    bonds_seen = 0
+    coords_seen = 0
+    rigid_seen = 0
+    confs_seen = 0
+    current_set = None
+    set_lines_remaining = 0
+    current_cluster = None
+    cluster_match_remaining = 0
+    for raw_line in f_iter:
+        line = raw_line.strip()
+        if isinstance(line, bytes):
+            line = line.decode('utf-8', errors='replace')
+            
+        if not line:
+            continue
+
+        section = line[0] if line else ''
+
+        if section == 'M':
+            parts = line.split()
+            if m_lines_remaining == 0:
+                data['refcod'] = parts[1] if len(parts) > 1 else ''
+                data['protcod'] = parts[2] if len(parts) > 2 else ''
+                if len(parts) >= 11:
+                    data['total_atoms'] = int(parts[3])
+                    data['total_bonds'] = int(parts[4])
+                    data['total_coords'] = int(parts[5])
+                    data['total_confs'] = int(parts[6])
+                    data['total_sets'] = int(parts[7])
+                    data['rigid_heavy_count'] = int(parts[8])
+                    data['mlines'] = int(parts[9])
+                    data['total_clusters'] = int(parts[10])
+                m_lines_remaining = data['mlines'] - 1
+            elif m_lines_remaining == data['mlines'] - 1:
+                if len(parts) >= 6:
+                    data['total_charge'] = float(parts[1])
+                    data['total_polar_solv'] = float(parts[2])
+                    data['total_apolar_solv'] = float(parts[3])
+                    data['total_solv'] = float(parts[4])
+                    data['total_surfarea'] = float(parts[5])
+                m_lines_remaining -= 1
+            elif m_lines_remaining == data['mlines'] - 3:
+                 data['molname'] = line[2:].strip() if len(line) > 2 else ''
+                 m_lines_remaining -= 1
+            elif m_lines_remaining == data['mlines'] - 2:
+                data['smiles'] = line[2:].strip() if len(line) > 2 else ''
+                m_lines_remaining -= 1
+            else:
+                arb_line = line[2:].strip() if len(line) > 2 else ''
+                data['arbitrary'].append(arb_line)
+                m_lines_remaining -= 1
+        elif section == 'A':
+            parts = line.split()
+            if len(parts) >= 11:
+                data['atom_num'].append(int(parts[1]))
+                data['atom_name'].append(parts[2][:4])
+                data['atom_type'].append(parts[3][:5])
+                data['atom_vdwtype'].append(int(parts[4]))
+                data['atom_color'].append(int(parts[5]))
+                data['atom_charge'].append(float(parts[6]))
+                data['atom_polsolv'].append(float(parts[7]))
+                data['atom_apolsolv'].append(float(parts[8]))
+                data['atom_totalsolv'].append(float(parts[9]))
+                data['atom_surfarea'].append(float(parts[10]))
+            atoms_seen += 1
+        elif section == 'B':
+            parts = line.split()
+            if len(parts) >= 5:
+                data['bond_num'].append(int(parts[1]))
+                data['bond_start'].append(int(parts[2]))
+                data['bond_end'].append(int(parts[3]))
+                data['bond_type'].append(parts[4][:2])
+            bonds_seen += 1
+        elif section == 'X':
+            parts = line.split()
+            if len(parts) >= 6:
+                data['coord_index'].append([int(parts[1]), int(parts[2]), int(parts[3])])
+                data['coords'].append([float(parts[4]), float(parts[5]), float(parts[6])])
+            coords_seen += 1
+        elif section == 'R':
+            parts = line.split()
+            if len(parts) >= 5:
+                data['rigid_colors'].append(int(parts[2]))
+                data['rigid_coords'].append([
+                    float(parts[3]), float(parts[4]), float(parts[5]) if len(parts) > 5 else float(parts[4])
+                ])
+            rigid_seen += 1
+        elif section == 'C':
+            parts = line.split()
+            if len(parts) >= 3:
+                data['conf_coord'].append([int(parts[2]), int(parts[3]) if len(parts) > 3 else int(parts[2])])
+            confs_seen += 1
+        elif section == 'S':
+            parts = line.split()
+            if len(parts) >= 5:
+                if set_lines_remaining == 0:
+                    set_num, total_lines, conf_count = int(parts[1]), int(parts[2]), int(parts[3])
+                    current_set = {
+                        'set_num': set_num, 'total_lines': total_lines, 'conf_count': conf_count,
+                        'broken': int(parts[4]) if len(parts) > 4 else 0,
+                        'energy': float(parts[6]) if len(parts) > 6 else 0.0,
+                        'max_strain': float(parts[7]) if len(parts) > 7 else 0.0,
+                        'confs': []
+                    }
+                    set_lines_remaining = total_lines
+                    if set_lines_remaining <= 0:
+                        data['sets'].append(current_set)
+                        current_set = None
+                else:
+                    confs_on_line = int(parts[3])
+                    if current_set is not None:
+                        for i in range(confs_on_line):
+                            if len(parts) > 4 + i:
+                                current_set['confs'].append(int(parts[4 + i]))
+                        set_lines_remaining -= 1
+                        if set_lines_remaining <= 0:
+                            data['sets'].append(current_set)
+                            current_set = None
+        elif section == 'D':
+            parts = line.split()
+            if cluster_match_remaining == 0 and len(parts) >= 6:
+                current_cluster = {
+                    'set_start': int(parts[2]), 'set_end': int(parts[3]),
+                    'match_start': int(parts[5]), 'match_end': int(parts[6]) if len(parts) > 6 else int(parts[5]),
+                    'add_match_colors': [], 'add_match_coords': []
+                }
+                cluster_match_remaining = int(parts[4])
+                if cluster_match_remaining == 0:
+                    data['clusters'].append(current_cluster)
+                    current_cluster = None
+            elif cluster_match_remaining > 0 and len(parts) >= 4:
+                if current_cluster:
+                    current_cluster['add_match_colors'].append(int(parts[2]))
+                    current_cluster['add_match_coords'].append([
+                        float(parts[3]), float(parts[4]) if len(parts) > 4 else 0.0,
+                        float(parts[5]) if len(parts) > 5 else 0.0
+                    ])
+                cluster_match_remaining -= 1
+                if cluster_match_remaining == 0 and current_cluster:
+                    data['clusters'].append(current_cluster)
+                    current_cluster = None
+        elif section == 'E':
+            yield data
+            data = {
+                'refcod': '', 'protcod': '', 'smiles': '', 'molname': '', 'arbitrary': [],
+                'total_atoms': 0, 'total_bonds': 0, 'total_coords': 0, 'total_confs': 0,
+                'total_sets': 0, 'rigid_heavy_count': 0, 'total_clusters': 0, 'mlines': 0,
+                'total_charge': 0.0, 'total_polar_solv': 0.0, 'total_apolar_solv': 0.0,
+                'total_solv': 0.0, 'total_surfarea': 0.0,
+                'atom_num': [], 'atom_name': [], 'atom_type': [], 'atom_vdwtype': [],
+                'atom_color': [], 'atom_charge': [], 'atom_polsolv': [], 'atom_apolsolv': [],
+                'atom_totalsolv': [], 'atom_surfarea': [], 'bond_num': [], 'bond_start': [],
+                'bond_end': [], 'bond_type': [], 'coord_index': [], 'coords': [],
+                'rigid_colors': [], 'rigid_coords': [], 'conf_coord': [], 'sets': [],
+                'clusters': [],
+            }
+            m_lines_remaining = 0
+            atoms_seen = 0
+            bonds_seen = 0
+            coords_seen = 0
+            rigid_seen = 0
+            confs_seen = 0
+            current_set = None
+            current_cluster = None
+            cluster_match_remaining = 0
+
+
+def write_single_molecule(f, data):
+    """Write a single molecule to an already-open binary file-like object."""
+    f.write(DB2BIN_MAGIC)
+    f.write(struct.pack('i', DB2BIN_VERSION))
+    f.write(struct.pack('i', data['total_atoms']))
+    f.write(struct.pack('i', data['total_bonds']))
+    f.write(struct.pack('i', data['total_coords']))
+    f.write(struct.pack('i', data['total_confs']))
+    f.write(struct.pack('i', len(data['sets'])))
+    f.write(struct.pack('i', data['rigid_heavy_count']))
+    f.write(struct.pack('i', len(data['clusters'])))
+    f.write(struct.pack('i', data['mlines']))
+    
+    f.write(struct.pack('f', data['total_charge']))
+    f.write(struct.pack('f', data['total_polar_solv']))
+    f.write(struct.pack('f', data['total_apolar_solv']))
+    f.write(struct.pack('f', data['total_solv']))
+    f.write(struct.pack('f', data['total_surfarea']))
+    
+    refcod = data['refcod'][:16].ljust(16, '\x00')
+    f.write(refcod.encode('ascii'))
+    protcod = data['protcod'][:9].ljust(9, ' ')
+    f.write(protcod.encode('ascii'))
+    smiles = data['smiles'][:78].ljust(78, ' ')
+    f.write(smiles.encode('ascii', errors='replace'))
+    molname = data['molname'][:78].ljust(78, ' ')
+    f.write(molname.encode('ascii', errors='replace'))
+    
+    arb_count = len(data['arbitrary'])
+    f.write(struct.pack('i', arb_count))
+    for arb_line in data['arbitrary']:
+        arb_bytes = arb_line[:78].ljust(78, ' ')
+        f.write(arb_bytes.encode('ascii', errors='replace'))
+    
+    for val in data['atom_num']: f.write(struct.pack('i', val))
+    for name in data['atom_name']: f.write(name[:4].ljust(4, ' ').encode('ascii'))
+    for atype in data['atom_type']: f.write(atype[:5].ljust(5, ' ').encode('ascii'))
+    for arr in ['atom_vdwtype', 'atom_color']:
+        for val in data[arr]: f.write(struct.pack('i', val))
+    for arr in ['atom_charge', 'atom_polsolv', 'atom_apolsolv', 'atom_totalsolv', 'atom_surfarea']:
+        for val in data[arr]: f.write(struct.pack('f', val))
+    
+    for val in data['bond_num']: f.write(struct.pack('i', val))
+    for val in data['bond_start']: f.write(struct.pack('i', val))
+    for val in data['bond_end']: f.write(struct.pack('i', val))
+    for btype in data['bond_type']: f.write(btype[:2].ljust(2, ' ').encode('ascii'))
+    
+    for i, (idx, coord) in enumerate(zip(data['coord_index'], data['coords'])):
+        for val in idx: f.write(struct.pack('i', val))
+        for val in coord: f.write(struct.pack('f', val))
+    
+    for i, (color, coord) in enumerate(zip(data['rigid_colors'], data['rigid_coords'])):
+        f.write(struct.pack('i', color))
+        for val in coord: f.write(struct.pack('f', val))
+    
+    for conf in data['conf_coord']:
+        f.write(struct.pack('i', conf[0]))
+        f.write(struct.pack('i', conf[1] if len(conf) > 1 else conf[0]))
+    
+    for s in data['sets']:
+        f.write(struct.pack('i', len(s['confs'])))
+        f.write(struct.pack('i', s['broken']))
+        f.write(struct.pack('f', s['energy']))
+        f.write(struct.pack('f', s['max_strain']))
+        for conf in s['confs']: f.write(struct.pack('i', conf))
+    
+    for c in data['clusters']:
+        f.write(struct.pack('i', c['set_start']))
+        f.write(struct.pack('i', c['set_end']))
+        f.write(struct.pack('i', c['match_start']))
+        f.write(struct.pack('i', c['match_end']))
+    for c in data['clusters']:
+        for i, color in enumerate(c['add_match_colors']):
+            f.write(struct.pack('i', color))
+            for val in c['add_match_coords'][i]: f.write(struct.pack('f', val))
+
+
+def convert_stream_to_binary_bytes(f_iter, compress=False):
+    """Parse iterable stream, write to binary in-memory buffer, and return the bytes."""
+    buf = io.BytesIO()
+    
+    if compress:
+        with gzip.GzipFile(fileobj=buf, mode='wb') as gz:
+            for data in parse_db2_stream(f_iter):
+                write_single_molecule(gz, data)
+    else:
+        for data in parse_db2_stream(f_iter):
+            write_single_molecule(buf, data)
+            
+    return buf.getvalue()
+
+
+def process_tarball(input_tar_path, output_tar_path):
+    """
+    Open a tarball, iterate members, convert .db2 internally,
+    and write to new tarball entirely in-memory.
+    """
+    print(f"Processing Archive: {input_tar_path} -> {output_tar_path}")
+    
+    # Create ZstdCompressor
+    cctx = zstandard.ZstdCompressor(level=10)
+    
+    with tarfile.open(input_tar_path, 'r|gz') as in_tar, \
+        open(output_tar_path, 'wb') as out_f, \
+        cctx.stream_writer(out_f) as zstd_stream, \
+        tarfile.open(fileobj=zstd_stream, mode='w|') as out_tar:
+         
+        for member in in_tar:
+            if member.isfile() and (member.name.endswith('.db2') or member.name.endswith('.db2.gz')):
+                # Skip if it is already a binary file (e.g. .db2bin.gz inside a .db2.tgz? edge case)
+                if member.name.endswith(('.db2bin', '.db2bin.gz')):
+                    continue
+                f = in_tar.extractfile(member)
+                if f is None:
+                    continue
+                
+                # Read contents
+                is_gz = member.name.endswith('.gz')
+                try:
+                    if is_gz:
+                        with gzip.open(f, 'rt') as gz_f:
+                            bin_data = convert_stream_to_binary_bytes(gz_f, compress=True)
+                        new_name = member.name.replace('.db2.gz', '.db2bin.gz')
+                    else:
+                        bin_data = convert_stream_to_binary_bytes(f, compress=False)
+                        new_name = member.name.replace('.db2', '.db2bin')
+                    
+                    # Add to new tar
+                    new_member = tarfile.TarInfo(name=new_name)
+                    new_member.size = len(bin_data)
+                    new_member.mtime = member.mtime
+                    
+                    out_tar.addfile(new_member, io.BytesIO(bin_data))
+                    # print(f"    - Converted in-memory: {member.name} -> {new_name}")
+                    
+                except Exception as e:
+                    print(f"    - [ERROR] Failed converting inner file {member.name}: {e}")
+
+
+def process_file(input_path, output_dir, relative_to=None):
+    """
+    Process a single file (db2, db2.gz, or db2.tgz), routing to proper handler.
+    If `relative_to` is provided, re-create the directory structure inside `output_dir`.
+    """
+    input_path = Path(input_path)
+    
+    # Determine the output path preserving directory structure
+    if relative_to:
+        rel_path = input_path.relative_to(relative_to)
+        out_path = Path(output_dir) / rel_path
+    else:
+        out_path = Path(output_dir) / input_path.name
+        
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    filename = input_path.name.lower()
+    
+    try:
+        if filename.endswith(('.tgz', '.tar.gz')):
+            if filename.endswith('.tar.gz'):
+                out_name = input_path.name[:-7] + ".db2bin.tar.zst" # .tar.gz -> .db2bin.tar.zst
+            else:
+                out_name = input_path.name[:-4] + ".db2bin.tar.zst" # .db2.tgz -> .db2bin.tar.zst
+            final_out_path = out_path.with_name(out_name)
+            process_tarball(input_path, final_out_path)
+            
+        elif filename.endswith('.db2.gz'):
+            out_name = input_path.name.replace('.db2.gz', '.db2bin.gz')
+            final_out_path = out_path.with_name(out_name)
+            
+            with gzip.open(input_path, 'rt') as f:
+                bin_data = convert_stream_to_binary_bytes(f, compress=True)
+            with open(final_out_path, 'wb') as out_f:
+                out_f.write(bin_data)
+                
+            print(f"  Converted {input_path} -> {final_out_path}")
+                
+        elif filename.endswith('.db2'):
+            out_name = input_path.name.replace('.db2', '.db2bin')
+            final_out_path = out_path.with_name(out_name)
+            
+            with open(input_path, 'rt') as f:
+                bin_data = convert_stream_to_binary_bytes(f, compress=False)
+            with open(final_out_path, 'wb') as out_f:
+                out_f.write(bin_data)
+                
+            print(f"  Converted {input_path} -> {final_out_path}")
+    except BaseException as e:
+        print(f"  [ERROR] Failed to process {input_path}: {e}")
+
+
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Recursively convert text DB2 files / archives to binary db2bin format without intermediate extraction.'
+    )
+    parser.add_argument('input_dir', help='Input directory to search')
+    parser.add_argument('output_dir', help='Output directory where results will be saved')
+    
+    args = parser.parse_args()
+    
+    input_dir = Path(args.input_dir).resolve()
+    output_dir = Path(args.output_dir).resolve()
+    
+    if not input_dir.is_dir():
+        print(f"Error: {input_dir} is not a directory.")
+        return
+        
+    output_dir.mkdir(parents=True, exist_ok=True)
+    
+    print(f"Scanning {input_dir} recursively for target files...")
+    
+    # Identify files
+    targets = []
+    targets.extend(input_dir.rglob("*.db2"))
+    targets.extend(input_dir.rglob("*.db2.gz"))
+    targets.extend(input_dir.rglob("*.db2.tgz"))
+    targets.extend(input_dir.rglob("*.tar.gz"))
+    
+    # Filter out `.db2bin` so we don't process binary files by mistake
+    targets = [t for t in targets if '.db2bin' not in t.name]
+    
+    if not targets:
+        print("No .db2, .db2.gz, .db2.tgz, or .tar.gz files found.")
+        return
+        
+    print(f"Found {len(targets)} files to process.")
+    
+    for t in targets:
+        process_file(t, output_dir, relative_to=input_dir)
+
+if __name__ == '__main__':
+    main()
diff --git a/src/chemscore.f b/src/chemscore.f
index 65137e4..c23128a 100644
--- a/src/chemscore.f
+++ b/src/chemscore.f
@@ -16,7 +16,7 @@ subroutine calc_chemscore(atomstart, atomend,
      &    sra, srb, atom_vdwtype,
      &    abval_precomp, ngrd, 
      &    maxatm, maxatmpos, maxtyv, 
-     &    vdwasum, vdwbsum, numscored)
+     &    bmpvdw, vdwasum, vdwbsum, numscored, bumped)
 
       use vdwconsts
 
@@ -37,90 +37,113 @@ subroutine calc_chemscore(atomstart, atomend,
       real, intent(in) :: abval_precomp(2, 8, ngrd) !actual grid values of vdw scores,
           ! precomputed a la the Carchia optimizations
       integer, intent(in) :: ngrd !how many grid points there are, dynamically changes each run
+      real, intent(in) :: bmpvdw !bump threshold for early exit
 c return values
       real, intent(inout) :: vdwasum, vdwbsum !where to put scores
       integer, intent(inout) :: numscored !keeps track of how many atom positions are evaluated
+      logical, intent(out) :: bumped !true if early exit due to bump
 
-c temporary variables
-      integer atomindex !actual location of atom
-      integer count, array_index
-      real a1, a2, a3, a4, a5, a6, a7, a8
-      integer nx, ny, nz
-      real xn(3), xgr, ygr, zgr
-c       xn - grid coordinates of ligand atom
-c       xgr, ygr, zgr - fractional cube coordinates
-      real apt, bpt
-c       apt, bpt: interpolated grid values
       real avdw, bvdw
 c       avdw: vdw repulsion energy of a ligand atom
 c       bvdw: vdw dispersion energy of a ligand atom
-c     abval_precomp declared locally so that dimensions of array can be 
-c     known.  This is a dynamically allocated array and thus the common 
-c     block does not know its size. except it does. so it isn't redeclared.
-c      real abval_precomp(2,8,ngrd)
+
+      integer :: i, n_in_block, block_idx, atomindex, count, array_index
+      real :: x_tmp(8), y_tmp(8), z_tmp(8)
+      real :: xgr_tmp(8), ygr_tmp(8), zgr_tmp(8)
+      integer :: nx_tmp(8), ny_tmp(8), nz_tmp(8)
+      integer :: array_idx_tmp(8)
+      integer :: atom_idx_tmp(8)
+      real :: a_val(8, 8), b_val(8, 8) ! [coeff_idx, atom_in_block]
+      real :: apt_tmp(8), bpt_tmp(8)
 
       vdwasum = 0.0
       vdwbsum = 0.0
-      do count = atomstart, atomend !for every atom
-        atomindex = coord_index(2, count) !gets actual atom non-consecutive #
-c       scale atoms to grid space
-c       optimization {MC}: use multiplication instead of division
-        xn(1) = (transfm_coords(1, atomindex) - offset(1)) * invgrid
-        xn(2) = (transfm_coords(2, atomindex) - offset(2)) * invgrid
-        xn(3) = (transfm_coords(3, atomindex) - offset(3)) * invgrid
-
-c       find lower left bottom grid point
-        nx = int(xn(1))
-        ny = int(xn(2))
-        nz = int(xn(3))
-         
-c       calculate fractional cube coordinates of point
-        xgr = xn(1) - float(nx)
-        ygr = xn(2) - float(ny)
-        zgr = xn(3) - float(nz)
-
-c       get index into precomputed interpolation array
-        array_index = grdpts(1)*grdpts(2)*nz + 
-     &         grdpts(1)*ny + nx + 1
-
-c       calculate vdw a term - speed optimized by {MC}
-c       pull coefficients of trilinear function from precomp
-        a1 = abval_precomp(AVAL_INDEX, 1, array_index)
-        a2 = abval_precomp(AVAL_INDEX, 2, array_index)
-        a3 = abval_precomp(AVAL_INDEX, 3, array_index)
-        a4 = abval_precomp(AVAL_INDEX, 4, array_index)
-        a5 = abval_precomp(AVAL_INDEX, 5, array_index)
-        a6 = abval_precomp(AVAL_INDEX, 6, array_index)
-        a7 = abval_precomp(AVAL_INDEX, 7, array_index)
-        a8 = abval_precomp(AVAL_INDEX, 8, array_index)
-
-c       determine interpolated VDW repulsion factor
-        apt = a1*xgr*ygr*zgr + a2*xgr*ygr +
-     &         a3*xgr*zgr + a4*ygr*zgr + a5*xgr +
-     &         a6*ygr + a7*zgr + a8
-
-c       calculate vdw b term  - speed optimized by {MC}
-        a1 = abval_precomp(BVAL_INDEX, 1, array_index)
-        a2 = abval_precomp(BVAL_INDEX, 2, array_index)
-        a3 = abval_precomp(BVAL_INDEX, 3, array_index)
-        a4 = abval_precomp(BVAL_INDEX, 4, array_index)
-        a5 = abval_precomp(BVAL_INDEX, 5, array_index)
-        a6 = abval_precomp(BVAL_INDEX, 6, array_index)
-        a7 = abval_precomp(BVAL_INDEX, 7, array_index)
-        a8 = abval_precomp(BVAL_INDEX, 8, array_index)
-
-c       determine interpolated VDW dispersion factor
-        bpt = a1*xgr*ygr*zgr + a2*xgr*ygr +
-     &         a3*xgr*zgr + a4*ygr*zgr + a5*xgr +
-     &         a6*ygr + a7*zgr + a8
-
-c       compute final vdw terms
-        avdw = sra(atom_vdwtype(atomindex)) * apt 
-        bvdw = srb(atom_vdwtype(atomindex)) * bpt
-
-c       sum vdw terms
-        vdwasum = vdwasum + avdw
-        vdwbsum = vdwbsum + bvdw
+      bumped = .false.
+
+      do block_idx = atomstart, atomend, 8
+        n_in_block = min(8, atomend - block_idx + 1)
+
+        ! 1. Gather coordinates and compute grid indices
+        do i = 1, n_in_block
+          count = block_idx + i - 1
+          atomindex = coord_index(2, count)
+          atom_idx_tmp(i) = atomindex
+          
+          x_tmp(i) = (transfm_coords(1, atomindex) - offset(1)) 
+     &               * invgrid
+          y_tmp(i) = (transfm_coords(2, atomindex) - offset(2)) 
+     &               * invgrid
+          z_tmp(i) = (transfm_coords(3, atomindex) - offset(3)) 
+     &               * invgrid
+
+          nx_tmp(i) = int(x_tmp(i))
+          ny_tmp(i) = int(y_tmp(i))
+          nz_tmp(i) = int(z_tmp(i))
+
+          xgr_tmp(i) = x_tmp(i) - float(nx_tmp(i))
+          ygr_tmp(i) = y_tmp(i) - float(ny_tmp(i))
+          zgr_tmp(i) = z_tmp(i) - float(nz_tmp(i))
+
+          array_idx_tmp(i) = grdpts(1)*grdpts(2)*nz_tmp(i) + 
+     &                       grdpts(1)*ny_tmp(i) + nx_tmp(i) + 1
+        enddo
+
+        ! 2. Gather interpolation coefficients
+        do i = 1, n_in_block
+          array_index = array_idx_tmp(i)
+          ! AVAL coefficients
+          a_val(1, i) = abval_precomp(AVAL_INDEX, 1, array_index)
+          a_val(2, i) = abval_precomp(AVAL_INDEX, 2, array_index)
+          a_val(3, i) = abval_precomp(AVAL_INDEX, 3, array_index)
+          a_val(4, i) = abval_precomp(AVAL_INDEX, 4, array_index)
+          a_val(5, i) = abval_precomp(AVAL_INDEX, 5, array_index)
+          a_val(6, i) = abval_precomp(AVAL_INDEX, 6, array_index)
+          a_val(7, i) = abval_precomp(AVAL_INDEX, 7, array_index)
+          a_val(8, i) = abval_precomp(AVAL_INDEX, 8, array_index)
+          
+          ! BVAL coefficients
+          b_val(1, i) = abval_precomp(BVAL_INDEX, 1, array_index)
+          b_val(2, i) = abval_precomp(BVAL_INDEX, 2, array_index)
+          b_val(3, i) = abval_precomp(BVAL_INDEX, 3, array_index)
+          b_val(4, i) = abval_precomp(BVAL_INDEX, 4, array_index)
+          b_val(5, i) = abval_precomp(BVAL_INDEX, 5, array_index)
+          b_val(6, i) = abval_precomp(BVAL_INDEX, 6, array_index)
+          b_val(7, i) = abval_precomp(BVAL_INDEX, 7, array_index)
+          b_val(8, i) = abval_precomp(BVAL_INDEX, 8, array_index)
+        enddo
+
+        ! 3. Compute interpolated values using FMA Factorization
+        do i = 1, n_in_block
+          apt_tmp(i) = zgr_tmp(i) * (ygr_tmp(i) * 
+     &                 (xgr_tmp(i) * a_val(1,i) + a_val(4,i)) + 
+     &                 (xgr_tmp(i) * a_val(3,i) + a_val(7,i))) + 
+     &                 (ygr_tmp(i) * (xgr_tmp(i) * a_val(2,i) + 
+     &                 a_val(6,i)) + (xgr_tmp(i) * a_val(5,i) + 
+     &                 a_val(8,i)))
+
+          bpt_tmp(i) = zgr_tmp(i) * (ygr_tmp(i) * 
+     &                 (xgr_tmp(i) * b_val(1,i) + b_val(4,i)) + 
+     &                 (xgr_tmp(i) * b_val(3,i) + b_val(7,i))) + 
+     &                 (ygr_tmp(i) * (xgr_tmp(i) * b_val(2,i) + 
+     &                 b_val(6,i)) + (xgr_tmp(i) * b_val(5,i) + 
+     &                 b_val(8,i)))
+        enddo
+
+        ! 4. Sum up the results
+        do i = 1, n_in_block
+          atomindex = atom_idx_tmp(i)
+          avdw = sra(atom_vdwtype(atomindex)) * apt_tmp(i) 
+          bvdw = srb(atom_vdwtype(atomindex)) * bpt_tmp(i)
+          vdwasum = vdwasum + avdw
+          vdwbsum = vdwbsum + bvdw
+        enddo
+
+        ! EARLY EXIT: Check if bumped after each block
+        if ((vdwasum - vdwbsum) .gt. bmpvdw) then
+          bumped = .true.
+          numscored = numscored + 1
+          return
+        endif
       enddo
 
       numscored = numscored + 1
@@ -128,3 +151,4 @@ subroutine calc_chemscore(atomstart, atomend,
       end subroutine calc_chemscore
 
       end module chemscore
+
diff --git a/src/cov_bond.f b/src/cov_bond.f
index 7ad9317..7365ea8 100644
--- a/src/cov_bond.f
+++ b/src/cov_bond.f
@@ -115,7 +115,7 @@ subroutine cov_bond(p1,p2,p3,a,fi,d,out)
 c     around the bond angle v-u
 c     notice the dihedral angle is set to 0 is there are only 3 atoms 
 c     this is left in (and not zeroed) for clarity
-      call dh_mat(ang_vu,0,v_len,T0)
+      call dh_mat(ang_vu,0.0,v_len,T0)
 
 c     calculate the second (T1) Denavit-Hartenberg matrix to roatate
 c     around the bond angle v-covalent bond
diff --git a/src/db2binary.f b/src/db2binary.f
new file mode 100644
index 0000000..8d5bff1
--- /dev/null
+++ b/src/db2binary.f
@@ -0,0 +1,404 @@
+c----------------------------------------------------------------------
+c
+c  Binary DB2 I/O module for high-performance ligand reading
+c  Provides gzip-compatible binary format (db2bin.gz) 
+c  ~5-20x faster than text parsing
+c  
+c  Phong Lam, Uppsala University, January 2026
+c
+c----------------------------------------------------------------------
+      module db2binary
+
+      use db2type
+      use optionstype
+      use filenums
+      use spheres
+
+      implicit none
+
+c Binary format magic and version
+      character(6), parameter :: DB2BIN_MAGIC = 'DB2BIN'
+      integer, parameter :: DB2BIN_VERSION = 1
+
+      contains
+
+c----------------------------------------------------------------------
+c Helper: read integer from gzipped binary file
+c----------------------------------------------------------------------
+      subroutine read_int_gz(fdlig, val, istat)
+      integer(kind=8), intent(in) :: fdlig
+      integer, intent(out) :: val
+      integer, intent(out) :: istat
+      character(4) :: buf
+      
+      call gzbread(fdlig, buf, 4, istat)
+      val = transfer(buf, val)
+      end subroutine read_int_gz
+
+c----------------------------------------------------------------------
+c Helper: read real from gzipped binary file
+c----------------------------------------------------------------------
+      subroutine read_real_gz(fdlig, val, istat)
+      integer(kind=8), intent(in) :: fdlig
+      real, intent(out) :: val
+      integer, intent(out) :: istat
+      character(4) :: buf
+      
+      call gzbread(fdlig, buf, 4, istat)
+      val = transfer(buf, val)
+      end subroutine read_real_gz
+
+c----------------------------------------------------------------------
+c Helper: read integer array from gzipped binary file  
+c----------------------------------------------------------------------
+      subroutine read_int_array_gz(fdlig, arr, n, istat)
+      integer(kind=8), intent(in) :: fdlig
+      integer, intent(in) :: n
+      integer, intent(out) :: arr(n)
+      integer, intent(out) :: istat
+      character, allocatable :: buf(:)
+      integer :: nbytes
+      
+      nbytes = n * 4
+      allocate(buf(nbytes))
+      call gzbread(fdlig, buf, nbytes, istat)
+      arr = transfer(buf, arr)
+      deallocate(buf)
+      end subroutine read_int_array_gz
+
+c----------------------------------------------------------------------
+c Helper: read real array from gzipped binary file
+c----------------------------------------------------------------------
+      subroutine read_real_array_gz(fdlig, arr, n, istat)
+      integer(kind=8), intent(in) :: fdlig
+      integer, intent(in) :: n
+      real, intent(out) :: arr(n)
+      integer, intent(out) :: istat
+      character, allocatable :: buf(:)
+      integer :: nbytes
+      
+      nbytes = n * 4
+      allocate(buf(nbytes))
+      call gzbread(fdlig, buf, nbytes, istat)
+      arr = transfer(buf, arr)
+      deallocate(buf)
+      end subroutine read_real_array_gz
+
+c----------------------------------------------------------------------
+c read_db2_binary: Fast binary reader for db2bin.gz files
+c Replaces ligread2 when use_binary_db2 = .true.
+c----------------------------------------------------------------------
+      subroutine read_db2_binary(db2lig, options0, 
+     &    inputend, rigid_heavy_coords, 
+     &    rigid_colors, rigid_heavy_count, fdlig, ligc, db2c)
+
+      type(db2), intent(inout) :: db2lig
+      type(options), intent(inout) :: options0
+      logical, intent(inout) :: inputend
+      real, intent(inout), dimension(3, MAXPTS) :: rigid_heavy_coords
+      integer, intent(inout), dimension(MAXPTS) :: rigid_colors
+      integer, intent(inout) :: rigid_heavy_count
+      integer(kind=8), intent(inout) :: fdlig
+      integer, intent(inout) :: ligc
+      integer, intent(inout) :: db2c
+
+c temporary variables
+      character(6) :: magic
+      integer :: version, inputstatus
+      integer :: total_atoms, total_bonds, total_coords
+      integer :: total_confs, total_sets, total_clusters
+      integer :: mlines, i, j, curset, curconf
+      integer :: arb_count
+      real :: total_charge, total_polar_solv, total_apolar_solv
+      real :: total_solv, total_surfarea
+      real :: set_energy_val, set_max_strain_val
+      integer :: conf_len, broken_val, conf_val
+      integer :: set_start, set_end, match_start, match_end
+      integer :: match_color
+      real :: match_x, match_y, match_z
+      integer, allocatable :: temp_int_arr(:)
+      real, allocatable :: temp_real_arr(:)
+
+c read magic number (6 bytes)
+      call gzbread(fdlig, magic, 6, inputstatus)
+      if (inputstatus .le. 0) then
+        inputend = .true.
+        return
+      endif
+      
+      if (magic .ne. DB2BIN_MAGIC) then
+        write(OUTDOCK, *) 'ERROR: Invalid binary DB2 magic: ', magic
+        write(OUTDOCK, *) 'Expected: ', DB2BIN_MAGIC
+        inputend = .true.
+        return
+      endif
+
+c read version
+      call read_int_gz(fdlig, version, inputstatus)
+      if (version .ne. DB2BIN_VERSION) then
+        write(OUTDOCK, *) 'WARNING: Binary DB2 version mismatch'
+        write(OUTDOCK, *) 'File version: ', version
+        write(OUTDOCK, *) 'Expected: ', DB2BIN_VERSION
+      endif
+
+c read header integers
+      call read_int_gz(fdlig, total_atoms, inputstatus)
+      call read_int_gz(fdlig, total_bonds, inputstatus)
+      call read_int_gz(fdlig, total_coords, inputstatus)
+      call read_int_gz(fdlig, total_confs, inputstatus)
+      call read_int_gz(fdlig, total_sets, inputstatus)
+      call read_int_gz(fdlig, rigid_heavy_count, inputstatus)
+      call read_int_gz(fdlig, total_clusters, inputstatus)
+      call read_int_gz(fdlig, mlines, inputstatus)
+
+c read header reals
+      call read_real_gz(fdlig, total_charge, inputstatus)
+      call read_real_gz(fdlig, total_polar_solv, inputstatus)
+      call read_real_gz(fdlig, total_apolar_solv, inputstatus)
+      call read_real_gz(fdlig, total_solv, inputstatus)
+      call read_real_gz(fdlig, total_surfarea, inputstatus)
+
+c read refcod (16 bytes, padded) and trim NUL characters
+      call gzbread(fdlig, db2lig%refcod, 16, inputstatus)
+c replace NUL chars in refcod with spaces
+      do i = 1, 16
+        if (db2lig%refcod(i:i) .eq. char(0)) then
+          db2lig%refcod(i:i) = ' '
+        endif
+      enddo
+
+c read protcod (9 bytes)
+      call gzbread(fdlig, db2lig%protcod, 9, inputstatus)
+
+c read smiles (78 bytes)
+      call gzbread(fdlig, db2lig%smiles, 78, inputstatus)
+
+c read molname (78 bytes)
+      call gzbread(fdlig, db2lig%molname, 78, inputstatus)
+
+c read arbitrary lines count and data
+      call read_int_gz(fdlig, arb_count, inputstatus)
+      
+c store header values (update mlines to include arbitrary)
+      db2lig%total_atoms = total_atoms
+      db2lig%total_bonds = total_bonds
+      db2lig%total_coords = total_coords
+      db2lig%total_confs = total_confs
+      db2lig%total_sets = total_sets
+      db2lig%total_clusters = total_clusters
+      db2lig%mlines = mlines
+      db2lig%total_charge = total_charge
+      db2lig%total_polar_solv = total_polar_solv
+      db2lig%total_apolar_solv = total_apolar_solv
+      db2lig%total_solv = total_solv
+      db2lig%total_surfarea = total_surfarea
+
+c allocate arrays
+      call allocate_db2(db2lig, OUTDOCK, total_sets,
+     &    total_coords, total_atoms, total_bonds,
+     &    total_confs, mlines, total_clusters,
+     &    options0%dockovalent, options0%nsav)
+
+c initialize set_conf_len to zero
+      db2lig%set_conf_len(:) = 0
+      
+c initialize arbitrary array to spaces
+      if (allocated(db2lig%arbitrary)) then
+          db2lig%arbitrary = ' '
+      endif
+
+c read arbitrary lines (after allocate since it allocates the array)
+      do i = 1, arb_count
+        call gzbread(fdlig, db2lig%arbitrary(i), 78, inputstatus)
+      enddo
+
+c----------------------------------------------------------------------
+c read atom arrays
+c----------------------------------------------------------------------
+      allocate(temp_int_arr(total_atoms))
+      allocate(temp_real_arr(total_atoms))
+      
+c read atom_num (int32)
+      call read_int_array_gz(fdlig, temp_int_arr, total_atoms, 
+     &                       inputstatus)
+      db2lig%atom_num(1:total_atoms) = temp_int_arr
+      
+c read atom_name (4 bytes each)
+      do i = 1, total_atoms
+        call gzbread(fdlig, db2lig%atom_name(i), 4, inputstatus)
+      enddo
+      
+c read atom_type (5 bytes each)
+      do i = 1, total_atoms
+        call gzbread(fdlig, db2lig%atom_type(i), 5, inputstatus)
+      enddo
+      
+c read atom_vdwtype
+      call read_int_array_gz(fdlig, temp_int_arr, total_atoms, 
+     &                       inputstatus)
+      db2lig%atom_vdwtype(1:total_atoms) = temp_int_arr
+      
+c read atom_color
+      call read_int_array_gz(fdlig, temp_int_arr, total_atoms,
+     &                       inputstatus)
+      db2lig%atom_color(1:total_atoms) = temp_int_arr
+      
+      call read_real_array_gz(fdlig, temp_real_arr, total_atoms,
+     &                        inputstatus)
+      db2lig%atom_charge(1:total_atoms) = temp_real_arr
+      
+      call read_real_array_gz(fdlig, temp_real_arr, total_atoms,
+     &                        inputstatus)
+      db2lig%atom_polsolv(1:total_atoms) = temp_real_arr
+      
+      call read_real_array_gz(fdlig, temp_real_arr, total_atoms,
+     &                        inputstatus)
+      db2lig%atom_apolsolv(1:total_atoms) = temp_real_arr
+      
+      call read_real_array_gz(fdlig, temp_real_arr, total_atoms,
+     &                        inputstatus)
+      db2lig%atom_totalsolv(1:total_atoms) = temp_real_arr
+      
+      call read_real_array_gz(fdlig, temp_real_arr, total_atoms,
+     &                        inputstatus)
+      db2lig%atom_surfarea(1:total_atoms) = temp_real_arr
+      
+      deallocate(temp_int_arr)
+      deallocate(temp_real_arr)
+
+c----------------------------------------------------------------------
+c read bond data
+c----------------------------------------------------------------------
+      allocate(temp_int_arr(total_bonds))
+      
+c read bond_num (int32)
+      call read_int_array_gz(fdlig, temp_int_arr, total_bonds,
+     &                       inputstatus)
+      db2lig%bond_num(1:total_bonds) = temp_int_arr
+      
+c read bond_start (int32)
+      call read_int_array_gz(fdlig, temp_int_arr, total_bonds,
+     &                       inputstatus)
+      db2lig%bond_start(1:total_bonds) = temp_int_arr
+      
+c read bond_end (int32)
+      call read_int_array_gz(fdlig, temp_int_arr, total_bonds,
+     &                       inputstatus)
+      db2lig%bond_end(1:total_bonds) = temp_int_arr
+      
+c read bond_type (2 bytes each)
+      do i = 1, total_bonds
+        call gzbread(fdlig, db2lig%bond_type(i), 2, inputstatus)
+      enddo
+      
+      deallocate(temp_int_arr)
+
+c----------------------------------------------------------------------
+c read coordinate data
+c----------------------------------------------------------------------
+      do i = 1, total_coords
+        call read_int_gz(fdlig, db2lig%coord_index(1,i), inputstatus)
+        call read_int_gz(fdlig, db2lig%coord_index(2,i), inputstatus)
+        call read_int_gz(fdlig, db2lig%coord_index(3,i), inputstatus)
+        call read_real_gz(fdlig, db2lig%coords(1,i), inputstatus)
+        call read_real_gz(fdlig, db2lig%coords(2,i), inputstatus)
+        call read_real_gz(fdlig, db2lig%coords(3,i), inputstatus)
+      enddo
+
+c----------------------------------------------------------------------
+c read rigid matching spheres
+c----------------------------------------------------------------------
+      do i = 1, rigid_heavy_count
+        call read_int_gz(fdlig, rigid_colors(i), inputstatus)
+        call read_real_gz(fdlig, rigid_heavy_coords(1,i), inputstatus)
+        call read_real_gz(fdlig, rigid_heavy_coords(2,i), inputstatus)
+        call read_real_gz(fdlig, rigid_heavy_coords(3,i), inputstatus)
+      enddo
+
+c----------------------------------------------------------------------
+c read conformation coord indices
+c----------------------------------------------------------------------
+      do i = 1, total_confs
+        call read_int_gz(fdlig, db2lig%conf_coord(1,i), inputstatus)
+        call read_int_gz(fdlig, db2lig%conf_coord(2,i), inputstatus)
+      enddo
+
+c----------------------------------------------------------------------
+c read set data
+c----------------------------------------------------------------------
+      do curset = 1, total_sets
+        call read_int_gz(fdlig, conf_len, inputstatus)
+        call read_int_gz(fdlig, broken_val, inputstatus)
+        call read_real_gz(fdlig, set_energy_val, inputstatus)
+        call read_real_gz(fdlig, set_max_strain_val, inputstatus)
+        
+        db2lig%set_conf_len(curset) = conf_len
+        db2lig%set_broken(curset) = broken_val
+        db2lig%set_energy(curset) = set_energy_val * options0%iscale
+        db2lig%set_total_strain(curset) = set_energy_val
+        db2lig%set_max_strain(curset) = set_max_strain_val
+        
+        if ((set_energy_val .ge. options0%total_strain) .or.
+     &      (set_max_strain_val .ge. options0%max_strain)) then
+          db2lig%set_above_strain(curset) = 1
+        else
+          db2lig%set_above_strain(curset) = 0
+        endif
+        
+        do curconf = 1, conf_len
+          call read_int_gz(fdlig, conf_val, inputstatus)
+          db2lig%set_conf(curset, curconf) = conf_val
+        enddo
+      enddo
+
+c----------------------------------------------------------------------
+c read cluster data
+c----------------------------------------------------------------------
+      do i = 1, total_clusters
+        call read_int_gz(fdlig, db2lig%cluster_set_start(i),inputstatus)
+        call read_int_gz(fdlig, db2lig%cluster_set_end(i), inputstatus)
+        call read_int_gz(fdlig, db2lig%cluster_match_start(i),
+     &                   inputstatus)
+        call read_int_gz(fdlig, db2lig%cluster_match_end(i),inputstatus)
+      enddo
+
+c read additional matching spheres for each cluster
+      do i = 1, total_clusters
+        do j = db2lig%cluster_match_start(i), 
+     &         db2lig%cluster_match_end(i)
+          call read_int_gz(fdlig, db2lig%addmatch_color(j), inputstatus)
+          call read_real_gz(fdlig, db2lig%addmatch_coord(1,j),
+     &                      inputstatus)
+          call read_real_gz(fdlig, db2lig%addmatch_coord(2,j),
+     &                      inputstatus)
+          call read_real_gz(fdlig, db2lig%addmatch_coord(3,j),
+     &                      inputstatus)
+        enddo
+      enddo
+
+c----------------------------------------------------------------------
+c post-processing: compute derived values like ligread2 does
+c----------------------------------------------------------------------
+c compute total heavy atom count (atoms where vdwtype != 6 and != 7)
+      db2lig%total_heavy_atoms = 0
+      db2lig%ligcharge = 0.0
+      do i = 1, total_atoms
+        if ((db2lig%atom_vdwtype(i) .ne. 6) .and. 
+     &      (db2lig%atom_vdwtype(i) .ne. 7)) then
+          db2lig%total_heavy_atoms = db2lig%total_heavy_atoms + 1
+        endif
+        db2lig%ligcharge = db2lig%ligcharge + db2lig%atom_charge(i)
+      enddo
+
+c initialize atom_heavy_valence to 0
+      do i = 1, total_atoms
+        db2lig%atom_heavy_valence(i) = 0
+      enddo
+
+      ligc = ligc + 1
+      inputend = .false.
+
+      return
+      end subroutine read_db2_binary
+
+      end module db2binary
diff --git a/src/degeneracycheck.f b/src/degeneracycheck.f
index a72e32a..8eb7a50 100644
--- a/src/degeneracycheck.f
+++ b/src/degeneracycheck.f
@@ -1,6 +1,7 @@
 c makes a hash of the lig spheres and rec spheres. checks this against
-c previous hashes, if new (not seen before) add to list andreturn true. 
+c previous hashes, if new (not seen before) add to list and return true. 
 c if seen before, return false.
+c OPTIMIZED: Uses hash buckets for O(1) average lookup instead of O(n) linear scan
 
       logical function degeneracycheck(nodetotal, sphpairs, 
      &          hash, hashcount,
@@ -20,25 +21,53 @@ logical function degeneracycheck(nodetotal, sphpairs,
       integer hash(maxor, maxctr) !hash of sph numbers
       integer hashcount !how many hashes have been stored
 
+c hash bucket parameters - using prime number for bucket count
+      integer, parameter :: NBUCKETS = 65537
+      integer, save :: bucket_heads(NBUCKETS)
+      integer, save :: bucket_next(100000)
+      logical, save :: buckets_initialized = .false.
+      integer, save :: last_hashcount = 0
+
 c temporary variables
       integer nodeindex, hashindex, centerindex, tmpindex !temporary counters
       integer tmphash(maxctr) !the temporarily computed hash
       logical match_found, all_matched
+      integer bucket_idx, hash_val, idx
+
+c initialize or reset buckets if hashcount was reset (new ligand)
+      if (.not. buckets_initialized .or. hashcount .lt. last_hashcount) 
+     &    then
+        do idx = 1, NBUCKETS
+          bucket_heads(idx) = 0
+        enddo
+        buckets_initialized = .true.
+        last_hashcount = 0
+      endif
 
       degeneracycheck = .true. !default is true
+      
+c compute hash for current match
+      hash_val = 0
       do nodeindex = 1, nodetotal !for every node
         tmphash(nodeindex) = sphpairs(nodeindex, 1) + 
      &                       sphpairs(nodeindex, 2) * maxpts !makes uniq number
+        hash_val = hash_val + tmphash(nodeindex) * (7919 ** nodeindex)
       enddo
-      do hashindex = 1, hashcount !check all previously found matches
-        !assume order doesn't matter, check to see if all of tmphash matches
+      
+c compute bucket index (1 to NBUCKETS)
+      bucket_idx = mod(abs(hash_val), NBUCKETS) + 1
+      
+c search only within this bucket instead of all entries
+      idx = bucket_heads(bucket_idx)
+      do while (idx .gt. 0)
+        !check if this entry matches - assume order doesn't matter
         all_matched = .true.
         do tmpindex = 1, maxctr
           match_found = .false.
           do centerindex = 1, maxctr
-            if (tmphash(tmpindex) .eq. 
-     &                   hash(hashindex, centerindex)) then
+            if (tmphash(tmpindex) .eq. hash(idx, centerindex)) then
               match_found = .true.
+              exit
             endif
           enddo
           if (.not. match_found) then
@@ -47,19 +76,21 @@ logical function degeneracycheck(nodetotal, sphpairs,
           endif       
         enddo       
         if (all_matched) then !whole tmphash matched
-c          write (6,*) tmphash, "tmp"
-c          write (6,*) hash(hashindex, 1), hash(hashindex, 2),
-c     &                hash(hashindex, 3), hash(hashindex, 4), "hash"
           degeneracycheck = .false.
           return !quit and return, match found, nothing to do
         endif
+        idx = bucket_next(idx)
       enddo
-      if (degeneracycheck) then !no match found, so add the temps 
-        hashcount = hashcount + 1
-        do centerindex = 1, maxctr !copy them all, even 0s
-          hash(hashcount, centerindex) = tmphash(centerindex)
-        enddo
-      endif
+      
+c not found - add new entry
+      hashcount = hashcount + 1
+      last_hashcount = hashcount
+      do centerindex = 1, maxctr !copy them all, even 0s
+        hash(hashcount, centerindex) = tmphash(centerindex)
+      enddo
+c insert at head of bucket's linked list
+      bucket_next(hashcount) = bucket_heads(bucket_idx)
+      bucket_heads(bucket_idx) = hashcount
 
       return !degeneracycheck as the return value, is still true if here
       end
diff --git a/src/dokw.f b/src/dokw.f
index a79d514..055c6b9 100644
--- a/src/dokw.f
+++ b/src/dokw.f
@@ -297,6 +297,17 @@ subroutine dokw(keywrd, args, OUTDOCK_h, phimap0, recdes0,
           options0%use_gist = .false.
         endif
 
+      else if (keywrd .eq. 'use_binary_db2') then
+        call tolow(args)
+        if (nf .lt. 1) then
+          options0%use_binary_db2 = .false.
+        else if (args(1:1) .eq. 'y') then
+          options0%use_binary_db2 = .true.
+          write(OUTDOCK_h,*) 'Binary DB2 format enabled (db2bin.gz)'
+        else
+          options0%use_binary_db2 = .false.
+        endif
+
       else if (keywrd .eq. 'output_file_prefix') then
         if (nf .lt. 1) goto 940
         read(args,'(a255)') options0%outfil
diff --git a/src/fullmatch.f b/src/fullmatch.f
index 4460eb2..74f3916 100644
--- a/src/fullmatch.f
+++ b/src/fullmatch.f
@@ -3,6 +3,7 @@
 c like all matching functions, this is called once per pair of initial spheres
 c results are added to nmatch, coml, comr, rot
 c 09/2011 RGC
+c OPTIMIZED 12/2024: Uses canonical ordering to avoid duplicate matches
 c 
       subroutine fullmatch(ligsphcount, ligsphcolor, ligcount, reccount, 
      &   recsphcolor, colortable, colormatch, disttol, disl, disr,
@@ -60,7 +61,8 @@ subroutine fullmatch(ligsphcount, ligsphcolor, ligcount, reccount,
           return  !color check failed, just quit and return
         endif
       endif
-      call sphsfromdists(ligcount, reccount, disl, disr, 
+c     Use sphsfromdists with canonical ordering (ligand indices > seed only)
+      call sphsfromdists(ligcount, reccount, disl, disr,
      &           maxlig, maxrec, disttol,
      &           colormatch, ligsphcolor, recsphcolor, 
      &           colortable, maxpts, maxcol, sphmatches)
diff --git a/src/i386/Makefile.pg b/src/i386/Makefile.pg
index 82e741a..9d71ce3 100755
--- a/src/i386/Makefile.pg
+++ b/src/i386/Makefile.pg
@@ -5,9 +5,9 @@ ifeq ($(COMPILER), pgf)
 # Path needs to be to 64bit compiler to compile 64b targets 
 
 # According to PGF Documentation, -fastsse with pgf95 yields fastest results. (vs pgf90, 77, etc)
-F77 = pgf95
-CC = pgcc
-PROF = pgprof
+F77 = nvfortran
+CC = nvc
+PROF = nvprof
 GPROF = gprof
 
 
diff --git a/src/libfgz/Makefile b/src/libfgz/Makefile
index 9db360d..4343a96 100644
--- a/src/libfgz/Makefile
+++ b/src/libfgz/Makefile
@@ -55,11 +55,11 @@ ifeq ($(COMPILER),pgf)
 #capable of 64b targets so that either 32b or 64b targets can be compiled.
 #Thus, use the compiler in .../linux86-x64/... and not /linux86/
 #PGPATH = /raid3/software/pgi/9.0.4/linux86-64/9.0-4/bin
-PGPATH = /nfs/soft/pgi/current/linux86-64/12.10/bin
+#PGPATH = /software/sse2/tetralith_el9/manual/nvhpc/23.7/Linux_x86_64/23.7/compilers/bin
 #/nfs/soft/pgi/v12_10/linux86-64/12.10/bin/pgf95
 #PGPATH = /mnt/nasapps/production/PGI/19.5/linux86-64-llvm/19.5/bin
-FC = $(PGPATH)/pgf95 
-CC = $(PGPATH)/pgcc 
+FC = nvfortran 
+CC = nvc 
 
 #Set these to the lowest common 64b and 32b architectures to optimize for.
 #ARCH64 = -tp penryn-64
diff --git a/src/ligread2.f b/src/ligread2.f
index 91a6cdb..846417b 100644
--- a/src/ligread2.f
+++ b/src/ligread2.f
@@ -10,6 +10,7 @@ subroutine ligread2(db2lig, options0,
       use db2formats
       use filenums
       use spheres
+      use db2binary
 
       implicit none
 
@@ -127,6 +128,37 @@ end function tokenize
         return
       endif
 
+c----------------------------------------------------------------------
+c Binary DB2 dispatch: use fast binary reader if enabled
+c----------------------------------------------------------------------
+      if (options0%use_binary_db2) then
+10      continue
+        call read_db2_binary(db2lig, options0, inputend,
+     &      rigid_heavy_coords, rigid_colors, rigid_heavy_count,
+     &      fdlig, ligc, db2c)
+        if (inputend) then
+          call gzclose(fdlig, inputstatus)
+          write(OUTDOCK, *) 'close the file: ', TRIM(options0%ligfil)
+          if (options0%sdi) then
+            inputend = .false.
+            db2c = db2c + 1
+            ligc = 0
+            read(SDIFILE, '(a255)', err=15, end=15) options0%ligfil
+            call gzopen(fdlig, 'r', options0%ligfil, inputstatus)
+            write(OUTDOCK, *) 'open the file: ', TRIM(options0%ligfil)
+            if (inputstatus .eq. 0) then
+              goto 10 ! Loop back to read first molecule of next file
+            endif
+15          continue
+            inputend = .true.
+            write(OUTDOCK, *) ' we reached the end of the 
+     &                split_database_index file'
+            write(OUTDOCK, *) "close SDIFILE"
+            close(SDIFILE)
+          endif
+        endif
+        return
+      endif
 
       do while (read_okay)
 c       write(OUTDOCK,*) "read_okay"
diff --git a/src/optionstype.f b/src/optionstype.f
index b6acaed..817bf82 100644
--- a/src/optionstype.f
+++ b/src/optionstype.f
@@ -61,6 +61,7 @@ module optionstype
                             ! gist expects the confs to be in a certain
                             ! order
         character (len=255) :: use_gist_val ! string the should be yes or no.  
+        logical :: use_binary_db2 ! true to read binary DB2 format (db2bin)
         logical :: per_atom_scores !true if you want extra output that includes per atom scores for each ligand
         logical :: receptor_desolv !true if you want to read in and score using receptor desolvation grids
         logical :: rec_des_r,hrec_des_r ! true if you want to read in REC_DESOLV 
diff --git a/src/phiscore.f b/src/phiscore.f
index b7832bc..ac4a087 100644
--- a/src/phiscore.f
+++ b/src/phiscore.f
@@ -31,49 +31,66 @@ subroutine phipdb(atomstart, atomend,
 c return variable
       real, intent(inout) :: potent(maxatm) !electrostatic potential at each atom
 
-      integer atomindex !actual position used
-      integer count !atom position counter
-      integer nx, ny, nz
-      real xn(3)
-      real a1, a2, a3, a4, a5, a6, a7, a8
-      real xgr, ygr, zgr
-
-c     loop over all atoms
-      do count = atomstart, atomend
-        atomindex = coord_index(2, count) !gets actual atom non-consecutive #
-c       scale atoms to grid space
-        xn(1) = (transfm_coords(1, atomindex) - 
-     &       oldmid(1)) * phiscale + goff
-        xn(2) = (transfm_coords(2, atomindex) -
-     &       oldmid(2)) * phiscale + goff
-        xn(3) = (transfm_coords(3, atomindex) - 
-     &       oldmid(3)) * phiscale + goff
-
-c       find lower left bottom grid point
-        nx = int(xn(1))
-        ny = int(xn(2))
-        nz = int(xn(3))
-c       $mem prefetch   phimap_precomp(1,nx,ny,nz)
-
-c       calculate fractional cube coordinates of point
-        xgr = xn(1) - float(nx)
-        ygr = xn(2) - float(ny)
-        zgr = xn(3) - float(nz)
-
-c       calculate potential - speed optimized by {MC}
-c       pull coefficients of trilinear function from precomp
-        a8 = phimap_precomp(8, nx, ny, nz)
-        a7 = phimap_precomp(7, nx, ny, nz)
-        a6 = phimap_precomp(6, nx, ny, nz)
-        a5 = phimap_precomp(5, nx, ny, nz)
-        a4 = phimap_precomp(4, nx, ny, nz)
-        a3 = phimap_precomp(3, nx, ny, nz)
-        a2 = phimap_precomp(2, nx, ny, nz)
-        a1 = phimap_precomp(1, nx, ny, nz)
-
-c       determine value of potential
-        potent(atomindex) = a1*xgr*ygr*zgr + a2*xgr*ygr + a3*xgr*zgr
-     &       + a4*ygr*zgr + a5*xgr + a6*ygr + a7*zgr + a8
+
+      integer :: i, n_in_block, block_idx, atomindex, count, nx, ny, nz
+      real :: x_tmp(8), y_tmp(8), z_tmp(8)
+      real :: xgr_tmp(8), ygr_tmp(8), zgr_tmp(8)
+      integer :: nx_tmp(8), ny_tmp(8), nz_tmp(8)
+      integer :: atom_idx_tmp(8)
+      real :: a_val(8, 8) ! [coeff_idx, atom_in_block]
+
+c     loop over all atoms in blocks of 8
+      do block_idx = atomstart, atomend, 8
+        n_in_block = min(8, atomend - block_idx + 1)
+
+        ! 1. Gather coordinates and compute grid indices
+        do i = 1, n_in_block
+          count = block_idx + i - 1
+          atomindex = coord_index(2, count)
+          atom_idx_tmp(i) = atomindex
+          
+          x_tmp(i) = (transfm_coords(1, atomindex) - oldmid(1)) 
+     &               * phiscale + goff
+          y_tmp(i) = (transfm_coords(2, atomindex) - oldmid(2)) 
+     &               * phiscale + goff
+          z_tmp(i) = (transfm_coords(3, atomindex) - oldmid(3)) 
+     &               * phiscale + goff
+
+          nx_tmp(i) = int(x_tmp(i))
+          ny_tmp(i) = int(y_tmp(i))
+          nz_tmp(i) = int(z_tmp(i))
+
+          xgr_tmp(i) = x_tmp(i) - float(nx_tmp(i))
+          ygr_tmp(i) = y_tmp(i) - float(ny_tmp(i))
+          zgr_tmp(i) = z_tmp(i) - float(nz_tmp(i))
+        enddo
+
+        ! 2. Gather interpolation coefficients
+        do i = 1, n_in_block
+          nx = nx_tmp(i)
+          ny = ny_tmp(i)
+          nz = nz_tmp(i)
+          ! These 8 values are contiguous in phimap_precomp
+          a_val(1, i) = phimap_precomp(1, nx, ny, nz)
+          a_val(2, i) = phimap_precomp(2, nx, ny, nz)
+          a_val(3, i) = phimap_precomp(3, nx, ny, nz)
+          a_val(4, i) = phimap_precomp(4, nx, ny, nz)
+          a_val(5, i) = phimap_precomp(5, nx, ny, nz)
+          a_val(6, i) = phimap_precomp(6, nx, ny, nz)
+          a_val(7, i) = phimap_precomp(7, nx, ny, nz)
+          a_val(8, i) = phimap_precomp(8, nx, ny, nz)
+        enddo
+
+        ! 3. Compute potential using FMA Factorization
+        do i = 1, n_in_block
+          atomindex = atom_idx_tmp(i)
+          potent(atomindex) = zgr_tmp(i) * (ygr_tmp(i) * 
+     &                        (xgr_tmp(i) * a_val(1,i) + a_val(4,i)) + 
+     &                        (xgr_tmp(i) * a_val(3,i) + a_val(7,i))) + 
+     &                        (ygr_tmp(i) * (xgr_tmp(i) * a_val(2,i) + 
+     &                        a_val(6,i)) + (xgr_tmp(i) * a_val(5,i) + 
+     &                        a_val(8,i)))
+        enddo
       enddo
 
       return
diff --git a/src/rescore_byatom.f b/src/rescore_byatom.f
index 4da0cef..e0a56b3 100644
--- a/src/rescore_byatom.f
+++ b/src/rescore_byatom.f
@@ -52,6 +52,7 @@ subroutine recalc_score_byatom(options0, db2lig, grids0,
       integer :: numscored !keeps track of how many atom positions are evaluated
       integer :: which_grid, count_grid !which grids to use
       integer :: conf, confcount !which conf currently being examined
+      logical :: vdw_bumped !unused, for chemscore interface compatibility
 
       do confcount = 1, db2lig%set_conf_len(setcount)
         conf = db2lig%set_conf(setcount, confcount)
@@ -83,7 +84,7 @@ subroutine recalc_score_byatom(options0, db2lig, grids0,
      &          sra, srb, db2lig%atom_vdwtype,
      &          grids0%griddata(which_grid)%abval_precomp, vdwmap0%ngrd, 
      &          db2lig%total_atoms, db2lig%total_coords, maxtyv,
-     &          asum, bsum, numscored)
+     &          1.0e30, asum, bsum, numscored, vdw_bumped)
             vdwasum = vdwasum + asum
             vdwbsum = vdwbsum + bsum
             if (options0%ldesolv .le. 2) then !full, none or GB partial
diff --git a/src/score_conf.f b/src/score_conf.f
index 17e1ced..4f3ef7d 100644
--- a/src/score_conf.f
+++ b/src/score_conf.f
@@ -60,6 +60,7 @@ subroutine calc_score_conf(conf, options0, db2lig, grids0,
       real recdes_energy(db2lig%total_atoms) !the receptor desolvation energy
             !of each atom, in kT (needs converted still)
       real ligdes_energy(db2lig%total_atoms) !ligand desolvation energy, kT
+      logical :: vdw_bumped !early exit flag from chemscore
 
 
       atomstart = db2lig%conf_coord(1, conf) !atom index of start
@@ -70,10 +71,11 @@ subroutine calc_score_conf(conf, options0, db2lig, grids0,
      &    sra, srb, db2lig%atom_vdwtype,
      &    grids0%griddata(which_grid)%abval_precomp, vdwmap0%ngrd, 
      &    db2lig%total_atoms, db2lig%total_coords, maxtyv,
-     &    vdwasum, vdwbsum, numscored)
+     &    options0%bmpvdw, vdwasum, vdwbsum, numscored, vdw_bumped)
       !write(6,*) "calc_score:I AM HERE(1)" 
 c     sets vdwasum and vdwbsum (vdw scoring), increments numscored, reads others
-      if ((vdwasum - vdwbsum) .le. options0%bmpvdw) then !otherwise bump this group
+c     Now use the bumped flag from early exit instead of post-check
+      if (.not. vdw_bumped) then !otherwise bump this group
         if (options0%ldesolv .le. 2) then !none, full or old GB partial
           call calc_solvation_score(atomstart, atomend,
      &        db2lig%atom_vdwtype, db2lig%atom_polsolv, 
diff --git a/src/score_min.f b/src/score_min.f
index bda98ca..486498f 100644
--- a/src/score_min.f
+++ b/src/score_min.f
@@ -62,7 +62,7 @@ subroutine recalc_score_min(conf,
       integer, intent(in) :: which_grid ! the grid number 
       integer :: count_grid !which grids to use
       integer :: confcount, atomcount, coordcount !which conf currently being examined
-
+      logical :: vdw_bumped !unused, for chemscore interface compatibility
 
       vdwasum = 0.0
       vdwbsum = 0.0
@@ -77,13 +77,14 @@ subroutine recalc_score_min(conf,
       atomend = db2lig%conf_coord(2, conf) !same but end (2)
       !print *, 'atomstart=', atomstart
       !print *, 'atomend=', atomend
+c     For minimizer, use huge bmpvdw to disable early exit (need full scoring)
       call calc_chemscore(atomstart, atomend,
      &    db2lig%coord_index, db2lig%transfm_coords, vdwmap0%offset,
      &    vdwmap0%invgrid, vdwmap0%grdpts,
      &    sra, srb, db2lig%atom_vdwtype,
      &    grids0%griddata(which_grid)%abval_precomp, vdwmap0%ngrd,
      &    db2lig%total_atoms, db2lig%total_coords, maxtyv,
-     &    vdwasum, vdwbsum, numscored)
+     &    1.0e30, vdwasum, vdwbsum, numscored, vdw_bumped)
 
 
 c      if ((vdwasum - vdwbsum) .le. options0%bmpvdw) then !otherwise bump this group
diff --git a/src/score_sets.f b/src/score_sets.f
index d7db394..fc2894e 100644
--- a/src/score_sets.f
+++ b/src/score_sets.f
@@ -67,18 +67,22 @@ subroutine calc_score_sets(current_match, dock_status,
       real, dimension(maxsets, grids0%total_combinations) :: 
      &     wholevs, wholees, wholegi, wholeas, wholeps, 
      &     wholeds,wholerd, wholehs, wholers, wholescore !score for all the grids (or just 1 if only 1) 
-      real, dimension(maxsets, options0%total_receptors) :: 
-     &    setvs, setes, setgi, setas, 
-     &    setps, setds, setrd, seths, setscore !scores per set (whole position)
-      real, dimension(maxconfs, options0%total_receptors) :: 
-     &    confvs, confes,confgi,confas,  !one for each scoring function except
-     &    confps, confds, confrd, confhs !internal energy and receptor
-      !energy do not have a per-conformation score
+      real, dimension(maxsets, options0%total_receptors) ::
+     &    setscore !scores per set (whole position)
+c     Packed array for better locality (Array of Structures)
+c     1:vdw, 2:es, 3:gi, 4:as, 5:ps, 6:ds, 7:rd, 8:hs
+      real, dimension(8, maxconfs, options0%total_receptors) :: 
+     &    conf_scores 
       integer, dimension(maxconfs, options0%total_receptors) :: 
      &    conf_status !track status of each conf
+      real, dimension(maxconfs, options0%total_receptors) ::
+     &    conf_weighted_total !pre-calculated weighted total score
+      real, dimension(maxsets, options0%total_receptors) ::
+     &    set_weighted_total !accumulated weighted total per set
       character (len=255) :: reccode
       integer, dimension(grids0%group_len) :: temp_combo !list of grids
       integer :: temp_combo_num !temporary counter for combinations
+      integer :: temp_combo_num_grid !temporary variable for grid index in combo
       integer :: tempcoord,tempatom,count
 c     ! this will store the list grid point already asigned to the molecule 
 c     integer,dimension(options0%total_receptors,10000) :: gistgridindex
@@ -95,6 +99,14 @@ subroutine calc_score_sets(current_match, dock_status,
       integer atomcoord!used to find the right place to put things
 
       logical logged_tconf, logged_oconf
+c     2D bump cache for O(1) lookup: is_bumped(conf, grid)
+      logical, dimension(maxconfs, options0%total_receptors) ::
+     &    is_bumped
+      logical :: skip_set
+
+c     Initialize bump cache for all grids
+      is_bumped(:,:) = .false.
+      conf_weighted_total(:,:) = 0.0
 
       maingrid = grids0%group_part(1, 1) !do the invariant or only grid first
 
@@ -120,20 +132,17 @@ subroutine calc_score_sets(current_match, dock_status,
      &    db2lig, options0, grids0, maingrid,
      &    vdwmap0, phimap0, recdes0, ligdes0, gist0, solvmap0,
      &    rec_des_r0, match,
-     &    confvs(tempconf, maingrid), 
-     &    confes(tempconf, maingrid), 
-     &    confgi(tempconf, maingrid), 
-     &    confas(tempconf, maingrid), 
-     &    confps(tempconf, maingrid), 
-     &    confds(tempconf, maingrid),
-     &    confrd(tempconf, maingrid),
-     &    confhs(tempconf, maingrid),
+     &    conf_scores(1, tempconf, maingrid), 
+     &    conf_scores(2, tempconf, maingrid), 
+     &    conf_scores(3, tempconf, maingrid), 
+     &    conf_scores(4, tempconf, maingrid), 
+     &    conf_scores(5, tempconf, maingrid), 
+     &    conf_scores(6, tempconf, maingrid),
+     &    conf_scores(7, tempconf, maingrid),
+     &    conf_scores(8, tempconf, maingrid),
      &    allminlimits, allmaxlimits,
      &    sra, srb, 
      &    maxor, maxconfs, maxatm, maxatmpos, maxtyv, 
-c    &    ligscore%numscored, gistgridindex_cur_rec,
-c    &    ligscore%numscored, gistgridindex(:,tempconf,maingrid),
-c    &    lengistgi(tempconf,maingrid))
      &    ligscore%numscored,
      &    gistgridindex(:,tempconf,
      &        maingrid),
@@ -151,140 +160,55 @@ subroutine calc_score_sets(current_match, dock_status,
 c      write(6,*) "conf_status after is", conf_status(tempconf, maingrid)
 c      just for debugging turn off the rigidbump check
       if ((conf_status(tempconf, maingrid) .ne. ALLOKAY) .or. 
-     &    (confvs(tempconf, maingrid) .gt. 
+     &    (conf_scores(1, tempconf, maingrid) .gt. 
      &    options0%bmpvdwrigid)) then
         dock_status = BUMPED !the new version of bumping, rely on the vdw score
+c       Mark rigid conf as bumped in cache
+        is_bumped(tempconf, maingrid) = .true.
       else !the rigid component isn't terrible, go ahead and score
-        if ((setstart .eq. 1) .and. 
-     &      (setend .eq. db2lig%total_sets)) then
-          !not doing clouds, just do all confs at once, we'll need them all
-          do loopconf = 2, db2lig%total_confs !do all other confs
-            !for tempconf, rotate atoms in that conf in place. score them.
-            !save scores to confes, ..vs, ..ps, ..as
-            !gistgridindex_cur_rec = gistgridindex(:,maingrid)
-            tempconf = loopconf
-            oriconf = tempconf
-c           oriconf = db2lig%conf_conection(tempconf) 
-c           write(*,*) loopconf, "->", tempconf,
-c    &                 "->", oriconf
-            call doflush(OUTDOCK)
-            if (tempconf.eq.0) then
-               if (.not. logged_tconf) then ! really annoying when OUTDOCK is cluttered with things like this
-                    write(*,*) "total_confs =", db2lig%total_confs
-                    write(*,*) "Warning. tempconf = 0"
-                    write(*,*) loopconf, "->", tempconf,
-     &                 "->", oriconf
-                    call doflush(OUTDOCK)
-                    logged_tconf = .true.
-               endif
-               
-               tempconf = loopconf
-            else if (oriconf.eq.0) then
-               if (.not. logged_oconf) then
-                    write(*,*) "Warning. oriconf=0"
-                    write(*,*) loopconf, "->", tempconf,
-     &                 "->", oriconf
-                    call doflush(OUTDOCK)
-                    logged_oconf = .true.
-               endif
-               oriconf = tempconf
-            endif
-            
-            conf_status(tempconf, maingrid) = run_conf(
-     &          tempconf, current_match,
-     &          db2lig, options0, grids0, maingrid,
-     &          vdwmap0, phimap0, recdes0, ligdes0, gist0, solvmap0, 
-     &          rec_des_r0, match,
-     &          confvs(tempconf, maingrid), 
-     &          confes(tempconf, maingrid), 
-     &          confgi(tempconf, maingrid), 
-     &          confas(tempconf, maingrid), 
-     &          confps(tempconf, maingrid), 
-     &          confds(tempconf, maingrid),
-     &          confrd(tempconf, maingrid),
-     &          confhs(tempconf, maingrid),
-     &          allminlimits, allmaxlimits,
-     &          sra, srb, 
-     &          maxor, maxconfs, maxatm, maxatmpos, maxtyv, 
-c    &          ligscore%numscored,gistgridindex_cur_rec,
-c    &          ligscore%numscored,gistgridindex(:,tempconf,maingrid),
-c    &          lengistgi(tempconf,maingrid))
-     &          ligscore%numscored,
-     &          gistgridindex(:,tempconf,
-     &              maingrid),
-     &          lengistgi(tempconf,maingrid),
-     &          gistgridindex(:,
-c    &              db2lig%conf_conection(tempconf),
-     &              oriconf,
-     &              maingrid),
-c    &          lengistgi(db2lig%conf_conection(tempconf),
-     &          lengistgi(oriconf,
-     &              maingrid))
-
+c       LAZY EVALUATION: Only score confs on-demand during set iteration
+c       Mark all non-rigid confs as NOTCHECKED - they will be scored
+c       when first encountered in the set loop below
+        do loopconf = 2, db2lig%total_confs
+          do flexgrid = 1, options0%total_receptors
+            conf_status(loopconf, flexgrid) = NOTCHECKED
           enddo
-          if (options0%flexible_receptor) then !may want to score more
-            do loopconf = 1, db2lig%total_confs !do all other confs
-              tempconf = loopconf
-c              write(*,*) "tempconf (sorted) = ",tempconf
-c     &                  ,"loopconf = ", loopconf      
-              if (conf_status(tempconf, maingrid) .eq. ALLOKAY) then
-                do flexgrid = 2, options0%total_receptors !all other recs
-                  !gistgridindex_cur_rec = gistgridindex(:,flexgrid)
-                  conf_status(tempconf, flexgrid) = run_conf(
-     &                tempconf, current_match,
-     &                db2lig, options0, grids0, flexgrid,
-     &                vdwmap0, phimap0, recdes0, ligdes0, gist0,
-     &                solvmap0, rec_des_r0, match,
-     &                confvs(tempconf, flexgrid), 
-     &                confes(tempconf, flexgrid), 
-     &                confgi(tempconf, flexgrid), 
-     &                confas(tempconf, flexgrid), 
-     &                confps(tempconf, flexgrid), 
-     &                confds(tempconf, flexgrid),
-     &                confrd(tempconf, flexgrid),
-     &                confhs(tempconf, flexgrid),
-     &                allminlimits, allmaxlimits,
-     &                sra, srb, 
-     &                maxor, maxconfs, maxatm, maxatmpos, maxtyv, 
-c    &                ligscore%numscored,gistgridindex_cur_rec,
-     &                ligscore%numscored,gistgridindex(:,tempconf,
-     &                    flexgrid),
-     &                lengistgi(tempconf,flexgrid),
-     &                gistgridindex(:,
-c     &                      db2lig%conf_conection(tempconf),
-     &                      tempconf,
-     &                    flexgrid),
-     &                lengistgi(
-c     &                      db2lig%conf_conection(tempconf),
-     &                      tempconf,
-     &                    flexgrid))
-                enddo
-              endif
-            enddo
-          endif
-          !all conformations have been scored if possible
-        else !we only need some of them.. don't do unnecessary work
-          !only do for necessary confs, so mark all as undone for now
-          do loopconf = 2, db2lig%total_confs !do all other confs
-            tempconf = loopconf
-            do flexgrid = 1, options0%total_receptors !for all of them
-              conf_status(tempconf, flexgrid) = NOTCHECKED !set to this, check later
-            enddo
-          enddo
-        endif
+        enddo
         do flexgrid = 1, options0%total_receptors !go through every grid & 
                             !set, assemble scores for each combination
-          set_status = NOTCHECKED !temporary for this set
+c         Note: is_bumped is now initialized at start, not reset per grid
+c         Skip entire set loop if rigid conf (conf 1) bumped on this grid
+          if (is_bumped(1, flexgrid)) then
+            dock_status = BUMPED
+c           Mark all sets as not good for this grid
+            do tempset = setstart, setend
+              goodset(tempset, flexgrid) = .false.
+            enddo
+            cycle
+          endif
+            set_status = NOTCHECKED !temporary for this set
           do tempset = setstart, setend !going through every set as well
+            skip_set = .false.
+
+c           Pre-scan: Check if any conformation in this set is already known to be bumped
+            do tempconfnum = 2, db2lig%set_conf_len(tempset)
+              tempconf = db2lig%set_conf(tempset, tempconfnum)
+              if (is_bumped(tempconf, flexgrid)) then
+                 skip_set = .true.
+                 exit
+              endif
+            enddo
+
+            if (skip_set) then
+              set_status = BUMPED
+              dock_status = BUMPED
+              goodset(tempset, flexgrid) = .false.
+              cycle ! Skip to next set
+            endif
+
             !add up scores from each conf to find the best one.
-            setvs(tempset, flexgrid) = 0. !set to 0
-            setes(tempset, flexgrid) = 0.
-            setgi(tempset, flexgrid) = 0.
-            setps(tempset, flexgrid) = 0.
-            setas(tempset, flexgrid) = 0.
-            setds(tempset, flexgrid) = 0.
-            setrd(tempset, flexgrid) = 0.
-            seths(tempset, flexgrid) = 0.
+c           Init weighted total
+            set_weighted_total(tempset, flexgrid) = 0.
             !broken/clashed check on next line, if necessary
             if ((db2lig%set_broken(tempset) .eq. 0) .or. 
      &            (options0%check_clash .eqv. .false.)) then 
@@ -293,23 +217,27 @@ subroutine calc_score_sets(current_match, dock_status,
                 set_status = ALLOKAY
                 do tempconfnum = 1, db2lig%set_conf_len(tempset)
                   tempconf = db2lig%set_conf(tempset, tempconfnum)
+c                 Check bump cache first (O(1) lookup)
+                  if (is_bumped(tempconf, flexgrid)) then
+                    set_status = BUMPED
+                    exit
+                  endif
                   !build on the fly if necessary
                   if (conf_status(tempconf, flexgrid) .eq. 
      &                NOTCHECKED) then !if it hasn't been checked already
-                    !gistgridindex_cur_rec = gistgridindex(:,flexgrid)
                     conf_status(tempconf, flexgrid) = run_conf(
      &                  tempconf, current_match,
      &                  db2lig, options0, grids0, flexgrid,
      &                  vdwmap0, phimap0, recdes0, ligdes0, gist0,
      &                  solvmap0, rec_des_r0, match,
-     &                  confvs(tempconf, flexgrid), 
-     &                  confes(tempconf, flexgrid), 
-     &                  confgi(tempconf, flexgrid), 
-     &                  confas(tempconf, flexgrid), 
-     &                  confps(tempconf, flexgrid), 
-     &                  confds(tempconf, flexgrid),
-     &                  confrd(tempconf, flexgrid),
-     &                  confhs(tempconf, flexgrid),
+     &                  conf_scores(1, tempconf, flexgrid), 
+     &                  conf_scores(2, tempconf, flexgrid), 
+     &                  conf_scores(3, tempconf, flexgrid), 
+     &                  conf_scores(4, tempconf, flexgrid), 
+     &                  conf_scores(5, tempconf, flexgrid), 
+     &                  conf_scores(6, tempconf, flexgrid),
+     &                  conf_scores(7, tempconf, flexgrid),
+     &                  conf_scores(8, tempconf, flexgrid),
      &                  allminlimits, allmaxlimits,
      &                  sra, srb, 
      &                  maxor, maxconfs, maxatm, maxatmpos, maxtyv, 
@@ -326,27 +254,41 @@ subroutine calc_score_sets(current_match, dock_status,
      &                      tempconf,
      &                      flexgrid))
                   endif             
-
-                  if (conf_status(tempconf, flexgrid) .ne. ALLOKAY) then
+                  
+                  if (conf_status(tempconf, flexgrid) .eq. ALLOKAY) then
+c                   Algorithm 6: Pre-calculate weighted total
+c                   Split lines to respect 72-col limit
+                    conf_weighted_total(tempconf, flexgrid) = 
+     &               conf_scores(1, tempconf, flexgrid)*options0%vscale+
+     &               conf_scores(2, tempconf, flexgrid)*options0%escale+
+     &               conf_scores(3, tempconf, flexgrid)*
+     &                                          options0%gistscale+
+     &               conf_scores(5, tempconf, flexgrid)*
+     &                                          options0%solvscale+
+     &               conf_scores(4, tempconf, flexgrid)*
+     &                                          options0%solvscale
+                    conf_weighted_total(tempconf, flexgrid) = 
+     &                 conf_weighted_total(tempconf, flexgrid) +
+     &                 conf_scores(6, tempconf, flexgrid)*
+     &                                          options0%rdscale +
+     &                 conf_scores(7, tempconf, flexgrid)* 
+     &                                          options0%rec_des_scale +
+     &                 conf_scores(8, tempconf, flexgrid)*
+     &                                          options0%rhscale
+                  
+                  else if (conf_status(tempconf, flexgrid) .ne. 
+     &                 ALLOKAY) then
                     set_status = conf_status(tempconf, flexgrid)
+c                   Mark conf as bumped for O(1) cache lookup
+                    if (conf_status(tempconf,flexgrid).eq.BUMPED) then
+                      is_bumped(tempconf, flexgrid) = .true.
+                    endif
                     exit !break out of loop for this set
                   endif
-                  setvs(tempset, flexgrid) = setvs(tempset, flexgrid) + 
-     &                confvs(tempconf, flexgrid)
-                  setes(tempset, flexgrid) = setes(tempset, flexgrid) + 
-     &                confes(tempconf, flexgrid)
-                  setgi(tempset, flexgrid) = setgi(tempset, flexgrid) + 
-     &                confgi(tempconf, flexgrid)
-                  setps(tempset, flexgrid) = setps(tempset, flexgrid) + 
-     &                confps(tempconf, flexgrid)
-                  setas(tempset, flexgrid) = setas(tempset, flexgrid) +
-     &                confas(tempconf, flexgrid)
-                  setds(tempset, flexgrid) = setds(tempset, flexgrid) + 
-     &                confds(tempconf, flexgrid)
-                  setrd(tempset, flexgrid) = setrd(tempset, flexgrid) + 
-     &                confrd(tempconf, flexgrid)
-                  seths(tempset, flexgrid) = seths(tempset, flexgrid) + 
-     &                confhs(tempconf, flexgrid)
+c                 Optimized inner loop: accumulate only the weighted total
+                  set_weighted_total(tempset, flexgrid) = 
+     &                 set_weighted_total(tempset, flexgrid) + 
+     &                 conf_weighted_total(tempconf, flexgrid)
                 enddo
               else !above strain
                 set_status = STRAIN
@@ -359,33 +301,10 @@ subroutine calc_score_sets(current_match, dock_status,
             goodset(tempset, flexgrid) = .false.
             if (set_status .eq. ALLOKAY) then !each conf scored
               goodset(tempset, flexgrid) = .true.
-              !moved scaling to here for grids stuff, internal energy is 
-              !done when it is read in
-              setvs(tempset, flexgrid) = options0%vscale * 
-     &            setvs(tempset, flexgrid)
-              setes(tempset, flexgrid) = options0%escale * 
-     &            setes(tempset, flexgrid)
-              setgi(tempset, flexgrid) = options0%gistscale * 
-     &            setgi(tempset, flexgrid)
-              setas(tempset, flexgrid) = options0%solvscale * 
-     &            setas(tempset, flexgrid)
-              setps(tempset, flexgrid) = options0%solvscale * 
-     &            setps(tempset, flexgrid)
-              setds(tempset, flexgrid) = options0%rdscale * 
-     &            setds(tempset, flexgrid)
-              setrd(tempset, flexgrid) = options0%rec_des_scale * 
-     &            setrd(tempset, flexgrid)
-              seths(tempset, flexgrid) = options0%rhscale * 
-     &            seths(tempset, flexgrid)
-              setscore(tempset, flexgrid) = setvs(tempset, flexgrid) 
-     &            + setes(tempset, flexgrid)
-     &            + setgi(tempset, flexgrid)
-     &            + setps(tempset, flexgrid) + setas(tempset, flexgrid) 
-     &            + db2lig%set_energy(tempset) 
-     &            + setds(tempset, flexgrid)
-     &            + setrd(tempset, flexgrid)
-     &            + seths(tempset, flexgrid)
-     &            + options0%rec_energy(flexgrid)
+              setscore(tempset, flexgrid) = 
+     &            set_weighted_total(tempset, flexgrid) + 
+     &            db2lig%set_energy(tempset) + 
+     &            options0%rec_energy(flexgrid)
             endif
           enddo      
         enddo
@@ -406,41 +325,93 @@ subroutine calc_score_sets(current_match, dock_status,
             wholescore(tempset, temp_combo_num) = 0.
             do flexgrid = 1, grids0%group_len
               if (goodset(tempset, temp_combo(flexgrid))) then
-                wholevs(tempset, temp_combo_num) = 
-     &              wholevs(tempset, temp_combo_num) + 
-     &              setvs(tempset, temp_combo(flexgrid))
-                wholees(tempset, temp_combo_num) = 
-     &              wholees(tempset, temp_combo_num) + 
-     &              setes(tempset, temp_combo(flexgrid))
-               wholegi(tempset, temp_combo_num) =
-     &              wholegi(tempset, temp_combo_num) +
-     &              setgi(tempset, temp_combo(flexgrid))
-                wholeas(tempset, temp_combo_num) = 
-     &              wholeas(tempset, temp_combo_num) + 
-     &              setas(tempset, temp_combo(flexgrid))
-                wholeps(tempset, temp_combo_num) = 
-     &              wholeps(tempset, temp_combo_num) + 
-     &              setps(tempset, temp_combo(flexgrid))
-                wholeds(tempset, temp_combo_num) = 
-     &              wholeds(tempset, temp_combo_num) + 
-     &              setds(tempset, temp_combo(flexgrid))
-                wholerd(tempset, temp_combo_num) = 
-     &              wholerd(tempset, temp_combo_num) + 
-     &              setrd(tempset, temp_combo(flexgrid))
-                wholehs(tempset, temp_combo_num) = 
-     &              wholehs(tempset, temp_combo_num) + 
-     &              seths(tempset, temp_combo(flexgrid))
-                wholers(tempset, temp_combo_num) = 
-     &              wholers(tempset, temp_combo_num) + 
-     &              options0%rec_energy(temp_combo(flexgrid)) !note difference!!
+c               Use weighted total for quick check
+c               Note: We removed setvs, setes etc intermediate arrays.
+c               Only accumulate wholescore here for checking.
+c               Components will be reconstructed later if good.
                 wholescore(tempset, temp_combo_num) = 
      &              wholescore(tempset, temp_combo_num) + 
-     &              setscore(tempset, temp_combo(flexgrid))
+     &              set_weighted_total(tempset, temp_combo(flexgrid)) +
+     &              options0%rec_energy(temp_combo(flexgrid))
               else
                 goodwhole = .false.
                 exit !break out, no need to do more
               endif
             enddo
+c           Add set_energy to complete the wholescore
+            if (goodwhole) then
+                 wholescore(tempset, temp_combo_num) = 
+     &              wholescore(tempset, temp_combo_num) +
+     &              db2lig%set_energy(tempset)
+
+c             Lazy check against cutoff
+c             Note: ligscore%pose_score is sorted best (lowest) to worst.
+c             ligscore%savedcount is the number of saved poses.
+c             We want to save if buffer is not full OR if score is better than worst.
+              if ((ligscore%savedcount .lt. options0%nsav) .or. 
+     &            (wholescore(tempset, temp_combo_num) .lt. 
+     &             ligscore%pose_score(ligscore%savedcount))) then
+
+c               It's a top hit! Reconstruct component scores now.
+                do flexgrid = 1, grids0%group_len
+                   temp_combo_num_grid = temp_combo(flexgrid)
+
+                   do tempconfnum=1, db2lig%set_conf_len(tempset)
+                      tempconf = db2lig%set_conf(tempset, tempconfnum)
+                      
+                      wholevs(tempset, temp_combo_num) = 
+     &                   wholevs(tempset, temp_combo_num) + 
+     &                   conf_scores(1, tempconf, temp_combo_num_grid) * 
+     &                   options0%vscale
+
+                      wholees(tempset, temp_combo_num) = 
+     &                   wholees(tempset, temp_combo_num) + 
+     &                   conf_scores(2, tempconf, temp_combo_num_grid) * 
+     &                   options0%escale
+
+                      wholegi(tempset, temp_combo_num) =
+     &                   wholegi(tempset, temp_combo_num) +
+     &                   conf_scores(3, tempconf, temp_combo_num_grid) * 
+     &                   options0%gistscale
+
+                      wholeas(tempset, temp_combo_num) = 
+     &                   wholeas(tempset, temp_combo_num) +
+     &                   conf_scores(4, tempconf, temp_combo_num_grid) * 
+     &                   options0%solvscale
+
+                      wholeps(tempset, temp_combo_num) = 
+     &                   wholeps(tempset, temp_combo_num) + 
+     &                   conf_scores(5, tempconf, temp_combo_num_grid) * 
+     &                   options0%solvscale
+
+                      wholeds(tempset, temp_combo_num) = 
+     &                   wholeds(tempset, temp_combo_num) + 
+     &                   conf_scores(6, tempconf, temp_combo_num_grid) * 
+     &                   options0%rdscale
+
+                      wholerd(tempset, temp_combo_num) = 
+     &                   wholerd(tempset, temp_combo_num) + 
+     &                   conf_scores(7, tempconf, temp_combo_num_grid) * 
+     &                   options0%rec_des_scale
+
+                      wholehs(tempset, temp_combo_num) = 
+     &                   wholehs(tempset, temp_combo_num) + 
+     &                   conf_scores(8, tempconf, temp_combo_num_grid) * 
+     &                   options0%rhscale
+                   enddo
+                   
+                   ! Add receptor energy for this grid
+                   wholers(tempset, temp_combo_num) = 
+     &                wholers(tempset, temp_combo_num) + 
+     &                options0%rec_energy(temp_combo_num_grid)
+
+                enddo 
+                
+              else
+                 goodwhole = .false. ! Mark as bad to skip saving block below
+              endif
+            endif ! end lazy check block
+
             if (goodwhole) then !don't save if not good
               onegood = .true. ! there is at least one that is good. 
 
@@ -461,56 +432,6 @@ subroutine calc_score_sets(current_match, dock_status,
                      enddo
                   enddo
                endif
-c              write(6,*) current_match, " ", tempset, " ",
-c     &            wholescore(tempset, temp_combo_num)," ",
-c     &            wholevs(tempset, temp_combo_num)
-c              debug, jklyu, 2019.03.28
-c              open(unit=OUTPDB, 
-c     &        file='test.pdb', action='write')
-c             for debugging, transfm the whole set based on the match
-c              do confcount = 1, db2lig%set_conf_len(tempset) !sets of complete conformations
-c                 conf = db2lig%set_conf(tempset, confcount)
-c                 !each conf needs to transform the coordinates to the right place
-c                 call run_transfm_conf(conf, current_match, db2lig,
-c     &                match, maxor)  !match-picked transformation
-c              enddo
-c              write(6,"(A18,I6,A10,I4,F6.2,F6.2)") 
-c     &            "REMARK match_num: ",
-c     &            current_match,
-c     &            " set_num: ", tempset,
-c     &            wholescore(tempset, temp_combo_num),
-c     &            wholevs(tempset, temp_combo_num)
-c             the codes below are trying to write out the whole molecule
-c              do atomcount = 1, db2lig%total_atoms
-c                write(6, PDBFORM) "ATOM  ",
-c     &           db2lig%atom_num(atomcount),
-c     &           db2lig%atom_name(atomcount), "","LIG", "A", 1,
-c     &           "", db2lig%transfm_coords(1, atomcount),
-c     &           db2lig%transfm_coords(2, atomcount),
-c     &           db2lig%transfm_coords(3, atomcount),
-c     &           0.00, 0.00,
-c     &           db2lig%atom_name(atomcount), ""
-c              enddo
-c              write(6,*) "ENDMDL"
-c             the codes below are trying to write out the rigid
-c             fragment for each molecule
-c             frist conf is the rigid fragment
-c              do atomcoord = db2lig%conf_coord(1, 1), 
-c     &             db2lig%conf_coord(2, 1)
-c                atomcount = db2lig%coord_index(2, atomcoord)
-c                write(6, PDBFORM) "ATOM  ",
-c     &           db2lig%atom_num(atomcount),
-c     &           db2lig%atom_name(atomcount), "","LIG", "A", 1,
-c     &           "", db2lig%transfm_coords(1, atomcount),
-c     &           db2lig%transfm_coords(2, atomcount),
-c     &           db2lig%transfm_coords(3, atomcount),
-c     &           0.00, 0.00,
-c     &           db2lig%atom_name(atomcount), ""
-c              enddo
-c              write(6,"(A6)") "ENDMDL"
-
-c             write(6,*) tempset, db2lig%set_max_strain(tempset),
-c    &                   db2lig%set_total_strain(tempset) 
               call save_set_score(ligscore, current_match, tempset, 
      &            wholevs(tempset, temp_combo_num), 
      &            wholees(tempset, temp_combo_num), 
diff --git a/src/search.f b/src/search.f
index 0f2db28..c2272e2 100644
--- a/src/search.f
+++ b/src/search.f
@@ -134,6 +134,13 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
       integer loc
 
       integer signal ! need for portland group
+      
+      real :: val_db2_start, val_db2_end
+      real :: val_match_start, val_match_end
+      real :: val_score_start, val_score_end
+      real :: acc_db2, acc_match, acc_score
+      real :: total_timed
+      real :: perc_db2, perc_match, perc_score
 
 c   --- initialize restart variables (Ben) ---
 
@@ -143,6 +150,10 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
       ligc = 0
       db2c = 0
       iend = .false.
+      
+      acc_db2 = 0.0
+      acc_match = 0.0
+      acc_score = 0.0
 
 c   --- move our file handles to the spot indicated by the restart file
 
@@ -176,7 +187,7 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
 999   signalflag = 0
       SIGUSR1 = 10
       !ret = signal(SIGUSR1, signalhandler) ! for gfortran
-      ret = signal(SIGUSR1, signalhandler, -1) ! for portland group
+      ret = signal(SIGUSR1, signalhandler) ! for portland group
 
 c     --- initialize variables ---
       call allocate_ligscore(ligscore, options0%nsav, OUTDOCK)
@@ -253,9 +264,12 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
 
         total_iter = 0
 c       --- read a ligand structure from the dbase ---
-        call ligread2(db2lig, options0, 
+        call get_time(val_db2_start)
+         call ligread2(db2lig, options0, 
      &      iend, tcorr, tcolor, tcount, fdlig, 
      &      ligc, db2c, .false.) !read one hierarchy 
+         call get_time(val_db2_end)
+         acc_db2 = acc_db2 + (val_db2_end - val_db2_start) 
 
         ! add the current number of conformations in the hierarchy to
         ! calaculate the total nuber docked.  
@@ -322,6 +336,7 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
           more_matches = .true. !always do at least once
           curdislim = options0%dislim !start at beginning no matter what
 
+          call get_time(val_match_start)
           do while (more_matches)
             do centrl = 1, tcount !use each ligand & receptor sphere as start
               do centrr = 1, spheres0%nsphr !seed point for generating spheres
@@ -385,7 +400,9 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
                 endif
               endif
             endif
-          enddo
+           enddo
+           call get_time(val_match_end)
+           acc_match = acc_match + (val_match_end - val_match_start)
           
           !calculating the total number of match or orientations obtained during docking run
           tot_match =  tot_match + match%nmatch
@@ -411,6 +428,7 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
           !next line actually calls scoring function now that matches are
           !all generated. no global variables used anymore.
 c          write(6,*), "dockstatus before is",dock_status
+          call get_time(val_score_start)
           call calc_score_mol(dock_status, setstart, setend, 
      &        db2lig, options0, ligscore, ligscoreeach, grids0,
      &        vdwmap0, phimap0, recdes0, ligdes0, gist0, solvmap0, 
@@ -419,6 +437,8 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
      &        MAXOR, db2lig%total_confs, db2lig%total_sets, 
      &        db2lig%total_atoms, db2lig%total_coords,
      &        MAXTYV) 
+           call get_time(val_score_end)
+           acc_score = acc_score + (val_score_end - val_score_start) 
           call get_time(time2)
           telaps = time2 - time1
 c          write(6,*), "dockstatus is",dock_status
@@ -778,9 +798,14 @@ subroutine run_search(grids0, phimap0, recdes0, ligdes0, gist0,
      &       tot_sets 
         write (OUTDOCK, *) "total number of nodes (confs): ",
      &       tot_nodes 
-        write (OUTDOCK, *) "total number of complexes: ",
+        write(OUTDOCK, *) "total number of complexes: ",
      &       tot_complex 
-        write(OUTDOCK, *) "end of file encountered"
+
+        write(OUTDOCK, '(A, F8.3, A, F8.3, A, F8.3, A)') 
+     &       " DB2 parsing time (s):", acc_db2,
+     &       "; Matching time (s): ", acc_match,
+     &       "; Scoring time (s): ", acc_score,
+     &       "; end of file encountered"
       endif
       call gzclose(fdmol, istat) !close ligand output file
       if ((options0%minimize .eq. 1) .and.
diff --git a/src/search_multi_cluster.f b/src/search_multi_cluster.f
index 9eff9ec..8290474 100644
--- a/src/search_multi_cluster.f
+++ b/src/search_multi_cluster.f
@@ -184,7 +184,7 @@ subroutine run_search_multi_cluster(grids0, phimap0,
 999   signalflag = 0
       SIGUSR1 = 10
       !ret = signal(SIGUSR1, signalhandler) ! for gfortran
-      ret = signal(SIGUSR1, signalhandler, -1) ! for portland group
+      ret = signal(SIGUSR1, signalhandler) ! for portland group
 
 c     --- initialize variables ---
       call allocate_ligscore(ligscore, options0%nsav, OUTDOCK)
diff --git a/src/setdef.f b/src/setdef.f
index 8797947..21c6c43 100644
--- a/src/setdef.f
+++ b/src/setdef.f
@@ -34,6 +34,7 @@ subroutine setdef(phimap0, options0, spheres0, recdes0, ligdes0)
       
       options0%use_gist = .false.
       options0%use_gist_val = ''
+      options0%use_binary_db2 = .false.  ! Binary DB2 format off by default
       options0%per_atom_scores = .true.
       options0%receptor_desolv = .false.
       options0%rec_des_r  = .false.
diff --git a/src/solvation_score.f b/src/solvation_score.f
index df8b9e6..65a4a13 100644
--- a/src/solvation_score.f
+++ b/src/solvation_score.f
@@ -36,13 +36,19 @@ subroutine calc_solvation_score(atomstart, atomend,
 c return values
       real, intent(inout) :: solvp, solva !polar and apolar desolvation
 
-      integer atomindex !actual atom index store here
-      integer count, lx, ly, lz
-      integer nx, ny, nz
-      real    slx, sly, slz, xgr, ygr, zgr
-      real    a1, a2, a3, a4, a5, a6, a7, a8
       real    volocl ! % desolvation based on sol. volume occluded by receptor.
 
+c     use precomputed values, removed hierarchy knowledge
+c     REMOVED OUTSIDE GRID CHECKS
+
+      integer :: i, n_in_block, block_idx, atomindex, count, nx, ny, nz
+      real :: slx_tmp(8), sly_tmp(8), slz_tmp(8)
+      real :: xgr_tmp(8), ygr_tmp(8), zgr_tmp(8)
+      integer :: nx_tmp(8), ny_tmp(8), nz_tmp(8)
+      integer :: atom_idx_tmp(8)
+      real :: a_val(8, 8) ! [coeff_idx, atom_in_block]
+      real :: volocl_tmp(8)
+
 c     use precomputed values, removed hierarchy knowledge
 c     REMOVED OUTSIDE GRID CHECKS
 
@@ -54,54 +60,91 @@ subroutine calc_solvation_score(atomstart, atomend,
       endif !otherwise ldesolv = 2, so use partial
       solvp = 0.0
       solva = 0.0
-      do count = atomstart, atomend !for every atom
-        atomindex = coord_index(2, count) !gets actual atom non-consecutive #
-        if (ldesolv .eq. 2) then !only if doing partial ligand desolvation
-          slx = smax(1) - transfm_coords(1, atomindex) * sperang
-          sly = smax(2) - transfm_coords(2, atomindex) * sperang
-          slz = smax(3) - transfm_coords(3, atomindex) * sperang
-          nx = int(slx)
-          ny = int(sly)
-          nz = int(slz)
-c         calculate partial cube coordinates of point
-          xgr = slx - float(nx)
-          ygr = sly - float(ny)
-          zgr = slz - float(nz)
-c         calculate coefficients of trilinear function
-          if (hsolflag .and. ((atom_vdwtype(atomindex) .eq. 6) .or.
-     &        (atom_vdwtype(atomindex) .eq. 7))) then !hydrogen atom 
-            a8 = hsolgrd_precomp(8, nx, ny, nz)
-            a7 = hsolgrd_precomp(7, nx, ny, nz)
-            a6 = hsolgrd_precomp(6, nx, ny, nz)
-            a5 = hsolgrd_precomp(5, nx, ny, nz)
-            a4 = hsolgrd_precomp(4, nx, ny, nz)
-            a3 = hsolgrd_precomp(3, nx, ny, nz)
-            a2 = hsolgrd_precomp(2, nx, ny, nz)
-            a1 = hsolgrd_precomp(1, nx, ny, nz)
-          else !heavy atom
-            a8 = solgrd_precomp(8, nx, ny, nz)
-            a7 = solgrd_precomp(7, nx, ny, nz)
-            a6 = solgrd_precomp(6, nx, ny, nz)
-            a5 = solgrd_precomp(5, nx, ny, nz)
-            a4 = solgrd_precomp(4, nx, ny, nz)
-            a3 = solgrd_precomp(3, nx, ny, nz)
-            a2 = solgrd_precomp(2, nx, ny, nz)
-            a1 = solgrd_precomp(1, nx, ny, nz)
-          endif
-c         interpolate values read from solvent grid
-          volocl = a1*xgr*ygr*zgr + a2*xgr*ygr + a3*xgr*zgr
-     &                   + a4*ygr*zgr + a5*xgr + a6*ygr + a7*zgr + a8
-          if (volocl .ge. 1.0 ) then !max is 1, means completely desolvated
-            write(*,*) "Warning: volocl (desolvation grid) > 1.0",volocl
-            volocl = 1.0
-          elseif ( volocl .lt. 0.0) then !max is 1, means completely desolvated
-            write(*,*) "Warning: volocl (desolvation grid) < 0.0",volocl
-            volocl = 0.0
-          endif
+
+      do block_idx = atomstart, atomend, 8
+        n_in_block = min(8, atomend - block_idx + 1)
+
+        ! 1. Gather coordinates and compute grid indices (only for ldesolv=2)
+        if (ldesolv .eq. 2) then
+          do i = 1, n_in_block
+            count = block_idx + i - 1
+            atomindex = coord_index(2, count)
+            atom_idx_tmp(i) = atomindex
+            
+            slx_tmp(i) = smax(1) - 
+     &                   transfm_coords(1, atomindex) * float(sperang)
+            sly_tmp(i) = smax(2) - 
+     &                   transfm_coords(2, atomindex) * float(sperang)
+            slz_tmp(i) = smax(3) - 
+     &                   transfm_coords(3, atomindex) * float(sperang)
+            
+            nx_tmp(i) = int(slx_tmp(i))
+            ny_tmp(i) = int(sly_tmp(i))
+            nz_tmp(i) = int(slz_tmp(i))
+
+            xgr_tmp(i) = slx_tmp(i) - float(nx_tmp(i))
+            ygr_tmp(i) = sly_tmp(i) - float(ny_tmp(i))
+            zgr_tmp(i) = slz_tmp(i) - float(nz_tmp(i))
+          enddo
+
+          ! 2. Gather interpolation coefficients
+          do i = 1, n_in_block
+            atomindex = atom_idx_tmp(i)
+            nx = nx_tmp(i)
+            ny = ny_tmp(i)
+            nz = nz_tmp(i)
+            
+            if (hsolflag .and. ((atom_vdwtype(atomindex) .eq. 6) .or.
+     &          (atom_vdwtype(atomindex) .eq. 7))) then !hydrogen atom 
+              a_val(1, i) = hsolgrd_precomp(1, nx, ny, nz)
+              a_val(2, i) = hsolgrd_precomp(2, nx, ny, nz)
+              a_val(3, i) = hsolgrd_precomp(3, nx, ny, nz)
+              a_val(4, i) = hsolgrd_precomp(4, nx, ny, nz)
+              a_val(5, i) = hsolgrd_precomp(5, nx, ny, nz)
+              a_val(6, i) = hsolgrd_precomp(6, nx, ny, nz)
+              a_val(7, i) = hsolgrd_precomp(7, nx, ny, nz)
+              a_val(8, i) = hsolgrd_precomp(8, nx, ny, nz)
+            else !heavy atom
+              a_val(1, i) = solgrd_precomp(1, nx, ny, nz)
+              a_val(2, i) = solgrd_precomp(2, nx, ny, nz)
+              a_val(3, i) = solgrd_precomp(3, nx, ny, nz)
+              a_val(4, i) = solgrd_precomp(4, nx, ny, nz)
+              a_val(5, i) = solgrd_precomp(5, nx, ny, nz)
+              a_val(6, i) = solgrd_precomp(6, nx, ny, nz)
+              a_val(7, i) = solgrd_precomp(7, nx, ny, nz)
+              a_val(8, i) = solgrd_precomp(8, nx, ny, nz)
+            endif
+          enddo
+
+          ! 3. Compute volocl using FMA Factorization
+          do i = 1, n_in_block
+            volocl_tmp(i) = zgr_tmp(i) * (ygr_tmp(i) * 
+     &                      (xgr_tmp(i) * a_val(1,i) + a_val(4,i)) + 
+     &                      (xgr_tmp(i) * a_val(3,i) + a_val(7,i))) + 
+     &                      (ygr_tmp(i) * (xgr_tmp(i) * a_val(2,i) + 
+     &                      a_val(6,i)) + (xgr_tmp(i) * a_val(5,i) + 
+     &                      a_val(8,i)))
+            
+            if (volocl_tmp(i) .ge. 1.0 ) then
+              volocl_tmp(i) = 1.0
+            elseif ( volocl_tmp(i) .lt. 0.0) then
+              volocl_tmp(i) = 0.0
+            endif
+          enddo
+        else ! ldesolv is 0 or 1
+          do i = 1, n_in_block
+            count = block_idx + i - 1
+            atom_idx_tmp(i) = coord_index(2, count)
+            volocl_tmp(i) = volocl
+          enddo
         endif
-        !write(*,*) "volocl (desolvation grid) = ", volocl
-        solvp = solvp - (atom_polsolv(atomindex) * volocl)
-        solva = solva - (atom_apolsolv(atomindex) * volocl)
+
+        ! 4. Sum up results
+        do i = 1, n_in_block
+          atomindex = atom_idx_tmp(i)
+          solvp = solvp-(atom_polsolv(atomindex)*volocl_tmp(i))
+          solva = solva-(atom_apolsolv(atomindex)*volocl_tmp(i))
+        enddo
       enddo
 
       return
diff --git a/src/sphsfromdists.f b/src/sphsfromdists.f
index e531418..7498c26 100644
--- a/src/sphsfromdists.f
+++ b/src/sphsfromdists.f
@@ -45,8 +45,9 @@ subroutine sphsfromdists(ligstart, recstart, disl,
       matchcount = matchcount + 1 !going to add one
       sphmatches(matchcount, 1) = ligstart !always add this pair
       sphmatches(matchcount, 2) = recstart !always add this pair
-      do curligpt = 1, maxlig !for all ligand spheres
-        if (curligpt .ne. ligstart) then !except the starting point
+c canonical ordering: only consider ligand indices > seed
+c this ensures each match is found exactly once (when min ligand index is seed)
+      do curligpt = ligstart + 1, maxlig !only indices > seed
           ligdist = disl(ligstart, curligpt) !dist from start to current, lig
           do currecpt = 1, maxrec !for all receptor spheres
             if (currecpt .ne. recstart) then !except the starting point
@@ -67,7 +68,6 @@ subroutine sphsfromdists(ligstart, recstart, disl,
               endif
             endif
           enddo
-        endif
       enddo
       !matchcount is now equal to how many added, sphmatches contains
       !pairs and then 0s once all pairs exhausted
diff --git a/src/version.f b/src/version.f
new file mode 100644
index 0000000..ea4d403
--- /dev/null
+++ b/src/version.f
@@ -0,0 +1,26 @@
+!compiler options, changed by Makefile
+      module version
+
+      implicit none
+
+      character (len=10), parameter :: SVNVERSION=''
+c adjusted by the makefile to the current svn version number.
+
+      character (len=10), parameter :: COMPDATE='20260320'
+c     COMPDATE: Adjusted by the makefile to the date DOCK is compiled
+
+      character (len=10), parameter :: ARCHSIZE='64,'
+c     ARCHSIZE: Adjusted by makefile to reflect if this was 32b\64b
+c     compilation.
+
+      character (len=1000), parameter :: FFLAGS = 'foo'
+c     FFLAGS:  Adjusted by makefile as a record of compilation options.
+c not actually used at the moment
+      end module version
+
+c----------------------------------------------------------------------
+c
+c       Copyright (C) 2008 Michael Mysinger and Brian K. Shoichet
+c               University of California at San Francisco
+c                         All Rights Reserved.
+c----------------------------------------------------------------------
diff --git a/src/version.f.template b/src/version.f.template
index 246cf42..f22881d 100644
--- a/src/version.f.template
+++ b/src/version.f.template
@@ -6,7 +6,7 @@
       character (len=10), parameter :: SVNVERSION=''
 c adjusted by the makefile to the current svn version number.
 
-      character (len=10), parameter :: COMPDATE='20220107'
+      character (len=10), parameter :: COMPDATE='20260107'
 c     COMPDATE: Adjusted by the makefile to the date DOCK is compiled
 
       character (len=10), parameter :: ARCHSIZE='64,'