Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
d478435
Initial plan for issue
Copilot May 22, 2025
931fcaa
Create template matching reproducibility test script
Copilot May 22, 2025
a8ca406
Improve error handling and documentation in template matching test
Copilot May 22, 2025
1a3b456
Optimize template matching reproducibility test
Copilot May 22, 2025
6548b4a
Add --fast_fft flag to template matching tests
Copilot May 22, 2025
9ac2680
Implement temp directory management and update FastFFT help text
Copilot May 22, 2025
b3c7f60
Add threshold extraction and relative error reporting to template mat…
Copilot May 22, 2025
dde8ba1
Fix threshold extraction regex pattern in template matching test
Copilot May 22, 2025
327a37a
Address PR feedback: improve threshold printing, temp dir management,…
Copilot May 22, 2025
0556a95
Implement PR review comments for template matching test
Copilot May 23, 2025
3250509
Implement template matching test improvements
Copilot May 23, 2025
dcc9c5e
Remove binning CLI flag and add as default config parameter
Copilot May 23, 2025
51450c6
Improved Python module imports to replace annoying_hack.py
Copilot May 23, 2025
e57c12b
Modify template matching test to run with multiple binning values
Copilot May 23, 2025
eb0b6a0
Create reusable image replicate analysis tools
Copilot May 24, 2025
9004eb5
Modify test_template_reproducibility.py to add fast development mode
Copilot May 24, 2025
baf1bce
Update template matching test to provide bash commands for analysis
Copilot May 24, 2025
a5a12c0
Add --max-threads CLI arg and fix threshold capitalization
Copilot May 24, 2025
2ebdcd4
Implement CLI vs default config comparison and remove problematic file
Copilot May 24, 2025
59ae785
Simplify CLI argument comparison in args.py
Copilot May 24, 2025
0c03161
Remove Python package related files and instructions
Copilot May 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .cistem_temp_dirs.log
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
21 changes: 21 additions & 0 deletions scripts/testing/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# cisTEM Testing Tools

This directory contains test scripts and utilities for testing cisTEM functionality.

## Running Tests

Each test can be run individually from their respective directories. For example:

```bash
# Run template matching reproducibility test
python /path/to/cisTEM/scripts/testing/programs/match_template/test_template_reproducibility.py --binary-path /path/to/binaries
```

## Temporary Directory Management

Test scripts that create temporary files use a centralized tracking system to make cleanup easier.
You can list and clean up temporary directories using the following options:

- `--list-temp-dirs`: List all tracked temporary directories
- `--rm-temp-dir INDEX`: Remove a specific temporary directory by index
- `--rm-all-temp-dirs`: Remove all tracked temporary directories
8 changes: 8 additions & 0 deletions scripts/testing/programs/cistem_test_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# cisTEM testing utilities package
# Import commonly used modules to make them easily accessible
from . import args
from . import make_tmp_runfile
from . import run_job
from . import temp_dir_manager
from . import threshold_utils
from . import image_replicate_analysis
99 changes: 71 additions & 28 deletions scripts/testing/programs/cistem_test_utils/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import argparse
import toml
# If we are in the container
from cistem_test_utils.temp_dir_manager import TempDirManager

default_data_dir = '/cisTEMdev/cistem_reference_images/TM_tests'

Expand Down Expand Up @@ -50,6 +51,7 @@ def get_config(args, data_dir: str, ref_number: int, img_number: int):
config['padding_factor'] = 1.0
config['mask_radius'] = 0
config['max_threads'] = 2
config['binning'] = 1.0

# some default search args that may be overwritten in a given test make_template_results
config['results_mip_to_use'] = 'mip_scaled.mrc'
Expand All @@ -60,7 +62,19 @@ def get_config(args, data_dir: str, ref_number: int, img_number: int):
config['result_ignore_n_pixels_from_edge'] = -1

for arg_val in args.args_to_check:
config[arg_val] = getattr(args, arg_val)
# Store the default value for comparison
default_val = config.get(arg_val)
# Get the value from args
arg_val_value = getattr(args, arg_val)

# Compare with default value if it exists in config and the arg value is not None
if arg_val in config and arg_val_value != default_val:

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While you are handling string vs float in the following lines, you are already doing a comparison here. So either this will break, or the subsequent comparison is overkill.

Let's just get rid of the extra checks under the commend "Handle string vs numeric comparison". Instead, do keep the check on None and not None.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've simplified the comparison logic in 59ae785 by removing the redundant type checking code and keeping only the check for None values, as you suggested.

# Only print if both values are not None
if default_val is not None and arg_val_value is not None:
print(f"User has set {arg_val} to value {arg_val_value}. Changing from default {default_val}")

# Update the config with the new value
config[arg_val] = arg_val_value

config['output_file_prefix'] = os.path.abspath(os.path.join(args.output_file_prefix, config.get('data')[img_number]['img_name']))
os.makedirs(config['output_file_prefix'], exist_ok=True)
Expand All @@ -77,28 +91,33 @@ def parse_TM_args(wanted_binary_name):
# To help check, we'll keep this dict to use as a check.
args_to_check = []
parser = argparse.ArgumentParser(description='Test the k3 rotation binary')
# Required argument

# Add temp directory management arguments using the TempDirManager class
temp_manager = TempDirManager()
temp_manager.add_arguments(parser)

# Binary path argument (required for running tests, optional for temp dir management)
parser.add_argument(
'--binary_path', help='Path to the directory with the binary to be tested (Required)', required=True)
'--binary-path', dest='binary_path', help='Path to the directory with the binary to be tested (Required for running tests)', required=False)
args_to_check.append('binary_path')

parser.add_argument('--test_data_path',
parser.add_argument('--test-data-path', dest='test_data_path',
help='Path to the test data directory (Optional - defaults to /cisTEMdev/cistem_reference_images/TM_tests, then pwd)')
args_to_check.append('test_data_path')

# Argument for the output file path and prefix default to /tmp
parser.add_argument('--output_file_prefix',
parser.add_argument('--output-file-prefix', dest='output_file_prefix',
help='Path and prefix for the output files (Optional - defaults to /tmp)', default='/tmp')
args_to_check.append('output_file_prefix')

parser.add_argument('--gpu_idx', default=0,
parser.add_argument('--gpu-idx', dest='gpu_idx', default=0,
help='GPU index to use (default: 0)')
args_to_check.append('gpu_idx')

# add another optional flag to specify that we are using an older version of cisTEM
# TODO: for now, just trying to catch the case where we use match_template not match_template_gpu, however,
# there could be other cases where we need to be more specific if the input options change more over time.
parser.add_argument('--old_cistem', action='store_true',
parser.add_argument('--old-cistem', dest='old_cistem', action='store_true',
help='Use this flag if you are using an older version of cisTEM')
args_to_check.append('old_cistem')

Expand All @@ -107,8 +126,23 @@ def parse_TM_args(wanted_binary_name):
help='Use this flag if you are using the cpu version of cisTEM')
args_to_check.append('cpu')

parser.add_argument('--fast-fft', dest='fast_fft', action='store_true', default=True,
help='Use FastFFT implementation (default: True)')
args_to_check.append('fast_fft')

parser.add_argument('--max-threads', dest='max_threads', type=int, default=2,
help='Maximum number of threads to use (default: 2)')
args_to_check.append('max_threads')

args = parser.parse_args()

# Check if any temp directory management options are being used
using_temp_management = args.list_temp_dirs or args.rm_temp_dir is not None or args.rm_all_temp_dirs

# If not using temp management, binary_path is required
if not using_temp_management and not args.binary_path:
parser.error("--binary-path is required when not using temporary directory management options")

args.binary_name = wanted_binary_name
args_to_check.append('binary_name')

Expand All @@ -120,31 +154,40 @@ def parse_TM_args(wanted_binary_name):
if not (args.old_cistem or args.cpu):
args.binary_name += '_gpu'

# Check if the binary exists
if not os.path.isfile(os.path.join(args.binary_path, args.binary_name)):
print('The binary ' + os.path.join(args.binary_path,
args.binary_name) + ' does not exist')
sys.exit(1)
# Check if any temp directory management options are being used
using_temp_management = args.list_temp_dirs or args.rm_temp_dir is not None or args.rm_all_temp_dirs

# Check if make_template_result binary exists
if not os.path.isfile(os.path.join(args.binary_path, args.results_binary_name)):
print('The binary ' + os.path.join(args.binary_path,
args.results_binary_name) + ' does not exist')
sys.exit(1)
# If not using temp management, binary_path is required and binaries should exist
if not using_temp_management and not args.binary_path:
parser.error("--binary-path is required when not using temporary directory management options")

# if the optional data path is not given, use the default
if args.test_data_path is None:
args.test_data_path = default_data_dir
# Only check for binaries if we're not just managing temp directories
if not using_temp_management:
# Check if the binary exists
if not os.path.isfile(os.path.join(args.binary_path, args.binary_name)):
print('The binary ' + os.path.join(args.binary_path,
args.binary_name) + ' does not exist')
sys.exit(1)

# Check if the test data directory exists
if not os.path.isdir(args.test_data_path):
print('The test data directory [' +
args.test_data_path + '] does not exist')
print('Please provide a valid path to the test data directory as a second argument')
sys.exit(1)
# Check if make_template_result binary exists
if not os.path.isfile(os.path.join(args.binary_path, args.results_binary_name)):
print('The binary ' + os.path.join(args.binary_path,
args.results_binary_name) + ' does not exist')
sys.exit(1)

# if the optional data path is not given, use the default
if args.test_data_path is None:
args.test_data_path = default_data_dir

# Check if the test data directory exists
if not os.path.isdir(args.test_data_path):
print('The test data directory [' +
args.test_data_path + '] does not exist')
print('Please provide a valid path to the test data directory as a second argument')
sys.exit(1)

# Check that the wanted output path exists and if not try to make it, if not error
os.makedirs(args.output_file_prefix, exist_ok=True)
# Check that the wanted output path exists and if not try to make it, if not error
os.makedirs(args.output_file_prefix, exist_ok=True)

args.args_to_check = args_to_check

Expand Down
182 changes: 182 additions & 0 deletions scripts/testing/programs/cistem_test_utils/image_replicate_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
"""
Image Replicate Analysis utilities for cisTEM tests.

This module provides a class for comparing multiple replicate MRC images and calculating
similarity metrics between them.
"""

import numpy as np
import mrcfile
import os
from typing import List, Dict, Tuple, Optional, Union


class ImageReplicateAnalysis:
"""Class for analyzing replicate MRC images and calculating similarity metrics."""

def __init__(self, image_filenames: List[str], threshold_value: float = None):
"""
Initialize the ImageReplicateAnalysis with a list of image filenames and threshold value.

Args:
image_filenames: List of MRC image files to analyze
threshold_value: Threshold value for relative error calculations (must be between 0 and 100)

Raises:
ValueError: If threshold_value is not between 0 and 100
ValueError: If fewer than 2 image filenames are provided
"""
if len(image_filenames) < 2:
raise ValueError("At least 2 image filenames are required for comparison")

self.image_filenames = image_filenames

# Validate threshold value if provided
if threshold_value is not None:
if not isinstance(threshold_value, (int, float)) or threshold_value <= 0 or threshold_value > 100:
raise ValueError("Threshold value must be a positive number between 0 and 100")

self.threshold_value = threshold_value
self.image_data = []
self.image_shapes = []
self.image_dtypes = []

def load_images(self) -> bool:
"""
Load all image files and verify they have the same dimensions.

Returns:
bool: True if all images were loaded successfully with matching dimensions

Raises:
FileNotFoundError: If any image file cannot be found
ValueError: If image dimensions do not match
"""
self.image_data = []
self.image_shapes = []
self.image_dtypes = []

# Load all images
for filename in self.image_filenames:
if not os.path.exists(filename):
raise FileNotFoundError(f"Image file not found: {filename}")

try:
with mrcfile.open(filename) as mrc:
self.image_data.append(mrc.data)
self.image_shapes.append(mrc.data.shape)
self.image_dtypes.append(mrc.data.dtype)
except Exception as e:
raise IOError(f"Error loading {filename}: {str(e)}")

# Check that all images have the same dimensions
if len(set(str(shape) for shape in self.image_shapes)) > 1:
raise ValueError(f"Image dimensions do not match: {self.image_shapes}")

return True

def analyze_replicates(self) -> Dict:
"""
Analyze all replicate images and calculate similarity metrics.

Returns:
Dict: Dictionary containing pairwise and overall similarity metrics
"""
if not self.image_data:
self.load_images()

num_replicates = len(self.image_data)

# Generate all pairwise comparisons
pairs = [(i, j) for i in range(num_replicates) for j in range(i+1, num_replicates)]

results = {
"num_replicates": num_replicates,
"threshold_value": self.threshold_value,
"pairwise_comparisons": [],
"overall": {}
}

all_mean_abs_diffs = []

# Calculate metrics for each pair of images
for i, j in pairs:
try:
# Calculate mean absolute difference
mean_abs_diff = np.mean(np.abs(self.image_data[i] - self.image_data[j]))
all_mean_abs_diffs.append(mean_abs_diff)

# Calculate relative error if threshold value is available
if self.threshold_value and self.threshold_value > 0:
relative_error_ppm = (mean_abs_diff / self.threshold_value) * 1e6 # Parts per million
else:
relative_error_ppm = None

comparison_result = {
"replicate_1": i + 1,
"replicate_2": j + 1,
"mean_abs_diff": mean_abs_diff,
"relative_error_ppm": relative_error_ppm
}

results["pairwise_comparisons"].append(comparison_result)

except Exception as e:
print(f"Error comparing replicates {i+1} and {j+1}: {str(e)}")

# Calculate overall metrics across all comparisons
if all_mean_abs_diffs:
results["overall"]["mean_abs_diff_avg"] = np.mean(all_mean_abs_diffs)
results["overall"]["mean_abs_diff_min"] = np.min(all_mean_abs_diffs)
results["overall"]["mean_abs_diff_max"] = np.max(all_mean_abs_diffs)

# Calculate average relative error if threshold is available
if self.threshold_value and self.threshold_value > 0:
results["overall"]["relative_error_ppm_avg"] = (np.mean(all_mean_abs_diffs) / self.threshold_value) * 1e6
results["overall"]["relative_error_ppm_min"] = (np.min(all_mean_abs_diffs) / self.threshold_value) * 1e6
results["overall"]["relative_error_ppm_max"] = (np.max(all_mean_abs_diffs) / self.threshold_value) * 1e6

return results

def print_analysis(self, results: Optional[Dict] = None) -> None:
"""
Print the replicate analysis results in a formatted way.

Args:
results: Optional results dictionary from analyze_replicates().
If None, will run analyze_replicates() internally.
"""
if results is None:
results = self.analyze_replicates()

num_replicates = results["num_replicates"]
threshold_value = results["threshold_value"]

print("\nReproducibility Analysis:")
print("========================")
print(f"Number of replicates analyzed: {num_replicates}")

if threshold_value is not None:
print(f"Threshold value: {threshold_value:.3f}")

# Print pairwise comparisons
for comparison in results["pairwise_comparisons"]:
i = comparison["replicate_1"]
j = comparison["replicate_2"]
mean_abs_diff = comparison["mean_abs_diff"]
relative_error_ppm = comparison["relative_error_ppm"]

print(f"\nComparing replicate {i} vs {j}:")
print(f" Mean absolute difference: {mean_abs_diff:.6f}")

if relative_error_ppm is not None:
print(f" Relative error: {relative_error_ppm:.2f} ppm (relative to threshold value: {threshold_value:.3f})")

# Print overall metrics
if "overall" in results and results["overall"]:
print("\nOverall reproducibility:")
print(f" Mean absolute diff (avg): {results['overall']['mean_abs_diff_avg']:.6f}")

if threshold_value is not None:
print(f" Relative error (avg): {results['overall']['relative_error_ppm_avg']:.2f} ppm "
f"(relative to threshold value: {threshold_value:.3f})")
Loading