diff --git a/.cistem_temp_dirs.log b/.cistem_temp_dirs.log new file mode 100644 index 000000000..0637a088a --- /dev/null +++ b/.cistem_temp_dirs.log @@ -0,0 +1 @@ +[] \ No newline at end of file diff --git a/scripts/testing/README.md b/scripts/testing/README.md new file mode 100644 index 000000000..258c202f0 --- /dev/null +++ b/scripts/testing/README.md @@ -0,0 +1,21 @@ +# cisTEM Testing Tools + +This directory contains test scripts and utilities for testing cisTEM functionality. + +## Running Tests + +Each test can be run individually from their respective directories. For example: + +```bash +# Run template matching reproducibility test +python /path/to/cisTEM/scripts/testing/programs/match_template/test_template_reproducibility.py --binary-path /path/to/binaries +``` + +## Temporary Directory Management + +Test scripts that create temporary files use a centralized tracking system to make cleanup easier. +You can list and clean up temporary directories using the following options: + +- `--list-temp-dirs`: List all tracked temporary directories +- `--rm-temp-dir INDEX`: Remove a specific temporary directory by index +- `--rm-all-temp-dirs`: Remove all tracked temporary directories \ No newline at end of file diff --git a/scripts/testing/programs/cistem_test_utils/__init__.py b/scripts/testing/programs/cistem_test_utils/__init__.py index e69de29bb..9d35520ec 100644 --- a/scripts/testing/programs/cistem_test_utils/__init__.py +++ b/scripts/testing/programs/cistem_test_utils/__init__.py @@ -0,0 +1,8 @@ +# cisTEM testing utilities package +# Import commonly used modules to make them easily accessible +from . import args +from . import make_tmp_runfile +from . import run_job +from . import temp_dir_manager +from . import threshold_utils +from . import image_replicate_analysis diff --git a/scripts/testing/programs/cistem_test_utils/args.py b/scripts/testing/programs/cistem_test_utils/args.py index 573c2c8b1..8b4c62ca5 100644 --- a/scripts/testing/programs/cistem_test_utils/args.py +++ b/scripts/testing/programs/cistem_test_utils/args.py @@ -3,6 +3,7 @@ import argparse import toml # If we are in the container +from cistem_test_utils.temp_dir_manager import TempDirManager default_data_dir = '/cisTEMdev/cistem_reference_images/TM_tests' @@ -50,6 +51,7 @@ def get_config(args, data_dir: str, ref_number: int, img_number: int): config['padding_factor'] = 1.0 config['mask_radius'] = 0 config['max_threads'] = 2 + config['binning'] = 1.0 # some default search args that may be overwritten in a given test make_template_results config['results_mip_to_use'] = 'mip_scaled.mrc' @@ -60,7 +62,19 @@ def get_config(args, data_dir: str, ref_number: int, img_number: int): config['result_ignore_n_pixels_from_edge'] = -1 for arg_val in args.args_to_check: - config[arg_val] = getattr(args, arg_val) + # Store the default value for comparison + default_val = config.get(arg_val) + # Get the value from args + arg_val_value = getattr(args, arg_val) + + # Compare with default value if it exists in config and the arg value is not None + if arg_val in config and arg_val_value != default_val: + # Only print if both values are not None + if default_val is not None and arg_val_value is not None: + print(f"User has set {arg_val} to value {arg_val_value}. Changing from default {default_val}") + + # Update the config with the new value + config[arg_val] = arg_val_value config['output_file_prefix'] = os.path.abspath(os.path.join(args.output_file_prefix, config.get('data')[img_number]['img_name'])) os.makedirs(config['output_file_prefix'], exist_ok=True) @@ -77,28 +91,33 @@ def parse_TM_args(wanted_binary_name): # To help check, we'll keep this dict to use as a check. args_to_check = [] parser = argparse.ArgumentParser(description='Test the k3 rotation binary') - # Required argument + + # Add temp directory management arguments using the TempDirManager class + temp_manager = TempDirManager() + temp_manager.add_arguments(parser) + + # Binary path argument (required for running tests, optional for temp dir management) parser.add_argument( - '--binary_path', help='Path to the directory with the binary to be tested (Required)', required=True) + '--binary-path', dest='binary_path', help='Path to the directory with the binary to be tested (Required for running tests)', required=False) args_to_check.append('binary_path') - parser.add_argument('--test_data_path', + parser.add_argument('--test-data-path', dest='test_data_path', help='Path to the test data directory (Optional - defaults to /cisTEMdev/cistem_reference_images/TM_tests, then pwd)') args_to_check.append('test_data_path') # Argument for the output file path and prefix default to /tmp - parser.add_argument('--output_file_prefix', + parser.add_argument('--output-file-prefix', dest='output_file_prefix', help='Path and prefix for the output files (Optional - defaults to /tmp)', default='/tmp') args_to_check.append('output_file_prefix') - parser.add_argument('--gpu_idx', default=0, + parser.add_argument('--gpu-idx', dest='gpu_idx', default=0, help='GPU index to use (default: 0)') args_to_check.append('gpu_idx') # add another optional flag to specify that we are using an older version of cisTEM # TODO: for now, just trying to catch the case where we use match_template not match_template_gpu, however, # there could be other cases where we need to be more specific if the input options change more over time. - parser.add_argument('--old_cistem', action='store_true', + parser.add_argument('--old-cistem', dest='old_cistem', action='store_true', help='Use this flag if you are using an older version of cisTEM') args_to_check.append('old_cistem') @@ -107,8 +126,23 @@ def parse_TM_args(wanted_binary_name): help='Use this flag if you are using the cpu version of cisTEM') args_to_check.append('cpu') + parser.add_argument('--fast-fft', dest='fast_fft', action='store_true', default=True, + help='Use FastFFT implementation (default: True)') + args_to_check.append('fast_fft') + + parser.add_argument('--max-threads', dest='max_threads', type=int, default=2, + help='Maximum number of threads to use (default: 2)') + args_to_check.append('max_threads') + args = parser.parse_args() + # Check if any temp directory management options are being used + using_temp_management = args.list_temp_dirs or args.rm_temp_dir is not None or args.rm_all_temp_dirs + + # If not using temp management, binary_path is required + if not using_temp_management and not args.binary_path: + parser.error("--binary-path is required when not using temporary directory management options") + args.binary_name = wanted_binary_name args_to_check.append('binary_name') @@ -120,31 +154,40 @@ def parse_TM_args(wanted_binary_name): if not (args.old_cistem or args.cpu): args.binary_name += '_gpu' - # Check if the binary exists - if not os.path.isfile(os.path.join(args.binary_path, args.binary_name)): - print('The binary ' + os.path.join(args.binary_path, - args.binary_name) + ' does not exist') - sys.exit(1) + # Check if any temp directory management options are being used + using_temp_management = args.list_temp_dirs or args.rm_temp_dir is not None or args.rm_all_temp_dirs - # Check if make_template_result binary exists - if not os.path.isfile(os.path.join(args.binary_path, args.results_binary_name)): - print('The binary ' + os.path.join(args.binary_path, - args.results_binary_name) + ' does not exist') - sys.exit(1) + # If not using temp management, binary_path is required and binaries should exist + if not using_temp_management and not args.binary_path: + parser.error("--binary-path is required when not using temporary directory management options") - # if the optional data path is not given, use the default - if args.test_data_path is None: - args.test_data_path = default_data_dir + # Only check for binaries if we're not just managing temp directories + if not using_temp_management: + # Check if the binary exists + if not os.path.isfile(os.path.join(args.binary_path, args.binary_name)): + print('The binary ' + os.path.join(args.binary_path, + args.binary_name) + ' does not exist') + sys.exit(1) - # Check if the test data directory exists - if not os.path.isdir(args.test_data_path): - print('The test data directory [' + - args.test_data_path + '] does not exist') - print('Please provide a valid path to the test data directory as a second argument') - sys.exit(1) + # Check if make_template_result binary exists + if not os.path.isfile(os.path.join(args.binary_path, args.results_binary_name)): + print('The binary ' + os.path.join(args.binary_path, + args.results_binary_name) + ' does not exist') + sys.exit(1) + + # if the optional data path is not given, use the default + if args.test_data_path is None: + args.test_data_path = default_data_dir + + # Check if the test data directory exists + if not os.path.isdir(args.test_data_path): + print('The test data directory [' + + args.test_data_path + '] does not exist') + print('Please provide a valid path to the test data directory as a second argument') + sys.exit(1) - # Check that the wanted output path exists and if not try to make it, if not error - os.makedirs(args.output_file_prefix, exist_ok=True) + # Check that the wanted output path exists and if not try to make it, if not error + os.makedirs(args.output_file_prefix, exist_ok=True) args.args_to_check = args_to_check diff --git a/scripts/testing/programs/cistem_test_utils/image_replicate_analysis.py b/scripts/testing/programs/cistem_test_utils/image_replicate_analysis.py new file mode 100644 index 000000000..433ead096 --- /dev/null +++ b/scripts/testing/programs/cistem_test_utils/image_replicate_analysis.py @@ -0,0 +1,182 @@ +""" +Image Replicate Analysis utilities for cisTEM tests. + +This module provides a class for comparing multiple replicate MRC images and calculating +similarity metrics between them. +""" + +import numpy as np +import mrcfile +import os +from typing import List, Dict, Tuple, Optional, Union + + +class ImageReplicateAnalysis: + """Class for analyzing replicate MRC images and calculating similarity metrics.""" + + def __init__(self, image_filenames: List[str], threshold_value: float = None): + """ + Initialize the ImageReplicateAnalysis with a list of image filenames and threshold value. + + Args: + image_filenames: List of MRC image files to analyze + threshold_value: Threshold value for relative error calculations (must be between 0 and 100) + + Raises: + ValueError: If threshold_value is not between 0 and 100 + ValueError: If fewer than 2 image filenames are provided + """ + if len(image_filenames) < 2: + raise ValueError("At least 2 image filenames are required for comparison") + + self.image_filenames = image_filenames + + # Validate threshold value if provided + if threshold_value is not None: + if not isinstance(threshold_value, (int, float)) or threshold_value <= 0 or threshold_value > 100: + raise ValueError("Threshold value must be a positive number between 0 and 100") + + self.threshold_value = threshold_value + self.image_data = [] + self.image_shapes = [] + self.image_dtypes = [] + + def load_images(self) -> bool: + """ + Load all image files and verify they have the same dimensions. + + Returns: + bool: True if all images were loaded successfully with matching dimensions + + Raises: + FileNotFoundError: If any image file cannot be found + ValueError: If image dimensions do not match + """ + self.image_data = [] + self.image_shapes = [] + self.image_dtypes = [] + + # Load all images + for filename in self.image_filenames: + if not os.path.exists(filename): + raise FileNotFoundError(f"Image file not found: {filename}") + + try: + with mrcfile.open(filename) as mrc: + self.image_data.append(mrc.data) + self.image_shapes.append(mrc.data.shape) + self.image_dtypes.append(mrc.data.dtype) + except Exception as e: + raise IOError(f"Error loading {filename}: {str(e)}") + + # Check that all images have the same dimensions + if len(set(str(shape) for shape in self.image_shapes)) > 1: + raise ValueError(f"Image dimensions do not match: {self.image_shapes}") + + return True + + def analyze_replicates(self) -> Dict: + """ + Analyze all replicate images and calculate similarity metrics. + + Returns: + Dict: Dictionary containing pairwise and overall similarity metrics + """ + if not self.image_data: + self.load_images() + + num_replicates = len(self.image_data) + + # Generate all pairwise comparisons + pairs = [(i, j) for i in range(num_replicates) for j in range(i+1, num_replicates)] + + results = { + "num_replicates": num_replicates, + "threshold_value": self.threshold_value, + "pairwise_comparisons": [], + "overall": {} + } + + all_mean_abs_diffs = [] + + # Calculate metrics for each pair of images + for i, j in pairs: + try: + # Calculate mean absolute difference + mean_abs_diff = np.mean(np.abs(self.image_data[i] - self.image_data[j])) + all_mean_abs_diffs.append(mean_abs_diff) + + # Calculate relative error if threshold value is available + if self.threshold_value and self.threshold_value > 0: + relative_error_ppm = (mean_abs_diff / self.threshold_value) * 1e6 # Parts per million + else: + relative_error_ppm = None + + comparison_result = { + "replicate_1": i + 1, + "replicate_2": j + 1, + "mean_abs_diff": mean_abs_diff, + "relative_error_ppm": relative_error_ppm + } + + results["pairwise_comparisons"].append(comparison_result) + + except Exception as e: + print(f"Error comparing replicates {i+1} and {j+1}: {str(e)}") + + # Calculate overall metrics across all comparisons + if all_mean_abs_diffs: + results["overall"]["mean_abs_diff_avg"] = np.mean(all_mean_abs_diffs) + results["overall"]["mean_abs_diff_min"] = np.min(all_mean_abs_diffs) + results["overall"]["mean_abs_diff_max"] = np.max(all_mean_abs_diffs) + + # Calculate average relative error if threshold is available + if self.threshold_value and self.threshold_value > 0: + results["overall"]["relative_error_ppm_avg"] = (np.mean(all_mean_abs_diffs) / self.threshold_value) * 1e6 + results["overall"]["relative_error_ppm_min"] = (np.min(all_mean_abs_diffs) / self.threshold_value) * 1e6 + results["overall"]["relative_error_ppm_max"] = (np.max(all_mean_abs_diffs) / self.threshold_value) * 1e6 + + return results + + def print_analysis(self, results: Optional[Dict] = None) -> None: + """ + Print the replicate analysis results in a formatted way. + + Args: + results: Optional results dictionary from analyze_replicates(). + If None, will run analyze_replicates() internally. + """ + if results is None: + results = self.analyze_replicates() + + num_replicates = results["num_replicates"] + threshold_value = results["threshold_value"] + + print("\nReproducibility Analysis:") + print("========================") + print(f"Number of replicates analyzed: {num_replicates}") + + if threshold_value is not None: + print(f"Threshold value: {threshold_value:.3f}") + + # Print pairwise comparisons + for comparison in results["pairwise_comparisons"]: + i = comparison["replicate_1"] + j = comparison["replicate_2"] + mean_abs_diff = comparison["mean_abs_diff"] + relative_error_ppm = comparison["relative_error_ppm"] + + print(f"\nComparing replicate {i} vs {j}:") + print(f" Mean absolute difference: {mean_abs_diff:.6f}") + + if relative_error_ppm is not None: + print(f" Relative error: {relative_error_ppm:.2f} ppm (relative to threshold value: {threshold_value:.3f})") + + # Print overall metrics + if "overall" in results and results["overall"]: + print("\nOverall reproducibility:") + print(f" Mean absolute diff (avg): {results['overall']['mean_abs_diff_avg']:.6f}") + + if threshold_value is not None: + print(f" Relative error (avg): {results['overall']['relative_error_ppm_avg']:.2f} ppm " + f"(relative to threshold value: {threshold_value:.3f})") \ No newline at end of file diff --git a/scripts/testing/programs/cistem_test_utils/make_tmp_runfile.py b/scripts/testing/programs/cistem_test_utils/make_tmp_runfile.py index 687d07869..541ff9554 100644 --- a/scripts/testing/programs/cistem_test_utils/make_tmp_runfile.py +++ b/scripts/testing/programs/cistem_test_utils/make_tmp_runfile.py @@ -9,8 +9,22 @@ def match_template(config): use_gpu = "no" else: use_gpu = "yes" + + # Check if fast_fft is requested but GPU is not enabled + if config.get('fast_fft') and use_gpu == "no": + print("Error: --fast_fft option requires GPU to be enabled. Cannot use fast FFT with CPU.") + exit(1) + + # Set the use_fast_fft parameter based on the fast_fft flag + if config.get('fast_fft'): + use_fast_fft = "yes" + else: + use_fast_fft = "no" - high_res_limit = 2*config.get('data')[config.get('img_number')].get('pixel_size') + # Calculate high resolution limit with binning factor + high_res_limit = 2 * config.get('data')[config.get('img_number')].get('pixel_size') * config.get('binning') + # Ensure high_res_limit is formatted with at least 4 decimal places + high_res_limit_str = f"{high_res_limit:.4f}" # make a string that will then be used to create a temporary file for feading to stdin # TODO: optional output dir @@ -35,7 +49,7 @@ def match_template(config): str(config.get('data')[config.get('img_number')].get('ctf').get('defocus_2')), str(config.get('data')[config.get('img_number')].get('ctf').get('defocus_angle')), str(config.get('data')[config.get('img_number')].get('ctf').get('extra_phase_shift')), - str(high_res_limit), + high_res_limit_str, str(config.get('out_of_plane_angle')), str(config.get('in_plane_angle')), str(config.get('defocus_range')), @@ -46,6 +60,7 @@ def match_template(config): str(config.get('mask_radius')), str(config.get('model')[config.get('ref_number')].get('symmetry')), use_gpu, + use_fast_fft, str(config.get('max_threads'))] pre_process_cmd = " " diff --git a/scripts/testing/programs/cistem_test_utils/temp_dir_manager.py b/scripts/testing/programs/cistem_test_utils/temp_dir_manager.py new file mode 100644 index 000000000..379ab01cd --- /dev/null +++ b/scripts/testing/programs/cistem_test_utils/temp_dir_manager.py @@ -0,0 +1,217 @@ +""" +Temporary Directory Manager for cisTEM tests. + +This module provides functionality to track, list, and clean up temporary directories +created during cisTEM test runs. It maintains a log file in the current working directory +that records information about each temporary directory including its path, creation time, +and a unique identifier. +""" + +import os +import tempfile +import datetime +import json +import shutil +import argparse +from typing import List, Dict, Optional, Tuple, Union + + +class TempDirManager: + """Class for managing temporary directories used in cisTEM tests.""" + + def __init__(self): + """Initialize the TempDirManager with a log file in the current working directory.""" + # The log file will be a hidden file in the current working directory + self.log_file = os.path.join(os.getcwd(), '.cistem_temp_dirs.log') + + # Add temporary directory management arguments to an ArgumentParser + self.temp_management_group = None + + def add_arguments(self, parser): + """ + Add temporary directory management arguments to an ArgumentParser. + + Args: + parser: argparse.ArgumentParser instance + """ + self.temp_management_group = parser.add_argument_group('Temporary Directory Management') + self.temp_management_group.add_argument('--list-temp-dirs', action='store_true', + help='List all tracked temporary directories') + self.temp_management_group.add_argument('--rm-temp-dir', type=int, metavar='INDEX', + help='Remove a specific temporary directory by index') + self.temp_management_group.add_argument('--rm-all-temp-dirs', action='store_true', + help='Remove all tracked temporary directories') + + def create_temp_dir(self, prefix: str = "cistem_test_", dir: str = "/tmp") -> str: + """ + Create a temporary directory and log its information. + + Args: + prefix: Prefix for the temporary directory name + dir: Parent directory where the temp directory will be created + + Returns: + Path to the created temporary directory + """ + # Create the temporary directory + temp_dir = tempfile.mkdtemp(dir=dir, prefix=prefix) + + # Log the created directory + log_entry = { + 'path': temp_dir, + 'created_at': datetime.datetime.now().isoformat(), + 'key': os.path.basename(temp_dir) # Use the basename as the unique key + } + + # Read existing log if it exists + log_entries = [] + if os.path.exists(self.log_file): + try: + with open(self.log_file, 'r') as f: + log_entries = json.load(f) + except json.JSONDecodeError: + # If the file is corrupted, start with an empty list + log_entries = [] + + # Add the new entry and write back to the log file + log_entries.append(log_entry) + with open(self.log_file, 'w') as f: + json.dump(log_entries, f, indent=2) + + return temp_dir + + def list_temp_dirs(self) -> List[Dict]: + """ + List all logged temporary directories. + + Returns: + List of dictionaries containing information about each temp directory + """ + if not os.path.exists(self.log_file): + return [] + + try: + with open(self.log_file, 'r') as f: + log_entries = json.load(f) + + # Filter out directories that no longer exist + valid_entries = [] + for entry in log_entries: + if os.path.exists(entry['path']): + valid_entries.append(entry) + + # Update the log file if some directories were already removed + if len(valid_entries) != len(log_entries): + with open(self.log_file, 'w') as f: + json.dump(valid_entries, f, indent=2) + + return valid_entries + except json.JSONDecodeError: + # If the file is corrupted, return an empty list + return [] + + def remove_temp_dir(self, index: int) -> Tuple[bool, str]: + """ + Remove a temporary directory by its index in the log. + + Args: + index: The index of the directory to remove (0-based) + + Returns: + Tuple of (success, message) + """ + log_entries = self.list_temp_dirs() + + if not log_entries: + return False, "No temporary directories found in the log." + + if index < 0 or index >= len(log_entries): + return False, f"Invalid index: {index}. Valid range is 0-{len(log_entries)-1}." + + entry = log_entries[index] + path = entry['path'] + + # Check if the directory exists + if not os.path.exists(path): + # Remove from log and return + log_entries.pop(index) + with open(self.log_file, 'w') as f: + json.dump(log_entries, f, indent=2) + return False, f"Directory {path} no longer exists. Removed from log." + + # Remove the directory + try: + shutil.rmtree(path) + # Update the log + log_entries.pop(index) + with open(self.log_file, 'w') as f: + json.dump(log_entries, f, indent=2) + return True, f"Successfully removed {path}" + except Exception as e: + return False, f"Failed to remove {path}: {str(e)}" + + def remove_all_temp_dirs(self) -> Tuple[int, int]: + """ + Remove all temporary directories in the log. + + Returns: + Tuple of (success_count, failure_count) + """ + log_entries = self.list_temp_dirs() + success_count = 0 + failure_count = 0 + + for entry in log_entries: + path = entry['path'] + if os.path.exists(path): + try: + shutil.rmtree(path) + success_count += 1 + except: + failure_count += 1 + + # Clear the log file + with open(self.log_file, 'w') as f: + json.dump([], f) + + return success_count, failure_count + + def print_temp_dirs(self) -> None: + """ + Print information about all logged temporary directories. + """ + log_entries = self.list_temp_dirs() + + if not log_entries: + print("No temporary directories found.") + return + + print(f"Found {len(log_entries)} temporary directories:") + print("-" * 80) + for i, entry in enumerate(log_entries): + created_at = datetime.datetime.fromisoformat(entry['created_at']) + formatted_time = created_at.strftime("%Y-%m-%d %H:%M:%S") + print(f"[{i}] {entry['path']}") + print(f" Created: {formatted_time}") + print(f" Key: {entry['key']}") + print("-" * 80) + + +# Create a singleton instance for backwards compatibility with existing code +_manager = TempDirManager() + +# Provide functions that delegate to the instance methods for backwards compatibility +def create_temp_dir(prefix: str = "cistem_test_", dir: str = "/tmp") -> str: + return _manager.create_temp_dir(prefix, dir) + +def list_temp_dirs() -> List[Dict]: + return _manager.list_temp_dirs() + +def remove_temp_dir(index: int) -> Tuple[bool, str]: + return _manager.remove_temp_dir(index) + +def remove_all_temp_dirs() -> Tuple[int, int]: + return _manager.remove_all_temp_dirs() + +def print_temp_dirs() -> None: + return _manager.print_temp_dirs() \ No newline at end of file diff --git a/scripts/testing/programs/cistem_test_utils/threshold_utils.py b/scripts/testing/programs/cistem_test_utils/threshold_utils.py new file mode 100644 index 000000000..a9980757a --- /dev/null +++ b/scripts/testing/programs/cistem_test_utils/threshold_utils.py @@ -0,0 +1,41 @@ +""" +Utilities for handling threshold values in cisTEM outputs. + +This module provides functions for extracting threshold values from cisTEM output files. +""" + +import os +import re + + +def extract_threshold_value(hist_file_path): + """ + Extract the threshold value from the histogram text file. + + The threshold value is in the first line of the file, which starts with + "# Expected threshold = ". This function extracts the numerical value following this marker. + + Args: + hist_file_path (str): Path to the histogram text file + + Returns: + float: The extracted threshold value + + Raises: + FileNotFoundError: If the file doesn't exist + ValueError: If the threshold value cannot be found or parsed + """ + if not os.path.exists(hist_file_path): + raise FileNotFoundError(f"Histogram file not found at {hist_file_path}") + + with open(hist_file_path, 'r') as f: + first_line = f.readline().strip() + + # Use regex to extract the threshold value from the line + # Looking for a pattern like "# Expected threshold = 6.90" + match = re.search(r'# Expected threshold = ([\d.e+-]+)', first_line) + if not match: + raise ValueError(f"Could not find threshold value in {hist_file_path}") + + threshold_value = float(match.group(1)) + return threshold_value \ No newline at end of file diff --git a/scripts/testing/programs/image_replicate_analysis.py b/scripts/testing/programs/image_replicate_analysis.py new file mode 100755 index 000000000..ba5a6728c --- /dev/null +++ b/scripts/testing/programs/image_replicate_analysis.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +Image Replicate Analysis Tool + +This script analyzes multiple replicate MRC images and calculates similarity metrics +between them. It accepts a file containing a list of image filenames and a threshold +value for calculating relative error metrics. + +Example usage: + python image_replicate_analysis.py --image-list /path/to/image_list.txt --threshold 6.90 +""" + +import sys +import os +import argparse + +# Handle Python module import paths +script_dir = os.path.dirname(os.path.abspath(__file__)) +sys.path.append(os.path.dirname(script_dir)) + +try: + from cistem_test_utils.image_replicate_analysis import ImageReplicateAnalysis +except ImportError: + # Fall back to annoying_hack.py approach for compatibility + sys.path.insert(0, os.path.join(script_dir)) + import annoying_hack + from cistem_test_utils.image_replicate_analysis import ImageReplicateAnalysis + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Analyze replicate MRC images and calculate similarity metrics" + ) + + parser.add_argument( + "--image-list", + required=True, + help="Path to a file containing a list of MRC image filenames (one per line)" + ) + parser.add_argument( + "--threshold", + type=float, + required=True, + help="Threshold value for relative error calculations (must be between 0 and 100)" + ) + + return parser.parse_args() + + +def read_image_list(file_path): + """ + Read image filenames from a text file. + + Args: + file_path: Path to the text file containing image filenames + + Returns: + List of image filenames + + Raises: + FileNotFoundError: If the file doesn't exist + ValueError: If the file is empty or contains invalid entries + """ + if not os.path.exists(file_path): + raise FileNotFoundError(f"Image list file not found: {file_path}") + + with open(file_path, 'r') as f: + image_files = [line.strip() for line in f if line.strip()] + + if not image_files: + raise ValueError(f"No image filenames found in {file_path}") + + # Check that all image files exist + missing_files = [f for f in image_files if not os.path.exists(f)] + if missing_files: + raise FileNotFoundError(f"The following image files were not found: {', '.join(missing_files)}") + + return image_files + + +def main(): + """Main entry point for the script.""" + try: + args = parse_args() + + # Read image filenames from the list file + image_files = read_image_list(args.image_list) + print(f"Found {len(image_files)} image files in {args.image_list}") + + # Create image replicate analyzer instance + analyzer = ImageReplicateAnalysis(image_files, args.threshold) + + # Load images and verify dimensions + print("Loading images and verifying dimensions...") + analyzer.load_images() + + # Run the analysis + print("Analyzing replicate images...") + results = analyzer.analyze_replicates() + + # Print the results + analyzer.print_analysis(results) + + return 0 + + except KeyboardInterrupt: + print("\nAnalysis interrupted by user.") + return 130 + except Exception as e: + print(f"Error: {str(e)}") + import traceback + traceback.print_exc() + return 1 + + +if __name__ == "__main__": + sys.exit(main()) \ No newline at end of file diff --git a/scripts/testing/programs/match_template/test_template_reproducibility.py b/scripts/testing/programs/match_template/test_template_reproducibility.py new file mode 100755 index 000000000..3cf0781b7 --- /dev/null +++ b/scripts/testing/programs/match_template/test_template_reproducibility.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" +Template Matching Reproducibility Test + +This script runs the cisTEM template matching GPU binary multiple times using the same input data +and parameters, then analyzes the reproducibility by comparing the resulting MIP images. + +It uses the Apoferritin dataset for testing, and saves the MIP (Maximum Intensity Projection) +files to a temporary directory with unique filenames for each replicate. + +The script loads the MIP images using the mrcfile package and compares them using numpy to +measure pixel similarity between replicates with various metrics. + +The script has two operational modes controlled by the FAST_DEVELOPMENT_CONDITIONS flag: +1. When True: Uses only binning=1.2 with 5.0 degree angular sampling (faster development mode) +2. When False: Tests all binning values (1.0, 1.2, 1.6) with 3.0 degree angular sampling + +Instead of performing analysis directly, this script outputs commands for the user to run +the image_replicate_analysis.py tool separately. +""" + +import annoying_hack + +from os.path import join, exists +from os import makedirs +import cistem_test_utils.args as tmArgs +import cistem_test_utils.make_tmp_runfile as mktmp +import cistem_test_utils.run_job as runner +from cistem_test_utils.temp_dir_manager import TempDirManager +from cistem_test_utils.threshold_utils import extract_threshold_value +from cistem_test_utils.image_replicate_analysis import ImageReplicateAnalysis +import mrcfile +import numpy as np +import tempfile +import os +import shutil +import sys +import re + +# By default the "_gpu" suffix will be added unless the --old-cistem flag is used +# or the --cpu flag is used +wanted_binary_name = 'match_template' + +# Define the number of replicates to run +NUM_REPLICATES = 5 + +# Define whether to use fast development conditions +FAST_DEVELOPMENT_CONDITIONS = True + +# Define the binning values to test - modified based on development conditions +if FAST_DEVELOPMENT_CONDITIONS: + BINNING_VALUES = [1.2] # Only use 1.2 binning for faster development +else: + BINNING_VALUES = [1.0, 1.2, 1.6] + + +def main(): + try: + # Create a temp_dir_manager instance + temp_manager = TempDirManager() + + # Parse command-line arguments + args = tmArgs.parse_TM_args(wanted_binary_name) + + # Handle temp directory management options + if args.list_temp_dirs: + temp_manager.print_temp_dirs() + return 0 + + if args.rm_temp_dir is not None: + success, message = temp_manager.remove_temp_dir(args.rm_temp_dir) + print(message) + return 0 if success else 1 + + if args.rm_all_temp_dirs: + success_count, failure_count = temp_manager.remove_all_temp_dirs() + print(f"Successfully removed {success_count} temporary directories.") + if failure_count > 0: + print(f"Failed to remove {failure_count} temporary directories.") + return 0 if failure_count == 0 else 1 + + # Create a temporary directory to store our replicate MIPs and track it + temp_dir = temp_manager.create_temp_dir(prefix="template_match_reproducibility_") + print(f"Temporary directory created at: {temp_dir}") + + # Test each binning value + for binning_value in BINNING_VALUES: + print(f"\n{'-'*80}") + print(f"Testing with binning value: {binning_value}") + print(f"{'-'*80}") + + # Create a subdirectory for this binning value + binning_dir = join(temp_dir, f"binning_{binning_value}") + os.makedirs(binning_dir, exist_ok=True) + + # We'll run template matching for the defined number of replicates + elapsed_time = [0] * NUM_REPLICATES + mip_filenames = [] + hist_filenames = [] + threshold_values = [] + + # Run the template matching for each replicate + for replicate in range(NUM_REPLICATES): + try: + # Use Apoferritin dataset with image 0 + config = tmArgs.get_config(args, 'Apoferritin', 0, 0) + + # Set the angular sampling step based on development conditions + if FAST_DEVELOPMENT_CONDITIONS: + config['out_of_plane_angle'] = 5.0 + config['in_plane_angle'] = 5.0 + else: + config['out_of_plane_angle'] = 3.0 + config['in_plane_angle'] = 3.0 + + # Set the binning value + config['binning'] = binning_value + + # Create a unique output file prefix for each replicate + original_prefix = config['output_file_prefix'] + config['output_file_prefix'] = join(binning_dir, f"replicate_{replicate+1}") + makedirs(config['output_file_prefix'], exist_ok=True) + + # Run the template matching + tmp_filename_match_template, tmp_filename_make_template_results = mktmp.make_tmp_runfile(config) + elapsed_time[replicate] = runner.run_job(tmp_filename_match_template) + runner.run_job(tmp_filename_make_template_results) + + # The MIP file is already in our temp directory with the unique replicate prefix + mip_file = join(config['output_file_prefix'], 'mip.mrc') + + # Check if MIP file exists + if not exists(mip_file): + raise FileNotFoundError(f"MIP file not found at {mip_file}") + + # Get the histogram file path and extract the threshold value + hist_file = join(config['output_file_prefix'], 'hist.txt') + threshold_value = extract_threshold_value(hist_file) + + mip_filenames.append(mip_file) + hist_filenames.append(hist_file) + threshold_values.append(threshold_value) + + # Print threshold value only for the first replicate + if replicate == 0: + print(f"Threshold value: {threshold_value:.3f}") + + print(f"Completed replicate {replicate+1}/{NUM_REPLICATES}, time: {elapsed_time[replicate]:.2f}s") + + except Exception as e: + print(f"Error during replicate {replicate+1}: {str(e)}") + continue + + # Check if we have at least 2 replicates + if len(mip_filenames) < 2: + print(f"Error: Only {len(mip_filenames)} replicates were successfully processed") + print("At least 2 replicates are required for comparison. Exiting.") + return 1 + + # Verify all threshold values are the same + if threshold_values: + if not all(abs(v - threshold_values[0]) < 1e-10 for v in threshold_values): + print("Warning: Threshold values differ between replicates:") + for i, val in enumerate(threshold_values): + print(f" Replicate {i+1}: {val:.6e}") + + # Use the first threshold value for calculations + threshold_value = threshold_values[0] + else: + print("Error: No threshold values were extracted.") + return 1 + + print(f"Binning value: {binning_value}") + + # Instead of running analysis directly, print commands for the user to run + print(f"\n{'-'*80}") + print(f"Replicate generation complete for binning {binning_value}") + print(f"{'-'*80}") + + # Suggest a list file location but don't create it - let user do it with wildcards + mip_list_file = join(binning_dir, "mip_files.txt") + + print("\nTo create a list of MIP files for analysis:") + print(f"find {binning_dir} -name 'mip.mrc' > {mip_list_file}") + + print("\nOr to analyze other image types, like theta maps:") + print(f"find {binning_dir} -name 'theta.mrc' > {binning_dir}/theta_files.txt") + + print("\nTo analyze the replicate results with the saved MIP list:") + print(f"python3 scripts/testing/programs/image_replicate_analysis.py --image-list {mip_list_file} --threshold {threshold_value:.3f}") + + print("\nOr to extract the threshold value directly from histogram files:") + print(f"threshold=$(awk '/threshold/{{print $NF; exit}}' {hist_filenames[0]})") + print(f"python3 scripts/testing/programs/image_replicate_analysis.py --image-list {mip_list_file} --threshold $threshold") + + # Print the directory where files are saved + print(f"\nMIP files saved in: {temp_dir}") + print("To list temp directories: --list-temp-dirs") + print("To remove this directory: --rm-temp-dir INDEX") + print("To remove all temp directories: --rm-all-temp-dirs") + + return 0 + + except KeyboardInterrupt: + print("\nTest interrupted by user.") + return 130 + except Exception as e: + print(f"Error in template matching reproducibility test: {str(e)}") + import traceback + traceback.print_exc() + return 1 + + +# Check if main function and run +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file