From f4609f42348663f894722421238b5ece1339db4b Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 10 Mar 2025 09:47:09 +0000 Subject: [PATCH 01/33] Possible subtomogram extraction service --- pyproject.toml | 1 + .../services/extract_subtomo.py | 347 ++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 src/cryoemservices/services/extract_subtomo.py diff --git a/pyproject.toml b/pyproject.toml index f67f8591..976f94ec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -90,6 +90,7 @@ dev = [ EMISPyB = "cryoemservices.services.ispyb_connector:EMISPyB" Extract = "cryoemservices.services.extract:Extract" ExtractClass = "cryoemservices.services.extract_class:ExtractClass" + ExtractSubTomo = "cryoemservices.services.extract_subtomo:ExtractSubTomo" IceBreaker = "cryoemservices.services.icebreaker:IceBreaker" Images = "cryoemservices.services.images:Images" MembrainSeg = "cryoemservices.services.membrain_seg:MembrainSeg" diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py new file mode 100644 index 00000000..8bd309fc --- /dev/null +++ b/src/cryoemservices/services/extract_subtomo.py @@ -0,0 +1,347 @@ +from __future__ import annotations + +from pathlib import Path + +import mrcfile +import numpy as np +import workflows.recipe +from gemmi import cif +from pydantic import BaseModel, Field, ValidationError +from workflows.services.common_service import CommonService + +from cryoemservices.util.models import MockRW +from cryoemservices.util.relion_service_options import ( + RelionServiceOptions, + update_relion_options, +) +from cryoemservices.util.tomo_output_files import _get_tilt_name_v5_12 + + +class ExtractSubTomoParameters(BaseModel): + tomogram: str = Field(..., min_length=1) + coord_list_file: str = Field(..., min_length=1) + tilt_alignment_file: str = Field(..., min_length=1) + newstack_file: str = Field(..., min_length=1) + output_file: str = Field(..., min_length=1) + pixel_size: float + refined_tilt_axis: float + particle_diameter: float = 0 + boxsize: int = 256 + small_boxsize: int = 64 + binning: int = 8 + min_frames: int = 1 + maximum_dose: int = -1 + tomogram_binning: int = 4 + invert_contrast: bool = True + bg_radius: int = -1 + relion_options: RelionServiceOptions + + +class ExtractSubTomo(CommonService): + """ + A service for extracting particles from cryolo autopicking for tomograms + """ + + # Human readable service name + _service_name = "ExtractSubTomo" + + # Logger name + _logger_name = "cryoemservices.services.extract_subtomo" + + # Job name + job_type = "relion.pseudosubtomo" + + def initializing(self): + """Subscribe to a queue. Received messages must be acknowledged.""" + self.log.info("Extract service starting") + workflows.recipe.wrap_subscribe( + self._transport, + self._environment["queue"] or "extract_subtomo", + self.extract_subtomo, + acknowledgement=True, + log_extender=self.extend_log, + allow_non_recipe_messages=True, + ) + + def extract_subtomo(self, rw, header: dict, message: dict): + """Main function which interprets and processes received messages""" + if not rw: + self.log.info("Received a simple message") + if not isinstance(message, dict): + self.log.error("Rejected invalid simple message") + self._transport.nack(header) + return + + # Create a wrapper-like object that can be passed to functions + # as if a recipe wrapper was present. + rw = MockRW(self._transport) + rw.recipe_step = {"parameters": message} + + try: + if isinstance(message, dict): + extract_subtomo_params = ExtractSubTomoParameters( + **{**rw.recipe_step.get("parameters", {}), **message} + ) + else: + extract_subtomo_params = ExtractSubTomoParameters( + **{**rw.recipe_step.get("parameters", {})} + ) + except (ValidationError, TypeError) as e: + self.log.warning( + f"Extraction parameter validation failed for message: {message} " + f"and recipe parameters: {rw.recipe_step.get('parameters', {})} " + f"with exception: {e}" + ) + rw.transport.nack(header) + return + + self.log.info( + f"Inputs: {extract_subtomo_params.tomogram}, " + f"{extract_subtomo_params.tilt_alignment_file}, " + f"{extract_subtomo_params.coord_list_file} " + f"Output: {extract_subtomo_params.output_file}" + ) + + # Update the relion options and get box sizes + extract_subtomo_params.relion_options = update_relion_options( + extract_subtomo_params.relion_options, dict(extract_subtomo_params) + ) + + # Make sure the output directory exists + if not Path(extract_subtomo_params.output_file).parent.exists(): + Path(extract_subtomo_params.output_file).parent.mkdir(parents=True) + + # If no background radius set diameter as 75% of box + if extract_subtomo_params.bg_radius == -1: + extract_subtomo_params.bg_radius = round( + 0.375 * extract_subtomo_params.relion_options.small_boxsize + ) + + # Find the locations of the particles + coords_file = cif.read(extract_subtomo_params.coord_list_file) + coords_block = coords_file.find_block("cryolo") + particles_x = np.array(coords_block.find_loop("_CoordinateX")) + particles_y = np.array(coords_block.find_loop("_CoordinateY")) + particles_z = np.array(coords_block.find_loop("_CoordinateZ")) + + # Get the shifts between tilts + shift_data = np.genfromtxt(extract_subtomo_params.tilt_alignment_file) + # tilt_ids = shift_data[:, 0].astype(int) + x_shifts = shift_data[:, 3].astype(float) + y_shifts = shift_data[:, 4].astype(float) + tilt_count = len(x_shifts) + + # Extraction + with mrcfile.open(extract_subtomo_params.tomogram) as input_tomogram: + input_tomogram_image = np.array(input_tomogram.data, dtype=np.float32) + image_size = np.shape(input_tomogram_image) + + centre_x = 0 + centre_y = image_size[1] / 2 + tilt_axis_radians = extract_subtomo_params.refined_tilt_axis * np.pi / 180 + + x_coords_in_tilts = centre_x + ( + (particles_x - centre_x) * np.cos(tilt_axis_radians) + - (particles_y - centre_y) * np.sin(tilt_axis_radians) + ) + y_coords_in_tilts = centre_y + ( + (particles_x - centre_x) * np.sin(tilt_axis_radians) + + (particles_y - centre_y) * np.cos(tilt_axis_radians) + ) + + # Downscaling dimensions + extract_subtomo_params.relion_options.pixel_size_downscaled = ( + extract_subtomo_params.pixel_size + * extract_subtomo_params.relion_options.boxsize + / extract_subtomo_params.relion_options.small_boxsize + ) + extract_width = round(extract_subtomo_params.relion_options.boxsize / 2) + scaled_extract_width = round( + extract_subtomo_params.relion_options.small_boxsize / 2 + ) + box_len = extract_subtomo_params.relion_options.small_boxsize + pixel_size = extract_subtomo_params.relion_options.pixel_size_downscaled + + # Read in tilt images + tilt_images = [] + with open(extract_subtomo_params.newstack_file) as ns_file: + while True: + line = ns_file.readline() + if not line: + break + elif line.startswith("/"): + tilt_images.append(line) + + for particle in range(len(particles_x)): + output_mrc_stack = np.array([]) + for tilt in range(tilt_count): + # Extract the particle image and pad the edges if it is not square + x_left_pad = 0 + x_right_pad = 0 + y_top_pad = 0 + y_bot_pad = 0 + + x_left = ( + x_coords_in_tilts[particle] - extract_width - x_shifts[particle] + ) + if x_left < 0: + x_left_pad = -x_left + x_left = 0 + x_right = ( + x_coords_in_tilts[particle] + extract_width - x_shifts[particle] + ) + if x_right >= image_size[1]: + x_right_pad = x_right - image_size[1] + x_right = image_size[1] + y_top = y_coords_in_tilts[particle] - extract_width - y_shifts[particle] + if y_top < 0: + y_top_pad = -y_top + y_top = 0 + y_bot = y_coords_in_tilts[particle] + extract_width - y_shifts[particle] + if y_bot >= image_size[0]: + y_bot_pad = y_bot - image_size[0] + y_bot = image_size[0] + + with mrcfile.open(tilt_images[tilt]) as mrc: + input_micrograph_image = mrc.data + + particle_subimage = input_micrograph_image[y_top:y_bot, x_left:x_right] + particle_subimage = np.pad( + particle_subimage, + ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), + mode="edge", + ) + + # Flip all the values on inversion + if extract_subtomo_params.invert_contrast: + particle_subimage = -1 * particle_subimage + + # Downscale the image size + subimage_ft = np.fft.fftshift(np.fft.fft2(particle_subimage)) + deltax = ( + subimage_ft.shape[0] + - extract_subtomo_params.relion_options.small_boxsize + ) + deltay = ( + subimage_ft.shape[1] + - extract_subtomo_params.relion_options.small_boxsize + ) + particle_subimage = np.real( + np.fft.ifft2( + np.fft.ifftshift( + subimage_ft[ + deltax // 2 : subimage_ft.shape[0] - deltax // 2, + deltay // 2 : subimage_ft.shape[1] - deltay // 2, + ] + ) + ) + ) + + # Distance of each pixel from the centre, compared to background radius + grid_indexes = np.meshgrid( + np.arange(2 * scaled_extract_width), + np.arange(2 * scaled_extract_width), + ) + distance_from_centre = np.sqrt( + (grid_indexes[0] - scaled_extract_width + 0.5) ** 2 + + (grid_indexes[1] - scaled_extract_width + 0.5) ** 2 + ) + bg_region = ( + distance_from_centre + > np.ones(np.shape(particle_subimage)) + * extract_subtomo_params.bg_radius + ) + + # Background normalisation + bg_mean = np.mean(particle_subimage[bg_region]) + bg_std = np.std(particle_subimage[bg_region]) + particle_subimage = (particle_subimage - bg_mean) / bg_std + + # Add to output stack + if len(output_mrc_stack): + output_mrc_stack = np.append( + output_mrc_stack, [particle_subimage], axis=0 + ) + else: + output_mrc_stack = np.array([particle_subimage], dtype=np.float32) + + # Produce the mrc file of the extracted particles + output_mrc_file = ( + Path(extract_subtomo_params.output_file).parent + / f"{particle}_stack2d.mrcs" + ) + self.log.info(f"Extracted particle {particle}") + with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: + mrc.set_data(output_mrc_stack.astype(np.float32)) + mrc.header.mx = box_len + mrc.header.my = box_len + mrc.header.mz = 1 + mrc.header.cella.x = pixel_size * box_len + mrc.header.cella.y = pixel_size * box_len + mrc.header.cella.z = 1 + + # Construct the output star file + extracted_parts_doc = cif.Document() + extracted_parts_block = extracted_parts_doc.add_new_block("particles") + extracted_parts_loop = extracted_parts_block.init_loop( + "_rln", + [ + "CenteredCoordinateXAngst", + "CenteredCoordinateYAngst", + "CenteredCoordinateZAngst", + "OpticsGroup", + "TomoParticleName", + "TomoVisibleFrames", + "ImageName", + "OriginXAngst", + "OriginYAngst", + "OriginZAngst", + ], + ) + frames = "?????" + for particle in range(len(particles_x)): + extracted_parts_loop.add_row( + [ + str( + float(particles_x[particle]) + - image_size[2] / 2 * extract_subtomo_params.tomogram_binning + ), + str( + float(particles_y[particle]) + - image_size[1] / 2 * extract_subtomo_params.tomogram_binning + ), + str( + float(particles_z[particle]) + - image_size[0] / 2 * extract_subtomo_params.tomogram_binning + ), + "1", + f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram))}/{particle}", + f"[{frames}]" + f"{Path(extract_subtomo_params.output_file).parent}/{particle}_stack2d.mrcs", + "0.0", + "0.0", + "0.0", + ] + ) + extracted_parts_doc.write_file( + extract_subtomo_params.output_file, style=cif.Style.Simple + ) + + # Register the extract job with the node creator + self.log.info(f"Sending {self.job_type} to node creator") + node_creator_parameters = { + "job_type": self.job_type, + "input_file": extract_subtomo_params.coord_list_file, + "output_file": extract_subtomo_params.output_file, + "relion_options": dict(extract_subtomo_params.relion_options), + "command": "", + "stdout": "", + "stderr": "", + "results": {"box_size": box_len}, + } + rw.send_to("node_creator", node_creator_parameters) + + self.log.info( + f"Done {self.job_type} for {extract_subtomo_params.coord_list_file}." + ) + rw.transport.ack(header) From 9dd3b4e33073d8a3575edff4c2ed2e54040e9844 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 10 Mar 2025 11:41:38 +0000 Subject: [PATCH 02/33] Various parameter updated --- .../services/extract_subtomo.py | 71 ++++++++++++------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 8bd309fc..f8bc7c1d 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -6,7 +6,7 @@ import numpy as np import workflows.recipe from gemmi import cif -from pydantic import BaseModel, Field, ValidationError +from pydantic import BaseModel, Field, ValidationError, field_validator from workflows.services.common_service import CommonService from cryoemservices.util.models import MockRW @@ -18,11 +18,11 @@ class ExtractSubTomoParameters(BaseModel): - tomogram: str = Field(..., min_length=1) - coord_list_file: str = Field(..., min_length=1) + cbox_3d_file: str = Field(..., min_length=1) tilt_alignment_file: str = Field(..., min_length=1) newstack_file: str = Field(..., min_length=1) - output_file: str = Field(..., min_length=1) + output_star: str = Field(..., min_length=1) + scaled_tomogram_shape: list[int] pixel_size: float refined_tilt_axis: float particle_diameter: float = 0 @@ -36,6 +36,13 @@ class ExtractSubTomoParameters(BaseModel): bg_radius: int = -1 relion_options: RelionServiceOptions + @field_validator("scaled_tomogram_shape") + @classmethod + def check_shape_is_3d(cls, v): + if len(v) != 3: + raise ValueError("Tomogram shape must be 3D") + return v + class ExtractSubTomo(CommonService): """ @@ -96,9 +103,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): return self.log.info( - f"Inputs: {extract_subtomo_params.tomogram}, " - f"{extract_subtomo_params.tilt_alignment_file}, " - f"{extract_subtomo_params.coord_list_file} " + f"Inputs: {extract_subtomo_params.tilt_alignment_file}, " + f"{extract_subtomo_params.cbox_3d_file} " f"Output: {extract_subtomo_params.output_file}" ) @@ -118,11 +124,18 @@ def extract_subtomo(self, rw, header: dict, message: dict): ) # Find the locations of the particles - coords_file = cif.read(extract_subtomo_params.coord_list_file) + coords_file = cif.read(extract_subtomo_params.cbox_3d_file) coords_block = coords_file.find_block("cryolo") - particles_x = np.array(coords_block.find_loop("_CoordinateX")) - particles_y = np.array(coords_block.find_loop("_CoordinateY")) - particles_z = np.array(coords_block.find_loop("_CoordinateZ")) + pick_radius = float(coords_block.find_loop("_Width")[0]) / 2 + particles_x = ( + np.array(coords_block.find_loop("_CoordinateX"), dtype=float) + pick_radius + ) + particles_y = ( + np.array(coords_block.find_loop("_CoordinateY"), dtype=float) + pick_radius + ) + particles_z = ( + np.array(coords_block.find_loop("_CoordinateZ"), dtype=float) + pick_radius + ) # Get the shifts between tilts shift_data = np.genfromtxt(extract_subtomo_params.tilt_alignment_file) @@ -132,12 +145,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): tilt_count = len(x_shifts) # Extraction - with mrcfile.open(extract_subtomo_params.tomogram) as input_tomogram: - input_tomogram_image = np.array(input_tomogram.data, dtype=np.float32) - image_size = np.shape(input_tomogram_image) - centre_x = 0 - centre_y = image_size[1] / 2 + centre_y = extract_subtomo_params.scaled_tomogram_shape[1] / 2 tilt_axis_radians = extract_subtomo_params.refined_tilt_axis * np.pi / 180 x_coords_in_tilts = centre_x + ( @@ -190,17 +199,19 @@ def extract_subtomo(self, rw, header: dict, message: dict): x_right = ( x_coords_in_tilts[particle] + extract_width - x_shifts[particle] ) - if x_right >= image_size[1]: - x_right_pad = x_right - image_size[1] - x_right = image_size[1] + if x_right >= extract_subtomo_params.scaled_tomogram_shape[1]: + x_right_pad = ( + x_right - extract_subtomo_params.scaled_tomogram_shape[1] + ) + x_right = extract_subtomo_params.scaled_tomogram_shape[1] y_top = y_coords_in_tilts[particle] - extract_width - y_shifts[particle] if y_top < 0: y_top_pad = -y_top y_top = 0 y_bot = y_coords_in_tilts[particle] + extract_width - y_shifts[particle] - if y_bot >= image_size[0]: - y_bot_pad = y_bot - image_size[0] - y_bot = image_size[0] + if y_bot >= extract_subtomo_params.scaled_tomogram_shape[0]: + y_bot_pad = y_bot - extract_subtomo_params.scaled_tomogram_shape[0] + y_bot = extract_subtomo_params.scaled_tomogram_shape[0] with mrcfile.open(tilt_images[tilt]) as mrc: input_micrograph_image = mrc.data @@ -304,15 +315,21 @@ def extract_subtomo(self, rw, header: dict, message: dict): [ str( float(particles_x[particle]) - - image_size[2] / 2 * extract_subtomo_params.tomogram_binning + - extract_subtomo_params.scaled_tomogram_shape[2] + / 2 + * extract_subtomo_params.tomogram_binning ), str( float(particles_y[particle]) - - image_size[1] / 2 * extract_subtomo_params.tomogram_binning + - extract_subtomo_params.scaled_tomogram_shape[1] + / 2 + * extract_subtomo_params.tomogram_binning ), str( float(particles_z[particle]) - - image_size[0] / 2 * extract_subtomo_params.tomogram_binning + - extract_subtomo_params.scaled_tomogram_shape[0] + / 2 + * extract_subtomo_params.tomogram_binning ), "1", f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram))}/{particle}", @@ -331,7 +348,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): self.log.info(f"Sending {self.job_type} to node creator") node_creator_parameters = { "job_type": self.job_type, - "input_file": extract_subtomo_params.coord_list_file, + "input_file": extract_subtomo_params.cbox_3d_file, "output_file": extract_subtomo_params.output_file, "relion_options": dict(extract_subtomo_params.relion_options), "command": "", @@ -342,6 +359,6 @@ def extract_subtomo(self, rw, header: dict, message: dict): rw.send_to("node_creator", node_creator_parameters) self.log.info( - f"Done {self.job_type} for {extract_subtomo_params.coord_list_file}." + f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}." ) rw.transport.ack(header) From aefbe02e3af6b9bd23bd9a44bf31271d142a9bae Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 10 Mar 2025 16:58:53 +0000 Subject: [PATCH 03/33] Enough to make it run --- .../services/extract_subtomo.py | 130 +++++++++++------- 1 file changed, 83 insertions(+), 47 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index f8bc7c1d..16aff48b 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -1,5 +1,6 @@ from __future__ import annotations +import ast from pathlib import Path import mrcfile @@ -22,9 +23,8 @@ class ExtractSubTomoParameters(BaseModel): tilt_alignment_file: str = Field(..., min_length=1) newstack_file: str = Field(..., min_length=1) output_star: str = Field(..., min_length=1) - scaled_tomogram_shape: list[int] + scaled_tomogram_shape: list[int] | str pixel_size: float - refined_tilt_axis: float particle_diameter: float = 0 boxsize: int = 256 small_boxsize: int = 64 @@ -39,9 +39,15 @@ class ExtractSubTomoParameters(BaseModel): @field_validator("scaled_tomogram_shape") @classmethod def check_shape_is_3d(cls, v): - if len(v) != 3: + if not len(v): + raise ValueError("Tomogram shape not given") + if type(v) is str: + shape_list = ast.literal_eval(v) + else: + shape_list = v + if len(shape_list) != 3: raise ValueError("Tomogram shape must be 3D") - return v + return shape_list class ExtractSubTomo(CommonService): @@ -105,7 +111,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): self.log.info( f"Inputs: {extract_subtomo_params.tilt_alignment_file}, " f"{extract_subtomo_params.cbox_3d_file} " - f"Output: {extract_subtomo_params.output_file}" + f"Output: {extract_subtomo_params.output_star}" ) # Update the relion options and get box sizes @@ -114,8 +120,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): ) # Make sure the output directory exists - if not Path(extract_subtomo_params.output_file).parent.exists(): - Path(extract_subtomo_params.output_file).parent.mkdir(parents=True) + if not Path(extract_subtomo_params.output_star).parent.exists(): + Path(extract_subtomo_params.output_star).parent.mkdir(parents=True) # If no background radius set diameter as 75% of box if extract_subtomo_params.bg_radius == -1: @@ -140,14 +146,16 @@ def extract_subtomo(self, rw, header: dict, message: dict): # Get the shifts between tilts shift_data = np.genfromtxt(extract_subtomo_params.tilt_alignment_file) # tilt_ids = shift_data[:, 0].astype(int) + refined_tilt_axis = float(shift_data[0, 1]) x_shifts = shift_data[:, 3].astype(float) y_shifts = shift_data[:, 4].astype(float) tilt_count = len(x_shifts) - # Extraction + # Rotation around the tilt axis is about (0, height/2) + # Or possibly not, sometimes seems to be (width/2, height/2), needs exploration centre_x = 0 - centre_y = extract_subtomo_params.scaled_tomogram_shape[1] / 2 - tilt_axis_radians = extract_subtomo_params.refined_tilt_axis * np.pi / 180 + centre_y = float(extract_subtomo_params.scaled_tomogram_shape[1]) / 2 + tilt_axis_radians = (90 - refined_tilt_axis) * np.pi / 180 x_coords_in_tilts = centre_x + ( (particles_x - centre_x) * np.cos(tilt_axis_radians) @@ -157,6 +165,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): (particles_x - centre_x) * np.sin(tilt_axis_radians) + (particles_y - centre_y) * np.cos(tilt_axis_radians) ) + x_coords_in_tilts *= extract_subtomo_params.tomogram_binning + y_coords_in_tilts *= extract_subtomo_params.tomogram_binning # Downscaling dimensions extract_subtomo_params.relion_options.pixel_size_downscaled = ( @@ -171,7 +181,18 @@ def extract_subtomo(self, rw, header: dict, message: dict): box_len = extract_subtomo_params.relion_options.small_boxsize pixel_size = extract_subtomo_params.relion_options.pixel_size_downscaled + # Distance of each pixel from the centre for background normalization + grid_indexes = np.meshgrid( + np.arange(2 * scaled_extract_width), + np.arange(2 * scaled_extract_width), + ) + distance_from_centre = np.sqrt( + (grid_indexes[0] - scaled_extract_width + 0.5) ** 2 + + (grid_indexes[1] - scaled_extract_width + 0.5) ** 2 + ) + # Read in tilt images + self.log.info("Reading tilt images") tilt_images = [] with open(extract_subtomo_params.newstack_file) as ns_file: while True: @@ -179,7 +200,9 @@ def extract_subtomo(self, rw, header: dict, message: dict): if not line: break elif line.startswith("/"): - tilt_images.append(line) + tilt_name = line.strip() + with mrcfile.open(tilt_name) as mrc: + tilt_images.append(mrc.data) for particle in range(len(particles_x)): output_mrc_stack = np.array([]) @@ -190,33 +213,52 @@ def extract_subtomo(self, rw, header: dict, message: dict): y_top_pad = 0 y_bot_pad = 0 - x_left = ( - x_coords_in_tilts[particle] - extract_width - x_shifts[particle] + x_left = round( + x_coords_in_tilts[particle] - extract_width - x_shifts[tilt] ) if x_left < 0: x_left_pad = -x_left x_left = 0 - x_right = ( - x_coords_in_tilts[particle] + extract_width - x_shifts[particle] + x_right = round( + x_coords_in_tilts[particle] + extract_width - x_shifts[tilt] ) - if x_right >= extract_subtomo_params.scaled_tomogram_shape[1]: + if ( + x_right + >= extract_subtomo_params.scaled_tomogram_shape[0] + * extract_subtomo_params.tomogram_binning + ): x_right_pad = ( - x_right - extract_subtomo_params.scaled_tomogram_shape[1] + x_right - extract_subtomo_params.scaled_tomogram_shape[0] + ) + x_right = ( + extract_subtomo_params.scaled_tomogram_shape[0] + * extract_subtomo_params.tomogram_binning ) - x_right = extract_subtomo_params.scaled_tomogram_shape[1] - y_top = y_coords_in_tilts[particle] - extract_width - y_shifts[particle] + y_top = round( + y_coords_in_tilts[particle] - extract_width - y_shifts[tilt] + ) if y_top < 0: y_top_pad = -y_top y_top = 0 - y_bot = y_coords_in_tilts[particle] + extract_width - y_shifts[particle] - if y_bot >= extract_subtomo_params.scaled_tomogram_shape[0]: - y_bot_pad = y_bot - extract_subtomo_params.scaled_tomogram_shape[0] - y_bot = extract_subtomo_params.scaled_tomogram_shape[0] + y_bot = round( + y_coords_in_tilts[particle] + extract_width - y_shifts[tilt] + ) + if ( + y_bot + >= extract_subtomo_params.scaled_tomogram_shape[1] + * extract_subtomo_params.tomogram_binning + ): + y_bot_pad = y_bot - extract_subtomo_params.scaled_tomogram_shape[1] + y_bot = ( + extract_subtomo_params.scaled_tomogram_shape[1] + * extract_subtomo_params.tomogram_binning + ) - with mrcfile.open(tilt_images[tilt]) as mrc: - input_micrograph_image = mrc.data + if y_bot <= y_top or x_left >= x_right: + self.log.warning(f"Invalid {tilt} for particle {particle}") + continue - particle_subimage = input_micrograph_image[y_top:y_bot, x_left:x_right] + particle_subimage = tilt_images[tilt][y_top:y_bot, x_left:x_right] particle_subimage = np.pad( particle_subimage, ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), @@ -248,22 +290,12 @@ def extract_subtomo(self, rw, header: dict, message: dict): ) ) - # Distance of each pixel from the centre, compared to background radius - grid_indexes = np.meshgrid( - np.arange(2 * scaled_extract_width), - np.arange(2 * scaled_extract_width), - ) - distance_from_centre = np.sqrt( - (grid_indexes[0] - scaled_extract_width + 0.5) ** 2 - + (grid_indexes[1] - scaled_extract_width + 0.5) ** 2 - ) + # Background normalisation bg_region = ( distance_from_centre > np.ones(np.shape(particle_subimage)) * extract_subtomo_params.bg_radius ) - - # Background normalisation bg_mean = np.mean(particle_subimage[bg_region]) bg_std = np.std(particle_subimage[bg_region]) particle_subimage = (particle_subimage - bg_mean) / bg_std @@ -276,12 +308,16 @@ def extract_subtomo(self, rw, header: dict, message: dict): else: output_mrc_stack = np.array([particle_subimage], dtype=np.float32) + if not len(output_mrc_stack): + self.log.warning(f"Could not extract particle {particle}") + continue + # Produce the mrc file of the extracted particles output_mrc_file = ( - Path(extract_subtomo_params.output_file).parent + Path(extract_subtomo_params.output_star).parent / f"{particle}_stack2d.mrcs" ) - self.log.info(f"Extracted particle {particle}") + self.log.info(f"Extracted particle {particle} of {len(particles_x)}") with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: mrc.set_data(output_mrc_stack.astype(np.float32)) mrc.header.mx = box_len @@ -315,33 +351,33 @@ def extract_subtomo(self, rw, header: dict, message: dict): [ str( float(particles_x[particle]) - - extract_subtomo_params.scaled_tomogram_shape[2] + - float(extract_subtomo_params.scaled_tomogram_shape[2]) / 2 * extract_subtomo_params.tomogram_binning ), str( float(particles_y[particle]) - - extract_subtomo_params.scaled_tomogram_shape[1] + - float(extract_subtomo_params.scaled_tomogram_shape[1]) / 2 * extract_subtomo_params.tomogram_binning ), str( float(particles_z[particle]) - - extract_subtomo_params.scaled_tomogram_shape[0] + - float(extract_subtomo_params.scaled_tomogram_shape[0]) / 2 * extract_subtomo_params.tomogram_binning ), "1", - f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram))}/{particle}", - f"[{frames}]" - f"{Path(extract_subtomo_params.output_file).parent}/{particle}_stack2d.mrcs", + f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tilt_alignment_file))}/{particle}", + f"[{frames}]", + f"{Path(extract_subtomo_params.output_star).parent}/{particle}_stack2d.mrcs", "0.0", "0.0", "0.0", ] ) extracted_parts_doc.write_file( - extract_subtomo_params.output_file, style=cif.Style.Simple + extract_subtomo_params.output_star, style=cif.Style.Simple ) # Register the extract job with the node creator @@ -349,7 +385,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): node_creator_parameters = { "job_type": self.job_type, "input_file": extract_subtomo_params.cbox_3d_file, - "output_file": extract_subtomo_params.output_file, + "output_file": extract_subtomo_params.output_star, "relion_options": dict(extract_subtomo_params.relion_options), "command": "", "stdout": "", From 67384784d417547eff91ca6225b945bd9e689bbc Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 14 Mar 2025 15:39:58 +0000 Subject: [PATCH 04/33] Set maximum dose requirement --- .../services/extract_subtomo.py | 41 +++++++++++++------ 1 file changed, 29 insertions(+), 12 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 16aff48b..eb8f09d4 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -15,7 +15,10 @@ RelionServiceOptions, update_relion_options, ) -from cryoemservices.util.tomo_output_files import _get_tilt_name_v5_12 +from cryoemservices.util.tomo_output_files import ( + _get_tilt_name_v5_12, + _get_tilt_number_v5_12, +) class ExtractSubTomoParameters(BaseModel): @@ -25,6 +28,7 @@ class ExtractSubTomoParameters(BaseModel): output_star: str = Field(..., min_length=1) scaled_tomogram_shape: list[int] | str pixel_size: float + dose_per_tilt: float particle_diameter: float = 0 boxsize: int = 256 small_boxsize: int = 64 @@ -194,6 +198,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): # Read in tilt images self.log.info("Reading tilt images") tilt_images = [] + tilt_numbers = [] with open(extract_subtomo_params.newstack_file) as ns_file: while True: line = ns_file.readline() @@ -201,12 +206,20 @@ def extract_subtomo(self, rw, header: dict, message: dict): break elif line.startswith("/"): tilt_name = line.strip() + tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) + frames = np.zeros((len(particles_x), tilt_count), dtype=int) for particle in range(len(particles_x)): output_mrc_stack = np.array([]) for tilt in range(tilt_count): + if ( + extract_subtomo_params.dose_per_tilt * tilt_numbers[tilt] + < extract_subtomo_params.maximum_dose + ): + continue + # Extract the particle image and pad the edges if it is not square x_left_pad = 0 x_right_pad = 0 @@ -307,6 +320,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): ) else: output_mrc_stack = np.array([particle_subimage], dtype=np.float32) + frames[particle, tilt] = 1 if not len(output_mrc_stack): self.log.warning(f"Could not extract particle {particle}") @@ -333,9 +347,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): extracted_parts_loop = extracted_parts_block.init_loop( "_rln", [ - "CenteredCoordinateXAngst", - "CenteredCoordinateYAngst", - "CenteredCoordinateZAngst", + "TomoName", "OpticsGroup", "TomoParticleName", "TomoVisibleFrames", @@ -343,12 +355,24 @@ def extract_subtomo(self, rw, header: dict, message: dict): "OriginXAngst", "OriginYAngst", "OriginZAngst", + "CenteredCoordinateXAngst", + "CenteredCoordinateYAngst", + "CenteredCoordinateZAngst", ], ) - frames = "?????" for particle in range(len(particles_x)): extracted_parts_loop.add_row( [ + _get_tilt_name_v5_12( + Path(extract_subtomo_params.tilt_alignment_file) + ), + "1", + f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tilt_alignment_file))}/{particle}", + f"[{frames[particle]}]", + f"{Path(extract_subtomo_params.output_star).parent}/{particle}_stack2d.mrcs", + "0.0", + "0.0", + "0.0", str( float(particles_x[particle]) - float(extract_subtomo_params.scaled_tomogram_shape[2]) @@ -367,13 +391,6 @@ def extract_subtomo(self, rw, header: dict, message: dict): / 2 * extract_subtomo_params.tomogram_binning ), - "1", - f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tilt_alignment_file))}/{particle}", - f"[{frames}]", - f"{Path(extract_subtomo_params.output_star).parent}/{particle}_stack2d.mrcs", - "0.0", - "0.0", - "0.0", ] ) extracted_parts_doc.write_file( From 31539ab8f7166f0cd5f8e5b76d8afd90baa17f04 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 14 Mar 2025 15:49:37 +0000 Subject: [PATCH 05/33] Change flags edited by rebase --- src/cryoemservices/services/tomo_align.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/cryoemservices/services/tomo_align.py b/src/cryoemservices/services/tomo_align.py index 0c1d128a..3d45dca6 100644 --- a/src/cryoemservices/services/tomo_align.py +++ b/src/cryoemservices/services/tomo_align.py @@ -725,33 +725,34 @@ def assemble_aretomo_command( ) ) # highest tilt - if tomo_parameters.manual_tilt_offset is None: + if tomo_parameters.manual_tilt_offset: command.extend( ( "-TiltCor", str(tomo_parameters.tilt_cor), + str(tomo_parameters.manual_tilt_offset), "-VolZ", str(tomo_parameters.vol_z), ) ) - else: + elif tomo_parameters.tilt_cor: command.extend( ( "-TiltCor", str(tomo_parameters.tilt_cor), - str(tomo_parameters.manual_tilt_offset), "-VolZ", str(tomo_parameters.vol_z), ) ) - command.extend( - ( - "-TiltAxis", - str(tomo_parameters.tilt_axis), - str(tomo_parameters.refine_flag), + if tomo_parameters.tilt_axis: + command.extend( + ( + "-TiltAxis", + str(tomo_parameters.tilt_axis), + str(tomo_parameters.refine_flag), + ) ) - ) if tomo_parameters.frame_count and tomo_parameters.dose_per_frame: command.extend( From a0ebbc973b0816cc489010e42ff3e031007120c6 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 17 Mar 2025 09:59:26 +0000 Subject: [PATCH 06/33] Fix the order of dose checks --- src/cryoemservices/services/extract_subtomo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index eb8f09d4..ed7ba820 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -216,7 +216,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): for tilt in range(tilt_count): if ( extract_subtomo_params.dose_per_tilt * tilt_numbers[tilt] - < extract_subtomo_params.maximum_dose + > extract_subtomo_params.maximum_dose ): continue From dd1b4b6a854e20c5c42b5c828144fd658d15c5aa Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 20 Mar 2025 10:10:47 +0000 Subject: [PATCH 07/33] Attempt to fix frame issues --- .../services/extract_subtomo.py | 30 ++++++++++++------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index ed7ba820..ee17addb 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -159,7 +159,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): # Or possibly not, sometimes seems to be (width/2, height/2), needs exploration centre_x = 0 centre_y = float(extract_subtomo_params.scaled_tomogram_shape[1]) / 2 - tilt_axis_radians = (90 - refined_tilt_axis) * np.pi / 180 + tilt_axis_radians = (refined_tilt_axis - 90) * np.pi / 180 x_coords_in_tilts = centre_x + ( (particles_x - centre_x) * np.cos(tilt_axis_radians) @@ -172,6 +172,14 @@ def extract_subtomo(self, rw, header: dict, message: dict): x_coords_in_tilts *= extract_subtomo_params.tomogram_binning y_coords_in_tilts *= extract_subtomo_params.tomogram_binning + with open( + Path(extract_subtomo_params.output_star).parent / "coords_in_tilts.txt", "w" + ) as f: + for particle in range(len(x_coords_in_tilts)): + f.write( + f"{x_coords_in_tilts[particle]} {y_coords_in_tilts[particle]}\n" + ) + # Downscaling dimensions extract_subtomo_params.relion_options.pixel_size_downscaled = ( extract_subtomo_params.pixel_size @@ -269,14 +277,16 @@ def extract_subtomo(self, rw, header: dict, message: dict): if y_bot <= y_top or x_left >= x_right: self.log.warning(f"Invalid {tilt} for particle {particle}") - continue - - particle_subimage = tilt_images[tilt][y_top:y_bot, x_left:x_right] - particle_subimage = np.pad( - particle_subimage, - ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), - mode="edge", - ) + particle_subimage = np.random.random((box_len, box_len)) * np.max( + tilt_images[tilt] + ) + else: + particle_subimage = tilt_images[tilt][y_top:y_bot, x_left:x_right] + particle_subimage = np.pad( + particle_subimage, + ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), + mode="edge", + ) # Flip all the values on inversion if extract_subtomo_params.invert_contrast: @@ -368,7 +378,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): ), "1", f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tilt_alignment_file))}/{particle}", - f"[{frames[particle]}]", + f"[{','.join([str(frm) for frm in frames[particle]])}]", f"{Path(extract_subtomo_params.output_star).parent}/{particle}_stack2d.mrcs", "0.0", "0.0", From bfacee0e728f435894e51042b877408823078327 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Tue, 25 Mar 2025 16:07:34 +0000 Subject: [PATCH 08/33] Refactor the extraction code --- .../services/extract_subtomo.py | 229 +++++++++--------- 1 file changed, 110 insertions(+), 119 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index ee17addb..6f0f8fcd 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -36,8 +36,6 @@ class ExtractSubTomoParameters(BaseModel): min_frames: int = 1 maximum_dose: int = -1 tomogram_binning: int = 4 - invert_contrast: bool = True - bg_radius: int = -1 relion_options: RelionServiceOptions @field_validator("scaled_tomogram_shape") @@ -127,12 +125,6 @@ def extract_subtomo(self, rw, header: dict, message: dict): if not Path(extract_subtomo_params.output_star).parent.exists(): Path(extract_subtomo_params.output_star).parent.mkdir(parents=True) - # If no background radius set diameter as 75% of box - if extract_subtomo_params.bg_radius == -1: - extract_subtomo_params.bg_radius = round( - 0.375 * extract_subtomo_params.relion_options.small_boxsize - ) - # Find the locations of the particles coords_file = cif.read(extract_subtomo_params.cbox_3d_file) coords_block = coords_file.find_block("cryolo") @@ -187,21 +179,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): / extract_subtomo_params.relion_options.small_boxsize ) extract_width = round(extract_subtomo_params.relion_options.boxsize / 2) - scaled_extract_width = round( - extract_subtomo_params.relion_options.small_boxsize / 2 - ) - box_len = extract_subtomo_params.relion_options.small_boxsize - pixel_size = extract_subtomo_params.relion_options.pixel_size_downscaled - # Distance of each pixel from the centre for background normalization - grid_indexes = np.meshgrid( - np.arange(2 * scaled_extract_width), - np.arange(2 * scaled_extract_width), - ) - distance_from_centre = np.sqrt( - (grid_indexes[0] - scaled_extract_width + 0.5) ** 2 - + (grid_indexes[1] - scaled_extract_width + 0.5) ** 2 - ) + pixel_size = extract_subtomo_params.relion_options.pixel_size_downscaled # Read in tilt images self.log.info("Reading tilt images") @@ -228,100 +207,21 @@ def extract_subtomo(self, rw, header: dict, message: dict): ): continue - # Extract the particle image and pad the edges if it is not square - x_left_pad = 0 - x_right_pad = 0 - y_top_pad = 0 - y_bot_pad = 0 - - x_left = round( - x_coords_in_tilts[particle] - extract_width - x_shifts[tilt] - ) - if x_left < 0: - x_left_pad = -x_left - x_left = 0 - x_right = round( - x_coords_in_tilts[particle] + extract_width - x_shifts[tilt] + particle_subimage, invalid_tilt = extract_particle_in_tilt( + tilt_image=tilt_images[tilt], + x_coord=x_coords_in_tilts[particle], + y_coord=y_coords_in_tilts[particle], + x_shift=x_shifts[tilt], + y_shift=y_shifts[tilt], + extract_width=extract_width, + scaled_tomogram_shape=[ + int(i) for i in extract_subtomo_params.scaled_tomogram_shape + ], + tomogram_binning=extract_subtomo_params.tomogram_binning, + small_boxsize=extract_subtomo_params.small_boxsize, ) - if ( - x_right - >= extract_subtomo_params.scaled_tomogram_shape[0] - * extract_subtomo_params.tomogram_binning - ): - x_right_pad = ( - x_right - extract_subtomo_params.scaled_tomogram_shape[0] - ) - x_right = ( - extract_subtomo_params.scaled_tomogram_shape[0] - * extract_subtomo_params.tomogram_binning - ) - y_top = round( - y_coords_in_tilts[particle] - extract_width - y_shifts[tilt] - ) - if y_top < 0: - y_top_pad = -y_top - y_top = 0 - y_bot = round( - y_coords_in_tilts[particle] + extract_width - y_shifts[tilt] - ) - if ( - y_bot - >= extract_subtomo_params.scaled_tomogram_shape[1] - * extract_subtomo_params.tomogram_binning - ): - y_bot_pad = y_bot - extract_subtomo_params.scaled_tomogram_shape[1] - y_bot = ( - extract_subtomo_params.scaled_tomogram_shape[1] - * extract_subtomo_params.tomogram_binning - ) - - if y_bot <= y_top or x_left >= x_right: + if invalid_tilt: self.log.warning(f"Invalid {tilt} for particle {particle}") - particle_subimage = np.random.random((box_len, box_len)) * np.max( - tilt_images[tilt] - ) - else: - particle_subimage = tilt_images[tilt][y_top:y_bot, x_left:x_right] - particle_subimage = np.pad( - particle_subimage, - ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), - mode="edge", - ) - - # Flip all the values on inversion - if extract_subtomo_params.invert_contrast: - particle_subimage = -1 * particle_subimage - - # Downscale the image size - subimage_ft = np.fft.fftshift(np.fft.fft2(particle_subimage)) - deltax = ( - subimage_ft.shape[0] - - extract_subtomo_params.relion_options.small_boxsize - ) - deltay = ( - subimage_ft.shape[1] - - extract_subtomo_params.relion_options.small_boxsize - ) - particle_subimage = np.real( - np.fft.ifft2( - np.fft.ifftshift( - subimage_ft[ - deltax // 2 : subimage_ft.shape[0] - deltax // 2, - deltay // 2 : subimage_ft.shape[1] - deltay // 2, - ] - ) - ) - ) - - # Background normalisation - bg_region = ( - distance_from_centre - > np.ones(np.shape(particle_subimage)) - * extract_subtomo_params.bg_radius - ) - bg_mean = np.mean(particle_subimage[bg_region]) - bg_std = np.std(particle_subimage[bg_region]) - particle_subimage = (particle_subimage - bg_mean) / bg_std # Add to output stack if len(output_mrc_stack): @@ -344,11 +244,15 @@ def extract_subtomo(self, rw, header: dict, message: dict): self.log.info(f"Extracted particle {particle} of {len(particles_x)}") with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: mrc.set_data(output_mrc_stack.astype(np.float32)) - mrc.header.mx = box_len - mrc.header.my = box_len + mrc.header.mx = extract_subtomo_params.relion_options.small_boxsize + mrc.header.my = extract_subtomo_params.relion_options.small_boxsize mrc.header.mz = 1 - mrc.header.cella.x = pixel_size * box_len - mrc.header.cella.y = pixel_size * box_len + mrc.header.cella.x = ( + pixel_size * extract_subtomo_params.relion_options.small_boxsize + ) + mrc.header.cella.y = ( + pixel_size * extract_subtomo_params.relion_options.small_boxsize + ) mrc.header.cella.z = 1 # Construct the output star file @@ -417,7 +321,9 @@ def extract_subtomo(self, rw, header: dict, message: dict): "command": "", "stdout": "", "stderr": "", - "results": {"box_size": box_len}, + "results": { + "box_size": extract_subtomo_params.relion_options.small_boxsize + }, } rw.send_to("node_creator", node_creator_parameters) @@ -425,3 +331,88 @@ def extract_subtomo(self, rw, header: dict, message: dict): f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}." ) rw.transport.ack(header) + + +def extract_particle_in_tilt( + tilt_image: np.ndarray, + x_coord: float, + y_coord: float, + x_shift: float, + y_shift: float, + extract_width: float, + scaled_tomogram_shape: list[int], + tomogram_binning: int, + small_boxsize: int, +): + # Extract the particle image and pad the edges if it is not square + x_left_pad = 0 + x_right_pad = 0 + y_top_pad = 0 + y_bot_pad = 0 + + x_left = round(x_coord - extract_width - x_shift) + if x_left < 0: + x_left_pad = -x_left + x_left = 0 + x_right = round(x_coord + extract_width - x_shift) + if x_right >= scaled_tomogram_shape[0] * tomogram_binning: + x_right_pad = x_right - scaled_tomogram_shape[0] + x_right = scaled_tomogram_shape[0] * tomogram_binning + y_top = round(y_coord - extract_width - y_shift) + if y_top < 0: + y_top_pad = -y_top + y_top = 0 + y_bot = round(y_coord + extract_width - y_shift) + if y_bot >= scaled_tomogram_shape[1] * tomogram_binning: + y_bot_pad = y_bot - scaled_tomogram_shape[1] + y_bot = scaled_tomogram_shape[1] * tomogram_binning + + if y_bot <= y_top or x_left >= x_right: + invalid = True + particle_subimage = np.random.random( + (extract_width * 2, extract_width * 2) + ) * np.max(tilt_image) + else: + invalid = False + particle_subimage = tilt_image[y_top:y_bot, x_left:x_right] + particle_subimage = np.pad( + particle_subimage, + ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), + mode="edge", + ) + + # Flip all the values on inversion + particle_subimage = -1 * particle_subimage + + # Downscale the image size + subimage_ft = np.fft.fftshift(np.fft.fft2(particle_subimage)) + deltax = subimage_ft.shape[0] - small_boxsize + deltay = subimage_ft.shape[1] - small_boxsize + particle_subimage = np.real( + np.fft.ifft2( + np.fft.ifftshift( + subimage_ft[ + deltax // 2 : subimage_ft.shape[0] - deltax // 2, + deltay // 2 : subimage_ft.shape[1] - deltay // 2, + ] + ) + ) + ) + + # Distance of each pixel from the centre for background normalization + grid_indexes = np.meshgrid( + np.arange(small_boxsize), + np.arange(small_boxsize), + ) + distance_from_centre = np.sqrt( + (grid_indexes[0] - round(small_boxsize / 2) + 0.5) ** 2 + + (grid_indexes[1] - round(small_boxsize / 2) + 0.5) ** 2 + ) + + # Background normalisation + bg_radius = round(0.375 * small_boxsize) + bg_region = distance_from_centre > np.ones(np.shape(particle_subimage)) * bg_radius + bg_mean = np.mean(particle_subimage[bg_region]) + bg_std = np.std(particle_subimage[bg_region]) + particle_subimage = (particle_subimage - bg_mean) / bg_std + return particle_subimage, invalid From 75a0a5f8bfac2be6dc75f4966f55cb861488b086 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Mon, 31 Mar 2025 15:54:14 +0100 Subject: [PATCH 09/33] Add sample aretomo remap command --- src/cryoemservices/services/extract_subtomo.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 6f0f8fcd..74cfc58b 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -416,3 +416,14 @@ def extract_particle_in_tilt( bg_std = np.std(particle_subimage[bg_region]) particle_subimage = (particle_subimage - bg_mean) / bg_std return particle_subimage, invalid + + +""" +python /dls_sw/apps/EM/aretomo3/2.1.0/AreTomo3/tools/Remap3D_0.3_07dec24/remap3D.py +-ovs 1022 1440 300 -nvs 1022 1440 300 -ops 1.34 -nps 1.34 +-os AutoPick/job009/CBOX_3D/Position_5_5_stack.star +-ns AutoPick/Position_5_5_remap.star +-oa Tomograms/job006/tomograms/ +-na AlignTiltSeries/job080/external/Position_5_5/ +-oap Position_ +""" From 84d8da52db58724b7b5a6280e13251ae7b506890 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Tue, 1 Apr 2025 15:21:50 +0100 Subject: [PATCH 10/33] Use relion transformation matrix for locations --- .../services/extract_subtomo.py | 101 +++++++++++------- 1 file changed, 62 insertions(+), 39 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 74cfc58b..d29ae2b7 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -16,6 +16,7 @@ update_relion_options, ) from cryoemservices.util.tomo_output_files import ( + _get_tilt_angle_v5_12, _get_tilt_name_v5_12, _get_tilt_number_v5_12, ) @@ -149,29 +150,11 @@ def extract_subtomo(self, rw, header: dict, message: dict): # Rotation around the tilt axis is about (0, height/2) # Or possibly not, sometimes seems to be (width/2, height/2), needs exploration - centre_x = 0 + centre_x = float(extract_subtomo_params.scaled_tomogram_shape[0]) / 2 centre_y = float(extract_subtomo_params.scaled_tomogram_shape[1]) / 2 + centre_z = float(extract_subtomo_params.scaled_tomogram_shape[2]) / 2 tilt_axis_radians = (refined_tilt_axis - 90) * np.pi / 180 - x_coords_in_tilts = centre_x + ( - (particles_x - centre_x) * np.cos(tilt_axis_radians) - - (particles_y - centre_y) * np.sin(tilt_axis_radians) - ) - y_coords_in_tilts = centre_y + ( - (particles_x - centre_x) * np.sin(tilt_axis_radians) - + (particles_y - centre_y) * np.cos(tilt_axis_radians) - ) - x_coords_in_tilts *= extract_subtomo_params.tomogram_binning - y_coords_in_tilts *= extract_subtomo_params.tomogram_binning - - with open( - Path(extract_subtomo_params.output_star).parent / "coords_in_tilts.txt", "w" - ) as f: - for particle in range(len(x_coords_in_tilts)): - f.write( - f"{x_coords_in_tilts[particle]} {y_coords_in_tilts[particle]}\n" - ) - # Downscaling dimensions extract_subtomo_params.relion_options.pixel_size_downscaled = ( extract_subtomo_params.pixel_size @@ -186,6 +169,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): self.log.info("Reading tilt images") tilt_images = [] tilt_numbers = [] + tilt_angles = [] with open(extract_subtomo_params.newstack_file) as ns_file: while True: line = ns_file.readline() @@ -194,6 +178,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): elif line.startswith("/"): tilt_name = line.strip() tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) + # TODO: want exact tilt angles + tilt_angles.append(float(_get_tilt_angle_v5_12(Path(tilt_name)))) with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) @@ -207,12 +193,32 @@ def extract_subtomo(self, rw, header: dict, message: dict): ): continue + x_in_tilt, y_in_tilt = get_coord_in_tilt( + x=particles_x[particle], + y=particles_y[particle], + z=particles_z[particle], + cen_x=centre_x, + cen_y=centre_y, + cen_z=centre_z, + theta_y=tilt_angles[tilt], + theta_z=tilt_axis_radians, + delta_x=x_shifts[tilt], + delta_y=y_shifts[tilt], + ) + if tilt_angles[tilt] == 0: + with open( + Path(extract_subtomo_params.output_star).parent + / "coords_in_tilts.txt", + "a", + ) as f: + f.write( + f"{particles_x[particle]} {particles_y[particle]} {x_in_tilt} {y_in_tilt}\n" + ) + particle_subimage, invalid_tilt = extract_particle_in_tilt( tilt_image=tilt_images[tilt], - x_coord=x_coords_in_tilts[particle], - y_coord=y_coords_in_tilts[particle], - x_shift=x_shifts[tilt], - y_shift=y_shifts[tilt], + x_coord=x_in_tilt, + y_coord=y_in_tilt, extract_width=extract_width, scaled_tomogram_shape=[ int(i) for i in extract_subtomo_params.scaled_tomogram_shape @@ -337,8 +343,6 @@ def extract_particle_in_tilt( tilt_image: np.ndarray, x_coord: float, y_coord: float, - x_shift: float, - y_shift: float, extract_width: float, scaled_tomogram_shape: list[int], tomogram_binning: int, @@ -350,19 +354,19 @@ def extract_particle_in_tilt( y_top_pad = 0 y_bot_pad = 0 - x_left = round(x_coord - extract_width - x_shift) + x_left = round(x_coord - extract_width) if x_left < 0: x_left_pad = -x_left x_left = 0 - x_right = round(x_coord + extract_width - x_shift) + x_right = round(x_coord + extract_width) if x_right >= scaled_tomogram_shape[0] * tomogram_binning: x_right_pad = x_right - scaled_tomogram_shape[0] x_right = scaled_tomogram_shape[0] * tomogram_binning - y_top = round(y_coord - extract_width - y_shift) + y_top = round(y_coord - extract_width) if y_top < 0: y_top_pad = -y_top y_top = 0 - y_bot = round(y_coord + extract_width - y_shift) + y_bot = round(y_coord + extract_width) if y_bot >= scaled_tomogram_shape[1] * tomogram_binning: y_bot_pad = y_bot - scaled_tomogram_shape[1] y_bot = scaled_tomogram_shape[1] * tomogram_binning @@ -418,12 +422,31 @@ def extract_particle_in_tilt( return particle_subimage, invalid -""" -python /dls_sw/apps/EM/aretomo3/2.1.0/AreTomo3/tools/Remap3D_0.3_07dec24/remap3D.py --ovs 1022 1440 300 -nvs 1022 1440 300 -ops 1.34 -nps 1.34 --os AutoPick/job009/CBOX_3D/Position_5_5_stack.star --ns AutoPick/Position_5_5_remap.star --oa Tomograms/job006/tomograms/ --na AlignTiltSeries/job080/external/Position_5_5/ --oap Position_ -""" +def get_coord_in_tilt( + x: float, + y: float, + z: float, + cen_x: float, + cen_y: float, + cen_z: float, + theta_y: float, + theta_z: float, + delta_x: float, + delta_y: float, +): + x_centred = x - cen_x + y_centred = y - cen_y + cen_x * np.tan(theta_z) # TODO: last factor depends on rot + z_centred = z - cen_z + x_2d = ( + x_centred * np.cos(theta_z) * np.cos(theta_y) + - y_centred * np.sin(theta_z) + + z_centred * np.cos(theta_z) * np.sin(theta_y) + + delta_x + ) + y_2d = ( + x_centred * np.sin(theta_z) * np.cos(theta_y) + + y_centred * np.cos(theta_z) + + z_centred * np.sin(theta_z) * np.sin(theta_y) + + delta_y + ) + return cen_x + x_2d, cen_y + y_2d From 510ef4b1d9486fddb5c3a472c62c0ff8afde4c03 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 2 Apr 2025 10:55:03 +0100 Subject: [PATCH 11/33] Function which can extract in spa or tomo --- src/cryoemservices/services/extract.py | 288 ++++++++++++++----------- 1 file changed, 161 insertions(+), 127 deletions(-) diff --git a/src/cryoemservices/services/extract.py b/src/cryoemservices/services/extract.py index 00ab828b..e882d42a 100644 --- a/src/cryoemservices/services/extract.py +++ b/src/cryoemservices/services/extract.py @@ -134,6 +134,13 @@ def extract(self, rw, header: dict, message: dict): ) ) + if extract_params.downscale: + extract_params.relion_options.pixel_size_downscaled = ( + extract_params.pixel_size + * extract_params.relion_options.boxsize + / extract_params.relion_options.small_boxsize + ) + # Find the locations of the particles cbox_name = Path( extract_params.coord_list_file.replace("STAR", "CBOX") @@ -223,139 +230,28 @@ def extract(self, rw, header: dict, message: dict): # Pixel locations are from bottom left, need to flip the image later pixel_location_x = round(float(particles_x[particle])) pixel_location_y = round(float(particles_y[particle])) - - # Extract the particle image and pad the edges if it is not square - x_left_pad = 0 - x_right_pad = 0 - y_top_pad = 0 - y_bot_pad = 0 - extract_width = round(extract_params.relion_options.boxsize / 2) - x_left = pixel_location_x - extract_width - if x_left < 0: - x_left_pad = -x_left - x_left = 0 - x_right = pixel_location_x + extract_width - if x_right >= image_size[1]: - x_right_pad = x_right - image_size[1] - x_right = image_size[1] - y_top = pixel_location_y - extract_width - if y_top < 0: - y_top_pad = -y_top - y_top = 0 - y_bot = pixel_location_y + extract_width - if y_bot >= image_size[0]: - y_bot_pad = y_bot - image_size[0] - y_bot = image_size[0] - - particle_subimage = input_micrograph_image[y_top:y_bot, x_left:x_right] - particle_subimage = np.pad( - particle_subimage, - ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), - mode="edge", - ) - - # Flip all the values on inversion - if extract_params.invert_contrast: - particle_subimage = -1 * particle_subimage - - # Downscale the image size - if extract_params.downscale: - extract_params.relion_options.pixel_size_downscaled = ( - extract_params.pixel_size - * extract_params.relion_options.boxsize - / extract_params.relion_options.small_boxsize - ) - subimage_ft = np.fft.fftshift(np.fft.fft2(particle_subimage)) - deltax = ( - subimage_ft.shape[0] - extract_params.relion_options.small_boxsize - ) - deltay = ( - subimage_ft.shape[1] - extract_params.relion_options.small_boxsize - ) - particle_subimage = np.real( - np.fft.ifft2( - np.fft.ifftshift( - subimage_ft[ - deltax // 2 : subimage_ft.shape[0] - deltax // 2, - deltay // 2 : subimage_ft.shape[1] - deltay // 2, - ] - ) - ) - ) - extract_width = round(extract_params.relion_options.small_boxsize / 2) - - # Distance of each pixel from the centre, compared to background radius - grid_indexes = np.meshgrid( - np.arange(2 * extract_width), - np.arange(2 * extract_width), + particle_subimage, failure_reason = extract_single_particle( + input_image=input_micrograph_image, + x_coord=pixel_location_x, + y_coord=pixel_location_y, + extract_width=extract_width, + shape=[image_size[1], image_size[0]], + small_boxsize=extract_params.relion_options.small_boxsize, + bg_radius=extract_params.bg_radius, + invert_contrast=extract_params.invert_contrast, + downscale=extract_params.downscale, + norm=extract_params.norm, + plane_fit=True, ) - distance_from_centre = np.sqrt( - (grid_indexes[0] - extract_width + 0.5) ** 2 - + (grid_indexes[1] - extract_width + 0.5) ** 2 - ) - bg_region = ( - distance_from_centre - > np.ones(np.shape(particle_subimage)) * extract_params.bg_radius - ) - - # Fit background to a plane and subtract the plane from the image - positions = [grid_indexes[0][bg_region], grid_indexes[1][bg_region]] - # needs to create a matrix of the correct shape for a*x + b*y + c plane fit - if not len(positions[0]) == len(positions[1]): + if failure_reason: self.log.warning( - f"Particle image {particle} in {extract_params.micrographs_file} is not square" + f"Extraction failed for {particle} " + f"in {extract_params.micrographs_file}. " + f"Reason was {failure_reason}." ) continue - data_size = len(positions[0]) - positions_matrix = np.hstack( - ( - np.reshape(positions[0], (data_size, 1)), - np.reshape(positions[1], (data_size, 1)), - ) - ) - # this ones for c - positions_matrix = np.hstack((np.ones((data_size, 1)), positions_matrix)) - values = particle_subimage[bg_region] - # normal equation - try: - theta = np.dot( - np.dot( - np.linalg.inv( - np.dot(positions_matrix.transpose(), positions_matrix) - ), - positions_matrix.transpose(), - ), - values, - ) - except np.linalg.LinAlgError: - self.log.warning( - f"Could not fit image plane for particle {particle} in {extract_params.micrographs_file}" - ) - continue - # now we need the full grid across the image - positions_matrix = np.hstack( - ( - np.reshape(grid_indexes[0], (4 * extract_width**2, 1)), - np.reshape(grid_indexes[1], (4 * extract_width**2, 1)), - ) - ) - positions_matrix = np.hstack( - (np.ones((4 * extract_width**2, 1)), positions_matrix) - ) - plane = np.reshape( - np.dot(positions_matrix, theta), (2 * extract_width, 2 * extract_width) - ) - - particle_subimage -= plane - - # Background normalisation - if extract_params.norm: - # Standardise the values using the background - bg_mean = np.mean(particle_subimage[bg_region]) - bg_std = np.std(particle_subimage[bg_region]) - particle_subimage = (particle_subimage - bg_mean) / bg_std # Add to output stack if len(output_mrc_stack): @@ -413,3 +309,141 @@ def extract(self, rw, header: dict, message: dict): self.log.info(f"Done {self.job_type} for {extract_params.coord_list_file}.") rw.transport.ack(header) + + +def extract_single_particle( + input_image: np.ndarray, + x_coord: float, + y_coord: float, + extract_width: float, + shape: list[int], + small_boxsize: int, + bg_radius: float, + invert_contrast: bool = True, + downscale: bool = True, + norm: bool = True, + plane_fit: bool = True, +) -> tuple[np.ndarray, str]: + """ + A function which can extract a single particle in a micrograph + or a single particle from a tomogram tilt + """ + # Extract the particle image and pad the edges if it is not square + x_left_pad = 0 + x_right_pad = 0 + y_top_pad = 0 + y_bot_pad = 0 + + x_left = round(x_coord - extract_width) + if x_left < 0: + x_left_pad = -x_left + x_left = 0 + x_right = round(x_coord + extract_width) + if x_right >= shape[0]: + x_right_pad = x_right - shape[0] + x_right = shape[0] + y_top = round(y_coord - extract_width) + if y_top < 0: + y_top_pad = -y_top + y_top = 0 + y_bot = round(y_coord + extract_width) + if y_bot >= shape[1]: + y_bot_pad = y_bot - shape[1] + y_bot = shape[1] + + if y_bot <= y_top or x_left >= x_right: + return [], "Particle is outside image" + else: + particle_subimage = input_image[y_top:y_bot, x_left:x_right] + particle_subimage = np.pad( + particle_subimage, + ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), + mode="edge", + ) + + # Flip all the values on inversion + if invert_contrast: + particle_subimage = -1 * particle_subimage + + if downscale: + # Downscale the image size + subimage_ft = np.fft.fftshift(np.fft.fft2(particle_subimage)) + deltax = subimage_ft.shape[0] - small_boxsize + deltay = subimage_ft.shape[1] - small_boxsize + particle_subimage = np.real( + np.fft.ifft2( + np.fft.ifftshift( + subimage_ft[ + deltax // 2 : subimage_ft.shape[0] - deltax // 2, + deltay // 2 : subimage_ft.shape[1] - deltay // 2, + ] + ) + ) + ) + extract_width = round(small_boxsize / 2) + + # Distance of each pixel from the centre for background normalization + grid_indexes = np.meshgrid( + np.arange(2 * extract_width), + np.arange(2 * extract_width), + ) + distance_from_centre = np.sqrt( + (grid_indexes[0] - extract_width + 0.5) ** 2 + + (grid_indexes[1] - extract_width + 0.5) ** 2 + ) + bg_region = distance_from_centre > np.ones(np.shape(particle_subimage)) * bg_radius + + # Fitting of plane to the ice + if plane_fit: + # Fit background to a plane and subtract the plane from the image + positions = [grid_indexes[0][bg_region], grid_indexes[1][bg_region]] + # needs to create a matrix of the correct shape for a*x + b*y + c plane fit + if not len(positions[0]) == len(positions[1]): + return [], "Particle image is not square" + data_size = len(positions[0]) + positions_matrix = np.hstack( + ( + np.reshape(positions[0], (data_size, 1)), + np.reshape(positions[1], (data_size, 1)), + ) + ) + # this ones for c + positions_matrix = np.hstack((np.ones((data_size, 1)), positions_matrix)) + values = particle_subimage[bg_region] + # normal equation + try: + theta = np.dot( + np.dot( + np.linalg.inv( + np.dot(positions_matrix.transpose(), positions_matrix) + ), + positions_matrix.transpose(), + ), + values, + ) + except np.linalg.LinAlgError: + return [], "Could not fit image plane" + # now we need the full grid across the image + positions_matrix = np.hstack( + ( + np.reshape(grid_indexes[0], (4 * extract_width**2, 1)), + np.reshape(grid_indexes[1], (4 * extract_width**2, 1)), + ) + ) + positions_matrix = np.hstack( + (np.ones((4 * extract_width**2, 1)), positions_matrix) + ) + plane = np.reshape( + np.dot(positions_matrix, theta), (2 * extract_width, 2 * extract_width) + ) + + particle_subimage -= plane + + # Background normalisation + if norm: + # Standardise the values using the background + bg_mean = np.mean(particle_subimage[bg_region]) + bg_std = np.std(particle_subimage[bg_region]) + particle_subimage = (particle_subimage - bg_mean) / bg_std + + return particle_subimage, "" From de0597ca7dcb4f008e6c63eab3fc3ca853a47893 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 2 Apr 2025 10:58:07 +0100 Subject: [PATCH 12/33] Use new function, and convert angle to radians --- .../services/extract_subtomo.py | 40 ++++++++++++++----- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index d29ae2b7..a230fc3c 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -10,6 +10,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator from workflows.services.common_service import CommonService +from cryoemservices.services.extract import extract_single_particle from cryoemservices.util.models import MockRW from cryoemservices.util.relion_service_options import ( RelionServiceOptions, @@ -169,7 +170,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): self.log.info("Reading tilt images") tilt_images = [] tilt_numbers = [] - tilt_angles = [] + tilt_angles_radians = [] with open(extract_subtomo_params.newstack_file) as ns_file: while True: line = ns_file.readline() @@ -179,7 +180,9 @@ def extract_subtomo(self, rw, header: dict, message: dict): tilt_name = line.strip() tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) # TODO: want exact tilt angles - tilt_angles.append(float(_get_tilt_angle_v5_12(Path(tilt_name)))) + tilt_angles_radians.append( + float(_get_tilt_angle_v5_12(Path(tilt_name))) * np.pi / 180 + ) with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) @@ -200,12 +203,12 @@ def extract_subtomo(self, rw, header: dict, message: dict): cen_x=centre_x, cen_y=centre_y, cen_z=centre_z, - theta_y=tilt_angles[tilt], + theta_y=tilt_angles_radians[tilt], theta_z=tilt_axis_radians, delta_x=x_shifts[tilt], delta_y=y_shifts[tilt], ) - if tilt_angles[tilt] == 0: + if tilt_angles_radians[tilt] == 0: with open( Path(extract_subtomo_params.output_star).parent / "coords_in_tilts.txt", @@ -215,19 +218,34 @@ def extract_subtomo(self, rw, header: dict, message: dict): f"{particles_x[particle]} {particles_y[particle]} {x_in_tilt} {y_in_tilt}\n" ) - particle_subimage, invalid_tilt = extract_particle_in_tilt( - tilt_image=tilt_images[tilt], + particle_subimage, failure_reason = extract_single_particle( + input_image=tilt_images[tilt], x_coord=x_in_tilt, y_coord=y_in_tilt, extract_width=extract_width, - scaled_tomogram_shape=[ - int(i) for i in extract_subtomo_params.scaled_tomogram_shape + shape=[ + int(i * extract_subtomo_params.tomogram_binning) + for i in extract_subtomo_params.scaled_tomogram_shape ], - tomogram_binning=extract_subtomo_params.tomogram_binning, small_boxsize=extract_subtomo_params.small_boxsize, + bg_radius=round(0.375 * extract_subtomo_params.small_boxsize), + invert_contrast=True, + downscale=True, + norm=True, + plane_fit=True, ) - if invalid_tilt: - self.log.warning(f"Invalid {tilt} for particle {particle}") + + if failure_reason: + self.log.warning( + f"Extraction failed for {particle} in {tilt}. " + f"Reason was {failure_reason}." + ) + particle_subimage = np.zeros( + ( + extract_subtomo_params.small_boxsize, + extract_subtomo_params.small_boxsize, + ) + ) # Add to output stack if len(output_mrc_stack): From ea58a0c911575bdf19fda9126c93769e49f53299 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 2 Apr 2025 13:28:54 +0100 Subject: [PATCH 13/33] Fix centre position and array types --- src/cryoemservices/services/extract.py | 6 ++--- .../services/extract_subtomo.py | 22 ++++++++----------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/cryoemservices/services/extract.py b/src/cryoemservices/services/extract.py index e882d42a..e77ec21c 100644 --- a/src/cryoemservices/services/extract.py +++ b/src/cryoemservices/services/extract.py @@ -352,7 +352,7 @@ def extract_single_particle( y_bot = shape[1] if y_bot <= y_top or x_left >= x_right: - return [], "Particle is outside image" + return np.array([]), "Particle is outside image" else: particle_subimage = input_image[y_top:y_bot, x_left:x_right] particle_subimage = np.pad( @@ -399,7 +399,7 @@ def extract_single_particle( positions = [grid_indexes[0][bg_region], grid_indexes[1][bg_region]] # needs to create a matrix of the correct shape for a*x + b*y + c plane fit if not len(positions[0]) == len(positions[1]): - return [], "Particle image is not square" + return np.array([]), "Particle image is not square" data_size = len(positions[0]) positions_matrix = np.hstack( ( @@ -422,7 +422,7 @@ def extract_single_particle( values, ) except np.linalg.LinAlgError: - return [], "Could not fit image plane" + return np.array([]), "Could not fit image plane" # now we need the full grid across the image positions_matrix = np.hstack( ( diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index a230fc3c..3161eccb 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -34,7 +34,6 @@ class ExtractSubTomoParameters(BaseModel): particle_diameter: float = 0 boxsize: int = 256 small_boxsize: int = 64 - binning: int = 8 min_frames: int = 1 maximum_dose: int = -1 tomogram_binning: int = 4 @@ -162,6 +161,9 @@ def extract_subtomo(self, rw, header: dict, message: dict): * extract_subtomo_params.relion_options.boxsize / extract_subtomo_params.relion_options.small_boxsize ) + self.log.info( + f"Downscaling to {extract_subtomo_params.relion_options.pixel_size_downscaled}" + ) extract_width = round(extract_subtomo_params.relion_options.boxsize / 2) pixel_size = extract_subtomo_params.relion_options.pixel_size_downscaled @@ -308,26 +310,20 @@ def extract_subtomo(self, rw, header: dict, message: dict): f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tilt_alignment_file))}/{particle}", f"[{','.join([str(frm) for frm in frames[particle]])}]", f"{Path(extract_subtomo_params.output_star).parent}/{particle}_stack2d.mrcs", - "0.0", - "0.0", - "0.0", + str(centre_x), + str(centre_y), + str(centre_z), str( float(particles_x[particle]) - - float(extract_subtomo_params.scaled_tomogram_shape[2]) - / 2 - * extract_subtomo_params.tomogram_binning + - centre_x * extract_subtomo_params.tomogram_binning ), str( float(particles_y[particle]) - - float(extract_subtomo_params.scaled_tomogram_shape[1]) - / 2 - * extract_subtomo_params.tomogram_binning + - centre_y * extract_subtomo_params.tomogram_binning ), str( float(particles_z[particle]) - - float(extract_subtomo_params.scaled_tomogram_shape[0]) - / 2 - * extract_subtomo_params.tomogram_binning + - centre_z * extract_subtomo_params.tomogram_binning ), ] ) From 69ebead3c0ffa138b6018955c0b62e844753b8ff Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 2 Apr 2025 13:43:40 +0100 Subject: [PATCH 14/33] Allow for tilt offset --- src/cryoemservices/services/extract_subtomo.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 3161eccb..07760f6d 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -31,6 +31,7 @@ class ExtractSubTomoParameters(BaseModel): scaled_tomogram_shape: list[int] | str pixel_size: float dose_per_tilt: float + tilt_offset: float particle_diameter: float = 0 boxsize: int = 256 small_boxsize: int = 64 @@ -181,9 +182,14 @@ def extract_subtomo(self, rw, header: dict, message: dict): elif line.startswith("/"): tilt_name = line.strip() tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) - # TODO: want exact tilt angles + tilt_axis_from_file = float(_get_tilt_angle_v5_12(Path(tilt_name))) tilt_angles_radians.append( - float(_get_tilt_angle_v5_12(Path(tilt_name))) * np.pi / 180 + ( + tilt_axis_from_file + + extract_subtomo_params.refined_tilt_offset + ) + * np.pi + / 180 ) with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) @@ -310,9 +316,9 @@ def extract_subtomo(self, rw, header: dict, message: dict): f"{_get_tilt_name_v5_12(Path(extract_subtomo_params.tilt_alignment_file))}/{particle}", f"[{','.join([str(frm) for frm in frames[particle]])}]", f"{Path(extract_subtomo_params.output_star).parent}/{particle}_stack2d.mrcs", - str(centre_x), - str(centre_y), - str(centre_z), + str(centre_x * extract_subtomo_params.tomogram_binning), + str(centre_y * extract_subtomo_params.tomogram_binning), + str(centre_z * extract_subtomo_params.tomogram_binning), str( float(particles_x[particle]) - centre_x * extract_subtomo_params.tomogram_binning From 6faa7faa1a9d135fbc3f37101382949b2a2de638 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 2 Apr 2025 14:48:12 +0100 Subject: [PATCH 15/33] Remove redundant function --- .../services/extract_subtomo.py | 83 ------------------- 1 file changed, 83 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 07760f6d..1f4a267b 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -359,89 +359,6 @@ def extract_subtomo(self, rw, header: dict, message: dict): rw.transport.ack(header) -def extract_particle_in_tilt( - tilt_image: np.ndarray, - x_coord: float, - y_coord: float, - extract_width: float, - scaled_tomogram_shape: list[int], - tomogram_binning: int, - small_boxsize: int, -): - # Extract the particle image and pad the edges if it is not square - x_left_pad = 0 - x_right_pad = 0 - y_top_pad = 0 - y_bot_pad = 0 - - x_left = round(x_coord - extract_width) - if x_left < 0: - x_left_pad = -x_left - x_left = 0 - x_right = round(x_coord + extract_width) - if x_right >= scaled_tomogram_shape[0] * tomogram_binning: - x_right_pad = x_right - scaled_tomogram_shape[0] - x_right = scaled_tomogram_shape[0] * tomogram_binning - y_top = round(y_coord - extract_width) - if y_top < 0: - y_top_pad = -y_top - y_top = 0 - y_bot = round(y_coord + extract_width) - if y_bot >= scaled_tomogram_shape[1] * tomogram_binning: - y_bot_pad = y_bot - scaled_tomogram_shape[1] - y_bot = scaled_tomogram_shape[1] * tomogram_binning - - if y_bot <= y_top or x_left >= x_right: - invalid = True - particle_subimage = np.random.random( - (extract_width * 2, extract_width * 2) - ) * np.max(tilt_image) - else: - invalid = False - particle_subimage = tilt_image[y_top:y_bot, x_left:x_right] - particle_subimage = np.pad( - particle_subimage, - ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), - mode="edge", - ) - - # Flip all the values on inversion - particle_subimage = -1 * particle_subimage - - # Downscale the image size - subimage_ft = np.fft.fftshift(np.fft.fft2(particle_subimage)) - deltax = subimage_ft.shape[0] - small_boxsize - deltay = subimage_ft.shape[1] - small_boxsize - particle_subimage = np.real( - np.fft.ifft2( - np.fft.ifftshift( - subimage_ft[ - deltax // 2 : subimage_ft.shape[0] - deltax // 2, - deltay // 2 : subimage_ft.shape[1] - deltay // 2, - ] - ) - ) - ) - - # Distance of each pixel from the centre for background normalization - grid_indexes = np.meshgrid( - np.arange(small_boxsize), - np.arange(small_boxsize), - ) - distance_from_centre = np.sqrt( - (grid_indexes[0] - round(small_boxsize / 2) + 0.5) ** 2 - + (grid_indexes[1] - round(small_boxsize / 2) + 0.5) ** 2 - ) - - # Background normalisation - bg_radius = round(0.375 * small_boxsize) - bg_region = distance_from_centre > np.ones(np.shape(particle_subimage)) * bg_radius - bg_mean = np.mean(particle_subimage[bg_region]) - bg_std = np.std(particle_subimage[bg_region]) - particle_subimage = (particle_subimage - bg_mean) / bg_std - return particle_subimage, invalid - - def get_coord_in_tilt( x: float, y: float, From dd29fb7537c1eb84aa7c212b82023b32d241b79e Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 3 Apr 2025 16:14:44 +0100 Subject: [PATCH 16/33] Fix binning --- src/cryoemservices/services/extract_subtomo.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 1f4a267b..0a762ed2 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -184,10 +184,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) tilt_axis_from_file = float(_get_tilt_angle_v5_12(Path(tilt_name))) tilt_angles_radians.append( - ( - tilt_axis_from_file - + extract_subtomo_params.refined_tilt_offset - ) + (tilt_axis_from_file + extract_subtomo_params.tilt_offset) * np.pi / 180 ) @@ -215,6 +212,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): theta_z=tilt_axis_radians, delta_x=x_shifts[tilt], delta_y=y_shifts[tilt], + binning=extract_subtomo_params.tomogram_binning, ) if tilt_angles_radians[tilt] == 0: with open( @@ -370,7 +368,9 @@ def get_coord_in_tilt( theta_z: float, delta_x: float, delta_y: float, + binning: int, ): + # In binned coordinates here x_centred = x - cen_x y_centred = y - cen_y + cen_x * np.tan(theta_z) # TODO: last factor depends on rot z_centred = z - cen_z @@ -378,12 +378,13 @@ def get_coord_in_tilt( x_centred * np.cos(theta_z) * np.cos(theta_y) - y_centred * np.sin(theta_z) + z_centred * np.cos(theta_z) * np.sin(theta_y) - + delta_x ) y_2d = ( x_centred * np.sin(theta_z) * np.cos(theta_y) + y_centred * np.cos(theta_z) + z_centred * np.sin(theta_z) * np.sin(theta_y) - + delta_y ) - return cen_x + x_2d, cen_y + y_2d + # Un-bin and apply shifts + x_tilt = (cen_x + x_2d) * binning - delta_x + y_tilt = (cen_y + y_2d) * binning - delta_y + return x_tilt, y_tilt From 57d56b15b58bf3ee55216f1aa097f655825e1c23 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 9 Jul 2025 10:15:56 +0100 Subject: [PATCH 17/33] Bring subtomo extract service up to date with main --- src/cryoemservices/services/extract_subtomo.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 0a762ed2..f7581f9a 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -5,11 +5,11 @@ import mrcfile import numpy as np -import workflows.recipe from gemmi import cif from pydantic import BaseModel, Field, ValidationError, field_validator -from workflows.services.common_service import CommonService +from workflows.recipe import wrap_subscribe +from cryoemservices.services.common_service import CommonService from cryoemservices.services.extract import extract_single_particle from cryoemservices.util.models import MockRW from cryoemservices.util.relion_service_options import ( @@ -71,12 +71,11 @@ class ExtractSubTomo(CommonService): def initializing(self): """Subscribe to a queue. Received messages must be acknowledged.""" self.log.info("Extract service starting") - workflows.recipe.wrap_subscribe( + wrap_subscribe( self._transport, self._environment["queue"] or "extract_subtomo", self.extract_subtomo, acknowledgement=True, - log_extender=self.extend_log, allow_non_recipe_messages=True, ) From ed3eede89e2016e41f6d468d737f8820896d10ad Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 9 Jul 2025 16:21:41 +0100 Subject: [PATCH 18/33] Make some illustrative images --- .../services/extract_subtomo.py | 28 +++++++++++++++---- 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index f7581f9a..759eda2c 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -3,6 +3,7 @@ import ast from pathlib import Path +import matplotlib.pyplot as plt import mrcfile import numpy as np from gemmi import cif @@ -70,7 +71,7 @@ class ExtractSubTomo(CommonService): def initializing(self): """Subscribe to a queue. Received messages must be acknowledged.""" - self.log.info("Extract service starting") + self.log.info("Sub-tomogram extraction service starting") wrap_subscribe( self._transport, self._environment["queue"] or "extract_subtomo", @@ -170,6 +171,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): # Read in tilt images self.log.info("Reading tilt images") + tilt_png_names = [] tilt_images = [] tilt_numbers = [] tilt_angles_radians = [] @@ -180,6 +182,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): break elif line.startswith("/"): tilt_name = line.strip() + tilt_png = Path(tilt_name).with_suffix(".png") + tilt_png_names.append(tilt_png) tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) tilt_axis_from_file = float(_get_tilt_angle_v5_12(Path(tilt_name))) tilt_angles_radians.append( @@ -190,14 +194,20 @@ def extract_subtomo(self, rw, header: dict, message: dict): with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) + plt.imshow(tilt_images[-1]) + plt.savefig(tilt_png) + plt.close() + frames = np.zeros((len(particles_x), tilt_count), dtype=int) + tilt_coords: list = [[] for tilt in range(tilt_count)] for particle in range(len(particles_x)): output_mrc_stack = np.array([]) for tilt in range(tilt_count): - if ( + if extract_subtomo_params.maximum_dose > 0 and ( extract_subtomo_params.dose_per_tilt * tilt_numbers[tilt] > extract_subtomo_params.maximum_dose ): + self.log.info(f"Skipping {tilt} due to dose limit") continue x_in_tilt, y_in_tilt = get_coord_in_tilt( @@ -222,6 +232,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): f.write( f"{particles_x[particle]} {particles_y[particle]} {x_in_tilt} {y_in_tilt}\n" ) + tilt_coords[tilt].append([x_in_tilt, y_in_tilt]) particle_subimage, failure_reason = extract_single_particle( input_image=tilt_images[tilt], @@ -270,7 +281,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): Path(extract_subtomo_params.output_star).parent / f"{particle}_stack2d.mrcs" ) - self.log.info(f"Extracted particle {particle} of {len(particles_x)}") + self.log.info(f"Extracted particle {particle+1} of {len(particles_x)}") with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: mrc.set_data(output_mrc_stack.astype(np.float32)) mrc.header.mx = extract_subtomo_params.relion_options.small_boxsize @@ -284,6 +295,13 @@ def extract_subtomo(self, rw, header: dict, message: dict): ) mrc.header.cella.z = 1 + for tilt in range(tilt_count): + plt.imshow(tilt_images[tilt]) + for loc in tilt_coords[tilt]: + plt.scatter(loc[0], loc[1], s=2, color="red") + plt.savefig(tilt_png_names[tilt]) + plt.close() + # Construct the output star file extracted_parts_doc = cif.Document() extracted_parts_block = extracted_parts_doc.add_new_block("particles") @@ -350,9 +368,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): } rw.send_to("node_creator", node_creator_parameters) - self.log.info( - f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}." - ) + self.log.info(f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}") rw.transport.ack(header) From 8b258f64975da4582875dd4b37cd629ea9e08e41 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 17 Sep 2025 13:55:59 +0100 Subject: [PATCH 19/33] Improved shifts and extraction logic --- .../services/extract_subtomo.py | 100 +++++++++--------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 759eda2c..c9b85df1 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -8,6 +8,7 @@ import numpy as np from gemmi import cif from pydantic import BaseModel, Field, ValidationError, field_validator +from tqdm import tqdm from workflows.recipe import wrap_subscribe from cryoemservices.services.common_service import CommonService @@ -18,11 +19,33 @@ update_relion_options, ) from cryoemservices.util.tomo_output_files import ( - _get_tilt_angle_v5_12, _get_tilt_name_v5_12, _get_tilt_number_v5_12, ) +inptus = { + "cbox_3d_file": "/scratch/yxd92326/data/tomo-extract/2_2_Ribosome_Pos_1_stack_aretomo.denoised.cbox", + "tilt_alignment_file": "/scratch/yxd92326/data/tomo-extract/2_2_Ribosome_Pos_1_stack.aln", + "newstack_file": "/scratch/yxd92326/data/tomo-extract/2_2_Ribosome_Pos_1_stack_newstack.txt", + "output_star": "/scratch/yxd92326/data/tomo-extract/extracted/extract.star", + "pixel_size": 1.34, + "dose_per_tilt": 4, + "tilt_offset": 0, + "scaled_tomogram_shape": [1440, 1023, 400], + "relion_options": {}, +} +in2 = { + "cbox_3d_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_stack_aretomo.denoised.cbox", + "tilt_alignment_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_stack_aretomo.aln", + "newstack_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_stack_newstack.txt", + "output_star": "/scratch/yxd92326/data/tomo-extract/extracted_apof/extract.star", + "pixel_size": 1.34, + "dose_per_tilt": 4, + "tilt_offset": 0, + "scaled_tomogram_shape": [1440, 1023, 400], + "relion_options": {}, +} + class ExtractSubTomoParameters(BaseModel): cbox_3d_file: str = Field(..., min_length=1) @@ -137,9 +160,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): particles_y = ( np.array(coords_block.find_loop("_CoordinateY"), dtype=float) + pick_radius ) - particles_z = ( - np.array(coords_block.find_loop("_CoordinateZ"), dtype=float) + pick_radius - ) + particles_z = np.array(coords_block.find_loop("_CoordinateZ"), dtype=float) # Get the shifts between tilts shift_data = np.genfromtxt(extract_subtomo_params.tilt_alignment_file) @@ -147,6 +168,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): refined_tilt_axis = float(shift_data[0, 1]) x_shifts = shift_data[:, 3].astype(float) y_shifts = shift_data[:, 4].astype(float) + tilt_angles = shift_data[:, 9].astype(float) * np.pi / 180 tilt_count = len(x_shifts) # Rotation around the tilt axis is about (0, height/2) @@ -174,33 +196,29 @@ def extract_subtomo(self, rw, header: dict, message: dict): tilt_png_names = [] tilt_images = [] tilt_numbers = [] - tilt_angles_radians = [] + tid = 0 with open(extract_subtomo_params.newstack_file) as ns_file: while True: + tid += 1 line = ns_file.readline() if not line: break elif line.startswith("/"): tilt_name = line.strip() - tilt_png = Path(tilt_name).with_suffix(".png") + tilt_png = Path("/home/yxd92326/Pictures/picking/tomo/") / ( + f"{tid}T_" + Path(tilt_name).with_suffix(".png").name + ) tilt_png_names.append(tilt_png) + tilt_png.unlink(missing_ok=True) + tilt_numbers.append(_get_tilt_number_v5_12(Path(tilt_name))) - tilt_axis_from_file = float(_get_tilt_angle_v5_12(Path(tilt_name))) - tilt_angles_radians.append( - (tilt_axis_from_file + extract_subtomo_params.tilt_offset) - * np.pi - / 180 - ) with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) - plt.imshow(tilt_images[-1]) - plt.savefig(tilt_png) - plt.close() - frames = np.zeros((len(particles_x), tilt_count), dtype=int) tilt_coords: list = [[] for tilt in range(tilt_count)] - for particle in range(len(particles_x)): + for particle in tqdm(range(len(particles_x))): # [385]:# + # print(particles_x[particle], particles_y[particle], particles_z[particle]) output_mrc_stack = np.array([]) for tilt in range(tilt_count): if extract_subtomo_params.maximum_dose > 0 and ( @@ -213,25 +231,15 @@ def extract_subtomo(self, rw, header: dict, message: dict): x_in_tilt, y_in_tilt = get_coord_in_tilt( x=particles_x[particle], y=particles_y[particle], - z=particles_z[particle], cen_x=centre_x, cen_y=centre_y, - cen_z=centre_z, - theta_y=tilt_angles_radians[tilt], + theta_y=tilt_angles[tilt], theta_z=tilt_axis_radians, delta_x=x_shifts[tilt], delta_y=y_shifts[tilt], binning=extract_subtomo_params.tomogram_binning, ) - if tilt_angles_radians[tilt] == 0: - with open( - Path(extract_subtomo_params.output_star).parent - / "coords_in_tilts.txt", - "a", - ) as f: - f.write( - f"{particles_x[particle]} {particles_y[particle]} {x_in_tilt} {y_in_tilt}\n" - ) + # print(tilt, tilt_angles_radians[tilt], tilt_axis_radians, x_in_tilt, y_in_tilt) tilt_coords[tilt].append([x_in_tilt, y_in_tilt]) particle_subimage, failure_reason = extract_single_particle( @@ -281,7 +289,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): Path(extract_subtomo_params.output_star).parent / f"{particle}_stack2d.mrcs" ) - self.log.info(f"Extracted particle {particle+1} of {len(particles_x)}") + # self.log.info(f"Extracted particle {particle+1} of {len(particles_x)}") with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: mrc.set_data(output_mrc_stack.astype(np.float32)) mrc.header.mx = extract_subtomo_params.relion_options.small_boxsize @@ -295,8 +303,8 @@ def extract_subtomo(self, rw, header: dict, message: dict): ) mrc.header.cella.z = 1 - for tilt in range(tilt_count): - plt.imshow(tilt_images[tilt]) + for tilt in tqdm(range(tilt_count)): + plt.imshow(tilt_images[tilt], vmin=0.5, vmax=1.7) for loc in tilt_coords[tilt]: plt.scatter(loc[0], loc[1], s=2, color="red") plt.savefig(tilt_png_names[tilt]) @@ -375,31 +383,25 @@ def extract_subtomo(self, rw, header: dict, message: dict): def get_coord_in_tilt( x: float, y: float, - z: float, cen_x: float, cen_y: float, - cen_z: float, theta_y: float, theta_z: float, delta_x: float, delta_y: float, binning: int, ): + # Translation raw to aligned tilt is subtract shift then rotate around centre # In binned coordinates here - x_centred = x - cen_x - y_centred = y - cen_y + cen_x * np.tan(theta_z) # TODO: last factor depends on rot - z_centred = z - cen_z - x_2d = ( - x_centred * np.cos(theta_z) * np.cos(theta_y) - - y_centred * np.sin(theta_z) - + z_centred * np.cos(theta_z) * np.sin(theta_y) - ) - y_2d = ( - x_centred * np.sin(theta_z) * np.cos(theta_y) - + y_centred * np.cos(theta_z) - + z_centred * np.sin(theta_z) * np.sin(theta_y) - ) + x_centred = x - cen_x # - delta_x / binning + y_centred = ( + y - cen_y + ) # - delta_y / binning#+ cen_x * np.tan(theta_z) # TODO: last factor depends on rot + x_2d = x_centred * np.cos(theta_z) - y_centred * np.sin(theta_z) + y_2d = x_centred * np.sin(theta_z) + y_centred * np.cos(theta_z) # Un-bin and apply shifts - x_tilt = (cen_x + x_2d) * binning - delta_x - y_tilt = (cen_y + y_2d) * binning - delta_y + x_tilt = (cen_x + x_2d) * binning + delta_x + y_flat = (cen_y + y_2d) * binning + delta_y + y_tilt = (y_flat - cen_y * binning) * np.cos(theta_y) + cen_y * binning + # print(x_centred, y_centred, cen_x, x_2d, delta_x, cen_y, y_2d, delta_y) return x_tilt, y_tilt From 9e6c20c03ce35cf1661694dcc5222a6e31a611b1 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 26 Sep 2025 09:52:09 +0100 Subject: [PATCH 20/33] Various experiments on subtomo extraction --- .../services/extract_subtomo.py | 90 ++++++++++++++++--- 1 file changed, 78 insertions(+), 12 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index c9b85df1..70346f82 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -35,7 +35,7 @@ "relion_options": {}, } in2 = { - "cbox_3d_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_stack_aretomo.denoised.cbox", + "cbox_3d_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_test.cbox", "tilt_alignment_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_stack_aretomo.aln", "newstack_file": "/scratch/yxd92326/data/tomo-extract/2_1_ApoF_Pos_13_9_stack_newstack.txt", "output_star": "/scratch/yxd92326/data/tomo-extract/extracted_apof/extract.star", @@ -44,6 +44,7 @@ "tilt_offset": 0, "scaled_tomogram_shape": [1440, 1023, 400], "relion_options": {}, + "particle_diameter": 500, } @@ -164,7 +165,6 @@ def extract_subtomo(self, rw, header: dict, message: dict): # Get the shifts between tilts shift_data = np.genfromtxt(extract_subtomo_params.tilt_alignment_file) - # tilt_ids = shift_data[:, 0].astype(int) refined_tilt_axis = float(shift_data[0, 1]) x_shifts = shift_data[:, 3].astype(float) y_shifts = shift_data[:, 4].astype(float) @@ -215,32 +215,39 @@ def extract_subtomo(self, rw, header: dict, message: dict): with mrcfile.open(tilt_name) as mrc: tilt_images.append(mrc.data) + for tilt in range(tilt_count): + if extract_subtomo_params.maximum_dose > 0 and ( + extract_subtomo_params.dose_per_tilt * tilt_numbers[tilt] + > extract_subtomo_params.maximum_dose + ): + self.log.info(f"Skipping tilt {tilt} due to dose limit") + frames = np.zeros((len(particles_x), tilt_count), dtype=int) tilt_coords: list = [[] for tilt in range(tilt_count)] - for particle in tqdm(range(len(particles_x))): # [385]:# - # print(particles_x[particle], particles_y[particle], particles_z[particle]) + for particle in tqdm(range(len(particles_x))): output_mrc_stack = np.array([]) for tilt in range(tilt_count): if extract_subtomo_params.maximum_dose > 0 and ( extract_subtomo_params.dose_per_tilt * tilt_numbers[tilt] > extract_subtomo_params.maximum_dose ): - self.log.info(f"Skipping {tilt} due to dose limit") continue x_in_tilt, y_in_tilt = get_coord_in_tilt( x=particles_x[particle], y=particles_y[particle], + z=particles_z[particle], cen_x=centre_x, cen_y=centre_y, + cen_z=centre_z, theta_y=tilt_angles[tilt], theta_z=tilt_axis_radians, delta_x=x_shifts[tilt], delta_y=y_shifts[tilt], binning=extract_subtomo_params.tomogram_binning, ) - # print(tilt, tilt_angles_radians[tilt], tilt_axis_radians, x_in_tilt, y_in_tilt) tilt_coords[tilt].append([x_in_tilt, y_in_tilt]) + # print(x_in_tilt, y_in_tilt, tilt_angles[tilt]) particle_subimage, failure_reason = extract_single_particle( input_image=tilt_images[tilt], @@ -289,7 +296,6 @@ def extract_subtomo(self, rw, header: dict, message: dict): Path(extract_subtomo_params.output_star).parent / f"{particle}_stack2d.mrcs" ) - # self.log.info(f"Extracted particle {particle+1} of {len(particles_x)}") with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: mrc.set_data(output_mrc_stack.astype(np.float32)) mrc.header.mx = extract_subtomo_params.relion_options.small_boxsize @@ -383,8 +389,10 @@ def extract_subtomo(self, rw, header: dict, message: dict): def get_coord_in_tilt( x: float, y: float, + z: float, cen_x: float, cen_y: float, + cen_z: float, theta_y: float, theta_z: float, delta_x: float, @@ -393,15 +401,73 @@ def get_coord_in_tilt( ): # Translation raw to aligned tilt is subtract shift then rotate around centre # In binned coordinates here - x_centred = x - cen_x # - delta_x / binning - y_centred = ( - y - cen_y - ) # - delta_y / binning#+ cen_x * np.tan(theta_z) # TODO: last factor depends on rot + x_centred = x - cen_x + y_centred = y - cen_y # + cen_x * np.tan(theta_z) TODO: last factor depends on rot x_2d = x_centred * np.cos(theta_z) - y_centred * np.sin(theta_z) y_2d = x_centred * np.sin(theta_z) + y_centred * np.cos(theta_z) # Un-bin and apply shifts x_tilt = (cen_x + x_2d) * binning + delta_x y_flat = (cen_y + y_2d) * binning + delta_y y_tilt = (y_flat - cen_y * binning) * np.cos(theta_y) + cen_y * binning - # print(x_centred, y_centred, cen_x, x_2d, delta_x, cen_y, y_2d, delta_y) + # print(cen_x, x, x_2d, delta_x, cen_y, y, y_2d, delta_y) + + z_centred = z - cen_z + y_tilt += z_centred * np.sin(theta_y) * binning + # print(z_centred * np.sin(theta_y), x_tilt, y_tilt) return x_tilt, y_tilt + + +""" +with open("Extract_maxdose/particles.star", "a") as partstar: + for tomo in Path("Extract_maxdose").glob("2_1*"): + with open(tomo / "extract.star") as tomostar: + while True: + line=tomostar.readline() + if not line: + break + if line.startswith("2_1"): + partstar.write(line) +""" + + +def cbox_to_star(name, max_subsample): + import pandas as pd + import starfile + + # make data_particles + cbox = starfile.read(f"AutoPick/job009/CBOX_3D/{name}_stack_aretomo.denoised.cbox") + all_particles = cbox["cryolo"] + new_particles = pd.DataFrame() + new_particles["rlnTomoName"] = [f"{name}" for i in range(len(all_particles))] + new_particles["rlnCenteredCoordinateXAngst"] = ( + all_particles["CoordinateY"] * 4 + 80 - 4092 / 2 + ) * 1.34 + new_particles["rlnCenteredCoordinateYAngst"] = ( + 5760 - all_particles["CoordinateX"] * 4 - 80 - 5760 / 2 + ) * 1.34 + new_particles["rlnCenteredCoordinateZAngst"] = ( + all_particles["CoordinateZ"] * 4 - 1600 / 2 + ) * 1.34 + for subsamp in range(2, max_subsample + 1): + if Path( + f"AutoPick/job009/CBOX_3D/{name}_{subsamp}_stack_aretomo.denoised.cbox" + ).is_file(): + cbox = starfile.read( + f"AutoPick/job009/CBOX_3D/{name}_{subsamp}_stack_aretomo.denoised.cbox" + ) + particles = cbox["cryolo"] + add_particles = pd.DataFrame() + add_particles["rlnTomoName"] = [ + f"{name}_{subsamp}" for i in range(len(particles)) + ] + add_particles["rlnCenteredCoordinateXAngst"] = ( + particles["CoordinateY"] * 4 + 80 - 4092 / 2 + ) * 1.34 + add_particles["rlnCenteredCoordinateYAngst"] = ( + 5760 - particles["CoordinateX"] * 4 - 80 - 5760 / 2 + ) * 1.34 + add_particles["rlnCenteredCoordinateZAngst"] = ( + particles["CoordinateZ"] * 4 - 1600 / 2 + ) * 1.34 + new_particles = pd.concat((new_particles, add_particles)) + starfile.write(new_particles, f"AutoPick/job009/{name}_all_particles_centered.star") From 503ae6f5b6372de9285976ed55f95eb21b0458a8 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 1 Oct 2025 10:43:11 +0100 Subject: [PATCH 21/33] Another function for experimenting with data --- .../services/extract_subtomo.py | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 70346f82..405e92a6 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -471,3 +471,32 @@ def cbox_to_star(name, max_subsample): ) * 1.34 new_particles = pd.concat((new_particles, add_particles)) starfile.write(new_particles, f"AutoPick/job009/{name}_all_particles_centered.star") + + +def cbox_to_star_whole_dir(): + import pandas as pd + import starfile + + # make data_particles + new_particles = None + + for cbox in Path("AutoPick/job009/CBOX_3D").glob("*.cbox"): + all_particles = starfile.read(cbox)["cryolo"] + add_particles = pd.DataFrame() + add_particles["rlnTomoName"] = [ + cbox.name.split("_stack_")[0] for i in range(len(all_particles)) + ] + add_particles["rlnCenteredCoordinateXAngst"] = ( + all_particles["CoordinateY"] * 4 + 80 - 4092 / 2 + ) * 1.63 + add_particles["rlnCenteredCoordinateYAngst"] = ( + 5760 / 2 - all_particles["CoordinateX"] * 4 - 80 + ) * 1.63 + add_particles["rlnCenteredCoordinateZAngst"] = ( + all_particles["CoordinateZ"] * 4 - 1600 / 2 + ) * 1.63 + if new_particles is None: + new_particles = add_particles + else: + new_particles = pd.concat((new_particles, add_particles)) + starfile.write(new_particles, "AutoPick/job009/all_particles_centered.star") From 343e75cf8343190cda8a060b01631bb5faaed977 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 2 Oct 2025 16:29:21 +0100 Subject: [PATCH 22/33] More functions for exploring extraction --- .../services/extract_subtomo.py | 46 +++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 405e92a6..aebece3c 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -482,10 +482,18 @@ def cbox_to_star_whole_dir(): for cbox in Path("AutoPick/job009/CBOX_3D").glob("*.cbox"): all_particles = starfile.read(cbox)["cryolo"] + + particles_to_drop = [] + for pindex, particle in all_particles.iterrows(): + if ( + particle["CoordinateZ"] < particle["Depth"] / 2 + or 400 - particle["CoordinateZ"] < particle["Depth"] / 2 + ): + particles_to_drop.append(pindex) + print(cbox, particles_to_drop) + all_particles.drop(labels=particles_to_drop, axis=0, inplace=True) + add_particles = pd.DataFrame() - add_particles["rlnTomoName"] = [ - cbox.name.split("_stack_")[0] for i in range(len(all_particles)) - ] add_particles["rlnCenteredCoordinateXAngst"] = ( all_particles["CoordinateY"] * 4 + 80 - 4092 / 2 ) * 1.63 @@ -495,8 +503,40 @@ def cbox_to_star_whole_dir(): add_particles["rlnCenteredCoordinateZAngst"] = ( all_particles["CoordinateZ"] * 4 - 1600 / 2 ) * 1.63 + add_particles["rlnTomoName"] = [ + cbox.name.split("_stack_")[0] for i in range(len(all_particles)) + ] if new_particles is None: new_particles = add_particles else: new_particles = pd.concat((new_particles, add_particles)) starfile.write(new_particles, "AutoPick/job009/all_particles_centered.star") + + +def cbox_to_star_whole_dir_noflip(): + import pandas as pd + import starfile + + # make data_particles + new_particles = None + + for cbox in Path("AutoPick/job009/CBOX_3D").glob("*.cbox"): + all_particles = starfile.read(cbox)["cryolo"] + add_particles = pd.DataFrame() + add_particles["rlnCenteredCoordinateXAngst"] = ( + all_particles["CoordinateX"] * 4 + 80 - 5760 / 2 + ) * 1.63 + add_particles["rlnCenteredCoordinateYAngst"] = ( + all_particles["CoordinateY"] * 4 + 80 - 4092 / 2 + ) * 1.63 + add_particles["rlnCenteredCoordinateZAngst"] = ( + all_particles["CoordinateZ"] * 4 - 1600 / 2 + ) * 1.63 + add_particles["rlnTomoName"] = [ + cbox.name.split("_stack_")[0] for i in range(len(all_particles)) + ] + if new_particles is None: + new_particles = add_particles + else: + new_particles = pd.concat((new_particles, add_particles)) + starfile.write(new_particles, "AutoPick/job009/all_particles_centered_noflip.star") From fe95d89b376523fd9d7e78de110c3c9d60dc1caf Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 3 Oct 2025 11:03:41 +0100 Subject: [PATCH 23/33] Generalisation --- src/cryoemservices/services/extract_subtomo.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index aebece3c..5723c3ae 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -473,7 +473,7 @@ def cbox_to_star(name, max_subsample): starfile.write(new_particles, f"AutoPick/job009/{name}_all_particles_centered.star") -def cbox_to_star_whole_dir(): +def cbox_to_star_whole_dir(pixel_size=1.63, xdim=5760, ydim=4092, zdim=1600): import pandas as pd import starfile @@ -487,7 +487,7 @@ def cbox_to_star_whole_dir(): for pindex, particle in all_particles.iterrows(): if ( particle["CoordinateZ"] < particle["Depth"] / 2 - or 400 - particle["CoordinateZ"] < particle["Depth"] / 2 + or zdim / 4 - particle["CoordinateZ"] < particle["Depth"] / 2 ): particles_to_drop.append(pindex) print(cbox, particles_to_drop) @@ -495,14 +495,14 @@ def cbox_to_star_whole_dir(): add_particles = pd.DataFrame() add_particles["rlnCenteredCoordinateXAngst"] = ( - all_particles["CoordinateY"] * 4 + 80 - 4092 / 2 - ) * 1.63 + all_particles["CoordinateY"] * 4 + 80 - ydim / 2 + ) * pixel_size add_particles["rlnCenteredCoordinateYAngst"] = ( - 5760 / 2 - all_particles["CoordinateX"] * 4 - 80 - ) * 1.63 + xdim / 2 - all_particles["CoordinateX"] * 4 - 80 + ) * pixel_size add_particles["rlnCenteredCoordinateZAngst"] = ( - all_particles["CoordinateZ"] * 4 - 1600 / 2 - ) * 1.63 + all_particles["CoordinateZ"] * 4 - zdim / 2 + ) * pixel_size add_particles["rlnTomoName"] = [ cbox.name.split("_stack_")[0] for i in range(len(all_particles)) ] From bae527c9c3c7ee106af79a3121dea7fe0c2d43e2 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 8 Oct 2025 09:54:33 +0100 Subject: [PATCH 24/33] Send picks from cryolo tomo to murfey --- src/cryoemservices/services/cryolo.py | 30 ++++++++++++++++++++++---- src/cryoemservices/services/denoise.py | 2 ++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/src/cryoemservices/services/cryolo.py b/src/cryoemservices/services/cryolo.py index b98a2e23..040bb975 100644 --- a/src/cryoemservices/services/cryolo.py +++ b/src/cryoemservices/services/cryolo.py @@ -22,7 +22,7 @@ class CryoloParameters(BaseModel): input_path: str = Field(..., min_length=1) output_path: str = Field(..., min_length=1) experiment_type: str - pixel_size: Optional[float] = None + pixel_size: float cryolo_box_size: int = 160 cryolo_model_weights: str = "gmodel_phosnet_202005_N63_c17.h5" cryolo_threshold: float = 0.3 @@ -39,6 +39,7 @@ class CryoloParameters(BaseModel): mc_uuid: Optional[int] = None app_id: Optional[int] = None picker_uuid: Optional[int] = None + raw_tomogram: Optional[str] = None relion_options: RelionServiceOptions ctf_values: dict = {} @@ -52,13 +53,12 @@ def is_spa_or_tomo(cls, experiment): @model_validator(mode="after") def check_spa_has_uuids_and_pixel_size(self): if self.experiment_type == "spa" and ( - self.mc_uuid is None or self.picker_uuid is None or not self.pixel_size + self.mc_uuid is None or self.picker_uuid is None ): raise ValueError( "In SPA mode the following must be provided: " f"mc_uuid (given {self.mc_uuid}), " - f"picker_uuid (given {self.picker_uuid}), " - f"pixel_size (given {self.pixel_size})" + f"picker_uuid (given {self.picker_uuid})" ) return self @@ -327,6 +327,28 @@ def cryolo(self, rw, header: dict, message: dict): } rw.send_to("ispyb_connector", ispyb_parameters_tomo) + # Get the diameters of the particles in Angstroms for Murfey + try: + cbox_file = cif.read_file(cryolo_params.output_path) + cbox_block = cbox_file.find_block("cryolo") + cbox_sizes = np.append( + np.array(cbox_block.find_loop("_EstWidth"), dtype=float), + np.array(cbox_block.find_loop("_EstHeight"), dtype=float), + ) + cryolo_particle_sizes = cbox_sizes * cryolo_params.pixel_size + except (FileNotFoundError, OSError, AttributeError): + cryolo_particle_sizes = [] + + # Send to murfey for extraction + extraction_parameters = { + "tomogram": cryolo_params.raw_tomogram or cryolo_params.input_path, + "cbox_3d": cryolo_params.output_path, + "pixel_size": cryolo_params.pixel_size, + "particle_diameters": list(cryolo_particle_sizes), + "particle_count": len(cryolo_particle_sizes), + } + rw.send_to("murfey_feedback", extraction_parameters) + self.log.info( f"Done {self.job_type} {cryolo_params.experiment_type} " f"for {cryolo_params.input_path}." diff --git a/src/cryoemservices/services/denoise.py b/src/cryoemservices/services/denoise.py index fa1f43dc..8d34b7b7 100644 --- a/src/cryoemservices/services/denoise.py +++ b/src/cryoemservices/services/denoise.py @@ -292,10 +292,12 @@ def denoise(self, rw, header: dict, message: dict): "relion_options": dict(denoise_params.relion_options), } cryolo_parameters = { + "raw_tomogram": denoise_params.volume, "input_path": str(denoised_full_path), "output_path": str(cryolo_dir / f"CBOX_3D/{denoised_full_path.stem}.cbox"), "experiment_type": "tomography", "cryolo_box_size": 40, + "pixel_size": str(denoise_params.relion_options.pixel_size_downscaled), "relion_options": dict(denoise_params.relion_options), } rw.send_to("segmentation", segmentation_parameters) From 36623d576f4735880434c3945b6110b30476879c Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Wed, 8 Oct 2025 09:57:38 +0100 Subject: [PATCH 25/33] Include service that can do 2d tomo extraction --- pyproject.toml | 2 +- src/cryoemservices/services/extract.py | 36 ++- .../services/extract_subtomo.py | 285 +++++++++++++++++- 3 files changed, 305 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e9843dd7..8cd4c8a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ torch = [ EMISPyB = "cryoemservices.services.ispyb_connector:EMISPyB" Extract = "cryoemservices.services.extract:Extract" ExtractClass = "cryoemservices.services.extract_class:ExtractClass" - ExtractSubTomo = "cryoemservices.services.extract_subtomo:ExtractSubTomo" + ExtractSubTomo = "cryoemservices.services.extract_subtomo:ExtractSubTomoFor2D" IceBreaker = "cryoemservices.services.icebreaker:IceBreaker" Images = "cryoemservices.services.images:Images" MembrainSeg = "cryoemservices.services.membrain_seg:MembrainSeg" diff --git a/src/cryoemservices/services/extract.py b/src/cryoemservices/services/extract.py index e77ec21c..ceb1621e 100644 --- a/src/cryoemservices/services/extract.py +++ b/src/cryoemservices/services/extract.py @@ -232,12 +232,23 @@ def extract(self, rw, header: dict, message: dict): pixel_location_y = round(float(particles_y[particle])) extract_width = round(extract_params.relion_options.boxsize / 2) - particle_subimage, failure_reason = extract_single_particle( + full_particle_subimage, failure_reason = extract_single_particle( input_image=input_micrograph_image, x_coord=pixel_location_x, y_coord=pixel_location_y, extract_width=extract_width, shape=[image_size[1], image_size[0]], + ) + if failure_reason: + self.log.warning( + f"Extraction failed for {particle} " + f"in {extract_params.micrographs_file}. " + f"Reason was {failure_reason}." + ) + continue + particle_subimage, failure_reason = enhance_single_particle( + particle_subimage=full_particle_subimage, + extract_width=extract_width, small_boxsize=extract_params.relion_options.small_boxsize, bg_radius=extract_params.bg_radius, invert_contrast=extract_params.invert_contrast, @@ -317,16 +328,9 @@ def extract_single_particle( y_coord: float, extract_width: float, shape: list[int], - small_boxsize: int, - bg_radius: float, - invert_contrast: bool = True, - downscale: bool = True, - norm: bool = True, - plane_fit: bool = True, ) -> tuple[np.ndarray, str]: """ A function which can extract a single particle in a micrograph - or a single particle from a tomogram tilt """ # Extract the particle image and pad the edges if it is not square x_left_pad = 0 @@ -360,7 +364,23 @@ def extract_single_particle( ((y_bot_pad, y_top_pad), (x_left_pad, x_right_pad)), mode="edge", ) + return particle_subimage, "" + +def enhance_single_particle( + particle_subimage: np.ndarray, + extract_width: float, + small_boxsize: int, + bg_radius: float, + invert_contrast: bool = True, + downscale: bool = True, + norm: bool = True, + plane_fit: bool = True, +): + """ + A function which runs enhancement on an extracted particle in a micrograph + or a flattened particle from a tomogram volume + """ # Flip all the values on inversion if invert_contrast: particle_subimage = -1 * particle_subimage diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 5723c3ae..eca3b6e7 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -12,7 +12,10 @@ from workflows.recipe import wrap_subscribe from cryoemservices.services.common_service import CommonService -from cryoemservices.services.extract import extract_single_particle +from cryoemservices.services.extract import ( + enhance_single_particle, + extract_single_particle, +) from cryoemservices.util.models import MockRW from cryoemservices.util.relion_service_options import ( RelionServiceOptions, @@ -48,7 +51,17 @@ } -class ExtractSubTomoParameters(BaseModel): +class ExtractSubTomoParameters2D(BaseModel): + cbox_3d_file: str = Field(..., min_length=1) + tomogram: str = Field(..., min_length=1) + output_star: str = Field(..., min_length=1) + pixel_size: float + particle_diameter: float = 0 + boxsize: int = 256 + relion_options: RelionServiceOptions + + +class ExtractSubTomoParameters3D(BaseModel): cbox_3d_file: str = Field(..., min_length=1) tilt_alignment_file: str = Field(..., min_length=1) newstack_file: str = Field(..., min_length=1) @@ -59,7 +72,6 @@ class ExtractSubTomoParameters(BaseModel): tilt_offset: float particle_diameter: float = 0 boxsize: int = 256 - small_boxsize: int = 64 min_frames: int = 1 maximum_dose: int = -1 tomogram_binning: int = 4 @@ -79,7 +91,258 @@ def check_shape_is_3d(cls, v): return shape_list -class ExtractSubTomo(CommonService): +class ExtractSubTomoFor2D(CommonService): + """ + A service for extracting particles from cryolo autopicking for tomograms + """ + + # Human readable service name + _service_name = "ExtractSubTomo" + + # Logger name + _logger_name = "cryoemservices.services.extract_subtomo" + + # Job name + job_type = "relion.pseudosubtomo" + + def initializing(self): + """Subscribe to a queue. Received messages must be acknowledged.""" + self.log.info("Sub-tomogram extraction service starting") + wrap_subscribe( + self._transport, + self._environment["queue"] or "extract_subtomo", + self.extract_subtomo_for_2d, + acknowledgement=True, + allow_non_recipe_messages=True, + ) + + def extract_subtomo_for_2d(self, rw, header: dict, message: dict): + """Main function which interprets and processes received messages""" + if not rw: + self.log.info("Received a simple message") + if not isinstance(message, dict): + self.log.error("Rejected invalid simple message") + self._transport.nack(header) + return + + # Create a wrapper-like object that can be passed to functions + # as if a recipe wrapper was present. + rw = MockRW(self._transport) + rw.recipe_step = {"parameters": message} + + try: + if isinstance(message, dict): + extract_subtomo_params = ExtractSubTomoParameters2D( + **{**rw.recipe_step.get("parameters", {}), **message} + ) + else: + extract_subtomo_params = ExtractSubTomoParameters2D( + **{**rw.recipe_step.get("parameters", {})} + ) + except (ValidationError, TypeError) as e: + self.log.warning( + f"Extraction parameter validation failed for message: {message} " + f"and recipe parameters: {rw.recipe_step.get('parameters', {})} " + f"with exception: {e}" + ) + rw.transport.nack(header) + return + + self.log.info( + f"Inputs: {extract_subtomo_params.tomogram}, " + f"{extract_subtomo_params.cbox_3d_file} " + f"Output: {extract_subtomo_params.output_star}" + ) + + # Update the relion options and get box sizes + extract_subtomo_params.relion_options = update_relion_options( + extract_subtomo_params.relion_options, dict(extract_subtomo_params) + ) + + # Make sure the output directory exists + if not Path(extract_subtomo_params.output_star).parent.exists(): + Path(extract_subtomo_params.output_star).parent.mkdir(parents=True) + + # Find the locations of the particles + coords_file = cif.read(extract_subtomo_params.cbox_3d_file) + coords_block = coords_file.find_block("cryolo") + pick_radius = float(coords_block.find_loop("_Width")[0]) / 2 + particles_x = ( + np.array(coords_block.find_loop("_CoordinateX"), dtype=float) + pick_radius + ) + particles_y = ( + np.array(coords_block.find_loop("_CoordinateY"), dtype=float) + pick_radius + ) + particles_z = np.array(coords_block.find_loop("_CoordinateZ"), dtype=float) + + # Read tomogram + with mrcfile.open(extract_subtomo_params.tomogram) as mrc: + tomogram_data = mrc.data + + # Extract at the same downscaling as during tomogram reconstruction + extract_width = round(extract_subtomo_params.relion_options.boxsize / 2) + + output_mrc_stack = np.array([]) + for particle in tqdm(range(len(particles_x))): + if ( + particles_z[particle] - extract_width < 0 + or particles_z[particle] + extract_width > tomogram_data.shape[0] + or particles_y[particle] - extract_width < 0 + or particles_y[particle] + extract_width > tomogram_data.shape[1] + or particles_x[particle] - extract_width < 0 + or particles_x[particle] + extract_width > tomogram_data.shape[2] + ): + self.log.info( + f"Skipping particle {particle} runs over the edge of the volume" + ) + particle_subimage = np.zeros( + (extract_subtomo_params.boxsize, extract_subtomo_params.boxsize) + ) + # Add to output stack + if len(output_mrc_stack): + output_mrc_stack = np.append( + output_mrc_stack, [particle_subimage], axis=0 + ) + else: + output_mrc_stack = np.array([particle_subimage], dtype=np.float32) + continue + extract_vol = tomogram_data[ + round(float(particles_z[particle] - extract_width)) : round( + float(particles_z[particle] + extract_width) + ), + round(float(particles_y[particle] - extract_width)) : round( + float(particles_y[particle] + extract_width) + ), + round(float(particles_x[particle] - extract_width)) : round( + float(particles_x[particle] + extract_width) + ), + ] + flat_particle = extract_vol.mean(axis=0) + + particle_subimage, failure_reason = enhance_single_particle( + particle_subimage=flat_particle, + extract_width=extract_width, + small_boxsize=extract_subtomo_params.boxsize, + bg_radius=round(0.375 * extract_subtomo_params.boxsize), + invert_contrast=True, + downscale=False, + norm=True, + plane_fit=True, + ) + if failure_reason: + self.log.warning( + f"Extraction failed for {particle}. Reason was {failure_reason}." + ) + continue + + # Add to output stack + if len(output_mrc_stack): + output_mrc_stack = np.append( + output_mrc_stack, [particle_subimage], axis=0 + ) + else: + output_mrc_stack = np.array([particle_subimage], dtype=np.float32) + + # Produce the mrc file of the extracted particles + output_mrc_file = ( + Path(extract_subtomo_params.output_star).parent + / Path(extract_subtomo_params.tomogram).with_suffix(".mrcs").name + ) + particle_count = np.shape(output_mrc_stack)[0] + self.log.info(f"Extracted {particle_count} particles") + with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: + mrc.set_data(output_mrc_stack.astype(np.float32)) + mrc.header.mx = extract_subtomo_params.relion_options.boxsize + mrc.header.my = extract_subtomo_params.relion_options.boxsize + mrc.header.mz = 1 + mrc.header.cella.x = ( + extract_subtomo_params.pixel_size + * extract_subtomo_params.relion_options.boxsize + ) + mrc.header.cella.y = ( + extract_subtomo_params.pixel_size + * extract_subtomo_params.relion_options.boxsize + ) + mrc.header.cella.z = 1 + + # Construct the output star file + if not Path(extract_subtomo_params.output_star).is_file(): + extracted_parts_doc = cif.Document() + optics_block = extracted_parts_doc.add_new_block("optics") + optics_loop = optics_block.init_loop( + "_rln", + [ + "Voltage", + "SphericalAberration", + "AmplitudeContrast", + "OpticsGroup", + "OpticsGroupName", + "CtfDataAreCtfPremultiplied", + "ImageDimensionality", + "ImagePixelSize", + "ImageSize", + ], + ) + optics_loop.add_row( + [ + "300.00", + "2.70", + "0.10", + "1", + "optics1", + "1", + "2", + str(extract_subtomo_params.pixel_size), + str(extract_subtomo_params.boxsize), + ] + ) + extracted_parts_block = extracted_parts_doc.add_new_block("particles") + extracted_parts_loop = extracted_parts_block.init_loop( + "_rln", + [ + "TomoName", + "OpticsGroup", + "TomoParticleName", + "ImageName", + ], + ) + else: + extracted_parts_doc = cif.read_file(extract_subtomo_params.output_star) + extracted_parts_block = extracted_parts_doc.find_block("particles") + extracted_parts_loop = extracted_parts_block.find_loop( + "_rlnTomoName" + ).get_loop() + for particle in range(len(particles_x)): + extracted_parts_loop.add_row( + [ + _get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram)), + "1", + f"{particle}@{output_mrc_file}", + f"{particle}@{output_mrc_file}", + ] + ) + extracted_parts_doc.write_file( + extract_subtomo_params.output_star, style=cif.Style.Simple + ) + + # Register the extract job with the node creator + self.log.info(f"Sending {self.job_type} to node creator") + node_creator_parameters = { + "job_type": self.job_type, + "input_file": extract_subtomo_params.cbox_3d_file, + "output_file": extract_subtomo_params.output_star, + "relion_options": dict(extract_subtomo_params.relion_options), + "command": "", + "stdout": "", + "stderr": "", + } + rw.send_to("node_creator", node_creator_parameters) + + self.log.info(f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}") + rw.transport.ack(header) + + +class ExtractSubTomoFor3D(CommonService): """ A service for extracting particles from cryolo autopicking for tomograms """ @@ -120,11 +383,11 @@ def extract_subtomo(self, rw, header: dict, message: dict): try: if isinstance(message, dict): - extract_subtomo_params = ExtractSubTomoParameters( + extract_subtomo_params = ExtractSubTomoParameters3D( **{**rw.recipe_step.get("parameters", {}), **message} ) else: - extract_subtomo_params = ExtractSubTomoParameters( + extract_subtomo_params = ExtractSubTomoParameters3D( **{**rw.recipe_step.get("parameters", {})} ) except (ValidationError, TypeError) as e: @@ -249,7 +512,7 @@ def extract_subtomo(self, rw, header: dict, message: dict): tilt_coords[tilt].append([x_in_tilt, y_in_tilt]) # print(x_in_tilt, y_in_tilt, tilt_angles[tilt]) - particle_subimage, failure_reason = extract_single_particle( + particle_subimage, failure_reason1 = extract_single_particle( input_image=tilt_images[tilt], x_coord=x_in_tilt, y_coord=y_in_tilt, @@ -258,6 +521,10 @@ def extract_subtomo(self, rw, header: dict, message: dict): int(i * extract_subtomo_params.tomogram_binning) for i in extract_subtomo_params.scaled_tomogram_shape ], + ) + particle_subimage, failure_reason2 = enhance_single_particle( + particle_subimage=particle_subimage, + extract_width=extract_width, small_boxsize=extract_subtomo_params.small_boxsize, bg_radius=round(0.375 * extract_subtomo_params.small_boxsize), invert_contrast=True, @@ -266,10 +533,10 @@ def extract_subtomo(self, rw, header: dict, message: dict): plane_fit=True, ) - if failure_reason: + if failure_reason1 or failure_reason2: self.log.warning( f"Extraction failed for {particle} in {tilt}. " - f"Reason was {failure_reason}." + f"Reason was {failure_reason1} {failure_reason2}." ) particle_subimage = np.zeros( ( From 0beed716404b9d2e63f0410d62fd14e466384e4e Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 9 Oct 2025 11:48:49 +0100 Subject: [PATCH 26/33] Give murfey a register name --- src/cryoemservices/services/cryolo.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cryoemservices/services/cryolo.py b/src/cryoemservices/services/cryolo.py index 040bb975..6e5ba1d6 100644 --- a/src/cryoemservices/services/cryolo.py +++ b/src/cryoemservices/services/cryolo.py @@ -341,6 +341,7 @@ def cryolo(self, rw, header: dict, message: dict): # Send to murfey for extraction extraction_parameters = { + "register": "picked_tomogram", "tomogram": cryolo_params.raw_tomogram or cryolo_params.input_path, "cbox_3d": cryolo_params.output_path, "pixel_size": cryolo_params.pixel_size, From 1d7345174fab4788ddf12317e66498aaae442959 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 9 Oct 2025 11:49:21 +0100 Subject: [PATCH 27/33] Don't append particles off xy frame --- .../services/extract_subtomo.py | 30 ++++++++----------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index eca3b6e7..7e3d4b52 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -185,9 +185,7 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): output_mrc_stack = np.array([]) for particle in tqdm(range(len(particles_x))): if ( - particles_z[particle] - extract_width < 0 - or particles_z[particle] + extract_width > tomogram_data.shape[0] - or particles_y[particle] - extract_width < 0 + particles_y[particle] - extract_width < 0 or particles_y[particle] + extract_width > tomogram_data.shape[1] or particles_x[particle] - extract_width < 0 or particles_x[particle] + extract_width > tomogram_data.shape[2] @@ -195,21 +193,16 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): self.log.info( f"Skipping particle {particle} runs over the edge of the volume" ) - particle_subimage = np.zeros( - (extract_subtomo_params.boxsize, extract_subtomo_params.boxsize) - ) - # Add to output stack - if len(output_mrc_stack): - output_mrc_stack = np.append( - output_mrc_stack, [particle_subimage], axis=0 - ) - else: - output_mrc_stack = np.array([particle_subimage], dtype=np.float32) continue + + min_z = particles_z[particle] - extract_width + max_z = particles_z[particle] + extract_width + if min_z < 0: + min_z = 0 + if max_z >= tomogram_data.shape[0]: + max_z = tomogram_data.shape[0] - 1 extract_vol = tomogram_data[ - round(float(particles_z[particle] - extract_width)) : round( - float(particles_z[particle] + extract_width) - ), + round(float(min_z)) : round(float(max_z)), round(float(particles_y[particle] - extract_width)) : round( float(particles_y[particle] + extract_width) ), @@ -217,8 +210,9 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): float(particles_x[particle] + extract_width) ), ] - flat_particle = extract_vol.mean(axis=0) + # Run projection along x axis + flat_particle = extract_vol.mean(axis=0) particle_subimage, failure_reason = enhance_single_particle( particle_subimage=flat_particle, extract_width=extract_width, @@ -312,7 +306,7 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): extracted_parts_loop = extracted_parts_block.find_loop( "_rlnTomoName" ).get_loop() - for particle in range(len(particles_x)): + for particle in range(particle_count): extracted_parts_loop.add_row( [ _get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram)), From 3a0020f044a2c338b3c11f99cd9691f5adb683fd Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Tue, 14 Oct 2025 12:22:19 +0100 Subject: [PATCH 28/33] Fix the length of the picked particles arrays --- src/cryoemservices/services/cryolo.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/cryoemservices/services/cryolo.py b/src/cryoemservices/services/cryolo.py index 6e5ba1d6..81c34556 100644 --- a/src/cryoemservices/services/cryolo.py +++ b/src/cryoemservices/services/cryolo.py @@ -331,10 +331,10 @@ def cryolo(self, rw, header: dict, message: dict): try: cbox_file = cif.read_file(cryolo_params.output_path) cbox_block = cbox_file.find_block("cryolo") - cbox_sizes = np.append( - np.array(cbox_block.find_loop("_EstWidth"), dtype=float), - np.array(cbox_block.find_loop("_EstHeight"), dtype=float), - ) + cbox_sizes = ( + np.array(cbox_block.find_loop("_EstWidth"), dtype=float) + + np.array(cbox_block.find_loop("_EstHeight"), dtype=float) + ) / 2 cryolo_particle_sizes = cbox_sizes * cryolo_params.pixel_size except (FileNotFoundError, OSError, AttributeError): cryolo_particle_sizes = [] @@ -366,14 +366,11 @@ def cryolo(self, rw, header: dict, message: dict): ) ) cbox_block = cbox_file.find_block("cryolo") - cbox_sizes = np.append( - np.array(cbox_block.find_loop("_EstWidth"), dtype=float), - np.array(cbox_block.find_loop("_EstHeight"), dtype=float), - ) - cbox_confidence = np.append( - np.array(cbox_block.find_loop("_Confidence"), dtype=float), - np.array(cbox_block.find_loop("_Confidence"), dtype=float), - ) + cbox_sizes = ( + np.array(cbox_block.find_loop("_EstWidth"), dtype=float) + + np.array(cbox_block.find_loop("_EstHeight"), dtype=float) + ) / 2 + cbox_confidence = np.array(cbox_block.find_loop("_Confidence"), dtype=float) # Select only a fraction of particles based on confidence if requested if ( From 5e3cf95b536d71b303007baac92144fefda14495 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 16 Oct 2025 09:25:08 +0100 Subject: [PATCH 29/33] Update test for murfey feedback from cryolo --- tests/services/test_cryolo_service.py | 52 ++++++++++++++++++++------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/tests/services/test_cryolo_service.py b/tests/services/test_cryolo_service.py index 3a015888..8a786db2 100644 --- a/tests/services/test_cryolo_service.py +++ b/tests/services/test_cryolo_service.py @@ -175,8 +175,8 @@ def write_cbox_file(command, cwd, capture_output): "register": "picked_particles", "motion_correction_id": cryolo_test_message["mc_uuid"], "micrograph": cryolo_test_message["input_path"], - "particle_diameters": [10.0, 20.0], - "particle_count": 2, + "particle_diameters": [15.0], + "particle_count": 1, "resolution": ctf_test_values["CtfMaxResolution"], "astigmatism": ctf_test_values["DefocusU"] - ctf_test_values["DefocusV"], "defocus": (ctf_test_values["DefocusU"] + ctf_test_values["DefocusV"]) / 2, @@ -213,20 +213,18 @@ def test_cryolo_service_tomography(mock_subprocess, offline_transport, tmp_path) This should call the mock subprocess then send messages on to the node_creator, murfey_feedback, ispyb_connector and images services """ - mock_subprocess().returncode = 0 - mock_subprocess().stdout = "stdout".encode("ascii") - mock_subprocess().stderr = "stderr".encode("ascii") - header = { "message-id": mock.sentinel, "subscription": mock.sentinel, } - output_path = tmp_path / "AutoPick/job007/STAR/sample.star" + output_path = tmp_path / "AutoPick/job007/CBOX_3D/sample.cbox" cryolo_test_message = { - "input_path": "MotionCorr/job002/sample.mrc", + "input_path": "Denoise/job007/tomograms/sample_denoised.mrc", "output_path": str(output_path), + "raw_tomogram": f"{tmp_path}/Tomogram/job006/tomograms/sample.mrc", "experiment_type": "tomography", + "pixel_size": 5.3, "cryolo_box_size": 40, "cryolo_model_weights": "sample_weights", "cryolo_threshold": 0.15, @@ -246,17 +244,34 @@ def test_cryolo_service_tomography(mock_subprocess, offline_transport, tmp_path) ) output_relion_options["cryolo_box_size"] = 40 + def write_cbox_file(command, cwd, capture_output): + # Write star co-ordinate file in the format cryolo will output + (cwd / "CBOX").mkdir() + with open(cwd / "CBOX_3D/sample.cbox", "w") as f: + f.write( + "data_cryolo\n\nloop_\n\n_EstWidth\n_EstHeight\n_Confidence\n" + "_CoordinateX\n_CoordinateY\n_Width\n_Height\n" + "100 200 0.6 0.1 0.2 2 4\n150 250 0.5 0.3 0.4 6 8" + ) + return CompletedProcess( + "", + returncode=0, + stdout="stdout".encode("ascii"), + stderr="stderr".encode("ascii"), + ) + + mock_subprocess.side_effect = write_cbox_file + # Set up the mock service and send the message to it service = cryolo.CrYOLO(environment={"queue": ""}, transport=offline_transport) service.initializing() service.cryolo(None, header=header, message=cryolo_test_message) - assert mock_subprocess.call_count == 4 - mock_subprocess.assert_called_with( + mock_subprocess.assert_called_once_with( [ "cryolo_predict.py", "-i", - "MotionCorr/job002/sample.mrc", + "Denoise/job007/tomograms/sample_denoised.mrc", "--conf", str(tmp_path / "AutoPick/job007/cryolo_config.json"), "-o", @@ -299,7 +314,7 @@ def test_cryolo_service_tomography(mock_subprocess, offline_transport, tmp_path) assert config_values["other"] == {"log_path": "logs/"} # Check that the correct messages were sent - assert offline_transport.send.call_count == 4 + assert offline_transport.send.call_count == 5 offline_transport.send.assert_any_call( "ispyb_connector", { @@ -334,7 +349,7 @@ def test_cryolo_service_tomography(mock_subprocess, offline_transport, tmp_path) "output_file": str(output_path), "relion_options": output_relion_options, "command": ( - "cryolo_predict.py -i MotionCorr/job002/sample.mrc " + "cryolo_predict.py -i Denoise/job007/tomograms/sample_denoised.mrc " f"--conf {tmp_path}/AutoPick/job007/cryolo_config.json " f"-o {tmp_path}/AutoPick/job007 " f"--tomogram -tsr -1 -tmem 0 -tmin 5 --gpus 0 " @@ -347,6 +362,17 @@ def test_cryolo_service_tomography(mock_subprocess, offline_transport, tmp_path) "success": True, }, ) + offline_transport.send.assert_any_call( + "murfey_feedback", + { + "cbox_3d": f"{tmp_path}/AutoPick/job007/CBOX_3D/sample.cbox", + "particle_count": 2, + "particle_diameters": [150 * 5.3, 200 * 5.3], + "pixel_size": 5.3, + "register": "picked_tomogram", + "tomogram": f"{tmp_path}/Tomogram/job006/tomograms/sample.mrc", + }, + ) @pytest.mark.skipif(sys.platform == "win32", reason="does not run on windows") From 32bb7701941f19d78d6d255f267153ef06068948 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Fri, 28 Nov 2025 16:47:50 +0000 Subject: [PATCH 30/33] Update star combination to work for tomo --- .../pipeliner_plugins/combine_star_files.py | 12 +++++++++--- src/cryoemservices/services/extract_subtomo.py | 15 ++++++++++++++- src/cryoemservices/util/tomo_output_files.py | 8 ++++---- 3 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/cryoemservices/pipeliner_plugins/combine_star_files.py b/src/cryoemservices/pipeliner_plugins/combine_star_files.py index 0b62d4c9..c9593b83 100644 --- a/src/cryoemservices/pipeliner_plugins/combine_star_files.py +++ b/src/cryoemservices/pipeliner_plugins/combine_star_files.py @@ -44,7 +44,7 @@ def combine_star_files(files_to_process: List[Path], output_dir: Path): ): for line_counter in range(50): line = full_starfile.readline() - if line.startswith("opticsGroup"): + if "opticsGroup" in line: reference_optics = line.split() if not line: break @@ -71,7 +71,7 @@ def combine_star_files(files_to_process: List[Path], output_dir: Path): optics_line = added_starfile.readline() if not optics_line: raise IndexError(f"Cannot find optics group in {split_file}") - if optics_line.startswith("opticsGroup"): + if "opticsGroup" in optics_line: new_optics = optics_line.split() break @@ -104,8 +104,14 @@ def combine_star_files(files_to_process: List[Path], output_dir: Path): particle_line = added_starfile.readline() if not particle_line: break + if "opticsGroup" in particle_line: + # Skip the optics group + continue + if particle_line.startswith(("_", "loop_", "data_", "#")): + # Skip all block and loop header lines + continue particle_split_line = particle_line.split() - if len(particle_split_line) > 0 and particle_split_line[0][0].isdigit(): + if len(particle_split_line) > 0: file_particles_count += 1 total_particles += 1 particles_file.write(particle_line) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index 7e3d4b52..ed10ced3 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -50,6 +50,15 @@ "particle_diameter": 500, } +in3 = { + "cbox_3d_file": "/dls/m06/data/2025/bi37708-55/tmp/extract-test/AutoPick/job009/CBOX_3D/Tomo_position3_stack_Vol.denoised.cbox", + "tomogram": "/dls/m06/data/2025/bi37708-55/tmp/extract-test/Tomograms/job006/tomograms/Tomo_position3_stack_Vol.mrc", + "output_star": "/dls/m06/data/2025/bi37708-55/tmp/extract-test/Extract/class2d/Tomo_position3_stack_Vol.star", + "pixel_size": 7.76, + "particle_diameter": 225, + "relion_options": {}, +} + class ExtractSubTomoParameters2D(BaseModel): cbox_3d_file: str = Field(..., min_length=1) @@ -158,6 +167,10 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): extract_subtomo_params.relion_options = update_relion_options( extract_subtomo_params.relion_options, dict(extract_subtomo_params) ) + if extract_subtomo_params.particle_diameter: + extract_subtomo_params.boxsize = ( + extract_subtomo_params.relion_options.boxsize + ) # Make sure the output directory exists if not Path(extract_subtomo_params.output_star).parent.exists(): @@ -283,7 +296,7 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): "2.70", "0.10", "1", - "optics1", + "opticsGroup1", "1", "2", str(extract_subtomo_params.pixel_size), diff --git a/src/cryoemservices/util/tomo_output_files.py b/src/cryoemservices/util/tomo_output_files.py index 8f60a425..d9b892e8 100644 --- a/src/cryoemservices/util/tomo_output_files.py +++ b/src/cryoemservices/util/tomo_output_files.py @@ -68,7 +68,7 @@ def _global_tilt_series_file( str(relion_options.ampl_contrast), str(relion_options.pixel_size), str(relion_options.invert_hand), - "optics1", + "opticsGroup1", str(relion_options.pixel_size), ] @@ -561,7 +561,7 @@ def _tomogram_output_files( str(relion_options.ampl_contrast), str(relion_options.pixel_size), str(relion_options.invert_hand), - "optics1", + "opticsGroup1", str(relion_options.pixel_size), f"AlignTiltSeries/job005/tilt_series/{tilt_series_name}.star", str(relion_options.pixel_size_downscaled / relion_options.pixel_size), @@ -622,7 +622,7 @@ def _denoising_output_files( str(relion_options.ampl_contrast), str(relion_options.pixel_size), str(relion_options.invert_hand), - "optics1", + "opticsGroup1", str(relion_options.pixel_size), f"AlignTiltSeries/job005/tilt_series/{tilt_series_name}.star", str(relion_options.pixel_size_downscaled / relion_options.pixel_size), @@ -685,7 +685,7 @@ def _membrain_output_files( str(relion_options.ampl_contrast), str(relion_options.pixel_size), str(relion_options.invert_hand), - "optics1", + "opticsGroup1", str(relion_options.pixel_size), f"AlignTiltSeries/job005/tilt_series/{tilt_series_name}.star", str(relion_options.pixel_size_downscaled / relion_options.pixel_size), From 3ddfaf8cc16f3ee44916de31566aa9dcd9af435d Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 18 Dec 2025 16:35:59 +0000 Subject: [PATCH 31/33] Functions to run ctf correction on tilts --- src/cryoemservices/services/ctffind.py | 59 ++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/src/cryoemservices/services/ctffind.py b/src/cryoemservices/services/ctffind.py index 224e77f8..5bbfd7a9 100644 --- a/src/cryoemservices/services/ctffind.py +++ b/src/cryoemservices/services/ctffind.py @@ -5,6 +5,8 @@ from pathlib import Path from typing import Any, Optional +import mrcfile +import numpy as np from pydantic import BaseModel, Field, ValidationError, field_validator from workflows.recipe import wrap_subscribe @@ -352,3 +354,60 @@ def ctf_find(self, rw, header: dict, message: dict): self.log.info(f"Done {self.job_type} for {ctf_params.input_image}.") rw.transport.ack(header) + + +def ctf( + num_pixels_x: int, num_pixels_y: int, pixel_size: float, defocus: float +) -> np.array: + C_s: float = 2.7e7 + wavelength: float = 0.0196 + grid = np.meshgrid( + np.fft.fftfreq(num_pixels_x, pixel_size), + np.fft.fftfreq(num_pixels_y, pixel_size), + ) + grid = np.fft.fftshift(grid) + ksq = grid[0] ** 2 + grid[1] ** 2 + w = -defocus * wavelength * ksq / 2 + C_s * wavelength**3 + ksq**2 / 4 + return np.sin(2 * np.pi * w) + + +def ctf_micrograph(mic_file, output_file, defocus_u, defocus_v): + with mrcfile.open(mic_file) as mrc: + im = mrc.data + num_pixels_x = int(mrc.header.mx) + num_pixels_y = int(mrc.header.my) + pixel_size = mrc.header.cella.x / mrc.header.mx + dtype = im.dtype + + fft = np.fft.fft2(im) + fft = np.fft.fftshift(fft) + + ctf_mask = ctf( + num_pixels_x, num_pixels_y, pixel_size, np.sqrt(defocus_u**2 + defocus_v**2) + ) + ctf_mask[ctf_mask < 0] = -1 + ctf_mask[ctf_mask >= 0] = 1 + + fft = fft * ctf_mask + + fft_shifted = np.fft.ifftshift(fft) + corrected_im = np.real(np.fft.ifft2(fft_shifted)) + corrected_im = corrected_im.astype(dtype) + mrcfile.write(output_file, corrected_im, overwrite=True) + + +def ctf_of_tomo_star(root_dir, tomo_star, output_dir): + from gemmi import cif + + data = cif.read_file(str(Path(root_dir) / tomo_star)) + mics = list(data.sole_block().find_loop("_rlnMicrographName")) + defocus_u = list(data.sole_block().find_loop("_rlnDefocusU")) + defocus_v = list(data.sole_block().find_loop("_rlnDefocusV")) + + for i, mic in enumerate(mics): + ctf_micrograph( + Path(root_dir) / mic, + Path(root_dir) / output_dir / Path(mic).name, + float(defocus_u[i]), + float(defocus_v[i]), + ) From 0610ca2b04b792c11c5f7f505dbeb5c0994250d5 Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 18 Dec 2025 16:36:40 +0000 Subject: [PATCH 32/33] Migrate 2d extraction to new file --- pyproject.toml | 2 +- .../services/extract_2d_subtomo.py | 289 ++++++++++++++++++ 2 files changed, 290 insertions(+), 1 deletion(-) create mode 100644 src/cryoemservices/services/extract_2d_subtomo.py diff --git a/pyproject.toml b/pyproject.toml index 3bb2796e..00448bfb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -96,7 +96,7 @@ torch = [ EMISPyB = "cryoemservices.services.ispyb_connector:EMISPyB" Extract = "cryoemservices.services.extract:Extract" ExtractClass = "cryoemservices.services.extract_class:ExtractClass" - ExtractSubTomo = "cryoemservices.services.extract_subtomo:ExtractSubTomoFor2D" + ExtractSubTomo = "cryoemservices.services.extract_2d_subtomo:ExtractSubTomoFor2D" IceBreaker = "cryoemservices.services.icebreaker:IceBreaker" Images = "cryoemservices.services.images:Images" MembrainSeg = "cryoemservices.services.membrain_seg:MembrainSeg" diff --git a/src/cryoemservices/services/extract_2d_subtomo.py b/src/cryoemservices/services/extract_2d_subtomo.py new file mode 100644 index 00000000..7328b67b --- /dev/null +++ b/src/cryoemservices/services/extract_2d_subtomo.py @@ -0,0 +1,289 @@ +from pathlib import Path + +import mrcfile +import numpy as np +from gemmi import cif +from pydantic import BaseModel, Field, ValidationError +from tqdm import tqdm +from workflows.recipe import wrap_subscribe + +from cryoemservices.services.common_service import CommonService +from cryoemservices.services.extract import enhance_single_particle +from cryoemservices.util.models import MockRW +from cryoemservices.util.relion_service_options import ( + RelionServiceOptions, + update_relion_options, +) +from cryoemservices.util.tomo_output_files import _get_tilt_name_v5_12 + + +class ExtractSubTomoParameters2D(BaseModel): + cbox_3d_file: str = Field(..., min_length=1) + tomogram: str = Field(..., min_length=1) + output_star: str = Field(..., min_length=1) + pixel_size: float + particle_diameter: float = 0 + boxsize: int = 256 + batch_size: int = 5000 + relion_options: RelionServiceOptions + + +class ExtractSubTomoFor2D(CommonService): + """ + A service for extracting 2D particles from cryolo autopicking for tomograms + This extracts the particle in 3D, projects it to 2D + and then processes it ready for SPA-like 2D classification + """ + + # Human readable service name + _service_name = "ExtractSubTomo" + + # Logger name + _logger_name = "cryoemservices.services.extract_subtomo" + + # Job name + job_type = "relion.pseudosubtomo" + + def initializing(self): + """Subscribe to a queue. Received messages must be acknowledged.""" + self.log.info("Sub-tomogram extraction service starting") + wrap_subscribe( + self._transport, + self._environment["queue"] or "extract_subtomo", + self.extract_subtomo_for_2d, + acknowledgement=True, + allow_non_recipe_messages=True, + ) + + def extract_subtomo_for_2d(self, rw, header: dict, message: dict): + """Main function which interprets and processes received messages""" + if not rw: + self.log.info("Received a simple message") + if not isinstance(message, dict): + self.log.error("Rejected invalid simple message") + self._transport.nack(header) + return + + # Create a wrapper-like object that can be passed to functions + # as if a recipe wrapper was present. + rw = MockRW(self._transport) + rw.recipe_step = {"parameters": message} + + try: + if isinstance(message, dict): + extract_subtomo_params = ExtractSubTomoParameters2D( + **{**rw.recipe_step.get("parameters", {}), **message} + ) + else: + extract_subtomo_params = ExtractSubTomoParameters2D( + **{**rw.recipe_step.get("parameters", {})} + ) + except (ValidationError, TypeError) as e: + self.log.warning( + f"Extraction parameter validation failed for message: {message} " + f"and recipe parameters: {rw.recipe_step.get('parameters', {})} " + f"with exception: {e}" + ) + rw.transport.nack(header) + return + + self.log.info( + f"Inputs: {extract_subtomo_params.tomogram}, " + f"{extract_subtomo_params.cbox_3d_file} " + f"Output: {extract_subtomo_params.output_star}" + ) + + # Update the relion options and get box sizes + extract_subtomo_params.relion_options = update_relion_options( + extract_subtomo_params.relion_options, dict(extract_subtomo_params) + ) + if extract_subtomo_params.particle_diameter: + extract_subtomo_params.boxsize = ( + extract_subtomo_params.relion_options.boxsize + ) + + # Make sure the output directory exists + if not Path(extract_subtomo_params.output_star).parent.exists(): + Path(extract_subtomo_params.output_star).parent.mkdir(parents=True) + + # Find the locations of the particles + coords_file = cif.read(extract_subtomo_params.cbox_3d_file) + coords_block = coords_file.find_block("cryolo") + pick_radius = float(coords_block.find_loop("_Width")[0]) / 2 + particles_x = ( + np.array(coords_block.find_loop("_CoordinateX"), dtype=float) + pick_radius + ) + particles_y = ( + np.array(coords_block.find_loop("_CoordinateY"), dtype=float) + pick_radius + ) + particles_z = np.array(coords_block.find_loop("_CoordinateZ"), dtype=float) + + # Read tomogram + with mrcfile.open(extract_subtomo_params.tomogram) as mrc: + tomogram_data = mrc.data + + # Extract at the same downscaling as during tomogram reconstruction + extract_width = round(extract_subtomo_params.relion_options.boxsize / 2) + + output_mrc_stack = np.array([]) + for particle in tqdm(range(len(particles_x))): + if ( + particles_y[particle] - extract_width < 0 + or particles_y[particle] + extract_width > tomogram_data.shape[1] + or particles_x[particle] - extract_width < 0 + or particles_x[particle] + extract_width > tomogram_data.shape[2] + ): + self.log.info( + f"Skipping particle {particle} runs over the edge of the volume" + ) + continue + + min_z = particles_z[particle] - extract_width + max_z = particles_z[particle] + extract_width + if min_z < 0: + min_z = 0 + if max_z >= tomogram_data.shape[0]: + max_z = tomogram_data.shape[0] - 1 + extract_vol = tomogram_data[ + round(float(min_z)) : round(float(max_z)), + round(float(particles_y[particle] - extract_width)) : round( + float(particles_y[particle] + extract_width) + ), + round(float(particles_x[particle] - extract_width)) : round( + float(particles_x[particle] + extract_width) + ), + ] + + # Run projection along x axis + flat_particle = extract_vol.mean(axis=0) + particle_subimage, failure_reason = enhance_single_particle( + particle_subimage=flat_particle, + extract_width=extract_width, + small_boxsize=extract_subtomo_params.boxsize, + bg_radius=round(0.375 * extract_subtomo_params.boxsize), + invert_contrast=True, + downscale=False, + norm=True, + plane_fit=True, + ) + if failure_reason: + self.log.warning( + f"Extraction failed for {particle}. Reason was {failure_reason}." + ) + continue + + # Add to output stack + if len(output_mrc_stack): + output_mrc_stack = np.append( + output_mrc_stack, [particle_subimage], axis=0 + ) + else: + output_mrc_stack = np.array([particle_subimage], dtype=np.float32) + + # Produce the mrc file of the extracted particles + output_mrc_file = ( + Path(extract_subtomo_params.output_star).parent + / Path(extract_subtomo_params.tomogram).with_suffix(".mrcs").name + ) + particle_count = np.shape(output_mrc_stack)[0] + self.log.info(f"Extracted {particle_count} particles") + with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: + mrc.set_data(output_mrc_stack.astype(np.float32)) + mrc.header.mx = extract_subtomo_params.relion_options.boxsize + mrc.header.my = extract_subtomo_params.relion_options.boxsize + mrc.header.mz = 1 + mrc.header.cella.x = ( + extract_subtomo_params.pixel_size + * extract_subtomo_params.relion_options.boxsize + ) + mrc.header.cella.y = ( + extract_subtomo_params.pixel_size + * extract_subtomo_params.relion_options.boxsize + ) + mrc.header.cella.z = 1 + + # Construct the output star file + if not Path(extract_subtomo_params.output_star).is_file(): + extracted_parts_doc = cif.Document() + optics_block = extracted_parts_doc.add_new_block("optics") + optics_loop = optics_block.init_loop( + "_rln", + [ + "Voltage", + "SphericalAberration", + "AmplitudeContrast", + "OpticsGroup", + "OpticsGroupName", + "CtfDataAreCtfPremultiplied", + "ImageDimensionality", + "ImagePixelSize", + "ImageSize", + ], + ) + optics_loop.add_row( + [ + "300.00", + "2.70", + "0.10", + "1", + "opticsGroup1", + "1", + "2", + str(extract_subtomo_params.pixel_size), + str(extract_subtomo_params.boxsize), + ] + ) + extracted_parts_block = extracted_parts_doc.add_new_block("particles") + extracted_parts_loop = extracted_parts_block.init_loop( + "_rln", + [ + "TomoName", + "OpticsGroup", + "TomoParticleName", + "ImageName", + ], + ) + else: + extracted_parts_doc = cif.read_file(extract_subtomo_params.output_star) + extracted_parts_block = extracted_parts_doc.find_block("particles") + extracted_parts_loop = extracted_parts_block.find_loop( + "_rlnTomoName" + ).get_loop() + for particle in range(particle_count): + extracted_parts_loop.add_row( + [ + _get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram)), + "1", + f"{particle}@{output_mrc_file}", + f"{particle}@{output_mrc_file}", + ] + ) + extracted_parts_doc.write_file( + extract_subtomo_params.output_star, style=cif.Style.Simple + ) + + # Register the extract job with the node creator + self.log.info(f"Sending {self.job_type} to node creator") + node_creator_parameters = { + "job_type": self.job_type, + "input_file": extract_subtomo_params.cbox_3d_file, + "output_file": extract_subtomo_params.output_star, + "relion_options": dict(extract_subtomo_params.relion_options), + "command": "", + "stdout": "", + "stderr": "", + } + rw.send_to("node_creator", node_creator_parameters) + + # Register the files needed for selection and batching + self.log.info("Sending to particle selection") + select_params = { + "input_file": extract_subtomo_params.output_star, + "batch_size": extract_subtomo_params.batch_size, + "image_size": extract_subtomo_params.boxsize, + "relion_options": dict(extract_subtomo_params.relion_options), + } + rw.send_to("select_particles", select_params) + + self.log.info(f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}") + rw.transport.ack(header) From 89077225cb8bb9fc034378e46b9e2b16fb18385d Mon Sep 17 00:00:00 2001 From: yxd92326 Date: Thu, 18 Dec 2025 17:21:02 +0000 Subject: [PATCH 33/33] Thoughts on selecting from tomo picks --- .../services/extract_2d_subtomo.py | 1 + .../services/extract_subtomo.py | 290 ++---------------- .../services/select_particles.py | 53 ++-- 3 files changed, 67 insertions(+), 277 deletions(-) diff --git a/src/cryoemservices/services/extract_2d_subtomo.py b/src/cryoemservices/services/extract_2d_subtomo.py index 7328b67b..dc823ff2 100644 --- a/src/cryoemservices/services/extract_2d_subtomo.py +++ b/src/cryoemservices/services/extract_2d_subtomo.py @@ -281,6 +281,7 @@ def extract_subtomo_for_2d(self, rw, header: dict, message: dict): "input_file": extract_subtomo_params.output_star, "batch_size": extract_subtomo_params.batch_size, "image_size": extract_subtomo_params.boxsize, + "tomo": True, "relion_options": dict(extract_subtomo_params.relion_options), } rw.send_to("select_particles", select_params) diff --git a/src/cryoemservices/services/extract_subtomo.py b/src/cryoemservices/services/extract_subtomo.py index ed10ced3..d6d289f7 100644 --- a/src/cryoemservices/services/extract_subtomo.py +++ b/src/cryoemservices/services/extract_subtomo.py @@ -6,6 +6,7 @@ import matplotlib.pyplot as plt import mrcfile import numpy as np +import workflows.transport.pika_transport as pt from gemmi import cif from pydantic import BaseModel, Field, ValidationError, field_validator from tqdm import tqdm @@ -26,6 +27,26 @@ _get_tilt_number_v5_12, ) +transport = pt.PikaTransport() +transport.load_configuration_file( + "/dls_sw/apps/murfey/config/rmq-connection-creds-pollux.yml" +) +transport.connect() + +root_dir = Path("/dls/m07/data/2025/cm40593-13/spool/ctfcorrect") +for tomo in root_dir.glob("Tomograms/job006/tomograms/*_aretomo.mrc"): + transport.send( + "segmentation", + { + "cbox_3d_file": f"{root_dir}/AutoPick/job009/CBOX_3D/{tomo.stem}.denoised.cbox", + "tomogram": str(tomo), + "output_star": f"{root_dir}/Extract/job010/{tomo.stem}.star", + "pixel_size": 5.36, + "particle_diameter": 250, + "relion_options": {}, + }, + ) + inptus = { "cbox_3d_file": "/scratch/yxd92326/data/tomo-extract/2_2_Ribosome_Pos_1_stack_aretomo.denoised.cbox", "tilt_alignment_file": "/scratch/yxd92326/data/tomo-extract/2_2_Ribosome_Pos_1_stack.aln", @@ -59,15 +80,17 @@ "relion_options": {}, } - -class ExtractSubTomoParameters2D(BaseModel): - cbox_3d_file: str = Field(..., min_length=1) - tomogram: str = Field(..., min_length=1) - output_star: str = Field(..., min_length=1) - pixel_size: float - particle_diameter: float = 0 - boxsize: int = 256 - relion_options: RelionServiceOptions +transport.send( + "segmentation", + { + "cbox_3d_file": "/dls/m06/data/2025/bi38637-22/spool/extract-test/AutoPick/job009/CBOX_3D/Position_002_stack_Vol.denoised.cbox", + "tomogram": "/dls/m06/data/2025/bi38637-22/spool/extract-test/CtfFind/corrected/aretomo2_compare/Position_002_stack_aretomo2_raw.mrc", + "output_star": "/dls/m06/data/2025/bi38637-22/spool/extract-test/Extract/ctftest/Position_002_stack_raw.star", + "pixel_size": 7.76, + "particle_diameter": 250, + "relion_options": {}, + }, +) class ExtractSubTomoParameters3D(BaseModel): @@ -100,255 +123,6 @@ def check_shape_is_3d(cls, v): return shape_list -class ExtractSubTomoFor2D(CommonService): - """ - A service for extracting particles from cryolo autopicking for tomograms - """ - - # Human readable service name - _service_name = "ExtractSubTomo" - - # Logger name - _logger_name = "cryoemservices.services.extract_subtomo" - - # Job name - job_type = "relion.pseudosubtomo" - - def initializing(self): - """Subscribe to a queue. Received messages must be acknowledged.""" - self.log.info("Sub-tomogram extraction service starting") - wrap_subscribe( - self._transport, - self._environment["queue"] or "extract_subtomo", - self.extract_subtomo_for_2d, - acknowledgement=True, - allow_non_recipe_messages=True, - ) - - def extract_subtomo_for_2d(self, rw, header: dict, message: dict): - """Main function which interprets and processes received messages""" - if not rw: - self.log.info("Received a simple message") - if not isinstance(message, dict): - self.log.error("Rejected invalid simple message") - self._transport.nack(header) - return - - # Create a wrapper-like object that can be passed to functions - # as if a recipe wrapper was present. - rw = MockRW(self._transport) - rw.recipe_step = {"parameters": message} - - try: - if isinstance(message, dict): - extract_subtomo_params = ExtractSubTomoParameters2D( - **{**rw.recipe_step.get("parameters", {}), **message} - ) - else: - extract_subtomo_params = ExtractSubTomoParameters2D( - **{**rw.recipe_step.get("parameters", {})} - ) - except (ValidationError, TypeError) as e: - self.log.warning( - f"Extraction parameter validation failed for message: {message} " - f"and recipe parameters: {rw.recipe_step.get('parameters', {})} " - f"with exception: {e}" - ) - rw.transport.nack(header) - return - - self.log.info( - f"Inputs: {extract_subtomo_params.tomogram}, " - f"{extract_subtomo_params.cbox_3d_file} " - f"Output: {extract_subtomo_params.output_star}" - ) - - # Update the relion options and get box sizes - extract_subtomo_params.relion_options = update_relion_options( - extract_subtomo_params.relion_options, dict(extract_subtomo_params) - ) - if extract_subtomo_params.particle_diameter: - extract_subtomo_params.boxsize = ( - extract_subtomo_params.relion_options.boxsize - ) - - # Make sure the output directory exists - if not Path(extract_subtomo_params.output_star).parent.exists(): - Path(extract_subtomo_params.output_star).parent.mkdir(parents=True) - - # Find the locations of the particles - coords_file = cif.read(extract_subtomo_params.cbox_3d_file) - coords_block = coords_file.find_block("cryolo") - pick_radius = float(coords_block.find_loop("_Width")[0]) / 2 - particles_x = ( - np.array(coords_block.find_loop("_CoordinateX"), dtype=float) + pick_radius - ) - particles_y = ( - np.array(coords_block.find_loop("_CoordinateY"), dtype=float) + pick_radius - ) - particles_z = np.array(coords_block.find_loop("_CoordinateZ"), dtype=float) - - # Read tomogram - with mrcfile.open(extract_subtomo_params.tomogram) as mrc: - tomogram_data = mrc.data - - # Extract at the same downscaling as during tomogram reconstruction - extract_width = round(extract_subtomo_params.relion_options.boxsize / 2) - - output_mrc_stack = np.array([]) - for particle in tqdm(range(len(particles_x))): - if ( - particles_y[particle] - extract_width < 0 - or particles_y[particle] + extract_width > tomogram_data.shape[1] - or particles_x[particle] - extract_width < 0 - or particles_x[particle] + extract_width > tomogram_data.shape[2] - ): - self.log.info( - f"Skipping particle {particle} runs over the edge of the volume" - ) - continue - - min_z = particles_z[particle] - extract_width - max_z = particles_z[particle] + extract_width - if min_z < 0: - min_z = 0 - if max_z >= tomogram_data.shape[0]: - max_z = tomogram_data.shape[0] - 1 - extract_vol = tomogram_data[ - round(float(min_z)) : round(float(max_z)), - round(float(particles_y[particle] - extract_width)) : round( - float(particles_y[particle] + extract_width) - ), - round(float(particles_x[particle] - extract_width)) : round( - float(particles_x[particle] + extract_width) - ), - ] - - # Run projection along x axis - flat_particle = extract_vol.mean(axis=0) - particle_subimage, failure_reason = enhance_single_particle( - particle_subimage=flat_particle, - extract_width=extract_width, - small_boxsize=extract_subtomo_params.boxsize, - bg_radius=round(0.375 * extract_subtomo_params.boxsize), - invert_contrast=True, - downscale=False, - norm=True, - plane_fit=True, - ) - if failure_reason: - self.log.warning( - f"Extraction failed for {particle}. Reason was {failure_reason}." - ) - continue - - # Add to output stack - if len(output_mrc_stack): - output_mrc_stack = np.append( - output_mrc_stack, [particle_subimage], axis=0 - ) - else: - output_mrc_stack = np.array([particle_subimage], dtype=np.float32) - - # Produce the mrc file of the extracted particles - output_mrc_file = ( - Path(extract_subtomo_params.output_star).parent - / Path(extract_subtomo_params.tomogram).with_suffix(".mrcs").name - ) - particle_count = np.shape(output_mrc_stack)[0] - self.log.info(f"Extracted {particle_count} particles") - with mrcfile.new(str(output_mrc_file), overwrite=True) as mrc: - mrc.set_data(output_mrc_stack.astype(np.float32)) - mrc.header.mx = extract_subtomo_params.relion_options.boxsize - mrc.header.my = extract_subtomo_params.relion_options.boxsize - mrc.header.mz = 1 - mrc.header.cella.x = ( - extract_subtomo_params.pixel_size - * extract_subtomo_params.relion_options.boxsize - ) - mrc.header.cella.y = ( - extract_subtomo_params.pixel_size - * extract_subtomo_params.relion_options.boxsize - ) - mrc.header.cella.z = 1 - - # Construct the output star file - if not Path(extract_subtomo_params.output_star).is_file(): - extracted_parts_doc = cif.Document() - optics_block = extracted_parts_doc.add_new_block("optics") - optics_loop = optics_block.init_loop( - "_rln", - [ - "Voltage", - "SphericalAberration", - "AmplitudeContrast", - "OpticsGroup", - "OpticsGroupName", - "CtfDataAreCtfPremultiplied", - "ImageDimensionality", - "ImagePixelSize", - "ImageSize", - ], - ) - optics_loop.add_row( - [ - "300.00", - "2.70", - "0.10", - "1", - "opticsGroup1", - "1", - "2", - str(extract_subtomo_params.pixel_size), - str(extract_subtomo_params.boxsize), - ] - ) - extracted_parts_block = extracted_parts_doc.add_new_block("particles") - extracted_parts_loop = extracted_parts_block.init_loop( - "_rln", - [ - "TomoName", - "OpticsGroup", - "TomoParticleName", - "ImageName", - ], - ) - else: - extracted_parts_doc = cif.read_file(extract_subtomo_params.output_star) - extracted_parts_block = extracted_parts_doc.find_block("particles") - extracted_parts_loop = extracted_parts_block.find_loop( - "_rlnTomoName" - ).get_loop() - for particle in range(particle_count): - extracted_parts_loop.add_row( - [ - _get_tilt_name_v5_12(Path(extract_subtomo_params.tomogram)), - "1", - f"{particle}@{output_mrc_file}", - f"{particle}@{output_mrc_file}", - ] - ) - extracted_parts_doc.write_file( - extract_subtomo_params.output_star, style=cif.Style.Simple - ) - - # Register the extract job with the node creator - self.log.info(f"Sending {self.job_type} to node creator") - node_creator_parameters = { - "job_type": self.job_type, - "input_file": extract_subtomo_params.cbox_3d_file, - "output_file": extract_subtomo_params.output_star, - "relion_options": dict(extract_subtomo_params.relion_options), - "command": "", - "stdout": "", - "stderr": "", - } - rw.send_to("node_creator", node_creator_parameters) - - self.log.info(f"Done {self.job_type} for {extract_subtomo_params.cbox_3d_file}") - rw.transport.ack(header) - - class ExtractSubTomoFor3D(CommonService): """ A service for extracting particles from cryolo autopicking for tomograms diff --git a/src/cryoemservices/services/select_particles.py b/src/cryoemservices/services/select_particles.py index d47f18c2..d701b1ce 100644 --- a/src/cryoemservices/services/select_particles.py +++ b/src/cryoemservices/services/select_particles.py @@ -18,6 +18,7 @@ class SelectParticlesParameters(BaseModel): batch_size: int image_size: int incomplete_batch_size: int = 10000 + tomo: bool = False relion_options: RelionServiceOptions @@ -92,7 +93,10 @@ def select_particles(self, rw, header: dict, message: dict): select_dir.mkdir(parents=True, exist_ok=True) extracted_parts_file = cif.read_file(select_params.input_file) - extracted_parts_block = extracted_parts_file.sole_block() + try: + extracted_parts_block = extracted_parts_file.sole_block() + except RuntimeError: + extracted_parts_block = extracted_parts_file.find_block("particles") extracted_parts_loop = extracted_parts_block.find_loop( "_rlnCoordinateX" ).get_loop() @@ -160,24 +164,35 @@ def select_particles(self, rw, header: dict, message: dict): ) new_split_block = new_particles_cif.add_new_block("particles") - new_split_loop = new_split_block.init_loop( - "_rln", - [ - "CoordinateX", - "CoordinateY", - "ImageName", - "MicrographName", - "OpticsGroup", - "CtfMaxResolution", - "CtfFigureOfMerit", - "DefocusU", - "DefocusV", - "DefocusAngle", - "CtfBfactor", - "CtfScalefactor", - "PhaseShift", - ], - ) + if select_params.tomo: + new_split_loop = new_split_block.init_loop( + "_rln", + [ + "TomoName", + "OpticsGroup", + "TomoParticleName", + "ImageName", + ], + ) + else: + new_split_loop = new_split_block.init_loop( + "_rln", + [ + "CoordinateX", + "CoordinateY", + "ImageName", + "MicrographName", + "OpticsGroup", + "CtfMaxResolution", + "CtfFigureOfMerit", + "DefocusU", + "DefocusV", + "DefocusAngle", + "CtfBfactor", + "CtfScalefactor", + "PhaseShift", + ], + ) num_prev_parts = 0 # While we have particles to add and the file is not full