From b73c2f0b53b984a89126337ceb913b43b9cad513 Mon Sep 17 00:00:00 2001 From: Doeke Hekstra Date: Mon, 29 Dec 2025 22:58:50 -0500 Subject: [PATCH 1/2] export toanalyse_panda_inspect.py in coot --- valdo/valdo_analyse_pandda_inspect.py | 1076 +++++++++++++++++++++++++ 1 file changed, 1076 insertions(+) create mode 100644 valdo/valdo_analyse_pandda_inspect.py diff --git a/valdo/valdo_analyse_pandda_inspect.py b/valdo/valdo_analyse_pandda_inspect.py new file mode 100644 index 0000000..2010de0 --- /dev/null +++ b/valdo/valdo_analyse_pandda_inspect.py @@ -0,0 +1,1076 @@ +# plugin for COOT to inspect and model pandda.analyse results +# +# Copyright (c) 2022, Tobias Krojer, MAX IV Laboratory +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +import os +import glob +import sys +import shutil + +import gtk +import coot +import __main__ + +import csv +import logging + +def init_logger(logfile): + logger = logging.getLogger() + logger.setLevel(logging.INFO) + formatter = logging.Formatter('%(asctime)s | %(levelname)s - INSPECT | %(message)s', '%m-%d-%Y %H:%M:%S') + + stdout_handler = logging.StreamHandler(sys.stdout) + stdout_handler.setLevel(logging.DEBUG) + stdout_handler.setFormatter(formatter) + logger.addHandler(stdout_handler) + + file_handler = logging.FileHandler(logfile) + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger + + +class inspect_gui(object): + + def __init__(self): + + self.logger = init_logger('inspect.log') + self.logger.info('starting new session of pandda event map inspection') + + self.index = -1 + self.Todo = [] + self.cb_list = [] + self.mol_dict = { + 'protein': None, + 'emap': None, + 'ligand': None + } + + self.panddaDir = None + self.eventCSV = None + self.reset_params() + self.merged = False + +# self.ligand_confidence_button_labels = [ +# [0, 'unassigned'], +# [1, 'no ligand bound'], +# [2, 'unknown ligand'], +# [3, 'low confidence'], +# [4, 'high confidence'] +# ] + + self.ligand_confidence_button_labels = [ + [0, 'unassigned'], + [1, 'no ligand bound'], + [2, 'unknown ligand'], + [3, 'ambiguous density'], + [4, 'event map only'], + [5, '(2)fo-fc map'] + ] + + self.selection_criteria = [ + 'show all events', + 'show all events - sort by cluster size', + 'show all events - sort alphabetically', + 'show not viewed events', + 'show unassigned', + 'show no ligands bound', + 'show unknown ligands', + 'show low confidence ligands', + 'show high confidence ligands' + ] + + self.selected_selection_criterion = None + self.cb_changed_id = None + + + def startGUI(self): + self.window = gtk.Window(gtk.WINDOW_TOPLEVEL) + self.window.connect("delete_event", gtk.main_quit) + self.window.set_border_width(10) + self.window.set_default_size(400, 600) + self.window.set_title("inspect") + self.vbox = gtk.VBox() # this is the main container + + frame = gtk.Frame(label='PanDDA folder') + hbox = gtk.HBox() + select_pandda_folder_button = gtk.Button(label="select pandda directory") + hbox.add(select_pandda_folder_button) + select_pandda_folder_button.connect("clicked", self.select_pandda_folder) + frame.add(hbox) + self.vbox.pack_start(frame) + + frame = gtk.Frame(label='Event selection') + hbox = gtk.HBox() + self.select_events_combobox = gtk.combo_box_new_text() + # self.select_events_combobox.connect("changed", self.set_selection_mode) + for citeria in self.selection_criteria: + self.select_events_combobox.append_text(citeria) + hbox.pack_start(self.select_events_combobox) + select_events_button = gtk.Button(label="Go") + select_events_button.connect("clicked", self.select_events) + hbox.pack_start(select_events_button) + frame.add(hbox) + self.vbox.add(frame) + + outer_frame = gtk.Frame() + hbox = gtk.HBox() + + # +1 row to display Cluster size + table = gtk.Table(8, 2, False) + + frame = gtk.Frame() + frame.add(gtk.Label('Crystal')) + table.attach(frame, 0, 1, 0, 1) + frame = gtk.Frame() + self.xtal_label = gtk.Label('') + frame.add(self.xtal_label) + table.attach(frame, 1, 2, 0, 1) + + frame = gtk.Frame() + frame.add(gtk.Label('Resolution')) + table.attach(frame, 0, 1, 1, 2) + frame = gtk.Frame() + self.resolution_label = gtk.Label('') + frame.add(self.resolution_label) + table.attach(frame, 1, 2, 1, 2) + + frame = gtk.Frame() + frame.add(gtk.Label('Rwork')) + table.attach(frame, 0, 1, 2, 3) + frame = gtk.Frame() + self.r_work_label = gtk.Label('') + frame.add(self.r_work_label) + table.attach(frame, 1, 2, 2, 3) + + frame = gtk.Frame() + frame.add(gtk.Label('Rfree')) + table.attach(frame, 0, 1, 3, 4) + frame = gtk.Frame() + self.r_free_label = gtk.Label('') + frame.add(self.r_free_label) + table.attach(frame, 1, 2, 3, 4) + + frame = gtk.Frame() + frame.add(gtk.Label('Event')) + table.attach(frame, 0, 1, 4, 5) + frame = gtk.Frame() + self.event_label = gtk.Label('') + frame.add(self.event_label) + table.attach(frame, 1, 2, 4, 5) + + frame = gtk.Frame() + frame.add(gtk.Label('Site')) + table.attach(frame, 0, 1, 5, 6) + frame = gtk.Frame() + self.site_label = gtk.Label('') + frame.add(self.site_label) + table.attach(frame, 1, 2, 5, 6) + + frame = gtk.Frame() + frame.add(gtk.Label('BDC')) + table.attach(frame, 0, 1, 6, 7) + frame = gtk.Frame() + self.bdc_label = gtk.Label('') + frame.add(self.bdc_label) + table.attach(frame, 1, 2, 6, 7) + + # NEW: Cluster size row (row 7) + frame = gtk.Frame() + frame.add(gtk.Label('Cluster size')) + table.attach(frame, 0, 1, 7, 8) + frame = gtk.Frame() + self.cluster_size_label = gtk.Label('') + frame.add(self.cluster_size_label) + table.attach(frame, 1, 2, 7, 8) + + outer_frame.add(table) + hbox.add(outer_frame) + self.vbox.add(hbox) + + frame = gtk.Frame(label='Navigator') + vbox = gtk.VBox() + hbox = gtk.HBox() + previous_event_button = gtk.Button(label="<<< Event") + previous_event_button.connect("clicked", self.previous_event) + hbox.pack_start(previous_event_button) + next_event_button = gtk.Button(label="Event >>>") + next_event_button.connect("clicked", self.next_event) + hbox.pack_start(next_event_button) + vbox.add(hbox) + hbox = gtk.HBox() + previous_site_button = gtk.Button(label="<<< Site") + previous_site_button.connect("clicked", self.previous_site) + hbox.pack_start(previous_site_button) + next_site_button = gtk.Button(label="Site >>>") + next_site_button.connect("clicked", self.next_site) + hbox.pack_start(next_site_button) + vbox.add(hbox) + + self.vbox_sample_navigator = gtk.VBox() + self.cb = gtk.combo_box_new_text() + # store handler id so we can block it during combobox rebuilds + self.cb_changed_id = self.cb.connect("changed", self.select_crystal) + vbox.add(self.cb) + + self.crystal_progressbar = gtk.ProgressBar() + vbox.add(self.crystal_progressbar) + frame.add(vbox) + self.vbox.add(frame) + + frame = gtk.Frame(label='Toggle Maps') + hbox = gtk.HBox() + toggle_emap_button = gtk.Button(label="event map") + hbox.add(toggle_emap_button) + toggle_emap_button.connect("clicked", self.toggle_emap) + toggle_zmap_button = gtk.Button(label="Z-map") + hbox.add(toggle_zmap_button) + toggle_zmap_button.connect("clicked", self.toggle_zmap) + toggle_x_ray_maps_button = gtk.Button(label="(2)fofc maps") + hbox.add(toggle_x_ray_maps_button) + toggle_x_ray_maps_button.connect("clicked", self.toggle_x_ray_maps) + self.toggle_average_map_button = gtk.Button(label="average map") + hbox.add(self.toggle_average_map_button) + self.toggle_average_map_button.connect("clicked", self.toggle_average_map) + frame.add(hbox) + self.vbox.pack_start(frame) + + frame = gtk.Frame(label='Ligand Modeling') + hbox = gtk.HBox() + place_ligand_here_button = gtk.Button(label="Place Ligand here") + place_ligand_here_button.connect("clicked", self.place_ligand_here) + hbox.add(place_ligand_here_button) + merge_ligand_button = gtk.Button(label="Merge Ligand") + merge_ligand_button.connect("clicked", self.merge_ligand_into_protein) + hbox.add(merge_ligand_button) + reset_to_unfitted_button = gtk.Button(label="Revert to unfitted") + reset_to_unfitted_button.connect("clicked", self.reset_to_unfitted) + hbox.add(reset_to_unfitted_button) + frame.add(hbox) + self.vbox.pack_start(frame) + + frame = gtk.Frame(label='Annotation') + vbox = gtk.VBox() + self.ligand_confidence_button_list = [] + for n, item in enumerate(self.ligand_confidence_button_labels): + if n == 0: + button = gtk.RadioButton(None, item[1]) + else: + button = gtk.RadioButton(button, item[1]) + button.connect("toggled", self.set_ligand_confidence, item[1]) + self.ligand_confidence_button_list.append(button) + vbox.add(button) + button.show() + frame.add(vbox) + self.vbox.pack_start(frame) + + frame = gtk.Frame(label='Save') + hbox = gtk.HBox() + self.save_next_button = gtk.Button(label="Save Model") + hbox.add(self.save_next_button) + self.save_next_button.connect("clicked", self.save_next) + frame.add(hbox) + self.vbox.pack_start(frame) + + self.window.add(self.vbox) + self.window.show_all() + + def save_pandda_inspect_events_csv_file(self): + with open(os.path.join(self.analysis_folder, 'pandda_inspect_events.csv'), 'w') as csvfile: + self.logger.info('updating {0!s}'.format(os.path.join(self.analysis_folder, 'pandda_inspect_events.csv'))) + writer = csv.writer(csvfile) + writer.writerows(self.elist) + + def set_ligand_confidence(self, widget, data=None): +# for n, b in enumerate(self.ligand_confidence_button_list): +# print("***********",n,b.get_active()) + if widget.get_active(): + self.elist[self.index][self.ligand_confidence_index] = data + self.save_pandda_inspect_events_csv_file() +# with open(os.path.join(self.analysis_folder, 'pandda_inspect_events.csv'), 'w') as csvfile: +# print('INSPECT - INFO: updating {0!s}'.format( +# os.path.join(self.analysis_folder, 'pandda_inspect_events.csv'))) +# writer = csv.writer(csvfile) +# writer.writerows(self.elist) + + def save_event_as_viewed(self): + self.elist[self.index][self.viewed_index] = 'True' + self.save_pandda_inspect_events_csv_file() + + def set_ligand_confidence_button(self): + foundItem = False + for item in self.ligand_confidence_button_labels: + if item[1] == self.ligand_confidence: + self.ligand_confidence_button_list[item[0]].set_active(True) + foundItem = True + break + if not foundItem: + self.ligand_confidence_button_list[0].set_active(True) + + def select_pandda_folder(self, widget): + dlg = gtk.FileChooserDialog("Open..", None, gtk.FILE_CHOOSER_ACTION_SELECT_FOLDER, + (gtk.STOCK_CANCEL, gtk.RESPONSE_CANCEL, gtk.STOCK_OPEN, gtk.RESPONSE_OK)) + response = dlg.run() + self.panddaDir = dlg.get_filename() + + self.analysis_folder = '' + if os.path.isdir(os.path.join(self.panddaDir, 'results')): + self.analysis_folder = os.path.join(self.panddaDir, 'results') + elif os.path.isdir(os.path.join(self.panddaDir, 'analyses')): + self.analysis_folder = os.path.join(self.panddaDir, 'analyses') + elif os.path.isdir(os.path.join(self.panddaDir, 'analysis')): + self.analysis_folder = os.path.join(self.panddaDir, 'analysis') + + self.eventCSV = os.path.realpath(os.path.join(self.analysis_folder, 'pandda_inspect_events.csv')) + self.siteCSV = os.path.realpath(os.path.join(self.analysis_folder, 'pandda_inspect_sites.csv')) + + if not os.path.isfile(self.eventCSV): + analyse_csv = self.eventCSV.replace('pandda_inspect_events.csv', 'pandda_analyse_events.csv') + if not os.path.isfile(analyse_csv): + self.logger.error('something went wrong; cannot find {0!s}'.format(analyse_csv)) + return + else: + self.initialize_inspect_events_csv_file(analyse_csv) + + if not os.path.isfile(self.eventCSV): + self.logger.error('something went wrong; cannot find {0!s}'.format(self.eventCSV)) + return + + if not os.path.isfile(self.siteCSV): + analyse_csv = self.siteCSV.replace('pandda_inspect_sites.csv', 'pandda_analyse_sites.csv') + if not os.path.isfile(analyse_csv): + self.logger.error('something went wrong; cannot find {0!s}'.format(analyse_csv)) + return + else: + self.initialize_inspect_sites_csv_file(analyse_csv) + + if not os.path.isfile(self.siteCSV): + self.logger.error('something went wrong; cannot find {0!s}'.format(self.siteCSV)) + return + + dlg.destroy() + self.parsepanddaDir() + + def get_pdb(self, missing_files): + if os.path.isfile( + os.path.join(self.panddaDir, 'processed_datasets', self.xtal, 'modelled_structures', + '{0!s}-pandda-model.pdb'.format(self.xtal))): + pdb = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, 'modelled_structures', + '{0!s}-pandda-model.pdb'.format(self.xtal)) + self.logger.info('found pdb file in modelled_structures folder: {0!s}'.format(pdb)) + elif os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-pandda-input.pdb'.format(self.xtal))): + pdb = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-pandda-input.pdb'.format(self.xtal)) + self.logger.info('found pdb file: {0!s}'.format(pdb)) + else: + self.logger.error('did not find pdb file') + missing_files = True + return pdb, missing_files + + def load_pdb(self): + coot.set_nomenclature_errors_on_read("ignore") + imol = coot.handle_read_draw_molecule_with_recentre(self.pdb, 0) + self.mol_dict['protein'] = imol + coot.set_show_symmetry_master(1) # master switch to show symmetry molecules + coot.set_show_symmetry_molecule(imol, 1) # show symm for model + + def get_emap(self, missing_files): + emap = '' + new_pandda_output = False + event_number = (3 - len(str(self.event))) * '0' + str(self.event) + if os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-event_{1!s}_1-BDC_{2!s}_map.native.mtz'.format(self.xtal, self.event, self.bdc))): + emap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-event_{1!s}_1-BDC_{2!s}_map.native.mtz'.format(self.xtal, self.event, self.bdc)) + self.logger.info('found event map: {0!s}'.format(emap)) + elif os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-event_{1!s}_1-BDC_{2!s}_map.native.ccp4'.format(self.xtal, self.event, self.bdc))): + emap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-event_{1!s}_1-BDC_{2!s}_map.native.ccp4'.format(self.xtal, self.event, self.bdc)) + self.logger.info('found event map: {0!s}'.format(emap)) + elif os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-pandda-output-event-{1!s}.mtz'.format(self.xtal, event_number))): + emap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-pandda-output-event-{1!s}.mtz'.format(self.xtal, event_number)) + self.logger.info('found event map: {0!s}'.format(emap)) + new_pandda_output = True + else: + emap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-event_{1!s}_1-BDC_{2!s}_map.native.mtz'.format(self.xtal, self.event, self.bdc)) + self.logger.error('cannot find event map {0!s}'.format(emap)) + self.logger.info('REMINDER: make sure that all CCP4 maps are converted into MTZ format!') + missing_files = True + self.logger.info('new pandda file names: {0!s}'.format(new_pandda_output)) + return emap, new_pandda_output, missing_files + + def load_emap(self): + if self.new_pandda_output: +# imol = coot.map_from_mtz_by_calc_phases(self.emap, "FEVENT", "PHEVENT", self.mol_dict['protein']) + imol = coot.make_and_draw_map(self.emap, "FEVENT", "PHEVENT", "1", 0, 0) + self.mol_dict['emap'] = imol + elif self.emap.endswith(".ccp4"): + imol = coot.read_ccp4_map(self.emap, 0) + self.mol_dict['emap'] = imol + else: + # loads double-maps +# imol = coot.auto_read_make_and_draw_maps(self.emap) + # testing this command + imol = coot.make_and_draw_map(self.emap, "FWT", "PHWT", "1", 0, 0) + self.mol_dict['emap'] = imol + # may cause core dump +# imol = coot.map_from_mtz_by_calc_phases(self.emap, "FWT", "PHWT", self.mol_dict['protein']) +# self.mol_dict['emap'] = imol + coot.set_colour_map_rotation_on_read_pdb(0) + coot.set_last_map_colour(0, 0, 1) + self.show_emap = 1 + # event map contour level: + # if you divide it by (1-bdc) you get the contour level in RMSD. + # for 1-bdc = 0.3, then contouring at 0.3 is 1 RMSD, 0.6 is 2 RMSD, etc. + # note self.bdc is actually 1-bdc; however, that seems far too low in practice + # emap_level = 1.0 - float(self.bdc) + coot.set_contour_level_in_sigma(self.mol_dict['emap'], 1.0 - float(self.bdc)) + + def get_zmap(self, missing_files): + zmap = '' + if os.path.isfile( + os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-z_map.native.mtz'.format(self.xtal))): + zmap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-z_map.native.mtz'.format(self.xtal)) + self.logger.info('found z-map map: {0!s}'.format(zmap)) + elif os.path.isfile( + os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-z_map.native.ccp4'.format(self.xtal))): + zmap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-z_map.native.ccp4'.format(self.xtal)) + self.logger.info('found z-map map: {0!s}'.format(zmap)) + elif os.path.isfile( + os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-pandda-output.mtz'.format(self.xtal))): + zmap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-pandda-output.mtz'.format(self.xtal)) + self.logger.info('found z-map map: {0!s}'.format(zmap)) + else: + self.logger.error('cannot find z-map!!!') + missing_files = True + return zmap, missing_files + + def load_zmap(self): + coot.set_default_initial_contour_level_for_difference_map(3) + if self.new_pandda_output: +# imol = coot.map_from_mtz_by_calc_phases(self.zmap, "FZVALUES", "PHZVALUES", self.mol_dict['protein']) + imol = coot.make_and_draw_map(self.zmap, "FZVALUES", "PHZVALUES", "1", 0, 1) + self.mol_dict['zmap'] = imol + coot.set_map_is_difference_map(imol, True) + elif self.zmap.endswith(".ccp4"): + imol = coot.read_ccp4_map(self.zmap, 1) + self.mol_dict['zmap'] = imol + else: + # load double-maps + imol = coot.auto_read_make_and_draw_maps(self.zmap) + self.mol_dict['zmap'] = imol[0] + coot.set_contour_level_in_sigma(self.mol_dict['zmap'], 3) + # may cause core dump +# imol = coot.map_from_mtz_by_calc_phases(self.zmap, "DELWT", "PHDELWT", self.mol_dict['protein']) +# self.mol_dict['zmap'] = imol +# coot.set_map_is_difference_map(imol, True) +# coot.set_contour_level_in_sigma(imol[0], 3) + self.show_zmap = 1 + + def get_xraymap(self, missing_files): + xraymap = '' + if os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', + self.xtal, '{0!s}-pandda-input.mtz'.format(self.xtal))): + xraymap = os.path.join(self.panddaDir, 'processed_datasets', + self.xtal, '{0!s}-pandda-input.mtz'.format(self.xtal)) + self.logger.info('found xray map: {0!s}'.format(xraymap)) + else: + self.logger.error('did not find xray map') + missing_files = True + return xraymap, missing_files + + def load_xraymap(self): + imol = coot.auto_read_make_and_draw_maps(self.xraymap) + self.mol_dict['xraymap'] = imol + coot.set_colour_map_rotation_on_read_pdb(0) + __main__.toggle_display_map(self.mol_dict['xraymap'][0], self.show_xraymap) + __main__.toggle_display_map(self.mol_dict['xraymap'][1], self.show_xraymap) +# coot.set_last_map_colour(0, 0, 1) + + def get_averagemap(self): + averagemap = '' + if os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-ground-state-average-map.native.mtz'.format(self.xtal))): + averagemap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-ground-state-average-map.native.mtz'.format(self.xtal)) + self.logger.info('found average map: {0!s}'.format(averagemap)) + self.toggle_average_map_button.set_sensitive(True) + + elif os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-ground-state-average-map.native.ccp4'.format(self.xtal))): + averagemap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-ground-state-average-map.native.ccp4'.format(self.xtal)) + self.logger.info('found average map: {0!s}'.format(averagemap)) + self.toggle_average_map_button.set_sensitive(True) + elif os.path.isfile( + os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-pandda-output.mtz'.format(self.xtal))): + averagemap = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, '{0!s}-pandda-output.mtz'.format(self.xtal)) + self.logger.info('found average map: {0!s}'.format(averagemap)) + self.toggle_average_map_button.set_sensitive(True) + else: + self.logger.warning('did not find average map; disabling "average map" button') + self.toggle_average_map_button.set_sensitive(False) + return averagemap + + def load_averagemap(self): + if self.new_pandda_output: +# imol = coot.map_from_mtz_by_calc_phases(self.zmap, "FGROUND", "PHGROUND", self.mol_dict['protein']) + imol = coot.make_and_draw_map(self.zmap, "FGROUND", "PHGROUND", "1", 0, 0) + self.mol_dict['averagemap'] = imol + elif self.averagemap.endswith(".ccp4"): + imol = coot.read_ccp4_map(self.averagemap, 0) + self.mol_dict['averagemap'] = imol + else: + # loads double-maps + imol = coot.auto_read_make_and_draw_maps(self.averagemap) + self.mol_dict['averagemap'] = imol[0] + # may case core-dump +# imol = coot.map_from_mtz_by_calc_phases(self.zmap, "FWT", "PHWT", self.mol_dict['protein']) +# self.mol_dict['averagemap'] = imol + coot.set_colour_map_rotation_on_read_pdb(0) + __main__.toggle_display_map(self.mol_dict['averagemap'], self.show_averagemap) + coot.set_last_map_colour(0, 0, 1) + + def get_ligcif(self): + foundCIF = False + ligcif = '' + if self.event: + if os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, self.event, 'rhofit', 'best.cif')): + ligcif = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, self.event, 'rhofit', 'best.cif') + self.logger.info('found ligand cif file: {0!s}'.format(ligcif)) + foundCIF = True + if not foundCIF: + for l in glob.glob(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, 'ligand_files', '*cif')): + ligcif = l + self.logger.info('found ligand cif file: {0!s}'.format(ligcif)) + foundCIF = True + break + if not foundCIF: + self.logger.warning('did not find ligand cif file! Check if this folder contains any files: {0!s}'.format( + os.path.join(self.panddaDir, 'processed_datasets', self.xtal, 'ligand_files'))) + return ligcif + + def load_ligcif(self): + if os.path.isfile(self.ligcif): + coot.read_cif_dictionary(os.path.join(self.ligcif)) + imol = coot.handle_read_draw_molecule_with_recentre(self.ligcif.replace('.cif','.pdb'), 0) +# imol = coot.handle_read_draw_molecule_with_recentre(self.ligcif.replace('.cif', '.pdb'), 1) + self.mol_dict['ligand'] = imol + coot.seqnum_from_serial_number(imol, "X", 0) + coot.set_b_factor_residue_range(imol, "X", 1, 1, 20.00) + coot.set_occupancy_residue_range(imol, "X", 1, 1, float(self.bdc)) + + def recentre_on_event(self): + coot.set_rotation_centre(self.x, self.y, self.z) + + def reset_params(self): + self.xtal = None + self.event = None + self.bdc = None + self.site = None + self.pdb = None + self.emap = None + self.new_pandda_output = False + self.zmap = None + self.xraymap = None + self.averagemap = None + self.ligcif = None + self.x = None + self.y = None + self.z = None + self.resolution = None + self.r_free = None + self.r_work = None + self.ligand_confidence = None + self.merged = False + self.cluster_size = "" # NEW + + def update_params(self): + missing_files = False + self.xtal = self.elist[self.index][self.xtal_index] + self.event = self.elist[self.index][self.event_index] + self.bdc = self.elist[self.index][self.bdc_index] + self.site = self.elist[self.index][self.site_index] + self.logger.info('checking if files exist for {0!s}, event: {1!s}, site: {2!s}'.format(self.xtal, self.event, + self.site)) + self.pdb, missing_files = self.get_pdb(missing_files) + self.emap, self.new_pandda_output, missing_files = self.get_emap(missing_files) + self.zmap, missing_files = self.get_zmap(missing_files) + self.xraymap, missing_files = self.get_xraymap(missing_files) + self.averagemap = self.get_averagemap() + self.ligcif = self.get_ligcif() + self.x = float(self.elist[self.index][self.x_index]) + self.y = float(self.elist[self.index][self.y_index]) + self.z = float(self.elist[self.index][self.z_index]) + self.logger.info('event coordinates -> x = {0!s}, y = {1!s}, z = {2!s}'.format(self.x, self.y, self.z)) + self.resolution = self.elist[self.index][self.resolution_index] + self.r_free = self.elist[self.index][self.r_free_index] + self.r_work = self.elist[self.index][self.r_work_index] + self.ligand_confidence = self.elist[self.index][self.ligand_confidence_index] + # NEW: cluster size if present + try: + self.cluster_size = str(self.elist[self.index][self.cluster_size_index]) + except Exception: + self.cluster_size = "" + return missing_files + + def update_labels(self): + self.xtal_label.set_label(self.xtal) + self.resolution_label.set_label(self.resolution) + self.r_free_label.set_label(self.r_free) + self.r_work_label.set_label(self.r_work) + self.event_label.set_label(self.event) + self.site_label.set_label(self.site) + self.bdc_label.set_label(self.bdc) + # NEW: + if hasattr(self, "cluster_size_label"): + self.cluster_size_label.set_label(self.cluster_size if self.cluster_size is not None else "") + + def current_sample_matches_selection_criteria(self): + show_event = False + crit = self.selected_selection_criterion or "show all events" + if crit.startswith("show all events"): + show_event = True + elif crit == "show no ligands bound": + if "no ligand bound" in self.ligand_confidence: + show_event = True + elif crit == "show unknown ligands": + if "unknown ligand" in self.ligand_confidence: + show_event = True + elif crit == "show low confidence ligands": + if "low confidence" in self.ligand_confidence: + show_event = True + elif crit == "show high confidence ligands": + if "high confidence" in self.ligand_confidence: + show_event = True + elif crit == 'show not viewed events': + if not 'True' in self.elist[self.index][self.viewed_index]: + show_event = True + return show_event + + def RefreshData(self): + + self.reset_params() + + if len(__main__.molecule_number_list()) > 0: + for item in __main__.molecule_number_list(): + coot.close_molecule(item) + + self.mol_dict = { + 'pdb': None, + 'emap': None, + 'zmap': None, + 'ligand': None, + 'xraymap': None, + 'averagemap': None + } + + self.show_emap = 0 + self.show_zmap = 0 + self.show_xraymap = 0 + self.show_averagemap = 0 + + if self.index < 1: + self.index = 1 + if self.index > len(self.elist) - 1: + self.index = len(self.elist) - 1 + self.logger.warning('you reached the end of available events!') + return None + + missing_files = self.update_params() + + # check if event fits selection criteria + if self.current_sample_matches_selection_criteria() and not missing_files: + self.logger.info('loading files for {0!s}, event: {1!s}, site: {2!s}'.format(self.xtal, self.event, self.site)) + self.set_ligand_confidence_button() + self.update_labels() + self.recentre_on_event() + self.load_ligcif() + self.load_pdb() + self.load_emap() + self.load_zmap() + self.load_xraymap() + if self.averagemap: + self.load_averagemap() + self.logger.info('setting event map as RSR map') + coot.set_imol_refinement_map(self.mol_dict['emap']) + + elif self.current_sample_matches_selection_criteria() and missing_files: + self.logger.error('essential files could not be found, check messages above; skipping...') + self.change_event(1) + else: + self.logger.warning('{0!s}, event: {1!s}, site: {2!s} does not match selection criteria; skipping'.format(self.xtal, self.event, self.site)) + self.change_event(1) + + + def place_ligand_here(self, widget): + self.logger.info('moving ligand to pointer') + self.logger.info('LIGAND: ', self.mol_dict['ligand']) + __main__.move_molecule_here(self.mol_dict['ligand']) + + def merge_ligand_into_protein(self, widget): + self.logger.info('merge ligand into protein structure') + # merge_molecules(list(imols), imol) e.g. merge_molecules([1],0) + coot.merge_molecules_py([self.mol_dict['ligand']], self.mol_dict['protein']) + self.logger.info('removing ligand from molecule list') + coot.close_molecule(self.mol_dict['ligand']) + self.merged = True + + def reset_to_unfitted(self, widget): + for imol in __main__.molecule_number_list(): + if 'pandda-model.pdb' in coot.molecule_name(imol): + self.pdb = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + '{0!s}-pandda-input.pdb'.format(self.xtal)) + coot.close_molecule(imol) + self.load_pdb() + break + + def check_if_modelled_structures_folder_exists(self): + modelled_structures = os.path.join(self.panddaDir, 'processed_datasets', self.xtal, 'modelled_structures') + if not os.path.isdir(os.path.join(modelled_structures)): + self.logger.info('creating folder {0!s}'.format(modelled_structures)) + os.mkdir(modelled_structures) + + def save_next(self, widget): + self.check_if_modelled_structures_folder_exists() + if os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + 'modelled_structures', 'fitted-v0001.pdb')): + n = [] + for p in sorted(glob.glob(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + 'modelled_structures', 'fitted-v*.pdb'))): + n.append(int(p[p.rfind('fitted-v')+8:].replace('.pdb',''))) + new = 'fitted-v' + (4-len(str(max(n)+1))) * '0' + str(max(n)+1) + '.pdb' + else: + new = 'fitted-v0001.pdb' + coot.write_pdb_file(self.mol_dict['protein'], os.path.join( + self.panddaDir, 'processed_datasets', self.xtal, 'modelled_structures', new)) + if os.path.isfile(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, + 'modelled_structures', '{0!s}-pandda-model.pdb'.format(self.xtal))): + os.remove(os.path.join(self.panddaDir,'processed_datasets', self.xtal, + 'modelled_structures', '{0!s}-pandda-model.pdb'.format(self.xtal))) + os.chdir(os.path.join(self.panddaDir, 'processed_datasets', self.xtal, 'modelled_structures')) + if os.name == 'nt': + os.popen('copy {0!s} {1!s}-pandda-model.pdb'.format(new, self.xtal)) + else: + os.system('/bin/cp {0!s} {1!s}-pandda-model.pdb'.format(new, self.xtal)) +# os.symlink(new, '{0!s}-pandda-model.pdb'.format(self.xtal)) + if self.merged: + self.elist[self.index][self.ligand_placed_index] = 'True' + self.save_pandda_inspect_events_csv_file() + + def select_events(self, widget): + self.selected_selection_criterion = self.select_events_combobox.get_active_text() + self.crystal_progressbar.set_fraction(0) + if self.selected_selection_criterion and self.selected_selection_criterion.startswith("show all events - sort by cluster size"): + if hasattr(self, 'cluster_size_index'): + header = self.elist[0] + del self.elist[0] + def _as_float_or_big(row): + try: + return float(row[self.cluster_size_index]) + except Exception: + return float('inf') + self.elist = sorted(self.elist, key=_as_float_or_big) + self.elist.insert(0, header) + self.init_crystal_selection_combobox() + else: + self.logger.warning("cluster_size column not present; cannot sort by cluster size") + elif self.selected_selection_criterion and self.selected_selection_criterion.startswith("show all events - sort alphabetically"): + self.logger.info("sorting event alphabetically") + header = self.elist[0] + del self.elist[0] + self.elist = sorted(self.elist, key=lambda x: x[self.xtal_index]) + self.elist.insert(0, header) + self.init_crystal_selection_combobox() + self.logger.info("you selected to {0!s}".format(self.selected_selection_criterion)) + self.index = -1 + + def previous_event(self, widget): + self.change_event(-1) + + def next_event(self, widget): + self.save_event_as_viewed() + for n, b in enumerate(self.ligand_confidence_button_list): +# print('===>', n, b.get_active()) + if b.get_active(): + for c in self.ligand_confidence_button_labels: + nc = c[0] + co = c[1] +# print(nc, co) + if nc == n: + self.elist[self.index][self.ligand_confidence_index] = co + self.logger.info("saving ligand confidence for event as '{0!s}'".format(co)) + self.save_pandda_inspect_events_csv_file() + break + break + self.change_event(1) + + def previous_site(self, widget): + self.logger.info('moving to previous site') + self.change_site(-1) + + def next_site(self, widget): + self.logger.info('moving to next site') + self.change_site(1) + + def change_site(self, n): + current_site = int(self.site) + self.logger.info('current site {0!s}'.format(current_site)) + new_site = current_site + n + self.logger.info('new site: {0!s}'.format(new_site)) + index_increment = 0 + for i, item in enumerate(self.elist): + self.logger.info('{0!s} - {1!s}'.format(i, item[2])) + if item[2] == str(new_site): + index_increment = i - self.index + break + self.change_event(index_increment) + + def change_event(self, n): + self.index += n + self.crystal_progressbar.set_fraction(float(self.index) / float(len(self.elist))) + self.update_crystal_selection_combobox() + self.RefreshData() + + def update_crystal_selection_combobox(self): + self.logger.info('updating crystal selection combobox') + x = self.elist[self.index][self.xtal_index] + e = self.elist[self.index][self.event_index] + s = self.elist[self.index][self.site_index] + text = '{0!s} - event: {1!s} - site: {2!s}'.format(x, e, s) + for n, i in enumerate(self.cb_list): + if i == text: + self.cb.set_active(n) + break + + def select_crystal(self, widget): + """ + Parse ' - event: - site: ' safely and jump there. + Tolerates transient empty selections fired during combobox rebuilds. + """ + text = widget.get_active_text() + if not text: + self.logger.info("combobox changed with no active text; ignoring") + return + if not isinstance(text, str): + text = str(text) + try: + left, rest = text.split(" - event: ", 1) + event_str, rest2 = rest.split(" - site: ", 1) + xtal = left.strip() + event = event_str.strip() + site = rest2.strip() + except Exception as e: + self.logger.warning("unexpected combobox text %r: %s; ignoring" % (text, e)) + return + index_increment = 0 + found = False + for n, _ in enumerate(self.elist): + if n == 0: + continue + x = self.elist[n][self.xtal_index] + e = self.elist[n][self.event_index] + s = self.elist[n][self.site_index] + if x == xtal and e == event and s == site: + index_increment = n - self.index + found = True + break + if not found: + self.logger.warning("could not locate event '%s', %s, %s; ignoring" % (xtal, event, site)) + return + self.change_event(index_increment) + + def make_secure_copy_of_original_csv(self, csv_file): + csv_original = csv_file + '.original' + if not os.path.isfile(csv_original): + self.logger.info('creating backup file of {0!s}'.format(csv_file)) + shutil.copy(csv_file, csv_original) + + + def initialize_inspect_events_csv_file(self, analyse_csv): + self.make_secure_copy_of_original_csv(analyse_csv) + r = csv.reader(open(analyse_csv)) + l = list(r) + for i, line in enumerate(l): + if i == 0: + l[i].extend(['Interesting','Ligand Placed','Ligand Confidence','Comment','Viewed']) + else: + l[i].extend(['False', 'False', 'Low', 'None', 'False']) + with open(os.path.join(self.analysis_folder,'pandda_inspect_events.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerows(l) + + + def initialize_inspect_sites_csv_file(self, analyse_csv): + self.make_secure_copy_of_original_csv(analyse_csv) + r = csv.reader(open(analyse_csv)) + l = list(r) + for i, line in enumerate(l): + if i == 0: + l[i].extend(['Name','Comment']) + else: + l[i].extend(['None', 'None']) + with open(os.path.join(self.analysis_folder,'pandda_inspect_sites.csv'), 'w') as f: + writer = csv.writer(f) + writer.writerows(l) + + def parsepanddaDir(self): + self.logger.info("reading {0!s}".format(self.eventCSV)) + r = csv.reader(open(self.eventCSV)) + self.elist = list(r) + + self.logger.info("reading {0!s}".format(self.siteCSV)) + r = csv.reader(open(self.siteCSV)) + self.slist = list(r) + + self.logger.info("getting header fields from {0!s}".format(self.eventCSV)) + for n, item in enumerate(self.elist[0]): # number of columns at the end can differ + if item == 'dtag': + self.xtal_index = n + if item == 'Ligand Confidence': + self.ligand_confidence_index = n + if item == 'event_num' or item == 'event_idx': + self.event_index = n + if item == 'site_num' or item == 'site_idx': + self.site_index = n + if item == 'bdc' or item == '1-BDC': + self.bdc_index = n + if item == 'x': + self.x_index = n + if item == 'y': + self.y_index = n + if item == 'z': + self.z_index = n + if item == 'analysed_resolution' or item == 'high_resolution': + self.resolution_index = n + if item == 'r_work': + self.r_work_index = n + if item == 'r_free': + self.r_free_index = n + if item == 'Viewed': + self.viewed_index = n + if item == 'cluster_size': + self.cluster_size_index = n + if item == "Ligand Placed": + self.ligand_placed_index = n + self.show_content_of_event_csv_file() + + def show_content_of_event_csv_file(self): + self.logger.info("showing contents of {0!s}:".format(self.eventCSV)) + for n,i in enumerate(self.elist): + if n == 0: + continue + x = round(float(self.elist[n][self.x_index]), 1) + y = round(float(self.elist[n][self.y_index]), 1) + z = round(float(self.elist[n][self.z_index]), 1) + info = ( + ' xtal: {0!s}'.format(self.elist[n][self.xtal_index]) + + ' - event/site: {0!s}/{1!s}'.format(self.elist[n][self.event_index],self.elist[n][self.site_index]) + + ' - BDC: {0!s}'.format(self.elist[n][self.bdc_index]) + + ' - x,y,z: {0!s},{1!s},{2!s}'.format(x, y, z) + + ' - Resolution: {0!s}'.format(self.elist[n][self.resolution_index]) + + ' - Rwork/Rfree: {0!s}/{1!s}'.format(self.elist[n][self.r_work_index],self.elist[n][self.r_free_index]) + + ' - viewed: {0!s}'.format(self.elist[n][self.viewed_index]) + + ' - ligand confidence: {0!s}'.format(self.elist[n][self.ligand_confidence_index]) + ) + # Optional: include cluster_size in the log when present + try: + info += ' - cluster_size: {0!s}'.format(self.elist[n][self.cluster_size_index]) + except Exception: + pass + self.logger.info(info) + self.init_crystal_selection_combobox() + + def init_crystal_selection_combobox(self): + self.logger.info('removing all entries from crystal selection combobox') + + # Block 'changed' during rebuild to avoid spurious callbacks + if self.cb_changed_id is not None: + self.cb.handler_block(self.cb_changed_id) + try: + if len(self.elist) != 0: + # remove first entry repeatedly until empty + for _ in range(len(self.cb_list)): + self.cb.remove_text(0) + self.logger.info('adding new entries from crystal selection combobox') + self.cb_list = [] + for n,i in sorted(enumerate(self.elist)): + if n == 0: + continue + text = '{0!s} - event: {1!s} - site: {2!s}'.format(self.elist[n][self.xtal_index], + self.elist[n][self.event_index], + self.elist[n][self.site_index]) + self.cb_list.append(text) + self.cb.append_text(text) + # ensure a valid selection exists + if self.cb_list: + self.cb.set_active(0) + finally: + if self.cb_changed_id is not None: + self.cb.handler_unblock(self.cb_changed_id) + + + + def toggle_emap(self, widget): + if self.mol_dict['emap'] is not None: + if self.show_emap == 0: + self.show_emap = 1 + else: + self.show_emap = 0 + __main__.toggle_display_map(self.mol_dict['emap'], self.show_emap) + + def toggle_zmap(self, widget): + if self.mol_dict['zmap'] is not None: + if self.show_zmap == 0: + self.show_zmap = 1 + else: + self.show_zmap = 0 + __main__.toggle_display_map(self.mol_dict['zmap'], self.show_zmap) + + def toggle_x_ray_maps(self, widget): + if self.mol_dict['xraymap'] is not None: + if self.show_xraymap == 0: + self.show_xraymap = 1 + else: + self.show_xraymap = 0 + __main__.toggle_display_map(self.mol_dict['xraymap'][0], self.show_xraymap) + __main__.toggle_display_map(self.mol_dict['xraymap'][1], self.show_xraymap) + + def toggle_average_map(self, widget): + if self.mol_dict['averagemap'] is not None: + if self.show_averagemap == 0: + self.show_averagemap = 1 + else: + self.show_averagemap = 0 + __main__.toggle_display_map(self.mol_dict['averagemap'], self.show_averagemap) + + def CANCEL(self, widget): + self.window.destroy() + +if __name__ == '__main__': + inspect_gui().startGUI() From ac0094737c579ad938e4e7e8a4eb73b23f684970 Mon Sep 17 00:00:00 2001 From: Doeke Hekstra Date: Mon, 29 Dec 2025 22:59:27 -0500 Subject: [PATCH 2/2] export toanalyse_panda_inspect.py in coot --- notebooks/pipeline.ipynb | 1289 ++++++++++++++++++++++++-------------- valdo/helper.py | 261 +++++++- 2 files changed, 1075 insertions(+), 475 deletions(-) diff --git a/notebooks/pipeline.ipynb b/notebooks/pipeline.ipynb index b6e51ab..7d9a2c9 100644 --- a/notebooks/pipeline.ipynb +++ b/notebooks/pipeline.ipynb @@ -12,7 +12,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "c4824ce9-c211-4e77-9ccb-8da266a8c6ca", "metadata": {}, "outputs": [], @@ -31,8 +31,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "/n/hekstra_lab/people/dhekstra/conda_envs/valdo-gpu/lib/python3.10/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.2\n", - " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" + "/n/home12/dhekstra/.conda/envs/valdo-gpu-mamba/lib/python3.10/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.7.3' currently installed).\n", + " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", + "/n/home12/dhekstra/ipython_notebooks/drug-screening/valdo/__init__.py:2: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", + " import pkg_resources\n" ] }, { @@ -64,6 +66,7 @@ "from itertools import repeat\n", "\n", "import valdo\n", + "from valdo.helper import export_valdo_to_pandda_inspect\n", "\n", "pd.set_option(\"display.precision\", 3)\n", "plt.rc('figure', figsize=(4,2.5))\n", @@ -90,9 +93,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "We will use GPU for torch operations (esp. VAE training).\n", - "There are 24 CPUs available.\n", - "For multiprocessing, we will use 23 CPUs.\n" + "There are 32 CPUs available.\n", + "For multiprocessing, we will use 31 CPUs.\n" ] } ], @@ -192,14 +194,14 @@ "outputs": [], "source": [ "# use trailing slashes!\n", - "my_dir = \"/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/\" # directory where to keep all the processing steps\n", + "my_dir = \"/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/\" # directory where to keep all the processing steps\n", "original_data_path = \"/n/hekstra_lab/people/minhuan/projects/drug/phyllis_backup/pipeline/data/original_data/\" # the MTZs with measured F and sigF\n", "\n", "# keep the logs and pdb files in the same directory, e.g. with names xxxx.pdb, xxxx.mtz, and xxxx.log\n", - "refinement_paths=[\"/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline_run16/vae/reconstructed/full_recon/refine/short_names/\",\n", - " \"/n/holyscratch01/hekstra_lab/dhekstra/phyllis/PTP1B_DK/pandda_input_models_refined_waters/short/\", # we have these refinements, but not the \"short\" versions (lost on scratch)\n", + "refinement_paths=[\"/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline_run16/vae/reconstructed/full_recon/refine/short_names/\",\n", + " \"/n/netscratch/hekstra_lab/Lab/dhekstra/phyllis/PTP1B_DK/pandda_input_models_refined_waters/short/\", # we have these refinements, but not the \"short\" versions (lost on scratch)\n", " \"/n/hekstra_lab/people/minhuan/projects/drug/minhuan_backup/pipeline/data/refined/\", # contains refinements for both indexing solutions\n", - " \"/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/refine/dimple_round_1/\",\n", + " \"/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/refine/dimple_round_1/\",\n", " \"/net/holy-nfsisilon/ifs/rc_labs/hekstra_lab/people/minhuan/projects/drug/minhuan_backup/pipeline/data/refine_v2/refine_output/\"]\n", "\n", "refinement_phases = [(\"PH2FOFCWT\",\"PHFOFCWT\"),\\\n", @@ -313,17 +315,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/data/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/data/mtzs_origin/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/data/mtzs_reindex/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/data/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/data/mtzs_origin/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/data/mtzs_reindex/ already exists\n", "/n/hekstra_lab/people/minhuan/projects/drug/minhuan_backup/pipeline/data/refined/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/data/mtzs_scaled/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/vae/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/vae/reconstructed/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/vae/reconstructed_w_phases/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/vae/blobs/ already exists\n", - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/data/bound_models_reindexed/ already exists\n" + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/data/mtzs_scaled/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/vae/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/vae/reconstructed/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/vae/reconstructed_w_phases/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/vae/blobs/ already exists\n", + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/data/bound_models_reindexed/ already exists\n" ] } ], @@ -1559,7 +1561,7 @@ " outputmtz_path=scaled_path,\n", " prefix=run_prefix)\n", "else:\n", - " scaler = valdo.Scaler(reference_mtz=file_list[reference_idx])\n", + " scaler = valdo.Scaler(reference_mtz=file_list[reference_idx],columns=[amplitude_col, error_col])\n", " scaling_metrics = scaler.batch_scaling(mtz_path_list=file_list, \n", " outputmtz_path=scaled_path,\n", " prefix=run_prefix, \n", @@ -3605,7 +3607,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "c4330dcd-7e7c-435b-b883-467d4b28e80e", "metadata": {}, "outputs": [], @@ -3626,7 +3628,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 20, "id": "efa64286-a14e-48e2-bbd1-74819ac455e1", "metadata": {}, "outputs": [ @@ -3634,7 +3636,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/data/bound_models_reindexed/\n" + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/data/bound_models_reindexed/\n" ] } ], @@ -3660,7 +3662,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 21, "id": "1e958250-cfb1-4274-8ffb-3d51eb545444", "metadata": {}, "outputs": [ @@ -3668,16 +3670,29 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|████████████████████████████████████████| 168/168 [00:01<00:00, 146.37it/s]" + " 60%|████████████████████████▍ | 100/168 [00:04<00:02, 26.06it/s]" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No match for exclude\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|█████████████████████████████████████████| 168/168 [00:07<00:00, 21.67it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "No match for exclude\n", - "CPU times: user 31.7 ms, sys: 69 ms, total: 101 ms\n", - "Wall time: 1.15 s\n" + "CPU times: user 69.3 ms, sys: 106 ms, total: 175 ms\n", + "Wall time: 7.77 s\n" ] }, { @@ -3758,7 +3773,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 22, "id": "b50b2d69-ab5f-4c68-a953-86f74ba1ad17", "metadata": {}, "outputs": [ @@ -3797,78 +3812,78 @@ " \n", " \n", " 0\n", - " 1488_1\n", - " 15.779\n", - " 15.779\n", - " 3061.747\n", - " 43.687\n", - " 17.737\n", - " 10.808\n", - " 582.258\n", - " 5.180\n", + " 0001_0\n", + " 5.084\n", + " 5.084\n", + " 463.701\n", + " 0.512\n", + " 46.332\n", + " 27.907\n", + " 119.812\n", + " 3.058\n", " \n", " \n", " 1\n", - " 1488_1\n", - " 16.734\n", - " 16.734\n", - " 2903.690\n", - " 7.750\n", - " 49.098\n", - " 24.999\n", - " 540.365\n", - " 5.053\n", + " 0001_0\n", + " 5.275\n", + " 5.275\n", + " 344.380\n", + " 10.732\n", + " 35.926\n", + " 32.714\n", + " 84.752\n", + " 2.725\n", " \n", " \n", " 2\n", - " 1488_1\n", - " 7.136\n", - " 7.136\n", - " 448.032\n", - " 15.106\n", - " 41.170\n", - " 23.162\n", - " 97.144\n", - " 2.852\n", + " 0001_0\n", + " 5.631\n", + " 5.631\n", + " 263.501\n", + " 7.958\n", + " 48.484\n", + " 27.455\n", + " 63.717\n", + " 2.478\n", " \n", " \n", " 3\n", - " 1488_1\n", - " 4.936\n", - " 4.936\n", - " 139.682\n", - " 15.278\n", - " 44.311\n", - " 17.316\n", - " 34.810\n", - " 2.026\n", + " 0001_0\n", + " 5.445\n", + " 5.445\n", + " 204.157\n", + " -19.138\n", + " 51.096\n", + " -1.604\n", + " 50.150\n", + " 2.288\n", " \n", " \n", " 4\n", - " 1488_1\n", - " 5.981\n", - " 5.981\n", - " 108.761\n", - " 44.720\n", - " -3.698\n", - " 0.417\n", - " 24.893\n", - " 1.811\n", + " 0001_0\n", + " 4.857\n", + " 4.857\n", + " 140.316\n", + " 40.682\n", + " 2.447\n", + " 3.383\n", + " 36.126\n", + " 2.051\n", " \n", " \n", "\n", "" ], "text/plain": [ - " sample peakz peak score cenx ceny cenz volume radius\n", - "0 1488_1 15.779 15.779 3061.747 43.687 17.737 10.808 582.258 5.180\n", - "1 1488_1 16.734 16.734 2903.690 7.750 49.098 24.999 540.365 5.053\n", - "2 1488_1 7.136 7.136 448.032 15.106 41.170 23.162 97.144 2.852\n", - "3 1488_1 4.936 4.936 139.682 15.278 44.311 17.316 34.810 2.026\n", - "4 1488_1 5.981 5.981 108.761 44.720 -3.698 0.417 24.893 1.811" + " sample peakz peak score cenx ceny cenz volume radius\n", + "0 0001_0 5.084 5.084 463.701 0.512 46.332 27.907 119.812 3.058\n", + "1 0001_0 5.275 5.275 344.380 10.732 35.926 32.714 84.752 2.725\n", + "2 0001_0 5.631 5.631 263.501 7.958 48.484 27.455 63.717 2.478\n", + "3 0001_0 5.445 5.445 204.157 -19.138 51.096 -1.604 50.150 2.288\n", + "4 0001_0 4.857 4.857 140.316 40.682 2.447 3.383 36.126 2.051" ] }, - "execution_count": 30, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -3890,7 +3905,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 23, "id": "e3488259-cb23-4d7a-8bf4-d10f8da48c50", "metadata": {}, "outputs": [], @@ -3912,7 +3927,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 24, "id": "53bfa05c-337c-4aa6-a46a-a0b26c23ab8e", "metadata": { "tags": [] @@ -3922,15 +3937,15 @@ "name": "stderr", "output_type": "stream", "text": [ - "100%|███████████████████████████████████| 16486/16486 [00:05<00:00, 2940.83it/s]\n" + "100%|████████████████████████████████████| 16798/16798 [00:27<00:00, 611.18it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "CPU times: user 120 ms, sys: 180 ms, total: 300 ms\n", - "Wall time: 7.7 s\n" + "CPU times: user 151 ms, sys: 78.9 ms, total: 230 ms\n", + "Wall time: 36.7 s\n" ] } ], @@ -4304,7 +4319,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 25, "id": "07f1dc04-ae37-43bd-a002-2eeeba6d225f", "metadata": {}, "outputs": [], @@ -4314,7 +4329,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 26, "id": "0b794a6d-7781-493a-be20-0eed978338c5", "metadata": {}, "outputs": [], @@ -4324,7 +4339,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "e96327c3-f59a-4465-ac31-491c15182030", "metadata": {}, "outputs": [], @@ -4337,7 +4352,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 28, "id": "d54f2246-d2ac-408e-aab1-aed7e9d03c39", "metadata": {}, "outputs": [], @@ -4357,7 +4372,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 29, "id": "ca34f668-57cc-43e9-a3ca-b5751036f3c5", "metadata": {}, "outputs": [ @@ -4399,58 +4414,58 @@ " \n", " \n", " count\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.000\n", - " 14309.0\n", - " 14309.000\n", - " 14309.0\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.000\n", + " 14620.0\n", + " 14620.000\n", + " 14620.0\n", " \n", " \n", " mean\n", - " 5.488\n", - " 5.488\n", - " 219.463\n", - " 3.194\n", - " 40.875\n", - " 14.504\n", - " 47.644\n", - " 1.936\n", - " 0.061\n", + " 5.483\n", + " 5.483\n", + " 215.058\n", + " 3.241\n", + " 40.899\n", + " 14.624\n", + " 46.966\n", + " 1.935\n", + " 0.059\n", " 0.0\n", " 0.013\n", " 0.0\n", " \n", " \n", " std\n", - " 2.184\n", - " 2.184\n", - " 607.253\n", - " 16.738\n", - " 15.981\n", - " 13.696\n", - " 107.366\n", - " 0.721\n", - " 0.240\n", + " 2.108\n", + " 2.108\n", + " 593.301\n", + " 16.637\n", + " 15.979\n", + " 13.748\n", + " 105.694\n", + " 0.709\n", + " 0.236\n", " 0.0\n", - " 0.115\n", + " 0.112\n", " 0.0\n", " \n", " \n", " min\n", - " 3.696\n", - " 3.696\n", - " 36.220\n", - " -43.761\n", - " -3.698\n", - " -9.908\n", - " 10.003\n", + " 3.643\n", + " 3.643\n", + " 36.038\n", + " -43.483\n", + " -8.330\n", + " -11.755\n", + " 10.000\n", " 1.337\n", " 0.000\n", " 0.0\n", @@ -4459,14 +4474,14 @@ " \n", " \n", " 25%\n", - " 4.504\n", - " 4.504\n", - " 55.411\n", - " -8.733\n", - " 34.528\n", - " 1.450\n", - " 13.853\n", - " 1.490\n", + " 4.502\n", + " 4.502\n", + " 55.633\n", + " -8.786\n", + " 34.585\n", + " 1.403\n", + " 13.884\n", + " 1.491\n", " 0.000\n", " 0.0\n", " 0.000\n", @@ -4474,14 +4489,14 @@ " \n", " \n", " 50%\n", - " 5.001\n", - " 5.001\n", - " 84.714\n", - " 0.291\n", - " 41.831\n", - " 8.598\n", - " 20.980\n", - " 1.711\n", + " 5.019\n", + " 5.019\n", + " 85.721\n", + " 0.452\n", + " 41.912\n", + " 8.816\n", + " 21.140\n", + " 1.715\n", " 0.000\n", " 0.0\n", " 0.000\n", @@ -4489,14 +4504,14 @@ " \n", " \n", " 75%\n", - " 5.742\n", - " 5.742\n", - " 158.604\n", - " 11.852\n", - " 50.958\n", - " 29.341\n", - " 38.796\n", - " 2.100\n", + " 5.763\n", + " 5.763\n", + " 159.818\n", + " 11.788\n", + " 51.037\n", + " 29.416\n", + " 38.841\n", + " 2.101\n", " 0.000\n", " 0.0\n", " 0.000\n", @@ -4504,14 +4519,14 @@ " \n", " \n", " max\n", - " 43.262\n", - " 43.262\n", - " 10039.979\n", - " 52.831\n", - " 80.980\n", - " 49.009\n", - " 1978.535\n", - " 7.788\n", + " 43.344\n", + " 43.344\n", + " 9848.373\n", + " 52.787\n", + " 81.035\n", + " 46.832\n", + " 1897.483\n", + " 7.680\n", " 1.000\n", " 0.0\n", " 1.000\n", @@ -4523,27 +4538,27 @@ ], "text/plain": [ " peakz peak score cenx ceny cenz \\\n", - "count 14309.000 14309.000 14309.000 14309.000 14309.000 14309.000 \n", - "mean 5.488 5.488 219.463 3.194 40.875 14.504 \n", - "std 2.184 2.184 607.253 16.738 15.981 13.696 \n", - "min 3.696 3.696 36.220 -43.761 -3.698 -9.908 \n", - "25% 4.504 4.504 55.411 -8.733 34.528 1.450 \n", - "50% 5.001 5.001 84.714 0.291 41.831 8.598 \n", - "75% 5.742 5.742 158.604 11.852 50.958 29.341 \n", - "max 43.262 43.262 10039.979 52.831 80.980 49.009 \n", + "count 14620.000 14620.000 14620.000 14620.000 14620.000 14620.000 \n", + "mean 5.483 5.483 215.058 3.241 40.899 14.624 \n", + "std 2.108 2.108 593.301 16.637 15.979 13.748 \n", + "min 3.643 3.643 36.038 -43.483 -8.330 -11.755 \n", + "25% 4.502 4.502 55.633 -8.786 34.585 1.403 \n", + "50% 5.019 5.019 85.721 0.452 41.912 8.816 \n", + "75% 5.763 5.763 159.818 11.788 51.037 29.416 \n", + "max 43.344 43.344 9848.373 52.787 81.035 46.832 \n", "\n", " volume radius bound cys215 ligand duplicate \n", - "count 14309.000 14309.000 14309.000 14309.0 14309.000 14309.0 \n", - "mean 47.644 1.936 0.061 0.0 0.013 0.0 \n", - "std 107.366 0.721 0.240 0.0 0.115 0.0 \n", - "min 10.003 1.337 0.000 0.0 0.000 0.0 \n", - "25% 13.853 1.490 0.000 0.0 0.000 0.0 \n", - "50% 20.980 1.711 0.000 0.0 0.000 0.0 \n", - "75% 38.796 2.100 0.000 0.0 0.000 0.0 \n", - "max 1978.535 7.788 1.000 0.0 1.000 0.0 " + "count 14620.000 14620.000 14620.000 14620.0 14620.000 14620.0 \n", + "mean 46.966 1.935 0.059 0.0 0.013 0.0 \n", + "std 105.694 0.709 0.236 0.0 0.112 0.0 \n", + "min 10.000 1.337 0.000 0.0 0.000 0.0 \n", + "25% 13.884 1.491 0.000 0.0 0.000 0.0 \n", + "50% 21.140 1.715 0.000 0.0 0.000 0.0 \n", + "75% 38.841 2.101 0.000 0.0 0.000 0.0 \n", + "max 1897.483 7.680 1.000 0.0 1.000 0.0 " ] }, - "execution_count": 42, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -4564,7 +4579,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "f4feb7b8-5217-4dfa-893c-0810b92e85ad", "metadata": {}, "outputs": [], @@ -4574,7 +4589,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 31, "id": "066ddf84-d5da-4280-b934-c31871a3710a", "metadata": {}, "outputs": [ @@ -4608,464 +4623,464 @@ " \n", " \n", " \n", - " 13589\n", - " 1227_0\n", - " 21.452\n", - " 1052.345\n", - " 0\n", - " 0\n", - " \n", - " \n", - " 6016\n", - " 1525_1\n", - " 21.408\n", - " 4809.380\n", + " 15892\n", + " 1865_0\n", + " 21.824\n", + " 2651.008\n", " 1\n", " 1\n", " \n", " \n", - " 27\n", - " 0714_0\n", - " 21.284\n", - " 1942.200\n", - " 0\n", - " 0\n", + " 5416\n", + " 0652_1\n", + " 21.712\n", + " 6797.678\n", + " 1\n", + " 1\n", " \n", " \n", - " 16223\n", - " 0205_1\n", - " 21.281\n", - " 7414.799\n", + " 7493\n", + " 0914_0\n", + " 21.699\n", + " 6921.540\n", " 1\n", " 1\n", " \n", " \n", - " 6284\n", - " 1549_1\n", - " 21.281\n", - " 2367.889\n", + " 3317\n", + " 0382_0\n", + " 21.640\n", + " 1286.560\n", " 0\n", " 0\n", " \n", " \n", - " 13355\n", - " 1316_0\n", - " 21.229\n", - " 3977.842\n", - " 1\n", - " 1\n", + " 16080\n", + " 1886_1\n", + " 21.611\n", + " 2337.166\n", + " 0\n", + " 0\n", " \n", " \n", - " 9137\n", - " 1252_0\n", - " 21.121\n", - " 1935.243\n", + " 10312\n", + " 1227_0\n", + " 21.466\n", + " 952.191\n", " 0\n", " 0\n", " \n", " \n", - " 7357\n", - " 0180_0\n", - " 21.014\n", - " 7316.743\n", + " 1035\n", + " 0116_0\n", + " 21.200\n", + " 7571.263\n", " 1\n", " 1\n", " \n", " \n", - " 7573\n", - " 1886_1\n", - " 20.883\n", - " 2125.104\n", - " 0\n", - " 0\n", + " 16432\n", + " 1922_0\n", + " 21.100\n", + " 5470.046\n", + " 1\n", + " 1\n", " \n", " \n", - " 7012\n", - " 0652_1\n", - " 20.576\n", - " 6236.932\n", + " 11007\n", + " 1317_1\n", + " 20.998\n", + " 4164.073\n", " 1\n", " 1\n", " \n", " \n", - " 5024\n", - " 0995_0\n", - " 20.572\n", - " 2479.002\n", - " 0\n", - " 0\n", + " 6610\n", + " 0812_0\n", + " 20.922\n", + " 6507.950\n", + " 1\n", + " 1\n", " \n", " \n", - " 14205\n", - " 1317_1\n", - " 20.458\n", - " 4126.022\n", + " 8309\n", + " 1009_0\n", + " 20.876\n", + " 5539.081\n", " 1\n", " 1\n", " \n", " \n", - " 3393\n", - " 1559_0\n", - " 20.411\n", - " 3167.417\n", + " 12978\n", + " 1549_1\n", + " 20.861\n", + " 2219.043\n", " 0\n", " 0\n", " \n", " \n", - " 8049\n", - " 0118_0\n", - " 20.404\n", - " 4023.063\n", - " 1\n", - " 1\n", - " \n", - " \n", - " 11440\n", + " 11013\n", " 1318_0\n", - " 20.253\n", - " 3538.773\n", + " 20.642\n", + " 3698.278\n", " 1\n", " 1\n", " \n", " \n", - " 4751\n", - " 0175_0\n", - " 20.218\n", - " 2451.201\n", + " 600\n", + " 0072_0\n", + " 20.640\n", + " 1776.643\n", " 1\n", " 1\n", " \n", " \n", - " 15203\n", - " 1589_1\n", - " 20.149\n", - " 5325.159\n", - " 1\n", - " 1\n", + " 16734\n", + " 1960_0\n", + " 20.548\n", + " 1494.204\n", + " 0\n", + " 0\n", " \n", " \n", - " 6797\n", + " 14649\n", " 1710_0\n", - " 20.135\n", - " 2365.174\n", + " 20.523\n", + " 3415.625\n", " 1\n", " 1\n", " \n", " \n", - " 13531\n", - " 0209_0\n", - " 20.017\n", - " 5774.076\n", + " 6683\n", + " 0821_1\n", + " 20.187\n", + " 1781.894\n", " 0\n", " 0\n", " \n", " \n", - " 2499\n", - " 0997_1\n", - " 19.851\n", - " 858.152\n", + " 10772\n", + " 1286_1\n", + " 20.134\n", + " 1863.233\n", " 0\n", " 0\n", " \n", " \n", - " 4244\n", - " 0904_1\n", - " 19.845\n", - " 5658.720\n", - " 0\n", - " 0\n", + " 11004\n", + " 1316_0\n", + " 20.023\n", + " 3608.919\n", + " 1\n", + " 1\n", " \n", " \n", - " 10030\n", - " 0847_1\n", - " 19.743\n", - " 3548.238\n", + " 6448\n", + " 0772_0\n", + " 19.899\n", + " 6854.781\n", " 1\n", + " 1\n", + " \n", + " \n", + " 16068\n", + " 1883_1\n", + " 19.854\n", + " 6851.699\n", + " 0\n", " 0\n", " \n", " \n", - " 4289\n", - " 0835_0\n", - " 19.689\n", - " 2733.679\n", + " 11023\n", + " 1320_1\n", + " 19.841\n", + " 6604.750\n", " 1\n", " 1\n", " \n", " \n", - " 12428\n", - " 1192_1\n", - " 19.680\n", - " 4858.033\n", + " 15381\n", + " 1802_1\n", + " 19.683\n", + " 2053.694\n", " 0\n", " 0\n", " \n", " \n", - " 11804\n", - " 0270_0\n", - " 19.509\n", - " 1418.160\n", - " 0\n", - " 0\n", + " 15401\n", + " 1804_0\n", + " 19.526\n", + " 4824.636\n", + " 1\n", + " 1\n", " \n", " \n", - " 2712\n", - " 0058_0\n", - " 19.469\n", - " 5921.016\n", + " 12017\n", + " 1417_0\n", + " 19.448\n", + " 3390.013\n", " 1\n", " 1\n", " \n", " \n", - " 12599\n", + " 15492\n", " 1815_1\n", - " 19.420\n", - " 1822.286\n", + " 19.447\n", + " 1990.358\n", " 0\n", " 0\n", " \n", " \n", - " 3376\n", - " 1740_1\n", - " 19.375\n", - " 2014.695\n", - " 0\n", - " 0\n", + " 456\n", + " 0049_0\n", + " 19.395\n", + " 7475.256\n", + " 1\n", + " 1\n", " \n", " \n", - " 11950\n", - " 1292_0\n", - " 19.369\n", - " 1792.566\n", - " 0\n", - " 0\n", + " 4876\n", + " 0579_0\n", + " 19.229\n", + " 4376.239\n", + " 1\n", + " 1\n", " \n", " \n", - " 5891\n", - " 1417_0\n", - " 19.278\n", - " 3282.802\n", + " 1591\n", + " 0180_0\n", + " 19.183\n", + " 7058.989\n", " 1\n", " 1\n", " \n", " \n", - " 3827\n", - " 0821_1\n", - " 19.237\n", - " 1577.498\n", - " 0\n", - " 0\n", + " 1819\n", + " 0205_1\n", + " 19.020\n", + " 7126.758\n", + " 1\n", + " 1\n", " \n", " \n", - " 11139\n", - " 0713_1\n", - " 19.179\n", - " 2640.951\n", + " 2138\n", + " 0248_0\n", + " 18.924\n", + " 879.368\n", " 0\n", " 0\n", " \n", " \n", - " 14467\n", - " 0623_0\n", - " 19.152\n", - " 2231.004\n", + " 16703\n", + " 1957_1\n", + " 18.895\n", + " 4556.884\n", " 1\n", " 1\n", " \n", " \n", - " 2653\n", - " 1960_0\n", - " 19.112\n", - " 1291.189\n", + " 5864\n", + " 0713_1\n", + " 18.750\n", + " 2431.609\n", " 0\n", " 0\n", " \n", " \n", - " 6858\n", - " 1114_1\n", - " 19.059\n", - " 647.105\n", + " 10033\n", + " 1192_1\n", + " 18.719\n", + " 4546.768\n", " 0\n", " 0\n", " \n", " \n", - " 5481\n", - " 0935_0\n", - " 19.036\n", - " 3406.919\n", - " 1\n", - " 1\n", + " 7408\n", + " 0904_1\n", + " 18.619\n", + " 4429.908\n", + " 0\n", + " 0\n", " \n", " \n", - " 12643\n", - " 0579_0\n", - " 18.938\n", - " 4183.276\n", + " 5208\n", + " 0623_0\n", + " 18.606\n", + " 2128.298\n", " 1\n", " 1\n", " \n", " \n", - " 14549\n", - " 1045_1\n", - " 18.934\n", - " 1397.476\n", + " 9818\n", + " 1159_0\n", + " 18.569\n", + " 4193.988\n", " 0\n", " 0\n", " \n", " \n", - " 11047\n", - " 0294_1\n", - " 18.856\n", - " 4581.245\n", + " 8602\n", + " 1045_1\n", + " 18.402\n", + " 1495.768\n", " 0\n", " 0\n", " \n", " \n", - " 2822\n", - " 0933_1\n", - " 18.687\n", - " 1500.988\n", - " 0\n", - " 0\n", + " 525\n", + " 0058_0\n", + " 18.402\n", + " 7315.671\n", + " 1\n", + " 1\n", " \n", " \n", - " 1311\n", - " 1095_1\n", - " 18.681\n", - " 1678.772\n", + " 1849\n", + " 0209_0\n", + " 18.367\n", + " 5381.768\n", " 0\n", " 0\n", " \n", " \n", - " 16234\n", - " 0822_0\n", - " 18.534\n", - " 1649.853\n", + " 10471\n", + " 1242_0\n", + " 18.164\n", + " 3883.708\n", " 1\n", " 1\n", " \n", " \n", - " 1390\n", - " 1957_1\n", - " 18.478\n", - " 4849.318\n", + " 3124\n", + " 0363_0\n", + " 18.001\n", + " 5918.296\n", " 1\n", " 1\n", " \n", " \n", - " 4432\n", - " 1264_1\n", - " 18.457\n", - " 7644.919\n", - " 1\n", - " 1\n", + " 8212\n", + " 0997_1\n", + " 17.954\n", + " 779.251\n", + " 0\n", + " 0\n", " \n", " \n", - " 4490\n", - " 0932_0\n", - " 18.382\n", - " 3041.379\n", - " 0\n", + " 16074\n", + " 1885_0\n", + " 17.798\n", + " 1712.642\n", + " 1\n", " 0\n", " \n", " \n", - " 11691\n", - " 1781_1\n", - " 18.238\n", - " 2643.981\n", + " 5959\n", + " 0723_1\n", + " 17.770\n", + " 6004.795\n", " 1\n", " 1\n", " \n", " \n", - " 13351\n", - " 1242_0\n", - " 18.046\n", - " 4460.611\n", + " 15063\n", + " 1763_0\n", + " 17.381\n", + " 1560.972\n", " 1\n", " 1\n", " \n", " \n", - " 9851\n", - " 0597_0\n", - " 18.039\n", - " 6770.364\n", + " 10826\n", + " 1292_0\n", + " 17.362\n", + " 1447.342\n", " 0\n", " 0\n", " \n", " \n", - " 8805\n", - " 1652_1\n", - " 17.991\n", - " 2752.616\n", + " 1050\n", + " 0118_0\n", + " 17.358\n", + " 3549.397\n", " 1\n", " 1\n", " \n", " \n", - " 3263\n", - " 0072_0\n", - " 17.974\n", - " 1462.695\n", + " 5352\n", + " 0645_0\n", + " 17.352\n", + " 1470.828\n", " 1\n", " 1\n", " \n", + " \n", + " 15271\n", + " 1789_0\n", + " 17.284\n", + " 1550.430\n", + " 0\n", + " 0\n", + " \n", " \n", "\n", "" ], "text/plain": [ " sample peakz score bound ligand\n", - "13589 1227_0 21.452 1052.345 0 0\n", - "6016 1525_1 21.408 4809.380 1 1\n", - "27 0714_0 21.284 1942.200 0 0\n", - "16223 0205_1 21.281 7414.799 1 1\n", - "6284 1549_1 21.281 2367.889 0 0\n", - "13355 1316_0 21.229 3977.842 1 1\n", - "9137 1252_0 21.121 1935.243 0 0\n", - "7357 0180_0 21.014 7316.743 1 1\n", - "7573 1886_1 20.883 2125.104 0 0\n", - "7012 0652_1 20.576 6236.932 1 1\n", - "5024 0995_0 20.572 2479.002 0 0\n", - "14205 1317_1 20.458 4126.022 1 1\n", - "3393 1559_0 20.411 3167.417 0 0\n", - "8049 0118_0 20.404 4023.063 1 1\n", - "11440 1318_0 20.253 3538.773 1 1\n", - "4751 0175_0 20.218 2451.201 1 1\n", - "15203 1589_1 20.149 5325.159 1 1\n", - "6797 1710_0 20.135 2365.174 1 1\n", - "13531 0209_0 20.017 5774.076 0 0\n", - "2499 0997_1 19.851 858.152 0 0\n", - "4244 0904_1 19.845 5658.720 0 0\n", - "10030 0847_1 19.743 3548.238 1 0\n", - "4289 0835_0 19.689 2733.679 1 1\n", - "12428 1192_1 19.680 4858.033 0 0\n", - "11804 0270_0 19.509 1418.160 0 0\n", - "2712 0058_0 19.469 5921.016 1 1\n", - "12599 1815_1 19.420 1822.286 0 0\n", - "3376 1740_1 19.375 2014.695 0 0\n", - "11950 1292_0 19.369 1792.566 0 0\n", - "5891 1417_0 19.278 3282.802 1 1\n", - "3827 0821_1 19.237 1577.498 0 0\n", - "11139 0713_1 19.179 2640.951 0 0\n", - "14467 0623_0 19.152 2231.004 1 1\n", - "2653 1960_0 19.112 1291.189 0 0\n", - "6858 1114_1 19.059 647.105 0 0\n", - "5481 0935_0 19.036 3406.919 1 1\n", - "12643 0579_0 18.938 4183.276 1 1\n", - "14549 1045_1 18.934 1397.476 0 0\n", - "11047 0294_1 18.856 4581.245 0 0\n", - "2822 0933_1 18.687 1500.988 0 0\n", - "1311 1095_1 18.681 1678.772 0 0\n", - "16234 0822_0 18.534 1649.853 1 1\n", - "1390 1957_1 18.478 4849.318 1 1\n", - "4432 1264_1 18.457 7644.919 1 1\n", - "4490 0932_0 18.382 3041.379 0 0\n", - "11691 1781_1 18.238 2643.981 1 1\n", - "13351 1242_0 18.046 4460.611 1 1\n", - "9851 0597_0 18.039 6770.364 0 0\n", - "8805 1652_1 17.991 2752.616 1 1\n", - "3263 0072_0 17.974 1462.695 1 1" + "15892 1865_0 21.824 2651.008 1 1\n", + "5416 0652_1 21.712 6797.678 1 1\n", + "7493 0914_0 21.699 6921.540 1 1\n", + "3317 0382_0 21.640 1286.560 0 0\n", + "16080 1886_1 21.611 2337.166 0 0\n", + "10312 1227_0 21.466 952.191 0 0\n", + "1035 0116_0 21.200 7571.263 1 1\n", + "16432 1922_0 21.100 5470.046 1 1\n", + "11007 1317_1 20.998 4164.073 1 1\n", + "6610 0812_0 20.922 6507.950 1 1\n", + "8309 1009_0 20.876 5539.081 1 1\n", + "12978 1549_1 20.861 2219.043 0 0\n", + "11013 1318_0 20.642 3698.278 1 1\n", + "600 0072_0 20.640 1776.643 1 1\n", + "16734 1960_0 20.548 1494.204 0 0\n", + "14649 1710_0 20.523 3415.625 1 1\n", + "6683 0821_1 20.187 1781.894 0 0\n", + "10772 1286_1 20.134 1863.233 0 0\n", + "11004 1316_0 20.023 3608.919 1 1\n", + "6448 0772_0 19.899 6854.781 1 1\n", + "16068 1883_1 19.854 6851.699 0 0\n", + "11023 1320_1 19.841 6604.750 1 1\n", + "15381 1802_1 19.683 2053.694 0 0\n", + "15401 1804_0 19.526 4824.636 1 1\n", + "12017 1417_0 19.448 3390.013 1 1\n", + "15492 1815_1 19.447 1990.358 0 0\n", + "456 0049_0 19.395 7475.256 1 1\n", + "4876 0579_0 19.229 4376.239 1 1\n", + "1591 0180_0 19.183 7058.989 1 1\n", + "1819 0205_1 19.020 7126.758 1 1\n", + "2138 0248_0 18.924 879.368 0 0\n", + "16703 1957_1 18.895 4556.884 1 1\n", + "5864 0713_1 18.750 2431.609 0 0\n", + "10033 1192_1 18.719 4546.768 0 0\n", + "7408 0904_1 18.619 4429.908 0 0\n", + "5208 0623_0 18.606 2128.298 1 1\n", + "9818 1159_0 18.569 4193.988 0 0\n", + "8602 1045_1 18.402 1495.768 0 0\n", + "525 0058_0 18.402 7315.671 1 1\n", + "1849 0209_0 18.367 5381.768 0 0\n", + "10471 1242_0 18.164 3883.708 1 1\n", + "3124 0363_0 18.001 5918.296 1 1\n", + "8212 0997_1 17.954 779.251 0 0\n", + "16074 1885_0 17.798 1712.642 1 0\n", + "5959 0723_1 17.770 6004.795 1 1\n", + "15063 1763_0 17.381 1560.972 1 1\n", + "10826 1292_0 17.362 1447.342 0 0\n", + "1050 0118_0 17.358 3549.397 1 1\n", + "5352 0645_0 17.352 1470.828 1 1\n", + "15271 1789_0 17.284 1550.430 0 0" ] }, - "execution_count": 49, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } @@ -5096,7 +5111,7 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 32, "id": "75577c49-4b83-42fc-966b-e55ce46ea26e", "metadata": {}, "outputs": [], @@ -5119,7 +5134,7 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 33, "id": "1859f451-23a3-4ffa-a16e-9d8a50172f2c", "metadata": {}, "outputs": [ @@ -5127,14 +5142,28 @@ "name": "stdout", "output_type": "stream", "text": [ - "/n/holyscratch01/hekstra_lab/dhekstra/valdo-tests/pipeline/vae/blobs/run_23_11231_filtered_blob_stats_tagged.pkl\n", - "Total Number of Blobs: 14309\n", - "Total Number of Unique Samples: 1613\n" + "/n/netscratch/hekstra_lab/Lab/dhekstra/valdo-tests/pipeline/vae/blobs/run_23_11231_filtered_blob_stats_tagged.pkl\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/n/home12/dhekstra/.conda/envs/valdo-gpu-mamba/lib/python3.10/site-packages/sklearn/utils/_plotting.py:379: FutureWarning: `estimator_name` is deprecated in 1.7 and will be removed in 1.9. Use `name` instead.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Total Number of Blobs: 14620\n", + "Total Number of Unique Samples: 1614\n" ] }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD/CAYAAAAddgY2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8pXeV/AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA5B0lEQVR4nO3deVxU9f4/8NcAMwwgyFWUTQRESclMhVTwmmWAgamXm0ppLggmoilQcF36JlpJWSFuuCSCet3KtQWVSU1BLBXxWtItSxJlycBYZBlm+fz+4MdcxxlwzjDDwMz7+Xjw0PnM55x5v2fgzeFzPudzeIwxBkIIISbBzNABEEII6ThU9AkhxIRQ0SeEEBNCRZ8QQkwIFX1CCDEhVPQJIcSEUNEnhBATYmHoADqaXC5HaWkpbG1twePxDB0OIYS0G2MMtbW1cHFxgZlZ28fyJlf0S0tL4ebmZugwCCFE5+7cuYM+ffq02cfkir6trS2A5jfHzs5O4+0kEgmys7MRHBwMPp+vr/AMythzNPb8AOPPkfJTr6amBm5ubor61haTK/otQzp2dnaci761tTXs7OyM8psNMP4cjT0/wPhzpPzapsmQNZ3IJYQQE0JFnxBCTIhBi/758+cxceJEuLi4gMfj4dixY4/d5ty5c/D19YVQKES/fv2wdetW/QdKCCFGwqBFv66uDk8//TQ2bdqkUf+ioiKEhoZizJgxKCgowPLly7F48WIcPnxYz5ESQohxMOiJ3JCQEISEhGjcf+vWrejbty9SU1MBAIMGDcKVK1fw8ccf4+WXX9ZTlIQQY8QYQ4NEZugwlEgkUohlzbHpS5eavXPx4kUEBwcrtY0fPx7p6emQSCRqz3aLxWKIxWLF45qaGgDNZ8klEonGr93Sl8s2XY2x52js+QEdm6MhimZLUayuawSfL9V6P4wBr+64jJ/Ka3UYna5YYNw4MbpzuHiUy+fdpYp+eXk5HB0dldocHR0hlUpRUVEBZ2dnlW2Sk5OxatUqlfbs7GxYW1tzjkEkEnHepqsx9hy7Wn6MAU1ybtt8dVL/Oa7/0Rwl9Ya4qt0CuHTeAK/bcc6cOQNLc83719fXa9y3SxV9QHUeasufQa3NT122bBni4+MVj1suYggODuY8T18kEiEoKMgo5wcDxp+jIfJr79Fw5z4i7foGOdlif9Qz6CwrskgkUpw5cwYTxgdCIBBovF3LCIYmulTRd3JyQnl5uVLbvXv3YGFhgZ49e6rdxtLSEpaWlirtfD5fqx98bbfrSrpSjlyKqoTxIJY1/wum/59yxoCpW79DYZnmP5BdjY+zHT6P9u+woimRSHDqVDbGj9fNFblWfPNOtQaXRCKBpTkgEAg45celb5cq+v7+/vjyyy+V2rKzs+Hn59dlihTR3OMKenNRvcixqFog8dKZ9gfXwTQtrrouio/T0UVTwmOwNAesBRbg87tU+eo0DPquPXjwAL/++qvicVFREa5du4YePXqgb9++WLZsGUpKSrB7924AQHR0NDZt2oT4+HjMmzcPFy9eRHp6Ovbv32+oFIgOqCvu2hX0zkkXR8OaFlcqiuRxDPpdceXKFTz//POKxy1j77Nnz0ZmZibKyspQXFyseN7T0xNZWVmIi4vD5s2b4eLigg0bNtB0zU5MP0fryjrrUXCLzjaEQEybQYv+c8891+Z81MzMTJW2sWPH4urVq3qMimhDX0frmhR0OgomRHP0nU9apelJUn0WdzpKJkS3qOgbOU6zW/7/hS/1TVJYyHk6G1PX5dE6IaR9qOh3YrqY492Rs1voaJ2Qzo+KfifFGMOUrReRf/svg8bBZeYJFXdCOj8q+p1My9F9fZNMZwW/PbNbqJATYlyo6HcCLYW+teGYK28HwlrAYSGOR9DsFkJIC/rJNpDHFfoWfu5/Q08bAR1tE0J0gop+B3j0hOzjCv3DwzE0vEII0SUq+nqm6QlZKvSEkI5ARV+PGGOorGtqteBToSeEdDQq+nqi7gj/0ROyVOgJIR2Nir6eNEiUp1zSCVlCSGdARb8DXHk7kAo+IaRTMDN0AMaIMYb6pv/N1rEW0DAOIaRzoCN9HessyycQQog6dKSvY+rG8q342l9NSwghukRH+jr28D1haCyfENLZ0JG+DsnlDC9tzFU8prF8QkhnQ0VfRxhrLvhFFXUAmi+8omEdQkhnQ0VfRxokMsVaOp4ONvjqjb/TUT4hpNPRquhLpVJ888032LZtG2prawEApaWlePDggU6D60oeHsv/6o2/w8yMCj4hpPPhfCL39u3bePHFF1FcXAyxWIygoCDY2tpi7dq1aGxsxNatW/URZ6fGGMPUrRcVj+kAnxDSWXE+0l+yZAn8/Pzw119/wcrKStEeFhaG06dP6zS4ruLhoR0ayyeEdGacj/Rzc3Nx4cIFCAQCpXZ3d3eUlJToLLCuqnnVTDrUJ4R0TpyP9OVyOWQymUr73bt3YWtryzmAtLQ0eHp6QigUwtfXFzk5OW3237t3L55++mlYW1vD2dkZERERqKys5Py6uvTweD7Ve0JIZ8a56AcFBSE1NVXxmMfj4cGDB1i5ciVCQ0M57evgwYOIjY3FihUrUFBQgDFjxiAkJATFxcVq++fm5mLWrFmIjIzEjRs38Pnnn+Py5cuIiorimobOPDqeTwghnRnnor9u3TqcO3cOPj4+aGxsxPTp0+Hh4YGSkhJ8+OGHnPaVkpKCyMhIREVFYdCgQUhNTYWbmxu2bNmitv93330HDw8PLF68GJ6envj73/+O+fPn48qVK1zT0Jn6JhrPJ4R0HZzH9F1cXHDt2jUcOHAA+fn5kMvliIyMxIwZM5RO7D5OU1MT8vPzsXTpUqX24OBg5OXlqd0mICAAK1asQFZWFkJCQnDv3j0cOnQIEyZMaPV1xGIxxGKx4nFNTXOBlkgkkEgkGsfb0vfhbRhjmLLlO8XjfZF+kEqlGu+zs1GXozEx9vwA48+R8mt7O03wGHt4RPrxzp8/j4CAAFhYKP++kEqlyMvLw7PPPqvRfkpLS+Hq6ooLFy4gICBA0b5mzRrs2rULP//8s9rtDh06hIiICDQ2NkIqlWLSpEk4dOgQ+Hy+2v5JSUlYtWqVSvu+fftgbW2tUaytEcuAxEvN74OrNUPCEBmN6RNCOlx9fT2mT5+O6upq2NnZtdmXc9E3NzdHWVkZevfurdReWVmJ3r17qz3Jq05L0c/Ly4O/v7+i/f3338eePXvw3//+V2WbwsJCBAYGIi4uDuPHj0dZWRkSEhLwzDPPID09Xe3rqDvSd3NzQ0VFxWPfnIdJJBKIRCIEBQUpfsHUN0nx9LtnAADX3h4HG8uuvX6duhyNibHnBxh/jpSfejU1NXBwcNCo6HOuUowxtVMSKysrYWNjo/F+HBwcYG5ujvLycqX2e/fuwdHRUe02ycnJGD16NBISEgAAQ4YMgY2NDcaMGYP33nsPzs7OKttYWlrC0tJSpZ3P52v1TfPwdnz2v/dBIOCDz+/aRb+Ftu9NV2Hs+QHGnyPlp9pfUxpXqX/+858AmmfrzJkzR6mQymQyXL9+XWmY5nEEAgF8fX0hEokQFhamaBeJRJg8ebLaberr61WGlczNm0+ccvyDRScM8JKEENIuGhf97t27A2gurra2tkonbQUCAUaNGoV58+ZxevH4+HjMnDkTfn5+8Pf3x/bt21FcXIzo6GgAwLJly1BSUoLdu3cDACZOnIh58+Zhy5YtiuGd2NhYjBgxAi4uLpxeu71oqiYhpCvSuOhnZGQAADw8PPDWW29xGsppTXh4OCorK7F69WqUlZVh8ODByMrKgru7OwCgrKxMac7+nDlzUFtbi02bNuHNN9+Evb09xo0bx3mqqC7QVE1CSFfEeRB65cqVOg0gJiYGMTExap/LzMxUaXvjjTfwxhtv6DQGrh49yqelFwghXYVWZx4PHTqEzz77DMXFxWhqalJ67urVqzoJrDN7dIE1awEd5RNCugbOV+Ru2LABERER6N27NwoKCjBixAj07NkTt27dQkhIiD5i7NToKJ8Q0pVwLvppaWnYvn07Nm3aBIFAgMTERIhEIixevBjV1dX6iLHToQXWCCFdFeeiX1xcrJiaaWVlpbhz1syZM7F//37dRtcJ0awdQkhXxrnoOzk5KZYydnd3x3ffNa89U1RUZJC58h2NbphCCOnKOBf9cePG4csvvwQAREZGIi4uDkFBQQgPD1e6yMoU0Hg+IaSr4Tx7Z/v27ZDL5QCA6Oho9OjRA7m5uZg4caLioipTQfWeENLVcC76ZmZmMDP73x8I06ZNw7Rp0wAAJSUlcHV11V10hBBCdIrz8I465eXleOONN9C/f39d7I4QQoieaFz0q6qqMGPGDPTq1QsuLi7YsGED5HI53nnnHfTr1w/fffcddu7cqc9YCSGEtJPGwzvLly/H+fPnMXv2bJw8eRJxcXE4efIkGhsbceLECYwdO1afcRJCCNEBjYv+119/jYyMDAQGBiImJgb9+/eHt7e30k3SCSGEdG4aD++UlpbCx8cHANCvXz8IhUJERUXpLTBCCCG6p3HRl8vlSndnMTc318nyyoQQQjqOxsM7jDGlO2Y1NjYiOjpapfAfOXJEtxESQgjRGY2L/uzZs5Uev/baazoPhhBCiH5xvnOWqTOB5YUIIUZMJxdnmQrGgFd3XDZ0GIQQojUq+hw0yYGfypuXkqYVNgkhXREVfS3RCpuEkK6Iir6WqN4TQroiKvqEEGJCtCr6e/bswejRo+Hi4oLbt28DAFJTU3H8+HGdBkcIIUS3OBf9LVu2ID4+HqGhoaiqqoJMJgMA2Nvb0zo8hBDSyXEu+hs3bsSnn36KFStWwNz8f7NX/Pz88MMPP3AOIC0tDZ6enhAKhfD19UVOTk6b/cViMVasWAF3d3dYWlrCy8uLlnQmhBANcb5zVlFREYYNG6bSbmlpibq6Ok77OnjwIGJjY5GWlobRo0dj27ZtCAkJQWFhIfr27at2m2nTpuGPP/5Aeno6+vfvj3v37kEqlXJNgxBCTBLnou/p6Ylr167B3d1dqf3EiROKVTg1lZKSgsjISMVqnampqTh16hS2bNmC5ORklf4nT57EuXPncOvWLfTo0QMA4OHhwTUFQggxWZyLfkJCAhYuXIjGxkYwxnDp0iXs378fycnJ2LFjh8b7aWpqQn5+PpYuXarUHhwcjLy8PLXbfPHFF/Dz88PatWuxZ88e2NjYYNKkSXj33XdhZWWldhuxWAyxWKx4XFNTAwCQSCSQSCQax/toX4lEAgnPuNZkaMmRy/vSlRh7foDx50j5tb2dJjgX/YiICEilUiQmJqK+vh7Tp0+Hq6sr1q9fj1deeUXj/VRUVEAmk8HR0VGp3dHREeXl5Wq3uXXrFnJzcyEUCnH06FFUVFQgJiYG9+/fb3VcPzk5GatWrVJpz87OhrW1tcbxPurUqWxYGukFuSKRyNAh6JWx5wcYf46Un7L6+nqN+/IY034JsYqKCsjlcvTu3ZvztqWlpXB1dUVeXh78/f0V7e+//z727NmD//73vyrbBAcHIycnB+Xl5ejevTuA5qWcp0yZgrq6OrVH++qO9N3c3FBRUQE7OzuN45VIJPjqpAiJl5p/T/7n/8bBWsD5d2anJpFIIBKJEBQUpHTvBGNh7PkBxp8j5adeTU0NHBwcUF1d/di6xrlqrVq1Cq+99hq8vLzg4ODAdXMFBwcHmJubqxzV37t3T+Xov4WzszNcXV0VBR8ABg0aBMYY7t69iwEDBqhsY2lpqbgHwMP4fH67vmmatzeuot+ive9NZ2fs+QHGnyPlp9pfU5ynbB4+fBje3t4YNWoUNm3ahD///JPrLgAAAoEAvr6+Kn/GiEQiBAQEqN1m9OjRKC0txYMHDxRtv/zyC8zMzNCnTx+t4iCEEFPCuehfv34d169fx7hx45CSkgJXV1eEhoZi3759nMaVACA+Ph47duzAzp078dNPPyEuLg7FxcWIjo4GACxbtgyzZs1S9J8+fTp69uyJiIgIFBYW4vz580hISMDcuXNbPZFLCCHkf7RahuHJJ5/EmjVrcOvWLZw9exaenp6IjY2Fk5MTp/2Eh4cjNTUVq1evxtChQ3H+/HlkZWUppoOWlZWhuLhY0b9bt24QiUSoqqqCn58fZsyYgYkTJ2LDhg3apEEIISan3YPSNjY2sLKygkAgQG1tLeftY2JiEBMTo/a5zMxMlbaBAwca/Zl7QgjRF62O9IuKivD+++/Dx8cHfn5+uHr1KpKSklqdakkIIaRz4Hyk7+/vj0uXLuGpp55CRESEYp4+IYSQzo9z0X/++eexY8cOPPnkk/qIhxBCiB5xLvpr1qzRRxyEEEI6gEZFPz4+Hu+++y5sbGwQHx/fZt+UlBSdBEYIIUT3NCr6BQUFigV9CgoK9BoQIYQQ/dGo6J89e1bt/wkhhHQtnKdszp07V+18/Lq6OsydO1cnQXVGjDGs/9FIl9UkhJgMzkV/165daGhoUGlvaGjA7t27dRJUZ9QgkaGkngcA8HG2gxWffgEQQroejWfv1NTUgDEGxhhqa2shFAoVz8lkMmRlZWm1xHJX9Hm0P3g8nqHDIIQQzjQu+vb29uDxeODxePD29lZ5nsfjqb1ZiTGiek8I6ao0Lvpnz54FYwzjxo3D4cOHFfeoBZqXSXZ3d4eLi4tegiSEEKIbGhf9sWPHAmhed6dv3740vEEIIV2QRkX/+vXrGDx4MMzMzFBdXY0ffvih1b5DhgzRWXCEEEJ0S6OiP3ToUJSXl6N3794YOnQoeDwe1N1al8fjQSaT6TxIQgghuqFR0S8qKkKvXr0U/yeEENI1aVT0W+5k9ej/CSGEdC1aXZz19ddfKx4nJibC3t4eAQEBuH37tk6DI4QQoluci/6aNWsUNyG/ePEiNm3ahLVr18LBwQFxcXE6D5AQQojucF5P/86dO+jfvz8A4NixY5gyZQpef/11jB49Gs8995yu4yOEEKJDnI/0u3XrhsrKSgBAdnY2AgMDAQBCoVDtmjyEEEI6D85H+kFBQYiKisKwYcPwyy+/YMKECQCAGzduwMPDQ9fxEUII0SHOR/qbN2+Gv78//vzzTxw+fBg9e/YEAOTn5+PVV1/VeYCEEEJ0h/ORvr29PTZt2qTSbiqLrRFCSFfG+UgfAKqqqvDJJ58gKioK8+bNQ0pKCqqrq7UKIC0tDZ6enhAKhfD19UVOTo5G2124cAEWFhYYOnSoVq9LCCGmiHPRv3LlCry8vLBu3Trcv38fFRUVWLduHby8vHD16lVO+zp48CBiY2OxYsUKFBQUYMyYMQgJCUFxcXGb21VXV2PWrFl44YUXuIZPCCEmjXPRj4uLw6RJk/D777/jyJEjOHr0KIqKivDSSy8hNjaW075SUlIQGRmJqKgoDBo0CKmpqXBzc8OWLVva3G7+/PmYPn06/P39uYZPCCEmjfOY/pUrV/Dpp5/CwuJ/m1pYWCAxMRF+fn4a76epqQn5+flYunSpUntwcDDy8vJa3S4jIwO//fYb/v3vf+O999577OuIxWKIxWLF45qaGgCARCKBRCLROF6JRPrQ/yWQ8FQXnOvqWt4PLu9LV2Ls+QHGnyPl1/Z2muBc9O3s7FBcXIyBAwcqtd+5cwe2trYa76eiogIymQyOjo5K7Y6OjigvL1e7zc2bN7F06VLk5OQo/dJpS3JystqTzNnZ2bC2ttY4XrEMaHm7Tp3KhqUR3yJXJBIZOgS9Mvb8AOPPkfJTVl9fr3FfzkU/PDwckZGR+PjjjxEQEAAej4fc3FwkJCRoNWXz0ZuxMMbU3qBFJpNh+vTpWLVqldrbNbZm2bJliI+PVzyuqamBm5sbgoODYWdnp/F+qusagUvnAQDjxwfDWsD5rev0JBIJRCIRgoKCwOfzDR2Ozhl7foDx50j5qdcygqEJzpXr448/Bo/Hw6xZsyCVNg958Pl8LFiwAB988IHG+3FwcIC5ubnKUf29e/dUjv4BoLa2FleuXEFBQQEWLVoEAJDL5WCMwcLCAtnZ2Rg3bpzKdpaWlrC0tFRp5/P5nN5UPl/60P/54PONr+i34PredDXGnh9g/DlSfqr9NcW5cgkEAqxfvx7Jycn47bffwBhD//79OQ2VtOzH19cXIpEIYWFhinaRSITJkyer9Lezs1O5Y1daWhrOnDmDQ4cOwdPTk2sqhBBicjQu+vX19UhISMCxY8cgkUgQGBiIDRs2wMHBQesXj4+Px8yZM+Hn5wd/f39s374dxcXFiI6OBtA8NFNSUoLdu3fDzMwMgwcPVtq+d+/eEAqFKu2EEELU07jor1y5EpmZmZgxYwaEQiH279+PBQsW4PPPP9f6xcPDw1FZWYnVq1ejrKwMgwcPRlZWluJGLWVlZY+ds08IIURzGhf9I0eOID09Ha+88goA4LXXXsPo0aMhk8lgbq79VJaYmBjExMSofS4zM7PNbZOSkpCUlKT1axNCiKnR+OKsO3fuYMyYMYrHI0aMgIWFBUpLS/USGCGEEN3TuOjLZDIIBAKlNgsLC8UMHkIIIZ2fxsM7jDHMmTNHafpjY2MjoqOjYWNjo2g7cuSIbiMkhBCiMxoX/dmzZ6u0vfbaazoNhhBCiH5pXPQzMjL0GQchhJAOoNV6+oQQQromKvqEEGJCqOgTQogJoaJPCCEmhIo+IYSYEK2K/p49ezB69Gi4uLjg9u3bAIDU1FQcP35cp8ERQgjRLc5Ff8uWLYiPj0doaCiqqqogk8kAAPb29khNTdV1fIQQQnSIc9HfuHEjPv30U6xYsUJpoTU/Pz+V9e4JIYR0LpyLflFREYYNG6bSbmlpibq6Op0ERQghRD84F31PT09cu3ZNpf3EiRPw8fHRRUyEEEL0hPPtEhMSErBw4UI0NjaCMYZLly5h//79SE5Oxo4dO/QRIyGEEB3hXPQjIiIglUqRmJiI+vp6TJ8+Ha6urli/fr3iBiuEEEI6J85FHwDmzZuHefPmoaKiAnK5HL1799Z1XIQQQvRAq6Lfoj03RSeEENLxOBd9T09P8Hi8Vp+/detWuwIihBCiP5yLfmxsrNJjiUSCgoICnDx5EgkJCbqKixBCiB5wLvpLlixR275582ZcuXKl3QERQgjRH50tuBYSEoLDhw/raneEEEL0QGdF/9ChQ+jRo4eudkcIIUQPOBf9YcOGYfjw4YqvYcOGwdnZGcuXL8fy5cs5B5CWlgZPT08IhUL4+voiJyen1b5HjhxBUFAQevXqBTs7O/j7++PUqVOcX5MQQkwV5zH9f/zjH0qPzczM0KtXLzz33HMYOHAgp30dPHgQsbGxSEtLw+jRo7Ft2zaEhISgsLAQffv2Vel//vx5BAUFYc2aNbC3t0dGRgYmTpyI77//Xu16QIQQQpRxKvpSqRQeHh4YP348nJyc2v3iKSkpiIyMRFRUFIDmNflPnTqFLVu2IDk5WaX/o0s3r1mzBsePH8eXX35JRZ8QQjTAqehbWFhgwYIF+Omnn9r9wk1NTcjPz8fSpUuV2oODg5GXl6fRPuRyOWpra9s8lyAWiyEWixWPa2pqADRPNZVIJBrHK5FIH/q/BBIe03jbrqLl/eDyvnQlxp4fYPw5Un5tb6cJzsM7I0eOREFBAdzd3bluqqSiogIymQyOjo5K7Y6OjigvL9doH5988gnq6uowbdq0VvskJydj1apVKu3Z2dmwtrbWOF6xDGh5u06dyoaleZvduzSRSGToEPTK2PMDjD9Hyk9ZfX29xn05F/2YmBi8+eabuHv3Lnx9fWFjY6P0/JAhQzjt79GrexljbV7x22L//v1ISkrC8ePH21z7Z9myZYiPj1c8rqmpgZubG4KDg2FnZ6dxnNV1jcCl8wCA8eODYS1o1woWnZJEIoFIJEJQUBD4fL6hw9E5Y88PMP4cKT/1WkYwNKFx5Zo7dy5SU1MRHh4OAFi8eLHiOR6PpyjWLbdPfBwHBweYm5urHNXfu3dP5ej/UQcPHkRkZCQ+//xzBAYGttnX0tISlpaWKu18Pp/Tm8rnSx/6Px98vvEV/RZc35uuxtjzA4w/R8pPtb+mNK5cu3btwgcffICioiKNd94WgUAAX19fiEQihIWFKdpFIhEmT57c6nb79+/H3LlzsX//fkyYMEEnsRBCiKnQuOgz1nzisr1j+Q+Lj4/HzJkz4efnB39/f2zfvh3FxcWIjo4G0Dw0U1JSgt27dwNoLvizZs3C+vXrMWrUKMVfCVZWVujevbvO4iKEEGPFaYxCk7F2LsLDw1FZWYnVq1ejrKwMgwcPRlZWluIXS1lZGYqLixX9t23bBqlUioULF2LhwoWK9tmzZyMzM1OnsRFCiDHiVPS9vb0fW/jv37/PKYCYmBjExMSofe7RQv7tt99y2jchhBBlnIr+qlWraBiFEEK6ME5F/5VXXqFbIxJCSBem8YJruh7PJ4QQ0vE0Lvots3cIIYR0XRoP78jlcn3GQYjRY4xBKpVqfAGjNiQSCSwsLNDY2KjX1zEUU86Pz+fD3Lz9678Y72WlhHQiTU1NKCsr47RGijYYY3BycsKdO3eMckjWlPPj8Xjo06cPunXr1q7XoKJPiJ7J5XIUFRXB3NwcLi4uEAgEeitYcrkcDx48QLdu3WBmprMb43UappofYwx//vkn7t69iwEDBrTriJ+KPiF61tTUBLlcDjc3N04ru2pDLpejqakJQqHQaIuiqebXq1cv/P7775BIJO0q+sb3rhHSSRljkSIdR1d/HdJ3ISGEmBAq+oQQYkKo6BNCOJs5cybWrFlj6DCMyjPPPIMjR47o/XWo6BNCOLl+/Tq+/vprvPHGGyrP7du3D+bm5orl0R+WmZkJe3t7tfu0t7dXWWDx7NmzCA0NRc+ePWFtbQ0fHx+89dZbKC0t1UUaajHGkJSUBBcXF1hZWeG5557DjRs32txGIpFg9erV8PLyglAoxNNPP42TJ08q9fHw8ACPx1P5eni14P/7v//D8uXL9X5NFBV9QggnmzZtwtSpU2Fra6vy3M6dO5GYmIgDBw6065qEbdu2ITAwEE5OTjh8+DAKCwuxdetWVFdXY/Pmze0Jv01r165FSkoKNm3ahMuXL8PJyQlBQUGora1tdZu3334b27Ztw8aNG1FYWIjo6GiEhYWhoKBA0efy5csoKytTfLXcA3fq1KmKPhMmTEB1dTVOnz6tt/wAAMzEVFdXMwCsurqa03ZVD+qZ+7++Yu7/+orViSV6is6wmpqa2LFjx1hTU5OhQ9ELQ+XX0NDACgsLWUNDg6JNLpezOrFE51+1DWJW+kcFq20Qt9pHLpdrnYtMJmP29vbsq6++UnmuqKiIWVlZsaqqKjZy5Ei2a9cupeczMjJY9+7d1e63e/fuLCMjgzHG2J07d5hAIGCxsbFqX//3339nMplM6xxaI5fLmZOTE/vggw8UbY2Njax79+5s69atrW7n7OzMNm3apNQ2efJkNmPGjFa3WbJkCfPy8lL5LGbPns3Cw8PV5qfu+6gFl7pG8/QJMYAGiQw+75wyyGsXrh4Pa4F2P/rXr19HVVUV/Pz8VJ7buXMnJkyYgO7du+O1115Deno6Zs2axfk1Pv/8czQ1NSExMVHt820t7x4SEoKcnJw29//gwQO17UVFRSgvL0dwcLCizdLSEmPHjkVeXh7mz5+vdjuxWAyhUKjUZmVlhdzcXLX9m5qa8O9//xvx8fEq0zCfeeYZrF27ts3424uKPiFEY7///jvMzc1VlliXy+XIzMzExo0bATQvwx4fH49ff/0V/fv35/QaN2/ehJ2dHZydnTnHt2PHDjQ0NHDeDoDi9quOjo5K7Y6Ojrh9+3ar240fPx4pKSl49tln4eXlhdOnT+P48eOtrg107NgxVFVVYc6cOSrPubq64u7du5DL5Xq7roOKPiEGYMU3R+Hq8Trfr1wuR21NLWztbFstGlZ87a/mbGhogKWlpcoRanZ2Nurq6hASEgIAcHBwQHBwMHbu3Ml5lg9jTOsLkVxdXbXa7mGPvvbj4lm/fj3mzZuHgQMHgsfjwcvLCxEREcjIyFDbPz09HSEhIXBxcVF5zsrKCnK5HGKxGBYW+inPVPQJMQAej6f1EEtb5HI5pAJzWAss9HKk6ODggPr6ejQ1NUEgECjad+7cifv37ystMyGXy1FQUIB3330X5ubmsLOzw4MHDyCTyZSWEZDJZHjw4IFi2Mbb2xvV1dUoKyvjfLTfnuEdJycnAM1H/A+/7r1791SO/h/Wq1cvHDt2DI2NjaisrISLiwuWLl0KT09Plb63b9/GN9980+rUzJb30MrKqs0c2oNm7xBCNDZ06FAAQGFhoaKtsrISx48fx4EDB3Dt2jWlrwcPHuDEiRMAgIEDB0ImkynNagGAq1evQiaT4YknngAATJkyBQKBoNWx7erq6lbj27Fjh0oMj361xtPTE05OToqZNUDz+Pu5c+cQEBDQ5vsCAEKhEK6urpBKpTh8+DAmT56s0icjIwO9e/fGhAkT1O7jxo0bGDJkyGNfqz3oSF9DdA8ZQpqPaocPH47c3FzFL4A9e/agZ8+emDp1qspfFy+99BLS09Px0ksvwcfHByEhIZg7dy5SUlLg5eWF3377DfHx8QgJCYGPjw8AwM3NDevWrcOiRYtQU1ODWbNmwcPDA3fv3sWuXbsgEAiwYcMGtfG1Z3iHx+MhNjYWa9aswYABAzBgwACsWbMG1tbWmD59uqLfrFmz4OrqiuTkZADA999/j5KSEgwdOhQlJSVISkqCXC5XOREtl8uRkZGB2bNntzp0k5ubi3HjxmmdgyboSF8DjDG8uuOyocMgpFN4/fXXsXfvXsXjnTt3IiwsTO1w0ssvv4yvvvoKf/zxBwDgwIEDCAwMxIIFC+Dj44MFCxbghRdewP79+5W2i4mJQXZ2NkpKShAWFoaBAwciKioKdnZ2WLRokd5yS0xMRGxsLGJiYuDn54eSkhJkZ2crXZNQXFyMsrIyxePGxka8/fbb8PHxQVhYGFxdXZGbm6tyIdo333yD4uJizJ07V+1rl5SUIC8vT+kXjD7wGDOtY9iamhp0794d1dXVsLOz02ib+iapYnrdICdbZC0ZY5Q3cJBIJMjKykJoaCj4fL6hw9E5Q+XX2NiIoqIieHp6qkzt0zW5XI6amhrY2dnpbfZHY2MjnnjiCRw4cAD+/v56eY3WdER+hpKQkICqqip89NFHavNr6/uIS12j4R2O9kc9Y5QFnxBNCYVC7N69GxUVFYYOxaj07t0b8fHxen8dg/+qTEtLU/zm8vX1feyZ93PnzsHX1xdCoRD9+vXD1q1bOyjSZlTvCQHGjh2LiRMnGjoMo5KQkNDmLCFdMWjRP3jwIGJjY7FixQoUFBRgzJgxCAkJQXFxsdr+RUVFCA0NxZgxY1BQUIDly5dj8eLFOHz4cAdHTgghXZNBi35KSgoiIyMRFRWFQYMGITU1FW5ubtiyZYva/lu3bkXfvn2RmpqKQYMGISoqCnPnzsXHH3/cwZETQkjXZLAx/aamJuTn52Pp0qVK7cHBwcjLy1O7zcWLF5XWxQCaL4FOT0+HRCJRe3JOLBZDLBYrHtfU1ABoPqknkUg0ilUikSr9X9PtupqWvCg/3ZJKpWCMQSaT6X3Z3JZ5GYwxvb+WIZhyfjKZDIwxSKWqNYjL97TBin5FRQVkMpnadS5a1sB4VHl5udr+UqkUFRUVaq/eS05OxqpVq1Tas7OzNb5JtVgGtLxVZ86cgaX2V7F3CQ9fnGKMOjo/Ho8HZ2dn3L9/X+1yxPrQ1lLAxsAU86uvr0d9fT3Onj2r8guByzLWBp+9w3WdC3X91bW3WLZsmdIZ8ZqaGri5uSE4OFjjKZuMMYwbJ8aZM2cwYXyg0uXnxkQikUAkEiEoKMhop2waKr8//vgDNTU1EAqFsLa21tsMMMYY6urqYGNjY5SzzEw1P7lcjrq6OvTs2RNDhgxRyb1lBEMTBiv6Dg4OMDc3Vzmqb2udCycnJ7X9LSws0LNnT7XbWFpawtLSUqWdz+dz+sHvzuPB0hwQCARGWRAfxvW96WoMkZ+rqyvMzc31Ps2RMYaGhgZYWVkZbVE01fzMzMzg6uqq9qCTy/ezwYq+QCCAr68vRCIRwsLCFO0ikUjtmhUA4O/vjy+//FKpLTs7G35+fkZdpEjX1zLE07t3b72eU5BIJDh//jyeffZZo/yZMOX8BAKBTi5IM+jwTnx8PGbOnAk/Pz/4+/tj+/btKC4uVtxfc9myZSgpKcHu3bsBANHR0di0aRPi4+Mxb948XLx4Eenp6SqXcBPSWZmbmyutMKmP/UulUgiFQqMsipRf+xm06IeHh6OyshKrV69GWVkZBg8ejKysLLi7uwMAysrKlObse3p6IisrC3Fxcdi8eTNcXFywYcMGvPzyy4ZKgRBCuhSDn8iNiYlBTEyM2ucyMzNV2saOHYurV6/qOSpCCDFOBl+GgRBCSMcx+JF+R2uZ4sllihPQfIKlvr4eNTU1RjmWCBh/jsaeH2D8OVJ+6rXUM00WTTa5ot9y0YObm5uBIyGEEN2qra1V3HayNSa3nr5cLkdpaSlsbW05zfNtuajrzp07Gl/U1dUYe47Gnh9g/DlSfuoxxlBbWwsXF5fHTus0uSN9MzMz9OnTR+vt7ezsjPKb7WHGnqOx5wcYf46Un6rHHeG3oBO5hBBiQqjoE0KICaGiryFLS0usXLlS7To+xsLYczT2/ADjz5Hyaz+TO5FLCCGmjI70CSHEhFDRJ4QQE0JFnxBCTAgVfUIIMSFU9B+SlpYGT09PCIVC+Pr6Iicnp83+586dg6+vL4RCIfr164etW7d2UKTa45LjkSNHEBQUhF69esHOzg7+/v44depUB0bLHdfPsMWFCxdgYWGBoUOH6jfAduKan1gsxooVK+Du7g5LS0t4eXlh586dHRStdrjmuHfvXjz99NOwtraGs7MzIiIiUFlZ2UHRcnP+/HlMnDgRLi4u4PF4OHbs2GO30XmdYYQxxtiBAwcYn89nn376KSssLGRLlixhNjY27Pbt22r737p1i1lbW7MlS5awwsJC9umnnzI+n88OHTrUwZFrjmuOS5YsYR9++CG7dOkS++WXX9iyZcsYn89nV69e7eDINcM1vxZVVVWsX79+LDg4mD399NMdE6wWtMlv0qRJbOTIkUwkErGioiL2/fffswsXLnRg1NxwzTEnJ4eZmZmx9evXs1u3brGcnBz25JNPsn/84x8dHLlmsrKy2IoVK9jhw4cZAHb06NE2++ujzlDR//9GjBjBoqOjldoGDhzIli5dqrZ/YmIiGzhwoFLb/Pnz2ahRo/QWY3txzVEdHx8ftmrVKl2HphPa5hceHs7efvtttnLlyk5d9Lnmd+LECda9e3dWWVnZEeHpBNccP/roI9avXz+ltg0bNrA+ffroLUZd0aTo66PO0PAOgKamJuTn5yM4OFipPTg4GHl5eWq3uXjxokr/8ePH48qVK3q9B6q2tMnxUXK5HLW1tejRo4c+QmwXbfPLyMjAb7/9hpUrV+o7xHbRJr8vvvgCfn5+WLt2LVxdXeHt7Y233noLDQ0NHREyZ9rkGBAQgLt37yIrKwuMMfzxxx84dOgQJkyY0BEh650+6ozJLbimTkVFBWQyGRwdHZXaHR0dUV5ernab8vJytf2lUikqKirg7Oyst3i1oU2Oj/rkk09QV1eHadOm6SPEdtEmv5s3b2Lp0qXIycmBhUXn/lHQJr9bt24hNzcXQqEQR48eRUVFBWJiYnD//v1OOa6vTY4BAQHYu3cvwsPD0djYCKlUikmTJmHjxo0dEbLe6aPO0JH+Qx5dapkx1ubyy+r6q2vvTLjm2GL//v1ISkrCwYMH0bt3b32F126a5ieTyTB9+nSsWrUK3t7eHRVeu3H5/ORyOXg8Hvbu3YsRI0YgNDQUKSkpyMzM7LRH+wC3HAsLC7F48WK88847yM/Px8mTJ1FUVITo6OiOCLVD6LrOdO7Dmw7i4OAAc3NzlaOJe/fuqfyWbeHk5KS2v4WFBXr27Km3WLWlTY4tDh48iMjISHz++ecIDAzUZ5ha45pfbW0trly5goKCAixatAhAc5FkjMHCwgLZ2dkYN25ch8SuCW0+P2dnZ7i6uiotuTto0CAwxnD37l0MGDBArzFzpU2OycnJGD16NBISEgAAQ4YMgY2NDcaMGYP33nuv0/3FzZU+6gwd6QMQCATw9fWFSCRSaheJRAgICFC7jb+/v0r/7Oxs+Pn5dcrbuGmTI9B8hD9nzhzs27evU4+Tcs3Pzs4OP/zwA65du6b4io6OxhNPPIFr165h5MiRHRW6RrT5/EaPHo3S0lI8ePBA0fbLL7+0+54S+qJNjvX19So3DTE3Nweg2a0DOzu91BmtTwEbmZapYunp6aywsJDFxsYyGxsb9vvvvzPGGFu6dCmbOXOmon/LVKq4uDhWWFjI0tPTu8yUTU1z3LdvH7OwsGCbN29mZWVliq+qqipDpdAmrvk9qrPP3uGaX21tLevTpw+bMmUKu3HjBjt37hwbMGAAi4qKMlQKj8U1x4yMDGZhYcHS0tLYb7/9xnJzc5mfnx8bMWKEoVJoU21tLSsoKGAFBQUMAEtJSWEFBQWKKakdUWeo6D9k8+bNzN3dnQkEAjZ8+HB27tw5xXOzZ89mY8eOVer/7bffsmHDhjGBQMA8PDzYli1bOjhi7rjkOHbsWAZA5Wv27NkdH7iGuH6GD+vsRZ8x7vn99NNPLDAwkFlZWbE+ffqw+Ph4Vl9f38FRc8M1xw0bNjAfHx9mZWXFnJ2d2YwZM9jdu3c7OGrNnD17ts2fqY6oM7S0MiGEmBAa0yeEEBNCRZ8QQkwIFX1CCDEhVPQJIcSEUNEnhBATQkWfEEJMCBV9QggxIVT0CSHEhFDRJ51aZmYm7O3tDR2G1jw8PJCamtpmn6SkpE5/m0ZiPKjoE72bM2cOeDyeytevv/5q6NCQmZmpFJOzszOmTZuGoqIinez/8uXLeP311xWP1d0X9a233sLp06d18nqteTRPR0dHTJw4ETdu3OC8n678S5hQ0Scd5MUXX0RZWZnSl6enp6HDAtC84mZZWRlKS0uxb98+XLt2DZMmTYJMJmv3vnv16gVra+s2+3Tr1q1DluN+OM+vv/4adXV1mDBhApqamvT+2qTzoKJPOoSlpSWcnJyUvszNzZGSkoKnnnoKNjY2cHNzQ0xMjNJSwI/6z3/+g+effx62traws7ODr68vrly5ong+Ly8Pzz77LKysrODm5obFixejrq6uzdh4PB6cnJzg7OyM559/HitXrsSPP/6o+Etky5Yt8PLygkAgwBNPPIE9e/YobZ+UlIS+ffvC0tISLi4uWLx4seK5h4d3PDw8AABhYWHg8XiKxw8P75w6dQpCoRBVVVVKr7F48WKMHTtWZ3n6+fkhLi4Ot2/fxs8//6zo09bn8e233yIiIgLV1dWKvxiSkpIANN/qMDExEa6urrCxscHIkSPx7bffthkPMQwq+sSgzMzMsGHDBvz444/YtWsXzpw5g8TExFb7z5gxA3369MHly5eRn5+PpUuXKtYV/+GHHzB+/Hj885//xPXr13Hw4EHk5uYqbpKiKSsrKwCARCLB0aNHsWTJErz55pv48ccfMX/+fERERODs2bMAgEOHDmHdunXYtm0bbt68iWPHjuGpp55Su9/Lly8DaL4vb1lZmeLxwwIDA2Fvb4/Dhw8r2mQyGT777DPMmDFDZ3lWVVVh3759AKC0Lntbn0dAQABSU1MVfzGUlZXhrbfeAgBERETgwoULOHDgAK5fv46pU6fixRdfxM2bNzWOiXSQdq3RSYgGZs+ezczNzZmNjY3ia8qUKWr7fvbZZ6xnz56KxxkZGax79+6Kx7a2tiwzM1PttjNnzmSvv/66UltOTg4zMzNjDQ0Nard5dP937txho0aNYn369GFisZgFBASwefPmKW0zdepUFhoayhhj7JNPPmHe3t6sqalJ7f7d3d3ZunXrFI8BsKNHjyr1eXRJ58WLF7Nx48YpHp86dYoJBAJ2//79duUJgNnY2DBra2vFkr6TJk1S27/F4z4Pxhj79ddfGY/HYyUlJUrtL7zwAlu2bFmb+ycdj26XSDrE888/jy1btige29jYAADOnj2LNWvWoLCwEDU1NZBKpWhsbERdXZ2iz8Pi4+MRFRWFPXv2IDAwEFOnToWXlxcAID8/H7/++iv27t2r6M8Yg1wuR1FREQYNGqQ2turqanTr1g2MMdTX12P48OE4cuQIBAIBfvrpJ6UTsUDzHanWr18PAJg6dSpSU1PRr18/vPjiiwgNDcXEiRPbdaP1GTNmwN/fH6WlpXBxccHevXsRGhqKv/3tb+3K09bWFlevXoVUKsW5c+fw0UcfYevWrUp9uH4eAHD16lUwxlTuNSwWizvlrUNNHRV90iFsbGzQv39/pbbbt28jNDQU0dHRePfdd9GjRw/k5uYiMjISEolE7X6SkpIwffp0fP311zhx4gRWrlyJAwcOICwsDHK5HPPnz1caU2/Rt2/fVmNrKYZmZmZwdHRUKW5t3ajbzc0NP//8M0QiEb755hvExMTgo48+wrlz57S+nd2IESPg5eWFAwcOYMGCBTh69CgyMjIUz2ubp5mZmeIzGDhwIMrLyxEeHo7z588D0O7zaInH3Nwc+fn5ilsVtujWrRun3In+UdEnBnPlyhVIpVJ88sknivucfvbZZ4/dztvbG97e3oiLi8Orr76KjIwMhIWFYfjw4bhx44bKL5fHebgYPmrQoEHIzc3FrFmzFG15eXlKR9NWVlaYNGkSJk2ahIULF2LgwIH44YcfMHz4cJX98fl8jWYFTZ8+HXv37kWfPn1gZmamdH9ibfN8VFxcHFJSUnD06FGEhYVp9HkIBAKV+IcNGwaZTIZ79+5hzJgx7YqJ6B+dyCUG4+XlBalUio0bN+LWrVvYs2ePynDDwxoaGrBo0SJ8++23uH37Ni5cuIDLly8rCvC//vUvXLx4EQsXLsS1a9dw8+ZNfPHFF3jjjTe0jjEhIQGZmZnYunUrbt68iZSUFBw5ckRxAjMzMxPp6en48ccfFTlYWVnB3d1d7f48PDxw+vRplJeX46+//mr1dWfMmIGrV6/i/fffx5QpUyAUChXP6SpPOzs7REVFYeXKlWCMafR5eHh44MGDBzh9+jQqKipQX18Pb29vzJgxA7NmzcKRI0dQVFSEy5cv48MPP0RWVhanmEgHMOQJBWIaZs+ezSZPnqz2uZSUFObs7MysrKzY+PHj2e7duxkA9tdffzHGlE8cisVi9sorrzA3NzcmEAiYi4sLW7RokdLJy0uXLrGgoCDWrVs3ZmNjw4YMGcLef//9VmNTd2LyUWlpaaxfv36Mz+czb29vtnv3bsVzR48eZSNHjmR2dnbMxsaGjRo1in3zzTeK5x89kfvFF1+w/v37MwsLC+bu7s4Ya/3evM888wwDwM6cOaPynK7yvH37NrOwsGAHDx5kjD3+82CMsejoaNazZ08GgK1cuZIxxlhTUxN75513mIeHB+Pz+czJyYmFhYWx69evtxoTMQy6Ry4hhJgQGt4hhBATQkWfEEJMCBV9QggxIVT0CSHEhFDRJ4QQE0JFnxBCTAgVfUIIMSFU9AkhxIRQ0SeEEBNCRZ8QQkwIFX1CCDEh/w+xuYvO6SRo3QAAAABJRU5ErkJggg==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAREAAAEDCAYAAAASijAeAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAMe9JREFUeJzt3XtcFPX+P/DXAnsBFChAbiEghmBaKhwTvIUJBqYez9EsTRGRJEwFEsKv35OXTtIV0RQvSZBmaoraRUw3RRG1UsTjhb5pSoAIP0UF5LrL7uf3h4fNdRfc2QvLwvv5eOyj5jMzn3nPh923M5+Z+QyPMcZACCFaMjN2AIQQ00ZJhBCiE0oihBCdUBIhhOiEkgghRCeURAghOqEkQgjRCSURQohOKIkQQnRCSYQQohOjJpG8vDxMmDABrq6u4PF42L9//2PXOX78OPz9/SESidCnTx9s3LjR8IESQtpkYcyN19fX47nnnkNkZCT++c9/Pnb54uJihIeHIzo6Gl999RVOnjyJ2NhYODo6arQ+AMjlcty8eRM9e/YEj8fTdRcIMTmMMdy/fx+urq4wM9PDcQTrJACwffv2tbtMUlIS8/X1VSqbN28eGzZsmMbbKSsrYwDoQ59u/ykrK9Pmp6rCqEciXJ0+fRqhoaFKZePGjUNGRgakUin4fL7KOs3NzWhublZMs/8+tHzlyhWcPXsWwcHBatcjhiOVSpGbm0ttrwZjDI1SGRgDoraew+//r07v2/hh7kA8N8APPXv21Et9JpVEKisr4eTkpFTm5OSElpYWVFVVwcXFRWWdlJQUrFixQqX87NmzsLKywi+//GKweEnbOrrtGQMk8g7bnNbWXDJHecNfp9lmQiu9b+NcYSEA6O103qSSCKC6461HFm01yJIlS5CQkKCYrq2thbu7O4KDg/HLL78gJCSE/jXsYFKpFGKxWG3bt/5LrE+MAa9tOYPfKu/rtd6O4OfcEzvm/g36+L1LpS04evQoRga+oHtlDzGpJOLs7IzKykqlslu3bsHCwgL29vZq1xEKhRAKhSrlrV9ePp/fpZOIIX6UupIyHpplD/4L9tevgzFg6safUVRRa8TojK+/iw12xwSCxwMs+eZ6O2KQSqUQmgMCgUAv9bUyqSQSGBiI77//Xqns8OHDCAgI6NKJQFuMMUzZeBoFJfeMHYoaFkj69WiHbvHhH2dnps/E0RGMmkTq6urwxx9/KKaLi4tx/vx5PPnkk+jduzeWLFmC8vJybN26FQAQExODdevWISEhAdHR0Th9+jQyMjKwY8cOY+1Cp9R69NEgkXXSBNI+Q/3YTe3HaSqMmkRar460au27iIiIQFZWFioqKlBaWqqY7+XlhZycHMTHx2P9+vVwdXXF2rVrNb5HxJRoexry4JTgtMopwdn/HQsrgbm+wtOJVCrFoUOHMW5cqNojSPqxmxajJpEXXnhB0TGqTlZWlkrZ6NGjce7cOQNGZXxyOcPLn+XrrW8gwOMJ2FsLOs0PU8pjEJoDVgIL8PkmdUZN1KC/YCfw8FEHY8DLn+WjuKpepzoN1TlHyKMoiRjY405L2jr9AAAvB2v8sGCEVn0DlDhIR6EkoiVN+izaSxCP09/FBj8sGAEzM0oEpHOjJKIFffdZAKpXJOhIgpgKSiIcyeUML6Ye59RnocklS0oaxFRREuGAMabU6alpnwUlCNKVURLREGMMd+olilMYLwdrHEkYTX0WpNujJNKOhx/LfrSDlDo9CXmAkkgb2nvuJMDjiU5z9ychxkZJpA3qnjtp7SC1ElAfByGtKImowRjD1I2nFdOtz51QBykhqiiJqNEgkSn6P/q72HSq504I6WzovTOPePQo5MH9HZRACGkLJZFHNEqVj0KoA5WQ9lESaQcdhRDyeJREHvHw8CaUPwh5PEoiD3m0P4QQ8niURB7y6FUZSz71hxDyOJRE/ouuyhCiHUoi/0VXZQjRDiURNegohBDNURJRg/IHIZqjJPJf7by5ghDSDkoioEu7hOiCkghUO1Xp0i4hmqMk8gjqVCWEG0oij6D8QQg3WiWRlpYW/PTTT9i0aRPu378PALh58ybq6uo415Weng4vLy+IRCL4+/vjxIkT7S6/fft2PPfcc7CysoKLiwsiIyNx584dbXaDEKIHnJNISUkJBg4ciEmTJmH+/Pm4ffs2AOCjjz7C4sWLOdW1a9cuxMXFYenSpSgsLMTIkSMRFhaG0tJStcvn5+dj1qxZiIqKwuXLl7F7926cOXMGc+fO5bobhBA94ZxEFi1ahICAANy7dw+WlpaK8smTJ+PIkSOc6kpNTUVUVBTmzp0LPz8/pKWlwd3dHRs2bFC7/M8//wxPT08sXLgQXl5eGDFiBObNm4ezZ89y3Q1CiJ5wHh4xPz8fJ0+ehEAgUCr38PBAeXm5xvVIJBIUFBQgOTlZqTw0NBSnTp1Su05QUBCWLl2KnJwchIWF4datW9izZw/Gjx/f5naam5vR3NysmK6tfXAVRiqVKv4rkf/VESKVSiHl0U0jhvRw25OOY6h255xE5HI5ZDLVF1nfuHEDPXv21LieqqoqyGQyODk5KZU7OTmhsrJS7TpBQUHYvn07pk2bhqamJrS0tGDixIn47LPP2txOSkoKVqxYoVKem5sLKysrHD4sxscXzAE8SCSHDh2GkK7wdgixWGzsELql3NxcvdbHOYmEhIQgLS0NmzdvBgDweDzU1dVh2bJlCA8P5xzAo5dTGWNtXmItKirCwoUL8e6772LcuHGoqKhAYmIiYmJikJGRoXadJUuWICEhQTFdW1sLd3d3BAcH45dffsGIF8Yg7uc8AICfc0/8/eVhdInXwKRSKcRiMUJCQsDn840dTrfR2u7BwcF6rZdzElm9ejWCg4PRv39/NDU1Yfr06bh69SocHBywY8cOjetxcHCAubm5ylHHrVu3VI5OWqWkpGD48OFITEwEADz77LOwtrbGyJEj8e9//xsuLi4q6wiFQgiFQpXy1i8vn/9XE+x5MwgCAQ2A31H4fD4lESPQd5tz/sW4urri/Pnz2LlzJwoKCiCXyxEVFYUZM2YodbQ+jkAggL+/P8RiMSZPnqwoF4vFmDRpktp1GhoaYGGhHLK5+YNzD6aHh1/oAIQQ7jgnkby8PAQFBSEyMhKRkZGK8paWFuTl5WHUqFEa15WQkICZM2ciICAAgYGB2Lx5M0pLSxETEwPgwalIeXk5tm7dCgCYMGECoqOjsWHDBsXpTFxcHIYOHQpXV1euu0II0QPOSSQ4OBgVFRXo1auXUnlNTQ2Cg4PVdrq2Zdq0abhz5w5WrlyJiooKDBgwADk5OfDw8AAAVFRUKN0zMnv2bNy/fx/r1q3D22+/DTs7O4wZMwYffvgh190ghOgJ5yTSVsfnnTt3YG1tzTmA2NhYxMbGqp2XlZWlUrZgwQIsWLCA83YIIYahcRL5xz/+AeDB1ZTZs2crdVbKZDJcuHABQUFB+o+QENKpaZxEbG1tATw4EunZs6dSJ6pAIMCwYcMQHR2t/wgJIZ2axkkkMzMTAODp6YnFixdrdepCCOl6OPeJLFu2zBBxEEJMlFZ3Vu3ZswfffPMNSktLIZFIlOadO3dOL4ERQkwD56d4165di8jISPTq1QuFhYUYOnQo7O3tcf36dYSFhRkiRoOiAZoJ0Q3nJJKeno7Nmzdj3bp1EAgESEpKglgsxsKFC1FTU2OIGA2GMeC1LWeMHQYhJo1zEiktLVVcyrW0tFSMbDZz5kxOz850BhI58Fvlg/hpgGZCtMM5iTg7OyuGI/Tw8MDPP/8MACguLtbL8yvGQgM0E6IdzklkzJgx+P777wEAUVFRiI+PR0hICKZNm6b0IJ2pofxBiHY4X53ZvHkz5HI5ACAmJgZPPvkk8vPzMWHCBMWDc4SQ7oNzEjEzM4OZ2V8HMK+88gpeeeUVAEB5eTnc3Nz0Fx0hpNPTy3tnKisrsWDBAvTt21cf1RFCTIjGSaS6uhozZsyAo6MjXF1dsXbtWsjlcrz77rvo06cPfv75Z3zxxReGjJUQ0glpfDrzP//zP8jLy0NERAR+/PFHxMfH48cff0RTUxMOHjyI0aNHGzJOQkgnpXESOXDgADIzMzF27FjExsaib9++8PHxQVpamgHDI4R0dhqfzty8eRP9+/cHAPTp0wcikYjePEcI0TyJyOVypVGizc3NaTgAQojmpzOMMaURzZqamhATE6OSSPbu3avfCAkhnZrGSSQiIkJp+vXXX9d7MIQQ08N5ZDNCCHmYXm42M0WMMay5RE/tEqKrbptEGqUylDc8eOqOhgEgRHvdNok8jIYBIER7lERAwwAQogtKIoQQnWiVRLZt24bhw4fD1dUVJSUlAIC0tDR8++23nOtKT0+Hl5cXRCIR/P39ceLEiXaXb25uxtKlS+Hh4QGhUAhvb2968I8QI+KcRDZs2ICEhASEh4ejurpa8QJvOzs7zs/R7Nq1C3FxcVi6dCkKCwsxcuRIhIWFKb3E+1GvvPIKjhw5goyMDPz+++/YsWMHfH19ue4GIURfGEd+fn5s3759jDHGevTowa5du8YYY+zixYvM3t6eU11Dhw5lMTExSmW+vr4sOTlZ7fIHDx5ktra27M6dO1zDVqipqWEAWOnNSubxzg/M450fWH2zVOv6CHcSiYTt37+fSSQSY4fSrbS2e1VVFQPAampq9FIv5yOR4uJiDB48WKVcKBSivr5e43okEgkKCgoQGhqqVB4aGopTp06pXee7775DQEAAPvroI7i5ucHHxweLFy9GY2Mjt50ghOgN5+ERvby8cP78eXh4eCiVHzx4UPGUryaqqqogk8ng5OSkVO7k5ITKykq161y/fh35+fkQiUTYt28fqqqqEBsbi7t377bZL9Lc3Izm5mbFdG1tLQCgRdqiKJNKpZDyTHekelMjlUqV/ks6hqHanXMSSUxMxPz589HU1ATGGH799Vfs2LEDKSkp2LJlC+cAHr0/gzHW5j0bcrkcPB4P27dvh62tLQAgNTUVU6ZMwfr162FpaamyTkpKClasWKFSfjwvD4ANAODQocMQ0r1mHU4sFhs7hG4pNzdXr/VxTiKRkZFoaWlBUlISGhoaMH36dLi5uWHNmjV49dVXNa7HwcEB5ubmKkcdt27dUjk6aeXi4gI3NzdFAgEAPz8/MMZw48YNPP300yrrLFmyBAkJCYrp2tpauLu7Y/SoUcCl8wCAceNCYSXQ6rXERAtSqRRisRghISFKw0sQw2pt9+DgYL3Wq9UvJzo6GtHR0aiqqoJcLkevXr041yEQCODv7w+xWKz0vhqxWIxJkyapXWf48OHYvXs36urq0KNHDwDAlStXYGZmhqeeekrtOkKhUDF8wcMs+H/tOp/PB59PSaSjPWh3SiIdTd9tzrljdcWKFbh27RqAB0cT2iSQVgkJCdiyZQu++OIL/Pbbb4iPj0dpaani/TVLlizBrFmzFMtPnz4d9vb2iIyMRFFREfLy8pCYmIg5c+aoPZUhhBge5ySSnZ0NHx8fDBs2DOvWrcPt27e13vi0adOQlpaGlStXYtCgQcjLy0NOTo6i07aiokLpnpEePXpALBajuroaAQEBmDFjBiZMmIC1a9dqHQMhRDc8xri/QPfy5cvYvn07du7ciRs3bmDs2LF4/fXX8fe//x1WVlaGiFNvamtrYWtri9KblRi55iwAoGjlOOoT6UBSqRQ5OTkIDw+n05kO1NruI0aMgIODA2pqamBjY6NzvVrd9v7MM89g1apVuH79OnJzc+Hl5YW4uDg4OzvrHBAhxLTo/ACetbU1LC0tIRAI6Lo/Id2QVkmkuLgY77//Pvr374+AgACcO3cOy5cvb/MmMUJI18W5IyAwMBC//vorBg4ciMjISMV9IoSQ7olzEgkODsaWLVvwzDPPGCIeQoiJ4ZxEVq1aZYg4CCEmSqMkkpCQgPfeew/W1tZKt5Crk5qaqpfACCGmQaMkUlhYqLjyUlhYaNCACCGmRaMk8vBTf/p+ApAQYto4X+KdM2cO7t+/r1JeX1+POXPm6CUoQojp4JxEvvzyS7UjiTU2NmLr1q16CYoQYjo0vjpTW1sLxhgYY7h//z5EIpFinkwmQ05Ojk5P9BJCTJPGScTOzg48Hg88Hg8+Pj4q83k8ntoRxAghXZvGSSQ3NxeMMYwZMwbZ2dl48sknFfMEAgE8PDzg6upqkCAJIZ2Xxklk9OjRAB48N9O7d296dy0hBICGSeTChQsYMGAAzMzMUFNTg4sXL7a57LPPPqu34AghnZ9GSWTQoEGorKxEr169MGjQIPB4PKgby4jH4yneiEcI6R40SiLFxcVwdHRU/D8hhLTSKIk8/KKqR19aRQjp3rS62ezAgQOK6aSkJNjZ2SEoKAglJSV6DY4Q0vlxTiKrVq1SvJ7h9OnTWLduHT766CM4ODggPj5e7wESQjo3zuOJlJWVoW/fvgCA/fv3Y8qUKXjjjTcwfPhwvPDCC/qOjxDSyXE+EunRowfu3LkDADh8+DDGjh0LABCJRGqfqSGEdG2cj0RCQkIwd+5cDB48GFeuXMH48eMBPHgXjaenp77jI4R0cpyPRNavX4/AwEDcvn0b2dnZsLe3BwAUFBTgtdde03uAhJDOjfORiJ2dHdatW6dSTg/fEdI9afXuyOrqamRkZOC3334Dj8eDn58foqKiYGtrq+/4CCGdHOfTmbNnz8Lb2xurV6/G3bt3UVVVhdWrV8Pb2xvnzp3jHEB6ejq8vLwgEong7++PEydOaLTeyZMnYWFhgUGDBnHeJiFEfzgnkfj4eEycOBF//vkn9u7di3379qG4uBgvv/wy4uLiONW1a9cuxMXFYenSpSgsLMTIkSMRFhaG0tLSdterqanBrFmz8OKLL3INnxCiZ1odibzzzjuwsPjrTMjCwgJJSUk4e/Ysp7pSU1MRFRWFuXPnws/PD2lpaXB3d8eGDRvaXW/evHmYPn06AgMDuYavoOb5QUKIFjj3idjY2KC0tBS+vr5K5WVlZejZs6fG9UgkEhQUFCA5OVmpPDQ0FKdOnWpzvczMTFy7dg1fffUV/v3vfz92O83NzWhublZM19bWAgDmbC1QlEmlUkh5lFU6SuvrR+gF8B3LUO3OOYlMmzYNUVFR+OSTTxAUFAQej4f8/HwkJiZyusRbVVUFmUwGJycnpXInJ6c2Xwx+9epVJCcn48SJE0pHQu1JSUlRe+Xoyv+rh5nQCm5WDLniw6AxljqeWCw2dgjdkr5f+8I5iXzyySfg8XiYNWsWWlpaAAB8Ph9vvvkmPvjgA84BPDpCGmNM7ahpMpkM06dPx4oVK9SO8dqWJUuWKL21r7a2Fu7u7orp/QtHwc7aknPcRHtSqRRisRghISHg8/nGDqfbaG334OBgvdbLOYkIBAKsWbMGKSkpuHbtGhhj6Nu3L6ysrDjV4+DgAHNzc5Wjjlu3bqkcnQDA/fv3cfbsWRQWFuKtt94CAMjlcjDGYGFhgcOHD2PMmDEq6wmFQgiFwrb3h8+nL7KR8KntjULfba5xx2pDQwPmz58PNzc39OrVC3PnzoWLiwueffZZzgkEeJCM/P39VQ5pxWIxgoKCVJa3sbHBxYsXcf78ecUnJiYG/fr1w/nz5/H8889zjoEQojuNj0SWLVuGrKwszJgxAyKRCDt27MCbb76J3bt3a73xhIQEzJw5EwEBAQgMDMTmzZtRWlqKmJgYAA9ORcrLy7F161aYmZlhwIABSuv36tULIpFIpZwQ0nE0TiJ79+5FRkYGXn31VQDA66+/juHDh0Mmk8Hc3FyrjU+bNg137tzBypUrUVFRgQEDBiAnJ0cxelpFRcVj7xkhhBgXj6kbcVkNgUCA4uJiuLm5KcosLS1x5coVpY7Kzq62tha2trZwj/sGZkIr/OdfY2BLHasdSiqVIicnB+Hh4dQn0oFa233EiBFwcHBATU0NbGxsdK5X4z4RmUwGgUCgVGZhYaG4QkMI6Z40Pp1hjGH27NlKVzqampoQExMDa2trRdnevXv1GyEhpFPTOIlERESolL3++ut6DYYQYno0TiKZmZmGjIMQYqI4P4BHCCEPoyRCCNEJJRFCiE4oiRBCdEJJhBCiE62SyLZt2zB8+HC4uroq3r+blpaGb7/9Vq/BEUI6P85JZMOGDUhISEB4eDiqq6shk8kAPHiVRFpamr7jI4R0cpyTyGeffYbPP/8cS5cuVXrwLiAgABcvXtRrcISQzo9zEikuLsbgwYNVyoVCIerr6/USFCHEdHBOIl5eXjh//rxK+cGDB9G/f399xEQIMSGch0dMTEzE/Pnz0dTUBMYYfv31V+zYsQMpKSnYsmWLIWIkhHRinJNIZGQkWlpakJSUhIaGBkyfPh1ubm5Ys2aNYsAiQkj3odW7eKOjoxEdHY2qqirI5XL06tVL33ERQkyEVkmklYODg77iIISYKM5JxMvLS+17YVpdv35dp4AIIaaFcxJ59KXdUqkUhYWF+PHHH5GYmKivuAghJoJzElm0aJHa8vXr13N+oTchxPTp7QG8sLAwZGdn66s6QoiJ0FsS2bNnD5588kl9VUcIMRGcT2cGDx6s1LHKGENlZSVu376N9PR0vQZHCOn8OCeRv//970rTZmZmcHR0xAsvvABfX199xUUIMRGckkhLSws8PT0xbtw4ODs7GyomQogJ4dQnYmFhgTfffBPNzc2GiocQYmI4d6w+//zzKCws1FsA6enp8PLygkgkgr+/P06cONHmsnv37kVISAgcHR1hY2ODwMBAHDp0SG+xEEK449wnEhsbi7fffhs3btyAv7+/0is0AeDZZ5/VuK5du3YhLi4O6enpGD58ODZt2oSwsDAUFRWhd+/eKsvn5eUhJCQEq1atgp2dHTIzMzFhwgT88ssvasc4IYR0AKahyMhIVlNTw3g8nsrHzMxM8V8uhg4dymJiYpTKfH19WXJyssZ19O/fn61YsULj5WtqahgA5h73DfN45wdWXdeg8bpEPyQSCdu/fz+TSCTGDqVbaW33qqoqBoDV1NTopV6Nj0S+/PJLfPDBByguLtZL8pJIJCgoKEBycrJSeWhoKE6dOqVRHXK5HPfv32/3/pTm5malPpza2lql+VJpC6RSKYfIia5a25vavWMZqt01TiKMMQCAh4eHXjZcVVUFmUwGJycnpXInJydUVlZqVMenn36K+vp6vPLKK20uk5KSghUrVrQ5/+jRoxCatzmbGJBYLDZ2CN1Sbm6uXuvj1CfS3tO72nq0TsaYRtvZsWMHli9fjm+//bbd8UyWLFmChIQExXRtbS3c3d0V02PGjIGttUiLyIm2pFIpxGIxQkJCwOfzjR1Ot9Ha7sHBwXqtl1MS8fHxeewP/O7duxrV5eDgAHNzc5Wjjlu3bqkcnTxq165diIqKwu7duzF27Nh2lxUKhRAKhW3O5/Mt6ItsJHw+n9reCPTd5pySyIoVK2Bra6uXDQsEAvj7+0MsFmPy5MmKcrFYjEmTJrW53o4dOzBnzhzs2LED48eP10sshBDtcUoir776ql6HQkxISMDMmTMREBCAwMBAbN68GaWlpYiJiQHw4FSkvLwcW7duBfAggcyaNQtr1qzBsGHDFEcxlpaWektuhBBuNE4ihugPmTZtGu7cuYOVK1eioqICAwYMQE5OjqLztqKiAqWlpYrlN23ahJaWFsyfPx/z589XlEdERCArK0vv8RFCHo/z1Rl9i42NRWxsrNp5jyaGY8eOGSQGQoj2NE4icrnckHGQR8hksi57H4VUKoWFhQWampoU73Im+mNubg4LCwuDnD2oo9No78Qw6urqcOPGDYMd/RkbYwzOzs4oKyvrsC96d2NlZQUXFxcIBAKDb4uSSCcjk8lw48YNWFlZwdHRsUv+yORyOerq6tCjRw+YmeltcD2CBwlaIpHg9u3bKC4uxtNPP23wNqYk0slIpVIwxuDo6AhLS0tjh2MQcrkcEokEIpGIkogBWFpags/no6SkRNHOhkR/wU6qKx6BkI7TkcmZkgghRCeURAghOqEkQgxq5syZWLVqlbHD6FL+9re/Ye/evcYOQ4GSCDGYCxcu4MCBA1iwYIHKvN27d4PP5ysecXhYVlYW7Ozs1NZpZ2enchNibm4uwsPDYW9vDysrK/Tv3x9vv/02ysvL9bEbajHGsHz5cri6usLS0hIvvPACLl++3O46UqkUK1euhLe3N0QiEZ577jn8+OOPSst4enqCx+OpfB6+Q/tf//oXkpOTO829W5REiMGsW7cOU6dORc+ePVXmbd++HYmJidi5cycaGhq03samTZswduxYODs7Izs7G0VFRdi4cSNqamrw6aef6hJ+uz766COkpqZi3bp1OHPmDJydnRESEoL79++3uc7//u//YtOmTfjss89QVFSEmJgYTJ48WWnM4jNnzqCiokLxaR1zZerUqYplxo8fj5qams4zvrBexkczIZ19eMTGxkZWVFTEGhsbGWOMyeVyVt8sNcpHLpdrvR8ymYzZ2dmxH374QWXetWvXmKWlJbt79y57/vnn2Zdffqk0PzMzk9na2qqt19bWlmVmZjLGGCsrK2MCgYDFxcWpXfbevXtax98euVzOnJ2d2QcffKAoa2pqYra2tmzjxo1trufi4sLWrVunVDZp0iQ2Y8aMNtdZtGgR8/b2VvlbzJ49m82cObPN9R79HjHWCYZHJMbRKJWh/7vG+RenaOU4WAm0+4pcuHAB1dXVCAgIUJmXmZmJ0NBQ2Nra4vXXX0dGRgZmzZrFeRu7d++GRCJBUlKS2vltnRIBD94d3d6bBYAHdw6rU1xcjMrKSoSGhirKhEIhRo8ejVOnTmHevHlq12tubla5Z8PS0hL5+flql5dIJPjqq6+QkJCgcsl/6NCh+Oijj9qNv6NQEiEG8eeff8Lc3Fxl6Ai5XK4Yrxd4MLxEQkIC/vjjD/Tt25fTNq5evQobGxu4uLhwjm/Lli1obGzkvB4AxRAU6ob2LCkpaXO9cePGITU1FaNGjYK3tzeOHDmCb7/9ts3nh/bv34/q6mrMnj1bZZ6bmxtKS0shl8uNfsMeJZFOzpJvjqKV44y2bW01NjZCKBSq/At6+PBh1NfXK0akc3BwQGhoKL744gvOV3GYhkNpquPm5qbVeg/jOrTnmjVrEB0dDV9fX/B4PHh7eyMyMhKZmZlql8/IyEBYWBhcXV1V5llaWkIul6O5udnodzZTEunkeDye1qcUxuTg4ICGhgZIJBKlh8C++OIL3L17V+mHIZfLUVhYiPfeew/m5uawsbFBXV0dZDIZzM3/SmQymQx1dXWKAah8fHxQU1ODiooKzkcjupzOtL5CtrKyUmm7jxva09HREfv370dTUxPu3LkDV1dXJCcnw8vLS2XZkpIS/PTTT21eyr179y6srKyMnkAAujpDDGTQoEEAgKKiIkXZnTt38O233+Lrr79GXl4ezp07h/Pnz+P8+fOoq6vDwYMHAQC+vr6QyWQqb1o8d+4cZDIZ+vXrBwCYMmUKBAJBm30D1dXVbca3ZcsWxbbb+rTFy8sLzs7OSqPVSyQSHD9+HEFBQe01CwBAJBLBzc0NLS0tyM7OVjscaGZmJnr16tXmEKCXLl3CkCFDHrutjmB6/8QRk+Do6IghQ4YgPz9fkVC2bdsGe3t7TJ06FXV1dbCxsVGcz7/88svIyMjAyy+/jP79+yMsLAxz5sxBamoqvL29ce3aNSQkJCAsLAz9+/cHALi7u2P16tV46623UFtbi1mzZsHT0xM3btzA1q1b0aNHjzYv8+pyOsPj8RAXF4dVq1bh6aefxtNPP41Vq1bBysoK06dPVyw3a9YsuLm5ISUlBQDwyy+/oLy8HIMGDUJ5eTmWL18OuVyu0jEsl8uRmZmJiIgIWFio/4meOHFCqWPXqPRyjceEmNolXlO2ceNGNmzYMMX0wIEDWWxsLJPJZOzevXtMJpMp5mVnZzMLCwtWWVnJGHvwd4qPj2d9+/ZlIpGI9e3bl8XFxbHq6mqV7YjFYjZu3Dj2xBNPMJFIxHx9fdnixYvZzZs3DbZvcrmcLVu2jDk7OzOhUMhGjRrFLl68qLTM6NGjWUREhGL62LFjzM/PjwmFQmZvb89mzpzJysvLVeo+dOgQA8B+//13tdu+ceMG4/P5rKysrM34OvISL4+xLjryTRtqa2tha2sL97hvYCa0wn/+NQa21sY/r2zV1NSE4uJixUvOTVlTUxP69euHnTt3IjAwUFEul8tRW1urdCRCNJeYmIiamhps3ry5zWXUfY+kUilycnIwYsQIODg4oKamBjY2NjrHQ6czxGBEIhG2bt2KqqoqY4fSpfTq1QuLFy82dhgKlESIQY0ePdrYIXQ5iYmJxg5BCR1LEkJ0QkmEEKITSiKdVDfr7yZ61pHfH0oinUzrHZoSicTIkRBT1jq8Qke8MJ06VjsZCwsLWFlZ4fbt2+Dz+V3yEmjraO9NTU1dcv+MiTGGhoYG3Lp1C3Z2dkqPDRgKJZFOhsfjwcXFBcXFxe0+EWrKGGNobGyEpaUljWpvIHZ2dopnfAzN6EkkPT0dH3/8MSoqKvDMM88gLS0NI0eObHP548ePIyEhAZcvX4arqyuSkpLUDrFnygQCAZ5++ukue0ojlUqRl5eHUaNGdcjhdnfD5/M75AiklVGTyK5duxAXF4f09HQMHz4cmzZtQlhYGIqKitC7d2+V5YuLixEeHo7o6Gh89dVXOHnyJGJjY+Ho6Ih//vOfRtgDwzEzMzP5O1bbYm5ujpaWFohEIkoiXYBRT0hTU1MRFRWFuXPnws/PD2lpaXB3d8eGDRvULr9x40b07t0baWlp8PPzw9y5czFnzhx88sknHRw5IaSV0Y5EJBIJCgoKkJycrFQeGhqKU6dOqV3n9OnTKk8ujhs3DhkZGZBKpWr/VWtubkZzc7Niura2Vmm+VNoCqVSq7W4QLbS2N7V7xzJUuxstiVRVVUEmk6kdYq51+LlHVVZWql2+paUFVVVVagemSUlJwYoVK9qM4+jRoxB23OkjecjD43GQjpObm6vX+ozescp1iDl1y6srb7VkyRIkJCQopmtqatC7d2/8MHcgzhUWYmTgC0ojbxHDk0qlyM3NRXBwMPWJdKDWdm8dPFtfN6QZLYk4ODjA3Nxc5aijvSHmnJ2d1S5vYWEBe3t7tesIhUIIhULFdOvpzHMD/HQJnxCTd//+fcVQk7owWhIRCATw9/eHWCzG5MmTFeVisVjtcHEAEBgYiO+//16p7PDhwwgICND4XzRXV1eUlZWBMYbevXujrKxML2MqEM3V1tbC3d2d2r6DtbZ7aWkpeDye2gGgtaKXoY20tHPnTsbn81lGRgYrKipicXFxzNramv3555+MMcaSk5OVXtBz/fp1ZmVlxeLj41lRURHLyMhgfD6f7dmzh/O2W0c409foTkRz1PbGYah2N2qfyLRp03Dnzh2sXLkSFRUVGDBgAHJycuDh4QEAqKioQGlpqWJ5Ly8v5OTkID4+HuvXr4erqyvWrl3b5e4RIcSUdLvhEVu1DpOoryHiiOao7Y3DUO3ebZ9+EgqFWLZsmVKnK+kY1PbGYah277ZHIoQQ/ei2RyKEEP2gJEII0QklEUKITiiJEEJ00qWTSHp6uuINYP7+/o99C/zx48fh7+8PkUiEPn36YOPGjR0UadfCpd2PHTsGHo+n8vm///u/Doy4a8jLy8OECRPg6uoKHo+H/fv3P3YdvXzn9XrrWifSejfs559/zoqKitiiRYuYtbU1KykpUbt8692wixYtYkVFRezzzz/X+m7Y7oxru+fm5ireO1tRUaH4tLS0dHDkpi8nJ4ctXbqUZWdnMwBs37597S6vr+98l00iQ4cOZTExMUplvr6+LDk5We3ySUlJzNfXV6ls3rx5Si+kJo/Htd1bk8i9e/c6ILruQ5Mkoq/vfJc8nWkd8OjRAYy0GfDo7NmzNHiOhrRp91aDBw+Gi4sLXnzxRb2Pd0HU09d3vksmEUMMeEQeT5t2d3FxwebNm5GdnY29e/eiX79+ePHFF5GXl9cRIXdr+vrOG31QIkMy9IBHRD0u7d6vXz/069dPMR0YGIiysjJ88sknGDVqlEHjJPr5znfJI5GOGvCIKNOm3dUZNmwYrl69qu/wyCP09Z3vkknk4QGPHiYWixEUFKR2ncDAQJXluQ541N1p0+7qFBYWqh0vl+iX3r7znLphTYgxBzzqzri2++rVq9m+ffvYlStX2KVLl1hycjIDwLKzs421Cybr/v37rLCwkBUWFjIALDU1lRUWFiourxvqO99lkwhjjK1fv555eHgwgUDAhgwZwo4fP66YFxERwUaPHq20/LFjx9jgwYOZQCBgnp6ebMOGDR0ccdfApd0//PBD5u3tzUQiEXviiSfYiBEj2IEDB4wQtelrvVz+6CciIoIxZrjvPA0FQAjRSZfsEyGEdBxKIoQQnVASIYTohJIIIUQnlEQIITqhJEII0QklEUKITiiJmKCsrCzY2dkZOwyteXp6Ii0trd1lli9fjkGDBnVIPEQ3lESMZPbs2WqHBfzjjz+MHRqysrKUYnJxccErr7yC4uJivdR/5swZvPHGG4ppdUP5LV68GEeOHNHL9try6H46OTlhwoQJuHz5Mud6TDmp64qSiBG99NJLqKioUPp4eXkZOywAgI2NDSoqKnDz5k18/fXXOH/+PCZOnAiZTKZz3Y6OjrCysmp3mR49enTI09MP7+eBAwdQX1+P8ePHQyKRGHzbXQUlESMSCoVwdnZW+pibmyM1NRUDBw6EtbU13N3dERsbi7q6ujbr+c9//oPg4GD07NkTNjY28Pf3x9mzZxXzT506hVGjRsHS0hLu7u5YuHAh6uvr242Nx+PB2dkZLi4uCA4OxrJly3Dp0iXFkdKGDRvg7e0NgUCAfv36Ydu2bUrrL1++HL1794ZQKISrqysWLlyomPfw6YynpycAYPLkyeDxeIrph09nDh06BJFIhOrqaqVtLFy4EKNHj9bbfgYEBCA+Ph4lJSX4/fffFcu09/c4duwYIiMjUVNToziiWb58OYAHI70lJSXBzc0N1tbWeP7553Hs2LF24zFFlEQ6ITMzM6xduxaXLl3Cl19+iaNHjyIpKanN5WfMmIGnnnoKZ86cQUFBAZKTkxWPcl+8eBHjxo3DP/7xD1y4cAG7du1Cfn4+3nrrLU4xWVpaAgCkUin27duHRYsW4e2338alS5cwb948REZGKoY13LNnD1avXo1Nmzbh6tWr2L9/PwYOHKi23jNnzgAAMjMzUVFRoZh+2NixY2FnZ4fs7GxFmUwmwzfffIMZM2bobT+rq6vx9ddfA4DSo/Dt/T2CgoKQlpamOKKpqKjA4sWLAQCRkZE4efIkdu7ciQsXLmDq1Kl46aWXut5YKTo/Oki0EhERwczNzZm1tbXiM2XKFLXLfvPNN8ze3l4xnZmZyWxtbRXTPXv2ZFlZWWrXnTlzJnvjjTeUyk6cOMHMzMxYY2Oj2nUerb+srIwNGzaMPfXUU6y5uZkFBQWx6OhopXWmTp3KwsPDGWOMffrpp8zHx4dJJBK19Xt4eLDVq1crpqFmUOFly5ax5557TjG9cOFCNmbMGMX0oUOHmEAgYHfv3tVpPwEwa2trZmVlpXjqdeLEiWqXb/W4vwdjjP3xxx+Mx+Ox8vJypfIXX3yRLVmypN36TU2XHh6xswsODsaGDRsU09bW1gCA3NxcrFq1CkVFRaitrUVLSwuamppQX1+vWOZhCQkJmDt3LrZt24axY8di6tSp8Pb2BgAUFBTgjz/+wPbt2xXLM8Ygl8tRXFwMPz8/tbHV1NSgR48eYIyhoaEBQ4YMwd69eyEQCPDbb78pdYwCwPDhw7FmzRoAwNSpU5GWloY+ffrgpZdeQnh4OCZMmAALC+2/bjNmzEBgYCBu3rwJV1dXbN++HeHh4XjiiSd02s+ePXvi3LlzaGlpwfHjx/Hxxx+rvHuF698DAM6dOwfGGHx8fJTKm5ubu9xIeZREjMja2hp9+/ZVKispKUF4eDhiYmLw3nvv4cknn0R+fj6ioqLaHIF7+fLlmD59Og4cOICDBw9i2bJl2LlzJyZPngy5XI558+Yp9Um06t27d5uxtf64zMzM4OTkpPJjaW8cVXd3d/z+++8Qi8X46aefEBsbi48//hjHjx/XepS4oUOHwtvbGzt37sSbb76Jffv2ITMzUzFf2/00MzNT/A18fX1RWVmJadOmKQaK1ubv0RqPubk5CgoKYG5urjSvR48enPa9s6Mk0smcPXsWLS0t+PTTT2Fm9qDL6ptvvnnsej4+PvDx8UF8fDxee+01ZGZmYvLkyRgyZAguX76skqwe5+Ef16P8/PyQn5+PWbNmKcpOnTql9K+9paUlJk6ciIkTJ2L+/Pnw9fXFxYsXMWTIEJX6+Hy+Rld9pk+fju3bt+Opp56CmZkZxo8fr5in7X4+Kj4+Hqmpqdi3bx8mT56s0d9DIBCoxD948GDIZDLcunULI0eO1Cmmzo46VjsZb29vtLS04LPPPsP169exbdu2dl9t2NjYiLfeegvHjh1DSUkJTp48iTNnzih+0O+88w5Onz6N+fPn4/z587h69Sq+++47LFiwQOsYExMTkZWVhY0bN+Lq1atITU3F3r17FR2KWVlZyMjIwKVLlxT7YGlpCQ8PD7X1eXp64siRI6isrMS9e/fa3O6MGTNw7tw5vP/++5gyZQpEIpFinr7208bGBnPnzsWyZcvAGNPo7+Hp6Ym6ujocOXIEVVVVaGhogI+PD2bMmIFZs2Zh7969KC4uxpkzZ/Dhhx8iJyeHU0ydnjE7ZLqziIgINmnSJLXzUlNTmYuLC7O0tGTjxo1jW7duVXpL3MMdec3NzezVV19l7u7uTCAQMFdXV/bWW28pdSb++uuvLCQkhPXo0YNZW1uzZ599lr3//vttxqauo/BR6enprE+fPozP5zMfHx+2detWxbx9+/ax559/ntnY2DBra2s2bNgw9tNPPynmP9qx+t1337G+ffsyCwsL5uHhwRhT7Vht9be//Y0BYEePHlWZp6/9LCkpYRYWFmzXrl2Mscf/PRhjLCYmhtnb2zMAbNmyZYwxxiQSCXv33XeZp6cn4/P5zNnZmU2ePJlduHChzZhMEQ2PSAjRCZ3OEEJ0QkmEEKITSiKEEJ1QEiGE6ISSCCFEJ5RECCE6oSRCCNEJJRFCiE4oiRBCdEJJhBCiE0oihBCdUBIhhOjk/wPQxe79z4b7gQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] @@ -5160,11 +5189,323 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "5d8f7366-c67d-4428-a760-f51e10051480", + "execution_count": 12, + "id": "63777c9d-d283-49dc-bed4-bad3601e81e6", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# import os, shutil, warnings\n", + "# import numpy as np\n", + "# import pandas as pd\n", + "# import reciprocalspaceship as rs\n", + "# import gemmi\n", + "# from multiprocessing import Pool, cpu_count\n", + "\n", + "\n", + "# # avoid OpenMP oversubscription when also using multiprocessing\n", + "# os.environ.setdefault(\"OMP_NUM_THREADS\", \"1\")\n", + "\n", + "\n", + "# def _pick(df_cols, *candidates, required=True, label=\"column\"):\n", + "# \"\"\"Find the first matching column name from candidates (case-insensitive).\"\"\"\n", + "# cols_l = {str(c).lower(): c for c in df_cols}\n", + "# for cand in candidates:\n", + "# if str(cand).lower() in cols_l:\n", + "# return cols_l[str(cand).lower()]\n", + "# if required:\n", + "# raise KeyError(f\"Missing required {label}. Tried: {candidates}\")\n", + "# return None\n", + "\n", + "\n", + "# def _export_one_dataset(args):\n", + "# \"\"\"\n", + "# Worker: writes maps for one dtag and returns (dtag, ev_rows, site_rows, log_msgs).\n", + "# Expects df_ev to have ONLY normalized columns:\n", + "# ['_event_num','_site_num','_x','_y','_z','_cluster_size']\n", + "# \"\"\"\n", + "# (dtag, df_ev, mtz_dir, out_root, wdf_label, phase_label, esfn, recon_label,\n", + "# mtz_pattern, input_pdb_dir, input_pdb_pattern, input_map_mtz_dir,\n", + "# input_map_mtz_pattern, use_new_style, default_rwork, default_rfree) = args\n", + "\n", + "# log = []\n", + "# ev_rows = []\n", + "# site_rows = []\n", + "\n", + "# try:\n", + "# dtag = str(dtag)\n", + "# ddir = os.path.join(out_root, \"processed_datasets\", dtag)\n", + "# os.makedirs(ddir, exist_ok=True)\n", + "\n", + "# mtz_path = os.path.join(mtz_dir, mtz_pattern.format(dtag=dtag))\n", + "# if not os.path.exists(mtz_path):\n", + "# log.append(f\"[{dtag}] MTZ not found: {mtz_path}\")\n", + "# return (dtag, ev_rows, site_rows, log)\n", + "\n", + "# # read mtz\n", + "# try:\n", + "# ds = rs.read_mtz(mtz_path)\n", + "# except Exception as e:\n", + "# log.append(f\"[{dtag}] Failed to read MTZ: {mtz_path} ({e})\")\n", + "# return (dtag, ev_rows, site_rows, log)\n", + "\n", + "# esf_label = f\"ESF_{esfn}\"\n", + "# for need in (wdf_label, phase_label, esf_label):\n", + "# if need not in ds.columns:\n", + "# raise KeyError(f\"[{dtag}] Missing column '{need}' in {os.path.basename(mtz_path)}\")\n", + "\n", + "# # optional inputs for (2)Fo-Fc/Fo-Fc and model\n", + "# if input_pdb_dir:\n", + "# src_pdb = os.path.join(input_pdb_dir, input_pdb_pattern.format(dtag=dtag))\n", + "# if os.path.exists(src_pdb):\n", + "# shutil.copy(src_pdb, os.path.join(ddir, f\"{dtag}-pandda-input.pdb\"))\n", + "# else:\n", + "# log.append(f\"[{dtag}] Input PDB not found: {src_pdb}\")\n", + "# if input_map_mtz_dir:\n", + "# src_mapmtz = os.path.join(input_map_mtz_dir, input_map_mtz_pattern.format(dtag=dtag))\n", + "# if os.path.exists(src_mapmtz):\n", + "# shutil.copy(src_mapmtz, os.path.join(ddir, f\"{dtag}-pandda-input.mtz\"))\n", + "# else:\n", + "# log.append(f\"[{dtag}] Input map MTZ not found: {src_mapmtz}\")\n", + "\n", + "# # --- Z / average MTZ ---\n", + "# out_zavg = os.path.join(ddir, f\"{dtag}-pandda-output.mtz\")\n", + "# zavg = ds[[wdf_label, phase_label]].copy()\n", + "# zavg = zavg.rename(columns={wdf_label: \"FZVALUES\", phase_label: \"PHZVALUES\"})\n", + "# zavg[\"FZVALUES\"] = zavg[\"FZVALUES\"].astype(\"F\")\n", + "# zavg[\"PHZVALUES\"] = zavg[\"PHZVALUES\"].astype(\"P\")\n", + "# if recon_label and recon_label in ds.columns:\n", + "# zavg[\"FGROUND\"] = ds[recon_label].astype(\"F\")\n", + "# zavg[\"PHGROUND\"] = ds[phase_label].astype(\"P\")\n", + "# zavg.write_mtz(out_zavg)\n", + "\n", + "# one_minus_bdc = 1.0 / float(esfn)\n", + "\n", + "# # Per-event MTZs + CSV rows\n", + "# for _, ev in df_ev.iterrows():\n", + "# i_ev = int(ev[\"_event_num\"])\n", + "\n", + "# if use_new_style:\n", + "# out_event = os.path.join(ddir, f\"{dtag}-pandda-output-event-{i_ev:03d}.mtz\")\n", + "# evds = ds[[esf_label, phase_label]].copy()\n", + "# evds = evds.rename(columns={esf_label: \"FEVENT\", phase_label: \"PHEVENT\"})\n", + "# evds[\"FEVENT\"] = evds[\"FEVENT\"].astype(\"F\")\n", + "# evds[\"PHEVENT\"] = evds[\"PHEVENT\"].astype(\"P\")\n", + "# evds.write_mtz(out_event)\n", + "# else:\n", + "# out_event = os.path.join(ddir, f\"{dtag}-event_{i_ev}_1-BDC_{one_minus_bdc:.3f}_map.native.mtz\")\n", + "# evds = ds[[esf_label, phase_label]].copy()\n", + "# evds = evds.rename(columns={esf_label: \"FWT\", phase_label: \"PHWT\"})\n", + "# evds[\"FWT\"] = evds[\"FWT\"].astype(\"F\")\n", + "# evds[\"PHWT\"] = evds[\"PHWT\"].astype(\"P\")\n", + "# evds.write_mtz(out_event)\n", + "\n", + "# # analysed_resolution via Gemmi\n", + "# mtz_g = gemmi.read_mtz_file(os.fspath(mtz_path))\n", + "# analysed_resolution = f\"{mtz_g.resolution_high():.3f}\"\n", + "\n", + "# ev_rows.append({\n", + "# \"dtag\": dtag,\n", + "# \"event_num\": i_ev,\n", + "# \"site_num\": int(ev[\"_site_num\"]) if pd.notna(ev[\"_site_num\"]) else i_ev,\n", + "# \"1-BDC\": f\"{one_minus_bdc:.6f}\",\n", + "# \"x\": float(ev[\"_x\"]),\n", + "# \"y\": float(ev[\"_y\"]),\n", + "# \"z\": float(ev[\"_z\"]),\n", + "# \"analysed_resolution\": analysed_resolution,\n", + "# \"r_work\": default_rwork,\n", + "# \"r_free\": default_rfree,\n", + "# \"Viewed\": \"False\",\n", + "# \"cluster_size\": (int(ev[\"_cluster_size\"]) if pd.notna(ev.get(\"_cluster_size\")) else \"\"),\n", + "# })\n", + "\n", + "# # Sites table (unique per dataset)\n", + "# for s in sorted(set(pd.to_numeric(df_ev[\"_site_num\"], errors=\"coerce\").dropna().astype(int).tolist())):\n", + "# site_rows.append({\"site_num\": int(s), \"dtag\": dtag})\n", + "\n", + "# except Exception as e:\n", + "# log.append(f\"[{dtag}] ERROR: {e}\")\n", + "\n", + "# return (dtag, ev_rows, site_rows, log)\n", + "\n", + "\n", + "# def export_valdo_to_pandda_inspect(\n", + "# blobs_pickle,\n", + "# mtz_dir,\n", + "# out_root,\n", + "# *,\n", + "# wdf_label=\"WDF\",\n", + "# phase_label=\"Refine_PH2FOFCWT\",\n", + "# esfn=8,\n", + "# recon_label=None,\n", + "# mtz_pattern=\"{dtag}.mtz\",\n", + "# input_pdb_dir=None,\n", + "# input_pdb_pattern=\"{dtag}.pdb\",\n", + "# input_map_mtz_dir=None,\n", + "# input_map_mtz_pattern=\"{dtag}.mtz\",\n", + "# use_new_style=True,\n", + "# default_rwork=\"NA\",\n", + "# default_rfree=\"NA\",\n", + "# top_n=200, # export only the top-N blobs globally (by score if present)\n", + "# n_procs=1, # number of worker processes\n", + "# ):\n", + "# \"\"\"\n", + "# Faster VALDO → PanDDA export with top-N filter and per-dataset parallelization.\n", + "\n", + "# - top_n: If 'score' exists, pick the top-N rows across ALL blobs by descending score.\n", + "# Else, take the first N rows in the pickle. Events renumber per-dtag.\n", + "# - n_procs: Number of worker processes (<= cpu_count()).\n", + "# \"\"\"\n", + "# # Normalize paths\n", + "# blobs_pickle = os.fspath(blobs_pickle)\n", + "# mtz_dir = os.fspath(mtz_dir)\n", + "# out_root = os.fspath(out_root)\n", + "# if input_pdb_dir is not None:\n", + "# input_pdb_dir = os.fspath(input_pdb_dir)\n", + "# if input_map_mtz_dir is not None:\n", + "# input_map_mtz_dir = os.fspath(input_map_mtz_dir)\n", + "\n", + "# os.makedirs(os.path.join(out_root, \"processed_datasets\"), exist_ok=True)\n", + "# os.makedirs(os.path.join(out_root, \"results\"), exist_ok=True)\n", + "\n", + "# # Load blobs\n", + "# blobs = pd.read_pickle(blobs_pickle)\n", + "# if not isinstance(blobs, pd.DataFrame):\n", + "# raise TypeError(f\"Pickle did not contain a pandas DataFrame: {type(blobs)}\")\n", + "\n", + "# # Resolve columns\n", + "# col_dtag = _pick(blobs.columns, \"dtag\", \"dataset\", \"sample\", \"id\", label=\"dtag\")\n", + "# col_x = _pick(blobs.columns, \"x\", \"cart_x\", \"x_Å\", \"x_A\", \"cenx\", \"center_x\", label=\"x\")\n", + "# col_y = _pick(blobs.columns, \"y\", \"cart_y\", \"y_Å\", \"y_A\", \"ceny\", \"center_y\", label=\"y\")\n", + "# col_z = _pick(blobs.columns, \"z\", \"cart_z\", \"z_Å\", \"z_A\", \"cenz\", \"center_z\", label=\"z\")\n", + "# col_site = _pick(blobs.columns, \"site\", \"site_id\", \"site_num\", required=False, label=\"site\")\n", + "# col_score = _pick(blobs.columns, \"score\", \"zscore\", \"z\", \"peak_value\", required=False, label=\"score\")\n", + "# col_size = _pick(blobs.columns, \"cluster_size\", \"n_voxels\", \"size\", \"members\", required=False, label=\"cluster_size\")\n", + "\n", + "# # Top-N across ALL blobs\n", + "# if col_score and col_score in blobs.columns:\n", + "# blobs_top = blobs.sort_values(col_score, ascending=False).head(int(top_n)).copy()\n", + "# else:\n", + "# blobs_top = blobs.head(int(top_n)).copy()\n", + "\n", + "# # Pre-normalize for workers\n", + "# blobs_top[\"_dtag\"] = blobs_top[col_dtag].astype(str)\n", + "# blobs_top[\"_x\"] = pd.to_numeric(blobs_top[col_x], errors=\"coerce\")\n", + "# blobs_top[\"_y\"] = pd.to_numeric(blobs_top[col_y], errors=\"coerce\")\n", + "# blobs_top[\"_z\"] = pd.to_numeric(blobs_top[col_z], errors=\"coerce\")\n", + "# blobs_top[\"_site_num\"] = (\n", + "# pd.to_numeric(blobs_top[col_site], errors=\"coerce\") if (col_site and col_site in blobs_top.columns)\n", + "# else pd.NA\n", + "# )\n", + "# blobs_top[\"_cluster_size\"] = (\n", + "# pd.to_numeric(blobs_top[col_size], errors=\"coerce\") if (col_size and col_size in blobs_top.columns)\n", + "# else pd.NA\n", + "# )\n", + "\n", + "# # Per-dtag group with local event numbering\n", + "# groups = []\n", + "# for dtag, df in blobs_top.groupby(\"_dtag\"):\n", + "# if col_score and col_score in df.columns:\n", + "# df = df.sort_values(col_score, ascending=False).reset_index(drop=True)\n", + "# else:\n", + "# df = df.reset_index(drop=True)\n", + "# df[\"_event_num\"] = np.arange(1, len(df) + 1, dtype=int)\n", + "# if df[\"_site_num\"].isna().all():\n", + "# df[\"_site_num\"] = df[\"_event_num\"]\n", + "# groups.append((dtag, df))\n", + "\n", + "# # Build worker args\n", + "# worker_args = [\n", + "# (dtag, df_ev[[\"_event_num\", \"_site_num\", \"_x\", \"_y\", \"_z\", \"_cluster_size\"]],\n", + "# mtz_dir, out_root, wdf_label, phase_label, esfn, recon_label,\n", + "# mtz_pattern, input_pdb_dir, input_pdb_pattern, input_map_mtz_dir,\n", + "# input_map_mtz_pattern, use_new_style, default_rwork, default_rfree)\n", + "# for (dtag, df_ev) in groups\n", + "# ]\n", + "\n", + "# # Run workers\n", + "# results = []\n", + "# if int(n_procs) > 1:\n", + "# procs = min(int(n_procs), cpu_count())\n", + "# chunksize = max(1, len(worker_args) // (procs * 4))\n", + "# with Pool(processes=procs) as pool:\n", + "# for r in pool.imap_unordered(_export_one_dataset, worker_args, chunksize=chunksize):\n", + "# results.append(r)\n", + "# else:\n", + "# for args in worker_args:\n", + "# results.append(_export_one_dataset(args))\n", + "\n", + "# # Collect rows / logs\n", + "# all_ev_rows, all_site_rows = [], []\n", + "# msgs = []\n", + "# for dtag, ev_rows, site_rows, log in results:\n", + "# all_ev_rows.extend(ev_rows)\n", + "# all_site_rows.extend(site_rows)\n", + "# msgs.extend(log)\n", + "\n", + "# # Write CSVs\n", + "# ev_df = pd.DataFrame(all_ev_rows, columns=[\n", + "# \"dtag\",\"event_num\",\"site_num\",\"1-BDC\",\"x\",\"y\",\"z\",\n", + "# \"analysed_resolution\",\"r_work\",\"r_free\",\"Viewed\",\"cluster_size\"\n", + "# ])\n", + "# si_df = pd.DataFrame(all_site_rows, columns=[\"site_num\",\"dtag\"])\n", + "\n", + "# ev_out = os.path.join(out_root, \"results\", \"pandda_analyse_events.csv\")\n", + "# si_out = os.path.join(out_root, \"results\", \"pandda_analyse_sites.csv\")\n", + "# ev_df.to_csv(ev_out, index=False)\n", + "# si_df.to_csv(si_out, index=False)\n", + "\n", + "# for m in msgs:\n", + "# warnings.warn(m)\n", + "\n", + "# print(f\"Wrote {len(ev_df)} events from {ev_df['dtag'].nunique()} datasets → {out_root} (top_n={top_n}, n_procs={n_procs})\")\n", + "# return out_root\n" + ] + }, + { + "cell_type": "markdown", + "id": "3dbf4ed5-f45f-4b73-8017-4ce544f7d97d", + "metadata": {}, + "source": [ + "## For use with the analyse_pandda_inspect.py plugin script in COOT:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5d8f7366-c67d-4428-a760-f51e10051480", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Wrote 200 events from 187 datasets → pandda_like_output (top_n=200, n_procs=29)\n", + "CPU times: user 987 ms, sys: 376 ms, total: 1.36 s\n", + "Wall time: 11.1 s\n" + ] + } + ], + "source": [ + "%%time\n", + "# from valdo.helper import export_valdo_to_pandda_inspect\n", + "_ = export_valdo_to_pandda_inspect(\n", + " blobs_pickle=blob_path + run_prefix + 'filtered_blob_stats_tagged.pkl', # <- pickle from valdo.blobs.generate_blobs()\n", + " mtz_dir=vae_reconstructed_with_phases_path, # <- MTZs with ESF_8 (or ESF_N), WDF, Refine_PH2FOFCWT\n", + " out_root=\"pandda_like_output\", # <- will create processed_datasets/* and results/*\n", + " esfn=8, # -> 1-BDC = 1/8 = 0.125 in CSV\n", + " wdf_label=\"WDF\",\n", + " phase_label=\"refine_PH2FOFCWT\",\n", + " recon_label='recons', # was None. e.g. \"VAE_recon\" if you want an average map\n", + " # The next three are OPTIONAL but recommended so the inspector shows 2FoFc/FoFc:\n", + " input_pdb_dir=refined_path, # contains {dtag}.pdb\n", + " input_pdb_pattern=\"refine_{dtag}_001.pdb\",\n", + " input_map_mtz_dir=refined_path,# contains {dtag}.mtz with 2FoFc/FoFc map coeffs\n", + " input_map_mtz_pattern=\"refine_{dtag}_001.mtz\",\n", + " use_new_style=True, # FEVENT/PHEVENT; FZVALUES/PHZVALUES\n", + " top_n=200, # export only the top-N blobs globally (by score if present)\n", + " n_procs=ncpu-2, # number of worker processes\n", + ")\n" + ] }, { "cell_type": "code", @@ -5199,7 +5540,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.19" }, "toc": { "base_numbering": 1, diff --git a/valdo/helper.py b/valdo/helper.py index c765562..5f0d9ca 100644 --- a/valdo/helper.py +++ b/valdo/helper.py @@ -4,13 +4,14 @@ import re import shutil import warnings -from multiprocessing import Pool +from multiprocessing import Pool, cpu_count from itertools import repeat from tqdm import tqdm import torch import pandas as pd import numpy as np import reciprocalspaceship as rs +import gemmi def try_gpu(i=0): if torch.cuda.device_count() >= i + 1: @@ -544,3 +545,261 @@ def extrapolate(file_list, F_col="F-obs-scaled", sigF_col="SIGF-obs-scaled", rec result.append(extrapolate_single_file(filename, additional_args)) return result + +# avoid OpenMP oversubscription when also using multiprocessing +os.environ.setdefault("OMP_NUM_THREADS", "1") + +def _pick(df_cols, *candidates, required=True, label="column"): + """Find the first matching column name from candidates (case-insensitive).""" + cols_l = {str(c).lower(): c for c in df_cols} + for cand in candidates: + if str(cand).lower() in cols_l: + return cols_l[str(cand).lower()] + if required: + raise KeyError(f"Missing required {label}. Tried: {candidates}") + return None + + +def _export_one_dataset(args): + """ + Worker: writes maps for one dtag and returns (dtag, ev_rows, site_rows, log_msgs). + Expects df_ev to have ONLY normalized columns: + ['_event_num','_site_num','_x','_y','_z','_cluster_size'] + """ + (dtag, df_ev, mtz_dir, out_root, wdf_label, phase_label, esfn, recon_label, + mtz_pattern, input_pdb_dir, input_pdb_pattern, input_map_mtz_dir, + input_map_mtz_pattern, use_new_style, default_rwork, default_rfree) = args + + log = [] + ev_rows = [] + site_rows = [] + + try: + dtag = str(dtag) + ddir = os.path.join(out_root, "processed_datasets", dtag) + os.makedirs(ddir, exist_ok=True) + + mtz_path = os.path.join(mtz_dir, mtz_pattern.format(dtag=dtag)) + if not os.path.exists(mtz_path): + log.append(f"[{dtag}] MTZ not found: {mtz_path}") + return (dtag, ev_rows, site_rows, log) + + # read mtz + try: + ds = rs.read_mtz(mtz_path) + except Exception as e: + log.append(f"[{dtag}] Failed to read MTZ: {mtz_path} ({e})") + return (dtag, ev_rows, site_rows, log) + + esf_label = f"ESF_{esfn}" + for need in (wdf_label, phase_label, esf_label): + if need not in ds.columns: + raise KeyError(f"[{dtag}] Missing column '{need}' in {os.path.basename(mtz_path)}") + + # optional inputs for (2)Fo-Fc/Fo-Fc and model + if input_pdb_dir: + src_pdb = os.path.join(input_pdb_dir, input_pdb_pattern.format(dtag=dtag)) + if os.path.exists(src_pdb): + shutil.copy(src_pdb, os.path.join(ddir, f"{dtag}-pandda-input.pdb")) + else: + log.append(f"[{dtag}] Input PDB not found: {src_pdb}") + if input_map_mtz_dir: + src_mapmtz = os.path.join(input_map_mtz_dir, input_map_mtz_pattern.format(dtag=dtag)) + if os.path.exists(src_mapmtz): + shutil.copy(src_mapmtz, os.path.join(ddir, f"{dtag}-pandda-input.mtz")) + else: + log.append(f"[{dtag}] Input map MTZ not found: {src_mapmtz}") + + # --- Z / average MTZ --- + out_zavg = os.path.join(ddir, f"{dtag}-pandda-output.mtz") + zavg = ds[[wdf_label, phase_label]].copy() + zavg = zavg.rename(columns={wdf_label: "FZVALUES", phase_label: "PHZVALUES"}) + zavg["FZVALUES"] = zavg["FZVALUES"].astype("F") + zavg["PHZVALUES"] = zavg["PHZVALUES"].astype("P") + if recon_label and recon_label in ds.columns: + zavg["FGROUND"] = ds[recon_label].astype("F") + zavg["PHGROUND"] = ds[phase_label].astype("P") + zavg.write_mtz(out_zavg) + + one_minus_bdc = 1.0 / float(esfn) + + # Per-event MTZs + CSV rows + for _, ev in df_ev.iterrows(): + i_ev = int(ev["_event_num"]) + + if use_new_style: + out_event = os.path.join(ddir, f"{dtag}-pandda-output-event-{i_ev:03d}.mtz") + evds = ds[[esf_label, phase_label]].copy() + evds = evds.rename(columns={esf_label: "FEVENT", phase_label: "PHEVENT"}) + evds["FEVENT"] = evds["FEVENT"].astype("F") + evds["PHEVENT"] = evds["PHEVENT"].astype("P") + evds.write_mtz(out_event) + else: + out_event = os.path.join(ddir, f"{dtag}-event_{i_ev}_1-BDC_{one_minus_bdc:.3f}_map.native.mtz") + evds = ds[[esf_label, phase_label]].copy() + evds = evds.rename(columns={esf_label: "FWT", phase_label: "PHWT"}) + evds["FWT"] = evds["FWT"].astype("F") + evds["PHWT"] = evds["PHWT"].astype("P") + evds.write_mtz(out_event) + + # analysed_resolution via Gemmi + mtz_g = gemmi.read_mtz_file(os.fspath(mtz_path)) + analysed_resolution = f"{mtz_g.resolution_high():.3f}" + + ev_rows.append({ + "dtag": dtag, + "event_num": i_ev, + "site_num": int(ev["_site_num"]) if pd.notna(ev["_site_num"]) else i_ev, + "1-BDC": f"{one_minus_bdc:.6f}", + "x": float(ev["_x"]), + "y": float(ev["_y"]), + "z": float(ev["_z"]), + "analysed_resolution": analysed_resolution, + "r_work": default_rwork, + "r_free": default_rfree, + "Viewed": "False", + "cluster_size": (int(ev["_cluster_size"]) if pd.notna(ev.get("_cluster_size")) else ""), + }) + + # Sites table (unique per dataset) + for s in sorted(set(pd.to_numeric(df_ev["_site_num"], errors="coerce").dropna().astype(int).tolist())): + site_rows.append({"site_num": int(s), "dtag": dtag}) + + except Exception as e: + log.append(f"[{dtag}] ERROR: {e}") + + return (dtag, ev_rows, site_rows, log) + + +def export_valdo_to_pandda_inspect( + blobs_pickle, + mtz_dir, + out_root, + *, + wdf_label="WDF", + phase_label="Refine_PH2FOFCWT", + esfn=8, + recon_label=None, + mtz_pattern="{dtag}.mtz", + input_pdb_dir=None, + input_pdb_pattern="{dtag}.pdb", + input_map_mtz_dir=None, + input_map_mtz_pattern="{dtag}.mtz", + use_new_style=True, + default_rwork="NA", + default_rfree="NA", + top_n=200, # export only the top-N blobs globally (by score if present) + n_procs=1, # number of worker processes +): + """ + Faster VALDO → PanDDA export with top-N filter and per-dataset parallelization. + + - top_n: If 'score' exists, pick the top-N rows across ALL blobs by descending score. + Else, take the first N rows in the pickle. Events renumber per-dtag. + - n_procs: Number of worker processes (<= cpu_count()). + """ + # Normalize paths + blobs_pickle = os.fspath(blobs_pickle) + mtz_dir = os.fspath(mtz_dir) + out_root = os.fspath(out_root) + if input_pdb_dir is not None: + input_pdb_dir = os.fspath(input_pdb_dir) + if input_map_mtz_dir is not None: + input_map_mtz_dir = os.fspath(input_map_mtz_dir) + + os.makedirs(os.path.join(out_root, "processed_datasets"), exist_ok=True) + os.makedirs(os.path.join(out_root, "results"), exist_ok=True) + + # Load blobs + blobs = pd.read_pickle(blobs_pickle) + if not isinstance(blobs, pd.DataFrame): + raise TypeError(f"Pickle did not contain a pandas DataFrame: {type(blobs)}") + + # Resolve columns + col_dtag = _pick(blobs.columns, "dtag", "dataset", "sample", "id", label="dtag") + col_x = _pick(blobs.columns, "x", "cart_x", "x_Å", "x_A", "cenx", "center_x", label="x") + col_y = _pick(blobs.columns, "y", "cart_y", "y_Å", "y_A", "ceny", "center_y", label="y") + col_z = _pick(blobs.columns, "z", "cart_z", "z_Å", "z_A", "cenz", "center_z", label="z") + col_site = _pick(blobs.columns, "site", "site_id", "site_num", required=False, label="site") + col_score = _pick(blobs.columns, "score", "zscore", "z", "peak_value", required=False, label="score") + col_size = _pick(blobs.columns, "cluster_size", "n_voxels", "size", "members", required=False, label="cluster_size") + + # Top-N across ALL blobs + if col_score and col_score in blobs.columns: + blobs_top = blobs.sort_values(col_score, ascending=False).head(int(top_n)).copy() + else: + blobs_top = blobs.head(int(top_n)).copy() + + # Pre-normalize for workers + blobs_top["_dtag"] = blobs_top[col_dtag].astype(str) + blobs_top["_x"] = pd.to_numeric(blobs_top[col_x], errors="coerce") + blobs_top["_y"] = pd.to_numeric(blobs_top[col_y], errors="coerce") + blobs_top["_z"] = pd.to_numeric(blobs_top[col_z], errors="coerce") + blobs_top["_site_num"] = ( + pd.to_numeric(blobs_top[col_site], errors="coerce") if (col_site and col_site in blobs_top.columns) + else pd.NA + ) + blobs_top["_cluster_size"] = ( + pd.to_numeric(blobs_top[col_size], errors="coerce") if (col_size and col_size in blobs_top.columns) + else pd.NA + ) + + # Per-dtag group with local event numbering + groups = [] + for dtag, df in blobs_top.groupby("_dtag"): + if col_score and col_score in df.columns: + df = df.sort_values(col_score, ascending=False).reset_index(drop=True) + else: + df = df.reset_index(drop=True) + df["_event_num"] = np.arange(1, len(df) + 1, dtype=int) + if df["_site_num"].isna().all(): + df["_site_num"] = df["_event_num"] + groups.append((dtag, df)) + + # Build worker args + worker_args = [ + (dtag, df_ev[["_event_num", "_site_num", "_x", "_y", "_z", "_cluster_size"]], + mtz_dir, out_root, wdf_label, phase_label, esfn, recon_label, + mtz_pattern, input_pdb_dir, input_pdb_pattern, input_map_mtz_dir, + input_map_mtz_pattern, use_new_style, default_rwork, default_rfree) + for (dtag, df_ev) in groups + ] + + # Run workers + results = [] + if int(n_procs) > 1: + procs = min(int(n_procs), cpu_count()) + chunksize = max(1, len(worker_args) // (procs * 4)) + with Pool(processes=procs) as pool: + for r in pool.imap_unordered(_export_one_dataset, worker_args, chunksize=chunksize): + results.append(r) + else: + for args in worker_args: + results.append(_export_one_dataset(args)) + + # Collect rows / logs + all_ev_rows, all_site_rows = [], [] + msgs = [] + for dtag, ev_rows, site_rows, log in results: + all_ev_rows.extend(ev_rows) + all_site_rows.extend(site_rows) + msgs.extend(log) + + # Write CSVs + ev_df = pd.DataFrame(all_ev_rows, columns=[ + "dtag","event_num","site_num","1-BDC","x","y","z", + "analysed_resolution","r_work","r_free","Viewed","cluster_size" + ]) + si_df = pd.DataFrame(all_site_rows, columns=["site_num","dtag"]) + + ev_out = os.path.join(out_root, "results", "pandda_analyse_events.csv") + si_out = os.path.join(out_root, "results", "pandda_analyse_sites.csv") + ev_df.to_csv(ev_out, index=False) + si_df.to_csv(si_out, index=False) + + for m in msgs: + warnings.warn(m) + + print(f"Wrote {len(ev_df)} events from {ev_df['dtag'].nunique()} datasets → {out_root} (top_n={top_n}, n_procs={n_procs})") + return out_root +