From afbcbe52e75a3a7633c01adc7abd7911bf302162 Mon Sep 17 00:00:00 2001 From: HavardStridBuholdt Date: Wed, 27 May 2026 18:05:15 +0200 Subject: [PATCH 1/2] First attempt at fixing quasi & target cat. modules. --- ppcpy/config/polly_global_config.json | 1 + ppcpy/interface/picassoProc.py | 206 ++++++++------ ppcpy/misc/helper.py | 248 ++++++++++++++--- ppcpy/qc/qualityMask.py | 39 +-- ppcpy/retrievals/quasi.py | 374 +++++++++++++++++--------- ppcpy/retrievals/quasiV1.py | 90 ++++--- ppcpy/retrievals/quasiV2.py | 133 ++++++--- 7 files changed, 757 insertions(+), 334 deletions(-) diff --git a/ppcpy/config/polly_global_config.json b/ppcpy/config/polly_global_config.json index daf33a0..94ad0cc 100644 --- a/ppcpy/config/polly_global_config.json +++ b/ppcpy/config/polly_global_config.json @@ -222,6 +222,7 @@ "flagUseRetrievedExt4LCCalc": true, "quasi_smooth_h": [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8], "quasi_smooth_t": [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10], + "flagOnlyUseValidQuasiData": true, "IWV_instrument": "AERONET", "maxIWVTLag": 0.0833, "tTwilight": 0.0347, diff --git a/ppcpy/interface/picassoProc.py b/ppcpy/interface/picassoProc.py index 9dcce38..e8abdc3 100644 --- a/ppcpy/interface/picassoProc.py +++ b/ppcpy/interface/picassoProc.py @@ -36,22 +36,21 @@ class PicassoProc: counter = 0 - def __init__(self, rawdata_dict, polly_config_dict, picasso_config_dict): - """initialize the data_cube + def __init__(self, rawdata_dict:dict, polly_config_dict:dict, picasso_config_dict:dict): + """Initialize the data_cube. Parameters ---------- - rawdata_dict - the dict returned by readPollyRawData.readPollyRawData(filename=rawfile) - polly_config_dict - the configuration specific to the specific polly loadConfigs.loadPollyConfig(polly_config_file_fullname, polly_default_config_file) - picasso_config_dict - the general picasso config loadConfigs.loadPicassoConfig(args.picasso_config_file,picasso_default_config_file) - + rawdata_dict : dict + The dict returned by readPollyRawData.readPollyRawData(filename=rawfile) + polly_config_dict : dict + The configuration specific to the specific polly loadConfigs.loadPollyConfig(polly_config_file_fullname, polly_default_config_file) + picasso_config_dict : dict + The general picasso config loadConfigs.loadPicassoConfig(args.picasso_config_file,picasso_default_config_file) Notes ----- - The `polly_default_dict` is not longer available as a separate variable, but is included into the `polly_config_dict` + - The `polly_default_dict` is not longer available as a separate variable, but is included into the `polly_config_dict` """ type(self).counter += 1 @@ -73,7 +72,7 @@ def __init__(self, rawdata_dict, polly_config_dict, picasso_config_dict): def mdate_filename(self): - """get the date from filename in YYYYMMDD""" + """Get the date from filename in YYYYMMDD.""" filename = self.rawdata_dict['filename'] mdate = re.split(r'_',filename)[0:3] YYYY = mdate[0] @@ -82,7 +81,7 @@ def mdate_filename(self): return f"{YYYY}{MM}{DD}" def gf(self, wavelength, meth, telescope): - """get flag shorthand + """Get flag shorthand. i.e., the following two calls are equivalent ``` @@ -127,12 +126,12 @@ def gf(self, wavelength, meth, telescope): # return mdate def mdate_infile(self): - """first date in file as string""" + """First date in file as string.""" mdate_infilename = self.rawdata_dict['measurement_time']['var_data'][0][0] return f"{mdate_infilename}" def check_for_correct_mshots(self): - """check if mshots are more than 1.1 * laser_prf * deltatime or smaller 0""" + """Check if mshots are more than 1.1 * laser_prf * deltatime or smaller 0.""" laser_rep_rate = self.rawdata_dict['laser_rep_rate']['var_data'] mShotsPerPrf = laser_rep_rate * self.polly_config_dict['deltaT'] mShots = self.rawdata_dict['measurement_shots']['var_data'] @@ -143,7 +142,7 @@ def check_for_correct_mshots(self): return condition_check_matrix def filter_or_correct_false_mshots(self): - """ filter or correct the mshots (currently only logging) + """Filter or correct the mshots (currently only logging). .. TODO:: that might be covered via the mcps conversion @@ -162,7 +161,7 @@ def filter_or_correct_false_mshots(self): return self def mdate_consistency(self) -> bool: - """check mdate consistency""" + """Check mdate consistency.""" if self.mdate_filename() == self.mdate_infile(): logging.info('... date in nc-file equals date of filename') return True @@ -171,7 +170,7 @@ def mdate_consistency(self) -> bool: return False def reset_date_infile(self): - """correct the date in the file """ + """Correct the date in the file.""" logging.info('date consistency-check... ') if self.mdate_consistency() == False: logging.info('date in nc-file will be replaced with date of filename.') @@ -182,7 +181,7 @@ def reset_date_infile(self): return self def setChannelTags(self): - """set the channel tags + """Set the channel tags. they are stored as dictionary in ``` @@ -275,17 +274,24 @@ def setChannelTags(self): return self def preprocessing(self, collect_debug:bool=False): - """ - Preprocessing of Lidar data. Including in the followin processes in order: + """Preprocessing of Lidar data. + + Including in the followin processes in order: Dead time correction Background correction Range correction - etc. + etc. + + Parameters + ---------- + collect_debug : bool + If true, collect debug information. Default is False. - Returns: - - Background corrected signal including the background per channel. - - Range corrected signal. - - etc. + Yeilds + ------ + - Background corrected signal including the background per channel. + - Range corrected signal. + - etc. .. TODO:: This is just a first draft for a docstring. Improve it. There is more processes and outputs of the function. """ @@ -333,17 +339,20 @@ def preprocessing(self, collect_debug:bool=False): return self def SaturationDetect(self): - """Saturation Detection - - - """ + """Saturation Detection.""" self.flagSaturation = pollySaturationDetect.pollySaturationDetect( data_cube = self, sigSaturateThresh = self.polly_config_dict['saturate_thresh']) def polarizationCaliD90(self, db_path:str=None): - """calibration with the Delta-90 method + """Calibration with the Delta-90 method. + + Parameters + ---------- + db_path : str + Path to database to read DC values from in case none + are successfully retrieved. Default is None. The stuff that starts here in the matlab version https://github.com/PollyNET/Pollynet_Processing_Chain/blob/5efd7d35596c67ef8672f5948e47d1f9d46ab867/lib/interface/picassoProcV3.m#L442 @@ -368,7 +377,7 @@ def polarizationCaliD90(self, db_path:str=None): def cloudScreen(self, collect_debug:bool=False): - """basic cloud screenting + """Basic cloud screenting. https://github.com/PollyNET/Pollynet_Processing_Chain/blob/b3b8ec7726b75d9db6287dcba29459587ca34491/lib/interface/picassoProcV3.m#L663 """ @@ -376,7 +385,7 @@ def cloudScreen(self, collect_debug:bool=False): def cloudFreeSeg(self): - """cloud free profile segmentation + """Cloud free profile segmentation. https://github.com/PollyNET/Pollynet_Processing_Chain/blob/b3b8ec7726b75d9db6287dcba29459587ca34491/lib/interface/picassoProcV3.m#L707 @@ -390,9 +399,17 @@ def cloudFreeSeg(self): """ self.clFreeGrps = profilesegment.segment(self) - def aggregate_profiles(self, var=None): - """Aggregate highres profiles over cloud free segments + def aggregate_profiles(self, var:str=None): + """Aggregate highres profiles over cloud free segments. + + Parameters + ---------- + var : str + Name of variables to aggregate. + Default is `RCS`, `sigBGCor`, and `BG`. + Notes + ----- .. TODO:: Decide on a consistent way for doing the aggregation, do not mix mean and sum """ @@ -442,8 +459,8 @@ def loadAOD(self): def calcMolecular(self): """Calculate the molecular scattering for the cloud free periods - with the strategy of first averaging the met data and then calculating the rayleigh scattering. - + with the strategy of first averaging the met data and then + calculating the rayleigh scattering. """ time_slices = [self.retrievals_highres['time64'][grp] for grp in self.clFreeGrps] @@ -454,23 +471,28 @@ def calcMolecular(self): def rayleighFit(self, collect_debug:bool=False): """Perform the rayleigh fit procedure. + + Parameters + ---------- + collect_debug : bool + If true, collects debug information. Default is False. - Direct translation from the matlab code. There might be noticeable numerical discrepancies (especially in the residual) - seemed to work ok for 532, 1064, but with issues for 355. - --> The Douglas Peucker algorithm works fine (gives the same result as the Matlab version). However, due the numerical discrepancies - In the residuals of the fit algorithm sometimes different refH segments are chosen. + Notes + ----- + - Direct translation from the matlab code. There might be noticeable numerical discrepancies (especially in the residual) + seemed to work ok for 532, 1064, but with issues for 355. + - The Douglas Peucker algorithm works fine (gives the same result as the Matlab version). However, due the numerical discrepancies + In the residuals of the fit algorithm sometimes different refH segments are chosen. """ - print('Start Rayleigh Fit') + logging.info('Start Rayleigh Fit') logging.warning(f'Potential for differences to matlab code due to numerical issues (subtraction of two small values)') self.retrievals_profile['refH'] = rayleighfit.rayleighfit(self, collect_debug) def polarizationCaliMol(self): - """calibration with molecular signal in reference height - - """ + """Calibration with molecular signal in reference height.""" logging.warning(f'not checked against the matlab code') if self.polly_config_dict['flagMolDepolCali']: @@ -480,7 +502,7 @@ def polarizationCaliMol(self): def transCor(self): - """transmission correction + """GHK-Transmission correction .. TODO:: @@ -506,8 +528,7 @@ def transCor(self): def retrievalKlett(self, oc=False, nr=False): - """apply the klett retrieval - """ + """Apply Klett retrieval.""" retrievalname = 'klett' kwargs = {} @@ -527,8 +548,7 @@ def retrievalKlett(self, oc=False, nr=False): def retrievalRaman(self, oc=False, nr=False, collect_debug=False): - """apply the raman retrieval (nighttime only) - """ + """Apply Raman retrieval (nighttime only).""" retrievalname = 'raman' kwargs = {} @@ -553,11 +573,18 @@ def retrievalRaman(self, oc=False, nr=False, collect_debug=False): self.retrievals_profile['avail_optical_profiles'].append(retrievalname) - def overlapCalc(self, collect_debug=False): - """estimate the overlap function + def overlapCalc(self, collect_debug:bool=False): + """Estimate the overlap function. - different to the matlab version, where an average over all cloud - free periods is taken, it is done here per cloud free segment + Parameters + ---------- + collect_debug : bool + If true, collects debug information. Default is False. + + Notes + ----- + - Different to the matlab version, where an average over all cloud + free periods is taken, it is done here per cloud free segment .. TODO:: The data structure of retrievals_profile['overlap'] differs from @@ -571,7 +598,7 @@ def overlapCalc(self, collect_debug=False): self.retrievals_profile['overlap']['raman'] = overlapEst.run_raman_cldFreeGrps(self, collect_debug=collect_debug) def overlapFixLowestBins(self): - """the lowest bins are affected by stange near range effects""" + """The lowest bins are affected by stange near range effects.""" if not self.polly_config_dict["flagOLCor"]: return height = self.retrievals_highres['range'] @@ -581,7 +608,7 @@ def overlapFixLowestBins(self): def overlapCor(self): - """overlap correction + """Overlap correction the overlap correction is implemented differently to the matlab version first a 2d (time, height) correction array is constructed then it is applied. @@ -605,8 +632,7 @@ def overlapCor(self): def calcDepol(self): - """calculate the volume depol and the particle depol - """ + """Calculate the volume depol and the particle depol.""" for ret_prof_name in self.retrievals_profile['avail_optical_profiles']: print(ret_prof_name) @@ -617,15 +643,13 @@ def calcDepol(self): self, ret_prof_name) def estQualityMask(self): - """estimate the quality mask - - """ + """Estimate the quality mask.""" + logging.info("Estimate quality mask") self.retrievals_highres['quality_mask'] = qualityMask.qualityMask(self) def Angstroem(self): - """calculate the angstrom exponent - """ + """Calculate the angstrom exponent.""" for ret_prof_name in self.retrievals_profile['avail_optical_profiles']: print(ret_prof_name) @@ -669,8 +693,7 @@ def LidarCalibration(self, db_path:str=None, collect_debug:bool=False): def attBsc_volDepol(self): - """highres attBsc and voldepol in 2d - """ + """highres attBsc and voldepol in 2D.""" # for now try with mutable state in data_cube logging.info('attBsc 2d retrieval') @@ -681,8 +704,7 @@ def attBsc_volDepol(self): def molecularHighres(self): - """calculate the molecular signal for the 2d high resolution - """ + """calculate the molecular signal for the 2d high resolution.""" self.mol_2d = molecular.calc_2d( self.met.ds, @@ -691,25 +713,37 @@ def molecularHighres(self): def quasiV1(self): - """quasiV1 retrivals and target categorisation. - """ + """QuasiV1 retrivals and target categorisation.""" + logging.info('Calculating Quasi V1 particle backscatter coefficient') quasiV1.quasi_bsc(self) + + logging.info('Calculating Quasi V1 particle Depolarization ratio') quasi.quasi_pdr(self, version='V1') + + logging.info('Calculating Quasi V1 Ångström Exponent') quasi.quasi_angstrom(self, version='V1') + + logging.info('Producing V1 target categorization') quasi.target_cat(self, version='V1') def quasiV2(self): - """quasiV2 retrivals and target categorisation. - """ + """QuasiV2 retrivals and target categorisation.""" + logging.info('Calculating Quasi V2 particle backscatter coefficient') quasiV2.quasi_bsc(self) + + logging.info('Calculating Quasi V2 particle Depolarization ratio') quasi.quasi_pdr(self, version='V2') + + logging.info('Calculating Quasi V2 Ångström Exponent') quasi.quasi_angstrom(self, version='V2') + + logging.info('Producing V2 target categorization') quasi.target_cat(self, version='V2') def write_2_sql_db(self, parameter:str, db_path:str|None=None, method:str|None=None): - """ write LC or eta to sqlite db table + """Write LC or eta to sqlite db table. Parameters ---------- @@ -723,10 +757,10 @@ def write_2_sql_db(self, parameter:str, db_path:str|None=None, method:str|None=N Notes ----- - The unique columns are needed that new entries overwrite old ones, - otherwise they are just added to the table with same timestamps. - + - The unique columns are needed that new entries overwrite old ones, + otherwise they are just added to the table with same timestamps. """ + if db_path == None: db_path = self.polly_config_dict['calibrationDB'] logging.info(f"read db_path from polly_config_dict {db_path}") @@ -755,12 +789,18 @@ def write_2_sql_db(self, parameter:str, db_path:str|None=None, method:str|None=N sql_db.write_rows_to_sql_db(db_path, table_name, column_names, rows_to_insert) + def read_calibration_db(self, db_path:str|None=None): - """read the calibration constants from database - - time interval includes 24h before and after the actual date + """Read the calibration constants from database. + + Parameters + ---------- + db_path : str + path to database of calibration values. + Time interval includes 24h before and after the actual date """ + if db_path == None: db_path = self.polly_config_dict['calibrationDB'] logging.info(f"read db_path from polly_config_dict {db_path}") @@ -775,16 +815,10 @@ def read_calibration_db(self, db_path:str|None=None): def adding_retrieving_infos_2_polly_config_dict(self): - """ some infos from the polly_config_dict should have there own keys, e.g. reference_search_range - - Parameters - ---------- - self - - Returns - ------- - self (with added polly_config dict_keys) + """Some infos from the polly_config_dict should have there own keys, + e.g. reference_search_range. """ + lower_overlap = np.array(self.polly_config_dict['heightFullOverlap']) reference_search_range_355_total_FR = [int(lower_overlap[self.flag_355_total_FR][0]),self.polly_config_dict['maxDecomHeight355']] reference_search_range_532_total_FR = [int(lower_overlap[self.flag_532_total_FR][0]),self.polly_config_dict['maxDecomHeight532']] diff --git a/ppcpy/misc/helper.py b/ppcpy/misc/helper.py index 36df370..db69f56 100644 --- a/ppcpy/misc/helper.py +++ b/ppcpy/misc/helper.py @@ -16,8 +16,24 @@ from scipy.signal import savgol_coeffs -def detect_path_type(fullpath): - """Detect the type of path (Windows or Linux) based on the input.""" +def detect_path_type(fullpath:str): + """Detect the type of path (Windows or Linux) based on the input. + + Parameters + ---------- + fullpath : str + ... + + Returns + ------- + Path + ... + + Notes + ----- + - TODO: Finish docstring. + """ + def is_windows_path(fullpath): """returns True or False""" return '\\' in str(fullpath) @@ -42,12 +58,32 @@ def is_linux_path(fullpath): def os_name(): - """""" + """...""" return platform.system() def get_input_path(timestamp, device, raw_folder): - """""" + """... + + Parameters + ---------- + timestamp : ... + ... + device : ... + ... + raw_folder : ... + ... + + Returns + ------- + input_path : ... + ... + + Notes + ----- + - TODO: Finish docstring. + """ + YYYY=timestamp[0:4] MM=timestamp[4:6] DD=timestamp[6:8] @@ -56,13 +92,32 @@ def get_input_path(timestamp, device, raw_folder): return input_path -def get_pollyxt_files(timestamp, device, raw_folder, output_path): - """ - This function locates multiple pollyxt level0 nc-zip files from one day measurements, - unzipps the files to output_path - and returns a list of files to be merged - and the title of the new merged nc-file +def get_pollyxt_files(timestamp, device, raw_folder, output_path) -> list: + """This function locates multiple pollyxt level0 nc-zip files from one day measurements, + unzipps the files to output_path and returns a list of files to be merged and the title + of the new merged nc-file. + + Parameters + ---------- + timestamp : ... + ... + device : ... + ... + raw_folder : ... + ... + output_path : ... + ... + + Returns + ------- + polly_files_list : list + ... + + Notes + ----- + - TODO: Finish docstring. """ + input_path = get_input_path(timestamp, device, raw_folder) path_exist = Path(input_path) @@ -159,11 +214,26 @@ def get_pollyxt_files(timestamp, device, raw_folder, output_path): def get_pollyxt_logbook_files(timestamp, device, raw_folder, output_path): + """This function locates multiple pollyxt logbook-zip files from one day measurements, + unzipps the files to output_path and merge them to one file. + + Parameters + ---------- + timestamp : ... + ... + device : ... + ... + raw_folder : ... + ... + output_path : ... + ... + + Notes + ----- + - TODO: Finish docstring. + - TODO: Is returning () needed?? """ - This function locates multiple pollyxt logbook-zip files from one day measurements, - unzipps the files to output_path - and merge them to one file - """ + input_path = get_input_path(timestamp, device, raw_folder) path_exist = Path(input_path) @@ -244,7 +314,22 @@ def get_pollyxt_logbook_files(timestamp, device, raw_folder, output_path): def add_to_list(element, from_list, to_list): - """""" + """Add element form dict to list. + + Parameters + ---------- + element : str + ... + from_list : dict + ... + to_list : list + ... + + Notes + ----- + - TODO: Finish docstring. + """ + if from_list[element] in to_list: pass else: @@ -252,8 +337,29 @@ def add_to_list(element, from_list, to_list): -def checking_vars(timestamp, device, raw_folder, output_path): - """""" +def checking_vars(timestamp, device, raw_folder, output_path:str): + """Check variables ... + + Parameters + ---------- + timestamp : ... + ... + device : ... + ... + raw_folder : ... + ... + output_path : str + ... + + Returns + ------- + ... + + Notes + ----- + - TODO: Finish docstring. + """ + ## select only those nc-files where the values of some specific variables haven't changed vars_of_interest = [ 'measurement_height_resolution', @@ -350,8 +456,12 @@ def checking_attr(timestamp, device, raw_folder, output_path): ------- ... + Notes + ----- + .. TODO:: Finish docstring. .. TODO:: Variables 'force' and 'polly_files_list' are not defined anywhere """ + ## select only those nc-files where the global attributes and the var-attributes haven't changed selected_var_nc_ls = checking_vars(timestamp, device, raw_folder, output_path) if len(selected_var_nc_ls) == 1: @@ -453,7 +563,28 @@ def checking_attr(timestamp, device, raw_folder, output_path): def checking_timestamp(timestamp, device, raw_folder, output_path): - """""" + """Check timestamps of ... + + Parameters + ---------- + timestamp : ... + ... + device : ... + ... + raw_folder : ... + ... + output_ path : ... + ... + + Returns + ------- + ... + + Notes + ----- + - TODO: Finish docstring. + """ + selected_timestamp_nc_ls = checking_attr(timestamp, device, raw_folder, output_path) if len(selected_timestamp_nc_ls) == 1: return selected_timestamp_nc_ls @@ -547,13 +678,34 @@ def checking_timestamp(timestamp, device, raw_folder, output_path): return selected_cor_timestamp_nc_ls -def concat_files(timestamp, device, raw_folder, output_path): - """""" +def concat_files(timestamp, device, raw_folder, output_path:str): + """Concatinate ... files + + Parameters + ---------- + timestamp : ... + ... + device : ... + ... + raw_folder : ... + ... + output_path : str + ... + + Returns + ------- + ... + + Notes + ----- + - TODO: Finish docstring. + """ + ## merge selected files # concat='concat' - sel_polly_files_list = checking_timestamp(timestamp,device,raw_folder,output_path) + sel_polly_files_list = checking_timestamp(timestamp, device, raw_folder, output_path) if len(sel_polly_files_list) == 0: print('no files found for this day. no merging.') @@ -565,7 +717,7 @@ def concat_files(timestamp, device, raw_folder, output_path): if len(sel_polly_files_list) == 1: print("\nOnly one file found. Nothing to merge!\n") - os.rename(sel_polly_files_list[0],Path(output_path,filestring)) + os.rename(sel_polly_files_list[0], Path(output_path, filestring)) return () else: # sel_polly_files_list = [ str(el) for el in sel_polly_files_list] @@ -595,7 +747,7 @@ def write_netcdf(ds: xarray.Dataset, out_file: Path) -> None: ds.to_netcdf(out_file, format="NETCDF4", engine="netcdf4", encoding=enc) - write_netcdf(ds=ds,out_file=Path(output_path,filestring_dummy)) + write_netcdf(ds=ds, out_file=Path(output_path, filestring_dummy)) ds.close() @@ -603,10 +755,10 @@ def write_netcdf(ds: xarray.Dataset, out_file: Path) -> None: for el in sel_polly_files_list: print(el) os.remove(el) - destination_file = Path(output_path,filestring) + destination_file = Path(output_path, filestring) if os.path.exists(destination_file): os.remove(destination_file) # Remove the existing destination file - os.rename(Path(output_path,filestring_dummy),destination_file) + os.rename(Path(output_path, filestring_dummy), destination_file) print('done!') return destination_file @@ -624,23 +776,27 @@ def remove_whitespaces_and_replace_dash_with_underscore(string:str) -> str: new_string : str Modified string """ + new_string = string.replace(" ", "").replace("-", "_") return new_string -def find_matching_dimension(array, reference_list): - """ - Finds the dimension of a 3D array that matches the length of the reference list. +def find_matching_dimension(array:np.ndarray, reference_list:list) -> int: + """Finds the dimension of a 3D array that matches the length of the reference list. Parameters ---------- - array (np.ndarray): The 3D NumPy array to check. - reference_list (list): The list to compare the dimension lengths with. This can also be a dict. + array : np.ndarray + The 3D NumPy array to check. + reference_list : list + The list to compare the dimension lengths with. This can also be a dict. Returns ------- - int: The index of the matching dimension, or -1 if no match is found. + int + The index of the matching dimension, or -1 if no match is found. """ + list_length = len(reference_list) for dim_index, dim_size in enumerate(array.shape): if dim_size == list_length: @@ -649,6 +805,21 @@ def find_matching_dimension(array, reference_list): def channel_2_variable_mapping(data_retrievals, var, channeltags_dict): + """... + + Parameters + ---------- + data_retrievals : ... + ... + var : ... + ... + channeltags_dict : ... + ... + + Notes + ----- + - TODO: Finish docstring. + """ ## check length of channeltags_dict vs. length of data_retrievals[var].shape channel_dim = find_matching_dimension(array=data_retrievals[var], reference_list=channeltags_dict) for ch in channeltags_dict.keys(): @@ -957,12 +1128,20 @@ def smooth2a(matrix_in:np.ndarray, Nr:int, Nc:int=None) -> np.ndarray: ------- matrix_out : ndarray Smoothed version of the input matrix. + + Notes + ----- + .. TODO:: change smoothing inputs Nr and Nc to be window size insted of int(window size / 2) References ---------- - Written by Greg Reeves, March 2009, Division of Biology, Caltech. - Inspired by "smooth2" by Kelly Hilands, October 2004, Applied Research Laboratory, Penn State University. - Developed from code by Olof Liungman, 1997, Dept. of Oceanography, Earth Sciences Centre, Göteborg University. + + ** History ** + + - xxxx-xx-xx: Translated to python """ if Nc is None: @@ -1033,6 +1212,7 @@ def idx2time(cldFreeIdx:np.ndarray[int, int], nIdx:int, nHour:int) -> str: out : str Time stamps of cldFreeIdx as a string (eg. 0920_1020) """ + minPerIdx = (nHour*60)/nIdx cldFreeMin = cldFreeIdx*minPerIdx cldFreeHour = cldFreeMin//60 @@ -1048,10 +1228,8 @@ def idx2time(cldFreeIdx:np.ndarray[int, int], nIdx:int, nHour:int) -> str: return out - - -def default_to_regular(d): - """defaultdict to regular dict """ +def default_to_regular(d:defaultdict): + """defaultdict to regular dict""" if isinstance(d, defaultdict): d = {k: default_to_regular(v) for k, v in d.items()} return d \ No newline at end of file diff --git a/ppcpy/qc/qualityMask.py b/ppcpy/qc/qualityMask.py index 6123c5b..46aee88 100644 --- a/ppcpy/qc/qualityMask.py +++ b/ppcpy/qc/qualityMask.py @@ -1,30 +1,39 @@ import numpy as np +import logging def qualityMask(data_cube): - """ + """Estimate quality mask. + + Categories: + 0 : good data + 1 : low-SNR data + 2 : depolarization calibration periods + 3 : shutter on + 4 : fog + 5 : saturated (NEW) - # 0 in quality_mask means good data - # 1 in quality_mask means low-SNR data - # 2 in quality_mask means depolarization calibration periods - # 3 in quality_mask means shutter on - # 4 in quality_mask means fog - # 5 in quality_mask means saturated (NEW) + Parameters + ---------- + data_cube : object + Main picassoProc object. - The lowSNRMask is actually calculated twice, once in pollyPreprocess.m - and then again when the quality mask is evaluated in picassoProcV3.m. - Also the original processing chain has a quality_mask_vdr, which should be a composite - of cross and total, maybe this can be handled more logically here. + Notes + ----- + - The lowSNRMask is actually calculated twice, once in pollyPreprocess.m + and then again when the quality mask is evaluated in picassoProcV3.m. + Also the original processing chain has a quality_mask_vdr, which should be a composite + of cross and total, maybe this can be handled more logically here. - """ + ** History ** - print(data_cube.retrievals_highres['channel']) + - xxxx-xx-xx: First edition by ... + """ quality_mask = np.zeros_like(data_cube.retrievals_highres['sigBGCor']).astype(int) - print('shape of quality mask', quality_mask.shape) for ich, ch in enumerate(data_cube.retrievals_highres['channel']): - print(ich, ch) + logging.info(f"channel {ich}, {ch}") quality_mask[:, :, ich][data_cube.retrievals_highres['lowSNRMask'][:, :, ich]] = 1 quality_mask[data_cube.retrievals_highres['depCalMask'], :, ich] = 2 quality_mask[data_cube.retrievals_highres['shutterOnMask'], :, ich] = 3 diff --git a/ppcpy/retrievals/quasi.py b/ppcpy/retrievals/quasi.py index ffc8691..ed6909e 100644 --- a/ppcpy/retrievals/quasi.py +++ b/ppcpy/retrievals/quasi.py @@ -7,14 +7,32 @@ from scipy.interpolate import interp1d -def quasi_pdr(data_cube, wvs=[532], version='V1'): - """ +def quasi_pdr(data_cube, wvs:list=[532], version:str='V1'): + """High resolution particle and volume depolarization ratio. + + Parameters + ---------- + data_cube : object + Main PicassoProc object. + wvs : list, optional + Wavelengths to do the retrieval for. Default is [521]. + version : str, optional + Name of qusi version ('V1' or 'V2'). Default is 'V1'. + + Notes + ----- + + ** History ** + + - xxxx-xx-xx: First edition by ... + - xxxx-xx-xx: AI based translation to python + - 2026-05-27: Added flag to use only valid data """ rgs = data_cube.retrievals_highres['range'] time = data_cube.retrievals_highres['time64'] config_dict = data_cube.polly_config_dict - #hres = data_cube.rawdata_dict['measurement_height_resolution']['var_data'] + # hres = data_cube.rawdata_dict['measurement_height_resolution']['var_data'] t = 'total' tel = 'FR' @@ -22,63 +40,109 @@ def quasi_pdr(data_cube, wvs=[532], version='V1'): flagt = data_cube.gf(wv, t, tel) flagc = data_cube.gf(wv, 'cross', tel) - sigt = np.squeeze( - data_cube.retrievals_highres[f'sigBGCor'][:,:,flagt]) - sigc = np.squeeze( - data_cube.retrievals_highres[f'sigBGCor'][:,:,flagc]) + # Extract and smooth total and corss channels + sigt = np.squeeze(data_cube.retrievals_highres[f'sigBGCor'][:, :, flagt].copy()) + sigc = np.squeeze(data_cube.retrievals_highres[f'sigBGCor'][:, :, flagc].copy()) sigt[data_cube.retrievals_highres['depCalMask'], :] = np.nan sigc[data_cube.retrievals_highres['depCalMask'], :] = np.nan # TODO check if halving the window is needed smooth_t = int(np.array(config_dict['quasi_smooth_t'])[data_cube.gf(wv, t, tel)][0] / 2) smooth_h = int(np.array(config_dict['quasi_smooth_h'])[data_cube.gf(wv, t, tel)][0] / 2) - sigt = helper.smooth2a(sigt, smooth_t, smooth_h) sigc = helper.smooth2a(sigc, smooth_t, smooth_h) - f_out = interp1d(data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), - data_cube.mol_2d[f'mBsc_{wv}'].values, axis=0) + # Interpolate molecular backscatter + f_out = interp1d( + data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), + data_cube.mol_2d[f'mBsc_{wv}'].values, axis=0 + ) mBsc = f_out(time.astype('datetime64[s]').astype(int)) - + + # Retrieve volume depolarization ratio vdr, _ = depolarization.calc_profile_vdr( - sigt, sigc, config_dict['G'][flagt], config_dict['G'][flagc], - config_dict['H'][flagt], config_dict['H'][flagc], - data_cube.etaused[f'{wv}_{tel}'], config_dict[f'voldepol_error_{wv}'], - window=1) - - if f"quasiBsc{version}_{wv}_{t}_{tel}" in data_cube.retrievals_highres.keys(): - pass - else: + sigt=sigt, sigc=sigc, + Gt=config_dict['G'][flagt], Gr=config_dict['G'][flagc], + Ht=config_dict['H'][flagt], Hr=config_dict['H'][flagc], + eta=data_cube.etaused[f"{wv}_{tel}"], + voldepol_error=config_dict[f'voldepol_error_{wv}'], + window=1 + ) + + if f"quasiBsc{version}_{wv}_{t}_{tel}" not in data_cube.retrievals_highres.keys(): + logging.warning(f"No quasiBsc{version}_{wv}_{t}_{tel} found, skipping retrival for this channel.") continue - quasi_bsc = data_cube.retrievals_highres[f"quasiBsc{version}_{wv}_{t}_{tel}"] - molDepol = config_dict[f'molDepol{wv}'] - #quasi_pdr = (vdr + 1) / \jjj - # (mBsc * (molDepol - vdr)) * (quasi_bsc * (1 + molDepol) + 1) - 1 + quasi_bsc = data_cube.retrievals_highres[f"quasiBsc{version}_{wv}_{t}_{tel}"].copy() + molDepol = config_dict[f"molDepol{wv}"] + + # quasi_pdr = (vdr + 1) / (mBsc * (molDepol - vdr)) * (quasi_bsc * (1 + molDepol) + 1) - 1 quasi_pdr = (vdr + 1) / (mBsc * (molDepol - vdr) / quasi_bsc / (1 + molDepol) + 1) - 1 + # Flag unvalid data. + if config_dict['flagOnlyUseValidQuasiData']: + quality_mask = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(wv, t, tel)]) + quasi_pdr[quality_mask != 0] = np.nan + vdr[quality_mask != 0] = np.nan + # quality_mask_vdr = data_cube.retrievals_highres['quality_mask_vdr_532'] # This does not exist yet in PicassoPy + # quasi_pdr[quality_mask_vdr != 0] = np.nan + # vdr[quality_mask_pdr != 0] = np.nan + data_cube.retrievals_highres[f"quasiPdr{version}_{wv}_{t}_{tel}"] = quasi_pdr data_cube.retrievals_highres[f"quasiVdr{version}_{wv}_{t}_{tel}"] = vdr -def quasi_angstrom(data_cube, version='V1'): - """ """ +def quasi_angstrom(data_cube, version:str='V1'): + """High resolution Ångström ratios. + + Parameters + ---------- + data_cube : object + Main PicassoProc object. + version : str, optional + Name of qusi version ('V1' or 'V2'). Default is 'V1'. + + Notes + ----- + + ** History ** + + - xxxx-xx-xx: First edition by ... + - xxxx-xx-xx: AI based translation to python + - 2026-05-21: Fixed calculation + """ t = 'total' tel = 'FR' - if f'quasiBsc{version}_532_{t}_{tel}' in data_cube.retrievals_highres.keys() and f'quasiBsc{version}_1064_{t}_{tel}'in data_cube.retrievals_highres.keys(): - pass - else: - return None - ratio_par_bsc = data_cube.retrievals_highres[f'quasiBsc{version}_532_{t}_{tel}'] / \ - data_cube.retrievals_highres[f'quasiBsc{version}_1064_{t}_{tel}'] - ratio_par_bsc[ratio_par_bsc < 0] = np.nan - data_cube.retrievals_highres[f"quasiAE{version}_532_1064"] = ratio_par_bsc / np.log(532/1064) + if not {f"quasiBsc{version}_532_{t}_{tel}", f"quasiBsc{version}_1064_{t}_{tel}"}.issubset(data_cube.retrievals_highres): + logging.warning(f"Skipping quasiAE{version}_532_1064 retrieval. Missing necessery retrievals.") + return + + ratio_par_bsc = data_cube.retrievals_highres[f'quasiBsc{version}_1064_{t}_{tel}'] / \ + data_cube.retrievals_highres[f'quasiBsc{version}_532_{t}_{tel}'] + ratio_par_bsc[ratio_par_bsc <= 0] = np.nan + data_cube.retrievals_highres[f"quasiAE{version}_532_1064"] = np.log(ratio_par_bsc) / np.log(532/1064) + +def target_cat(data_cube, version:str='V1'): + """Run target categorization. + + Parameters + ---------- + data_cube : object + Main PicassoProc object. + version : str, optional + Name of qusi version ('V1' or 'V2'). Default is 'V1'. + Notes + ----- + + ** History ** -def target_cat(data_cube, version='V1'): - """ """ + - xxxx-xx-xx: First edition by ... + - xxxx-xx-xx: AI based translation to python + - 2026-05-21: Added dependency on Quality mask + """ config_dict = data_cube.polly_config_dict heightFullOverlap = np.array(config_dict['heightFullOverlap']) @@ -96,15 +160,15 @@ def target_cat(data_cube, version='V1'): logging.warning(f"Failed to produce tcMask{version}, missing necessary retrievals.") return - - tcMask = target_classify(data_cube.retrievals_highres['range'], - data_cube.retrievals_highres['attBsc_532_total_FR'], - data_cube.retrievals_highres[f'quasiBsc{version}_1064_total_FR'], - data_cube.retrievals_highres[f'quasiBsc{version}_532_total_FR'], - data_cube.retrievals_highres[f'quasiPdr{version}_532_total_FR'], - data_cube.retrievals_highres[f'quasiVdr{version}_532_total_FR'], - data_cube.retrievals_highres[f'quasiAE{version}_532_1064'], - + tcMask = target_classify( + height=data_cube.retrievals_highres['range'].copy(), # Should target_calssify use range or height? in matlab height is used. + attBeta532=data_cube.retrievals_highres['attBsc_532_total_FR'].copy(), + quasiBsc1064=data_cube.retrievals_highres[f'quasiBsc{version}_1064_total_FR'].copy(), + quasiBsc532=data_cube.retrievals_highres[f'quasiBsc{version}_532_total_FR'].copy(), + quasiPDR532=data_cube.retrievals_highres[f'quasiPdr{version}_532_total_FR'].copy(), + VDR532=data_cube.retrievals_highres[f'quasiVdr{version}_532_total_FR'].copy(), + quasiAE=data_cube.retrievals_highres[f'quasiAE{version}_532_1064'].copy(), + # Keyword Arguments: clearThresBsc1064=config_dict['clear_thres_par_beta_1064'], turbidThresBsc1064=config_dict['turbid_thres_par_beta_1064'], turbidThresBsc532=config_dict['turbid_thres_par_beta_532'], @@ -121,62 +185,98 @@ def target_cat(data_cube, version='V1'): searchCloudBelow=config_dict['search_cloud_below'], hFullOL=hFullOL ) - - tcMask[data_cube.retrievals_highres['depCalMask'], :] = 0 - # add fog mask - # add low SNR mask - # set the value during the depolarization calibration period or in fog conditions to 0 - #data.tcMaskV1(:, data.depCalMask | data.fogMask) = 0; - # set the value with low SNR to 0 - #data.tcMaskV1((data.quality_mask_532 ~= 0) | (data.quality_mask_1064 ~= 0) | (data.quality_mask_vdr_532 ~= 0)) = 0; + + tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(532, 'total', 'FR')]) != 0] = 0 + tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(1064, 'total', 'FR')]) != 0] = 0 + # tcMask[data_cube.retrievals_highres['quality_mask_vdr_532_FR'] != 0, :] = 0 data_cube.retrievals_highres[f"tcMask{version}"] = tcMask -def target_classify(height, attBeta532, quasiBsc1064, quasiBsc532, quasiPDR532, VDR532, quasiAE, **kwargs): - """aerosol/cloud target classification. +def target_classify(height:np.ndarray, attBeta532:np.ndarray, quasiBsc1064:np.ndarray, quasiBsc532:np.ndarray, + quasiPDR532:np.ndarray, VDR532:np.ndarray, quasiAE:np.ndarray, **kwargs:dict) -> np.ndarray: + """Aerosol & cloud target classification. Parameters ---------- - height (ndarray): Height array (m). - attBeta532 (ndarray): Attenuated backscatter at 532 nm. - quasiBsc1064 (ndarray): Quasi particle backscatter at 1064 nm. (m^{-1}sr^{-1}) - quasiBsc532 (ndarray): Quasi particle backscatter at 532 nm. (m^{-1}sr^{-1}) - quasiPDR532 (ndarray): Quasi particle depolarization ratio at 532 nm. - VDR532 (ndarray): Volume depolarization ratio at 532 nm. - quasiAE (ndarray): Quasi Ångström exponents. - - clearThresBsc1064 (float): Default 1e-8. - turbidThresBsc1064 (float): Default 2e-7. - turbidThresBsc532 (float): Default 2e-7. - dropletThresPDR (float): Default 0.05. - spheriodThresPDR (float): Default 0.07. - unspheroidThresPDR (float): Default 0.2. - iceThresVDR (float): Default 0.3. - iceThresPDR (float): Default 0.35. - largeThresAE (float): Default 0.75. - smallThresAE (float): Default 0.5. - cloudThresBsc1064 (float): Default 2e-5. - minAttnRatioBsc1064 (float): Default 10. - searchCloudAbove (float): Default 300. - searchCloudBelow (float): Default 100. - hFullOL (float): Default 600. + height : ndarray + Height array [m]. + attBeta532 : ndarray + Attenuated backscatter at 532 nm (time x height). + quasiBsc1064 : ndarray + Quasi particle backscatter at 1064 nm (time x height) [m^{-1}sr^{-1}]. + quasiBsc532 : ndarray + Quasi particle backscatter at 532 nm (time x height) [m^{-1}sr^{-1}]. + quasiPDR532 : ndarray + Quasi particle depolarization ratio at 532 nm (time x height). + VDR532 : ndarray + Volume depolarization ratio at 532 nm (time x height). + quasiAE : ndarray + Quasi Ångström exponents 532nm-1064nm (time x height). + kwargs : dict, optional + clearThresBsc1064 : float + Threshold for discriminating clear atmosphere based on particle backscatter at + 1064nm [m^{-1}]. Default 1e-8. + turbidThresBsc1064 : float + Threshold for discriminating turbid atmosphere based on particle backscatter at + 1064nm [m^{-1}]. Default 2e-7. + turbidThresBsc532 : float + Threshold for discriminating turbid atmosphere based on particle backscatter at + 532nm [m^{-1}]. Default 2e-7. + dropletThresPDR : float + Threshold for discriminating cloud droplets based on particle depolarization ratio + at 532nm. Default 0.05. + spheriodThresPDR : float + Threshold for discriminating spheriod paricles based on particle depolarization + ratio at 532nm. Default 0.07. + unspheroidThresPDR : float + Threshold for discriminating unspheriod paricles based on particle depolarization + ratio at 532nm. Default 0.2. + iceThresVDR : float + Threshold for discriminating ice crystals based on volume depolarization ratio + at 532nm. Default 0.3. + iceThresPDR : float + Threshold for discriminating ice crystals based on particle depolarization + ratio at 532nm. Default 0.35. + largeThresAE : float + Threshold for discriminating large particles based on angstroem exponent. + Default 0.75. + smallThresAE : float + Threshold for discriminating small particles based on angstroem exponent. + Default 0.5. + cloudThresBsc1064 : float + Threshold for discriminating cloud layers based on quasi particle backscatter + at 1064nm [m^{-1}]. Default 2e-5. + minAttnRatioBsc1064 : float + Mminimum attenuation factor which could be expected at the first 250m + penatration depth. Default 10. + searchCloudAbove : float + Parameter used in cloud top detection. The cloud top will be searched + between the first bin with quasi particle backscatter at 1064nm larger than + `cloud_thres_par_beta_1064` and + `search_height_above` [m]. Default 300. + searchCloudBelow : float + Parameter used in cloud base detection. The cloud base will be searched + between the first bin with quasi particle backscatter at 1064nm larger than + `cloud_thres_par_beta_1064` and - `search_height_below` [m]. Default 100. + hFullOL : float + Height full overlap [m] Default 600. Returns ------- - ndarray: Classification mask. - 0: No signal - 1: Clean atmosphere - 2: Non-typed particles/low conc. - 3: Aerosol: small - 4: Aerosol: large, spherical - 5: Aerosol: mixture, partly non-spherical - 6: Aerosol: large, non-spherical - 7: Cloud: non-typed - 8: Cloud: water droplets - 9: Cloud: likely water droplets - 10: Cloud: ice crystals - 11: Cloud: likely ice crystal + tc_mask : ndarray + Classification mask (time x height). + 0: No signal + 1: Clean atmosphere + 2: Non-typed particles/low conc. + 3: Aerosol: small + 4: Aerosol: large, spherical + 5: Aerosol: mixture, partly non-spherical + 6: Aerosol: large, non-spherical + 7: Cloud: non-typed + 8: Cloud: water droplets + 9: Cloud: likely water droplets + 10: Cloud: ice crystals + 11: Cloud: likely ice crystal References ---------- @@ -189,6 +289,7 @@ def target_classify(height, attBeta532, quasiBsc1064, quasiBsc532, quasiPDR532, - 2021-06-05: First edition by Zhenping - 2025-03-25: AI based translation to python + - 2026-05-21: Fixed dimension issue """ # Default parameter values @@ -210,7 +311,7 @@ def target_classify(height, attBeta532, quasiBsc1064, quasiBsc532, quasiPDR532, "hFullOL": 600, } - # Override defaults with user-provided values + # Overwrite defaults with user-provided values params.update(kwargs) # Initialize classification mask @@ -240,7 +341,13 @@ def target_classify(height, attBeta532, quasiBsc1064, quasiBsc532, quasiPDR532, tc_mask[flag_large_par_beta_1064 & ~flag_large_ang & flag_small_par_depol] = 4 # Cloud mask - flag_cloud = detect_liquid_bits(height, quasiBsc1064, **kwargs) + flag_cloud = detect_liquid_bits( + height, quasiBsc1064.copy(), + cloudThresBsc1064=params['cloudThresBsc1064'], + minAttnRatioBsc1064=params['minAttnRatioBsc1064'], + searchCloudAbove=params['searchCloudAbove'], + searchCloudBelow=params['searchCloudBelow'] + ) tc_mask[flag_cloud] = 7 tc_mask[flag_cloud & flag_water_par_depol] = 9 tc_mask[flag_cloud & flag_water_par_depol & flag_small_ang] = 8 @@ -250,49 +357,62 @@ def target_classify(height, attBeta532, quasiBsc1064, quasiBsc532, quasiPDR532, tc_mask[flag_large_par_beta_1064 & flag_large_par_beta_532 & flag_ice_par_depol] = 10 # Post-processing - for iPrf in range(attBeta532.shape[1]): - cloud_index = np.where((tc_mask[:, iPrf] > 6) & (tc_mask[:, iPrf] < 10))[0] + for iPrf in range(attBeta532.shape[0]): + cloud_index = np.where((tc_mask[iPrf, :] > 6) & (tc_mask[iPrf, :] < 10))[0] if cloud_index.size > 0: cloudIndx = cloud_index[0] - non_cloud_above = np.where((tc_mask[cloudIndx:, iPrf] < 7) | (tc_mask[cloudIndx:, iPrf] > 9))[0] + non_cloud_above = np.where((tc_mask[iPrf, cloudIndx:] < 7) | (tc_mask[iPrf, cloudIndx:] > 9))[0] if non_cloud_above.size > 0: - tc_mask[non_cloud_above + cloudIndx, iPrf] = 0 + tc_mask[iPrf, non_cloud_above + cloudIndx] = 0 # Set mask to 0 below full overlap height hIndxFullOverlap = np.searchsorted(height, params["hFullOL"]) if hIndxFullOverlap == len(height): hIndxFullOverlap = 70 - tc_mask[:hIndxFullOverlap, :] = 0 + tc_mask[:, :hIndxFullOverlap+1] = 0 return tc_mask -def detect_liquid_bits(height, bsc1064, cloudThresBsc1064=2e-5, minAttnRatioBsc1064=10, - searchCloudAbove=300, searchCloudBelow=100, **kwargs): - """ detect liquid cloud bits. +def detect_liquid_bits(height:np.ndarray, bsc1064:np.ndarray, cloudThresBsc1064:float=2e-5, minAttnRatioBsc1064:float=10, + searchCloudAbove:float=300, searchCloudBelow:float=100) -> np.ndarray: + """Detect liquid cloud bits. Parameters ---------- - height (ndarray): Height array (m). - bsc1064 (ndarray): Particle backscatter at 1064 nm (height x time). - cloudThresBsc1064 (float, optional): Threshold of cloud backscatter at 1064 nm. Default is 2e-5. - minAttnRatioBsc1064 (float, optional): Minimum attenuation required to detect liquid cloud. Default is 10. - searchCloudAbove (float, optional): Cloud search window above current bit (m). Default is 300. - searchCloudBelow (float, optional): Cloud search window below current bit (m). Default is 100. + height : ndarray + Height array [m]. + bsc1064 : ndarray + Particle backscatter at 1064 nm (time x height). + cloudThresBsc1064 : float, optional + Threshold of cloud backscatter at 1064 nm. Default is 2e-5. + minAttnRatioBsc1064 : float, optional + Minimum attenuation required to detect liquid cloud. Default is 10. + searchCloudAbove : float, optional + Cloud search window above current bit [m]. Default is 300. + searchCloudBelow : float, optional + Cloud search window below current bit [m]. Default is 100. Returns ------- - ndarray: Logical mask (height x time) for detected liquid cloud regions. + falgLiquid : ndarray + Boolean mask (time x height) for detected liquid cloud regions. Notes ----- + - Warning: Still under testing! + .. TODO:: Are the indices correctlly translated from matlab? **History** - 2021-06-05: First edition by Zhenping - 2025-03-25: AI based translation to python + - 2026-05-21: Fixed dimension issue """ - bsc1064 = np.nan_to_num(bsc1064) # Replace NaN and Inf with 0 + + logging.warning("Still in testing phase, may show strange classifications.") + # bsc1064 = np.nan_to_num(bsc1064) # Replace NaN 0 and inf with large positive or negative numbers + bsc1064[~np.isfinite(bsc1064)] = 0 # Replace NaN and inf with 0 flagLiquid = np.zeros_like(bsc1064, dtype=bool) hRes = height[1] - height[0] @@ -301,47 +421,53 @@ def detect_liquid_bits(height, bsc1064, cloudThresBsc1064=2e-5, minAttnRatioBsc1 if searchCloudAbove < jump_distance: raise ValueError(f'searchCloudAbove should be larger than jump_distance ({jump_distance}).') - - search_bins_above = int(np.ceil(searchCloudAbove / hRes)) - search_bins_below = int(np.ceil(searchCloudBelow / hRes)) + + # search_bins_above = int(np.ceil(searchCloudAbove / hRes)) # old + # search_bins_below = int(np.ceil(searchCloudBelow / hRes)) # old + search_bins_above = np.searchsorted(height, searchCloudAbove) + search_bins_below = np.searchsorted(height, searchCloudBelow) diff_factor = 0.25 - - for iTime in range(bsc1064.shape[1]): + for iTime in range(bsc1064.shape[0]): start_bin = 1 - while start_bin <= (bsc1064.shape[0] - jump_hBins): - hIndLargeBsc_candidates = np.where(bsc1064[start_bin:(bsc1064.shape[0] - search_bins_above), iTime] > cloudThresBsc1064)[0] + while start_bin <= (bsc1064.shape[1] - jump_hBins): + # hIndLargeBsc_candidates = np.where(bsc1064[iTime, start_bin:(bsc1064.shape[1]-search_bins_above)] > cloudThresBsc1064)[0] # old + hIndLargeBsc_candidates = np.where(bsc1064[iTime, start_bin:(bsc1064.shape[1]-search_bins_above)+1] > cloudThresBsc1064)[0] if hIndLargeBsc_candidates.size == 0: break - + hIndLargeBsc = hIndLargeBsc_candidates[0] + start_bin - if np.min(bsc1064[hIndLargeBsc:(hIndLargeBsc + jump_hBins), iTime] / bsc1064[hIndLargeBsc, iTime]) < (1 / minAttnRatioBsc1064): + # if np.min(bsc1064[iTime, hIndLargeBsc:(hIndLargeBsc+jump_hBins)] / bsc1064[iTime, hIndLargeBsc]) < (1 / minAttnRatioBsc1064): # old + if np.min(bsc1064[iTime, hIndLargeBsc:(hIndLargeBsc+jump_hBins)+1] / bsc1064[iTime, hIndLargeBsc]) < (1 / minAttnRatioBsc1064): + search_start = max(0, hIndLargeBsc - search_bins_below) - diff_bsc1064 = np.diff(bsc1064[search_start:hIndLargeBsc + 1, iTime]) + diff_bsc1064 = np.diff(bsc1064[iTime, search_start:hIndLargeBsc+1]) if diff_bsc1064.size == 0: start_bin = hIndLargeBsc + 1 continue - + max_diff = np.max(diff_bsc1064) - base_cloud_candidates = np.where(diff_bsc1064 > max_diff * diff_factor)[0] + base_cloud_candidates = np.where(diff_bsc1064 > max_diff*diff_factor)[0] base_cloud = (base_cloud_candidates[0] + search_start) if base_cloud_candidates.size > 0 else hIndLargeBsc - top_cloud_candidates = np.where(bsc1064[(hIndLargeBsc + 1):(hIndLargeBsc + search_bins_above), iTime] != 0)[0] + # top_cloud_candidates = np.where(bsc1064[iTime, (hIndLargeBsc+1):(hIndLargeBsc+search_bins_above)] != 0)[0] # old + top_cloud_candidates = np.where(bsc1064[iTime, (hIndLargeBsc+1):(hIndLargeBsc+search_bins_above)+1] != 0)[0] top_cloud = (top_cloud_candidates[-1] + hIndLargeBsc) if top_cloud_candidates.size > 0 else None if top_cloud is None: - diff_bsc1064_top = np.diff(bsc1064[hIndLargeBsc:(hIndLargeBsc + search_bins_above), iTime]) + # diff_bsc1064_top = np.diff(bsc1064[iTime, hIndLargeBsc:(hIndLargeBsc+search_bins_above)]) # old + diff_bsc1064_top = np.diff(bsc1064[iTime, hIndLargeBsc:(hIndLargeBsc+search_bins_above)+1]) if diff_bsc1064_top.size > 0: max_diff_top = np.max(-diff_bsc1064_top) - top_cloud_candidates = np.where(-diff_bsc1064_top > max_diff_top * diff_factor)[0] + top_cloud_candidates = np.where(-diff_bsc1064_top > max_diff_top*diff_factor)[0] top_cloud = (top_cloud_candidates[-1] + hIndLargeBsc) if top_cloud_candidates.size > 0 else hIndLargeBsc else: top_cloud = hIndLargeBsc - flagLiquid[base_cloud:top_cloud + 1, iTime] = True + flagLiquid[iTime, base_cloud:top_cloud+1] = True start_bin = top_cloud + 1 else: start_bin = hIndLargeBsc + 1 diff --git a/ppcpy/retrievals/quasiV1.py b/ppcpy/retrievals/quasiV1.py index 113b7cc..06aaab4 100644 --- a/ppcpy/retrievals/quasiV1.py +++ b/ppcpy/retrievals/quasiV1.py @@ -6,81 +6,101 @@ from scipy.interpolate import interp1d def quasi_bsc(data_cube): - """calculate the quasi backscatter for a datacube + """Run Quasi Backscatter retrieval Version 1. + Parameters + ---------- + data_cube : object + Main PicassoProc object. + + Notes + ----- + + ** History ** + + - xxxx-xx-xx: First edition by ... + - xxxx-xx-xx: AI based translation to python """ rgs = data_cube.retrievals_highres['range'] time = data_cube.retrievals_highres['time64'] config_dict = data_cube.polly_config_dict - hres = data_cube.rawdata_dict['measurement_height_resolution']['var_data'] + # hres = data_cube.rawdata_dict['measurement_height_resolution']['var_data'] heightFullOverlap = np.array(config_dict['heightFullOverlap']) channels = [(355, 'total', 'FR'), (532, 'total', 'FR'), (1064, 'total', 'FR')] #channels = [(532, 'total', 'FR')] for wv, t, tel in channels: - if f'attBsc_{wv}_{t}_{tel}' in data_cube.retrievals_highres.keys(): - pass - else: - logging.info(f'{wv} {t} {tel} skipped at quasi bsc') + if f'attBsc_{wv}_{t}_{tel}' not in data_cube.retrievals_highres.keys(): + logging.warning(f'{wv} {t} {tel} skipped at quasi bsc') continue - att_beta_qsi = data_cube.retrievals_highres[f'attBsc_{wv}_{t}_{tel}'].copy() - # TODO check if halving the window is needed + # Extract and smooth elastic attenuated backscatter + att_beta_qsi = data_cube.retrievals_highres[f"attBsc_{wv}_{t}_{tel}"].copy() + if config_dict['flagOnlyUseValidQuasiData']: + quality_mask = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(wv, t, tel)]) + att_beta_qsi[quality_mask != 0] = np.nan + # TODO check if halving the window is needed: Yes at the moment they need to be halved. smooth_t = int(np.array(config_dict['quasi_smooth_t'])[data_cube.gf(wv, t, tel)][0] / 2) smooth_h = int(np.array(config_dict['quasi_smooth_h'])[data_cube.gf(wv, t, tel)][0] / 2) - - print(att_beta_qsi.shape, smooth_t, smooth_h) att_beta_qsi = helper.smooth2a(att_beta_qsi, smooth_t, smooth_h) - f_out = interp1d(data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), - data_cube.mol_2d[f'mBsc_{wv}'].values, axis=0) + # Interpolat molecular profiles + f_out = interp1d( + data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), + data_cube.mol_2d[f'mBsc_{wv}'].values, axis=0 + ) mBsc = f_out(time.astype('datetime64[s]').astype(int)) - f_out = interp1d(data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), - data_cube.mol_2d[f'mExt_{wv}'].values, axis=0) + f_out = interp1d( + data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), + data_cube.mol_2d[f'mExt_{wv}'].values, axis=0 + ) mExt = f_out(time.astype('datetime64[s]').astype(int)) - print(mBsc.shape, mExt.shape) + # Interpolate attenuated backscatter under height full overlap hFullOverlap = heightFullOverlap[data_cube.gf(wv, t, tel)][0] hBaseInd = np.argmax(rgs >= hFullOverlap) - print('hFullOverlap', hFullOverlap, hBaseInd) + att_beta_qsi[:, :hBaseInd] = np.repeat(att_beta_qsi[:, hBaseInd][:, np.newaxis], hBaseInd, axis=1) - att_beta_qsi[:, :hBaseInd] = np.repeat(att_beta_qsi[:,hBaseInd][:,np.newaxis], hBaseInd, axis=1) + # Retrieve Backscatter and Extinction quasi_par_bsc, quasi_par_ext = quasi_retrieval( - rgs, att_beta_qsi, mExt, mBsc, config_dict[f'LR{wv}'], nIters=6 + height=rgs, + att_beta=att_beta_qsi, + molExt=mExt, + molBsc=mBsc, + LRaer=config_dict[f'LR{wv}'], + nIters=6 ) data_cube.retrievals_highres[f"quasiBscV1_{wv}_{t}_{tel}"] = quasi_par_bsc data_cube.retrievals_highres[f"quasiExtV1_{wv}_{t}_{tel}"] = quasi_par_ext - - - -def quasi_retrieval(height, att_beta, molExt, molBsc, LRaer, nIters=2): +def quasi_retrieval(height:np.ndarray, att_beta:np.ndarray, molExt:np.ndarray, + molBsc:np.ndarray, LRaer:float, nIters:int=2) -> tuple: """Retrieve aerosol optical properties using the quasi-retrieving method. Parameters ---------- - height (array): - Height in meters [m]. - att_beta (ndarray): + height : ndarray + Height [m]. + att_beta : ndarray Attenuated backscatter [m^{-1}Sr^{-1}]. - molExt (ndarray): + molExt : ndarray Molecular extinction coefficient [m^{-1}]. - molBsc (ndarray): + molBsc : ndarray Molecular backscatter coefficient [m^{-1}Sr^{-1}]. - LRaer (float): + LRaer : float Aerosol lidar ratio [Sr]. - nIters (int, optional): - Number of iterations (default is 2). + nIters : int, optional + Number of iterations. Default is 2. Returns ------- - quasi_par_bsc (ndarray): + quasi_par_bsc : ndarray Quasi particle backscatter coefficient [m^{-1}Sr^{-1}]. - quasi_par_ext (ndarray): + quasi_par_ext : ndarray Quasi particle extinction coefficient [m^{-1}]. References @@ -98,18 +118,18 @@ def quasi_retrieval(height, att_beta, molExt, molBsc, LRaer, nIters=2): """ # Compute differential heights - diff_height = np.repeat(np.hstack(([height[0]], np.diff(height)))[np.newaxis,:], att_beta.shape[0], axis=0) - #print('diff_height', diff_height.shape) + diff_height = np.repeat(np.hstack(([height[0]], np.diff(height)))[np.newaxis, :], att_beta.shape[0], axis=0) # Compute molecular attenuation mol_att = np.exp(-np.cumsum(molExt * diff_height, axis=1)) + # Initialize quasi particle extinction coefficient quasi_par_ext = np.zeros_like(molBsc) # Iterative retrieval process for _ in range(nIters): quasi_par_att = np.exp(-np.nancumsum(quasi_par_ext * diff_height, axis=1)) - quasi_par_bsc = att_beta / (mol_att * quasi_par_att) ** 2 - molBsc + quasi_par_bsc = att_beta / (mol_att * quasi_par_att)**2 - molBsc quasi_par_bsc[quasi_par_bsc < 0] = 0 # Ensure no negative values quasi_par_ext = quasi_par_bsc * LRaer diff --git a/ppcpy/retrievals/quasiV2.py b/ppcpy/retrievals/quasiV2.py index 4a3d222..53f0f51 100644 --- a/ppcpy/retrievals/quasiV2.py +++ b/ppcpy/retrievals/quasiV2.py @@ -10,7 +10,20 @@ def quasi_bsc(data_cube): - """ + """Run Quasi Backscatter retrieval Version 2. + + Parameters + ---------- + data_cube : object + Main PicassoProc object. + + Notes + ----- + + ** History ** + + - xxxx-xx-xx: First edition by ... + - xxxx-xx-xx: AI based translation to python """ rgs = data_cube.retrievals_highres['range'] @@ -23,65 +36,105 @@ def quasi_bsc(data_cube): for (wv, t, tel), (wv_r, t_r, tel_r) in channels: if not {f'attBsc_{wv}_{t}_{tel}', f'attBsc_{wv_r}_{t}_{tel}'}.issubset(data_cube.retrievals_highres): - logging.info(f"{wv}_{t}_{tel} skipped at quasi bsc") + logging.warning(f"{wv}_{t}_{tel} skipped at quasi bsc") continue + # Extract and smooth elastic attenuated backscatter att_beta_qsi = data_cube.retrievals_highres[f'attBsc_{wv}_{t}_{tel}'].copy() - # TODO check if halving the window is needed + if config_dict['flagOnlyUseValidQuasiData']: + quality_mask = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(wv, t, tel)]) + att_beta_qsi[quality_mask != 0] = np.nan + # TODO check if halving the window is needed: Yes at the moment they need to be halved. smooth_t = int(np.array(config_dict['quasi_smooth_t'])[data_cube.gf(wv, t, tel)][0] / 2) smooth_h = int(np.array(config_dict['quasi_smooth_h'])[data_cube.gf(wv, t, tel)][0] / 2) att_beta_qsi = helper.smooth2a(att_beta_qsi, smooth_t, smooth_h) + # Extract and smooth raman attenuated backscatter att_beta_r_qsi = data_cube.retrievals_highres[f'attBsc_{wv_r}_{t}_{tel}'].copy() - # TODO check if halving the window is needed - smooth_t = int(np.array(config_dict['quasi_smooth_t'])[data_cube.gf(wv_r, t, tel)][0] / 2) - smooth_h = int(np.array(config_dict['quasi_smooth_h'])[data_cube.gf(wv_r, t, tel)][0] / 2) - att_beta_r_qsi = helper.smooth2a(att_beta_r_qsi, smooth_t, smooth_h) - - - f_out = interp1d(data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), - data_cube.mol_2d[f'mBsc_{wv}'].values, axis=0) + if config_dict['flagOnlyUseValidQuasiData']: + quality_mask_r = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(wv_r, t_r, tel_r)]) + att_beta_r_qsi[quality_mask_r != 0] = np.nan + # TODO check if halving the window is needed: Yes at the moment they need to be halved. + smooth_t_r = int(np.array(config_dict['quasi_smooth_t'])[data_cube.gf(wv_r, t_r, tel_r)][0] / 2) + smooth_h_r = int(np.array(config_dict['quasi_smooth_h'])[data_cube.gf(wv_r, t_r, tel_r)][0] / 2) + att_beta_r_qsi = helper.smooth2a(att_beta_r_qsi, smooth_t_r, smooth_h_r) + + # Interpolate the molecular profiles + f_out = interp1d( + data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), + data_cube.mol_2d[f'mBsc_{wv}'].values, axis=0 + ) mBsc = f_out(time.astype('datetime64[s]').astype(int)) - f_out = interp1d(data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), - data_cube.mol_2d[f'mExt_{wv}'].values, axis=0) + f_out = interp1d( + data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), + data_cube.mol_2d[f'mExt_{wv}'].values, axis=0 + ) mExt = f_out(time.astype('datetime64[s]').astype(int)) - - f_out = interp1d(data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), - data_cube.mol_2d[f'mExt_{wv_r}'].values, axis=0) + f_out = interp1d( + data_cube.mol_2d['time'].values.astype('datetime64[s]').astype(int), + data_cube.mol_2d[f'mExt_{wv_r}'].values, axis=0 + ) mExt_r = f_out(time.astype('datetime64[s]').astype(int)) - + # Retrieve Backscatter and Extinction quasi_par_bsc, quasi_par_ext = quasi_retrieval2( - rgs, att_beta_qsi, att_beta_r_qsi, float(wv), float(wv_r), - mExt, mBsc, mExt_r, 0.5, config_dict[f'LR{wv}'], nIters=3 + height=rgs, + att_beta_el=att_beta_qsi, + att_beta_ra=att_beta_r_qsi, + wv=float(wv), + wv_r=float(wv_r), + molExtEl=mExt, + molBscEl=mBsc, + molExtRa=mExt_r, + AE=0.5, + LR=config_dict[f'LR{wv}'], + nIters=3 ) + # .. TODO:: In matlab the output ´quasi_par_bsc´ is smoothed again. + # For some reason this causes only two categorize to be classified in the target cat. V2 for PicassoPy (class 0 and 1). + # quasi_par_bsc = helper.smooth2a(quasi_par_bsc, smooth_t, smooth_h) + # quasi_par_bsc = helper.smooth2a(quasi_par_ext, smooth_t, smooth_h) + data_cube.retrievals_highres[f"quasiBscV2_{wv}_{t}_{tel}"] = quasi_par_bsc data_cube.retrievals_highres[f"quasiExtV2_{wv}_{t}_{tel}"] = quasi_par_ext - - -def quasi_retrieval2(height, att_beta_el, att_beta_ra, wv, wv_r, molExtEl, molBscEl, molExtRa, AE, LR, nIters=1): +def quasi_retrieval2(height:np.ndarray, att_beta_el:np.ndarray, att_beta_ra:np.ndarray, wv:float, wv_r:float, + molExtEl:np.ndarray, molBscEl:np.ndarray, molExtRa:np.ndarray, AE:float, LR:float, nIters:int=1) -> tuple: """Retrieve aerosol optical properties using quasi retrieval method (V2), improved by utilizing Raman signals. Parameters ---------- - height (ndarray): Height array [m]. - att_beta_el (ndarray): Attenuated backscatter at elastic wavelength [m^{-1}sr^{-1}]. - att_beta_ra (ndarray): Attenuated backscatter at Raman wavelength [m^{-1}sr^{-1}]. - wavelength (int): Elastic backscatter wavelength [nm]. - molExtEl (ndarray): Molecular extinction coefficient at elastic wavelength [m^{-1}]. - molBscEl (ndarray): Molecular backscatter coefficient at elastic wavelength [m^{-1}sr^{-1}]. - molExtRa (ndarray): Molecular extinction coefficient at Raman wavelength [m^{-1}]. - AE (float): Extinction-related Ångström exponent. - LR (float): Aerosol lidar ratio [sr]. - nIters (int, optional): Number of iterations. Default is 1. + height : ndarray + Height array [m]. + att_beta_el : ndarray + Attenuated backscatter at elastic wavelength [m^{-1}sr^{-1}]. + att_beta_ra : ndarray + Attenuated backscatter at Raman wavelength [m^{-1}sr^{-1}]. + wv : float + Elastic backscatter wavelength [nm]. + wv_r : float + Raman backscatter wavelength [nm]. + molExtEl : ndarray + Molecular extinction coefficient at elastic wavelength [m^{-1}]. + molBscEl : ndarray + Molecular backscatter coefficient at elastic wavelength [m^{-1}sr^{-1}]. + molExtRa : ndarray + Molecular extinction coefficient at Raman wavelength [m^{-1}]. + AE : float + Extinction-related Ångström exponent. + LR : float + Aerosol lidar ratio [sr]. + nIters : int, optional + Number of iterations. Default is 1. Returns ------- - quasi_par_bsc (ndarray): Quasi particle backscatter coefficient [m^{-1}sr^{-1}]. - quasi_par_ext (ndarray): Quasi particle extinction coefficient [m^{-1}]. + quasi_par_bsc : ndarray + Quasi particle backscatter coefficient [m^{-1}sr^{-1}]. + quasi_par_ext : ndarray + Quasi particle extinction coefficient [m^{-1}]. References ---------- @@ -95,23 +148,25 @@ def quasi_retrieval2(height, att_beta_el, att_beta_ra, wv, wv_r, molExtEl, molBs - 2021-06-07: first edition by Zhenping - 2025-03-30: AI translation to python """ - diff_height = np.vstack((np.diff(height, prepend=height[0]))).T + + # diff_height = np.vstack((np.diff(height, prepend=height[0]))).T # old + diff_height = np.repeat(np.hstack(([height[0]], np.diff(height)))[np.newaxis, :], att_beta_el.shape[0], axis=0) quasi_par_ext = np.zeros_like(molBscEl) OD_mol = np.nancumsum(molExtEl * diff_height, axis=1) OD_mol_r = np.nancumsum(molExtRa * diff_height, axis=1) if wv == 1064 and wv_r == 607: - molBsc532 = molBscEl * (1064 / 532) ** 4 - OD_mol_532 = np.nancumsum(molExtRa * (607 / 532) ** 4 * diff_height, axis=1) + molBsc532 = molBscEl * (1064 / 532)**4 + OD_mol_532 = np.nancumsum(molExtRa * (607 / 532)**4 * diff_height, axis=1) for _ in range(nIters): OD_par = np.nancumsum(quasi_par_ext * diff_height, axis=1) if wv == 1064 and wv_r == 607: - quasi_par_att = np.exp((2 - (1064 / 607) ** AE - (1064 / 532) ** AE) * OD_par + (2 * OD_mol - OD_mol_532 - OD_mol)) * molBsc532 + quasi_par_att = np.exp((2 - (1064 / 607)**AE - (1064 / 532) ** AE) * OD_par + (2 * OD_mol - OD_mol_532 - OD_mol)) * molBsc532 else: - quasi_par_att = np.exp((1 - (wv / wv_r) ** AE) * OD_par + (OD_mol - OD_mol_r)) * molBscEl + quasi_par_att = np.exp((1 - (wv / wv_r)**AE) * OD_par + (OD_mol - OD_mol_r)) * molBscEl quasi_par_bsc = (att_beta_el / att_beta_ra) * quasi_par_att - molBscEl quasi_par_ext = quasi_par_bsc * LR From 79041a4ea9ea00e66a0b33b65e5f548ada35168f Mon Sep 17 00:00:00 2001 From: HavardStridBuholdt Date: Thu, 28 May 2026 17:06:09 +0200 Subject: [PATCH 2/2] Added missing flagging based on quality masks in Quasi and Target Cat. retrieval. - Matlabs quality_mask_vdr is now substituted by combining the total and cross masks when applied. - Target Cat. V2 now depends on quality_mask_607. --- ppcpy/retrievals/quasi.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/ppcpy/retrievals/quasi.py b/ppcpy/retrievals/quasi.py index ed6909e..0c0de04 100644 --- a/ppcpy/retrievals/quasi.py +++ b/ppcpy/retrievals/quasi.py @@ -81,12 +81,10 @@ def quasi_pdr(data_cube, wvs:list=[532], version:str='V1'): # Flag unvalid data. if config_dict['flagOnlyUseValidQuasiData']: - quality_mask = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(wv, t, tel)]) - quasi_pdr[quality_mask != 0] = np.nan - vdr[quality_mask != 0] = np.nan - # quality_mask_vdr = data_cube.retrievals_highres['quality_mask_vdr_532'] # This does not exist yet in PicassoPy - # quasi_pdr[quality_mask_vdr != 0] = np.nan - # vdr[quality_mask_pdr != 0] = np.nan + quality_mask_t = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, flagt]) + quality_mask_c = np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, flagc]) + quasi_pdr[(quality_mask_t != 0) | (quality_mask_c != 0)] = np.nan + vdr[(quality_mask_t != 0) | (quality_mask_c != 0)] = np.nan data_cube.retrievals_highres[f"quasiPdr{version}_{wv}_{t}_{tel}"] = quasi_pdr data_cube.retrievals_highres[f"quasiVdr{version}_{wv}_{t}_{tel}"] = vdr @@ -147,6 +145,7 @@ def target_cat(data_cube, version:str='V1'): config_dict = data_cube.polly_config_dict heightFullOverlap = np.array(config_dict['heightFullOverlap']) + # Version spesific configurations if version == 'V1': hFullOL = np.max([ heightFullOverlap[data_cube.gf(532, 'total', 'FR')][0], @@ -154,12 +153,14 @@ def target_cat(data_cube, version:str='V1'): else: hFullOL = 0 + # Check for necessary inputs if not {'attBsc_532_total_FR', f'quasiBsc{version}_1064_total_FR', f'quasiBsc{version}_532_total_FR', f'quasiPdr{version}_532_total_FR', f'quasiVdr{version}_532_total_FR', f'quasiAE{version}_532_1064' }.issubset(data_cube.retrievals_highres): logging.warning(f"Failed to produce tcMask{version}, missing necessary retrievals.") return + # Retrieve target categories tcMask = target_classify( height=data_cube.retrievals_highres['range'].copy(), # Should target_calssify use range or height? in matlab height is used. attBeta532=data_cube.retrievals_highres['attBsc_532_total_FR'].copy(), @@ -186,9 +187,13 @@ def target_cat(data_cube, version:str='V1'): hFullOL=hFullOL ) - tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(532, 'total', 'FR')]) != 0] = 0 - tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(1064, 'total', 'FR')]) != 0] = 0 - # tcMask[data_cube.retrievals_highres['quality_mask_vdr_532_FR'] != 0, :] = 0 + # Mask invalid data + if 'quality_mask' in data_cube.retrievals_highres: + tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(532, 'total', 'FR')]) != 0] = 0 + tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(532, 'cross', 'FR')]) != 0] = 0 + tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(1064, 'total', 'FR')]) != 0] = 0 + if version == 'V2': + tcMask[np.squeeze(data_cube.retrievals_highres['quality_mask'][:, :, data_cube.gf(607, 'total', 'FR')]) != 0] = 0 data_cube.retrievals_highres[f"tcMask{version}"] = tcMask