HyperInSPACE/run_sample.py at master · juanchossn/HyperInSPACE · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import multiprocessing
import os
import glob
import subprocess
import sys

from Main import Command, cmd

'''
This script is an example of how to run HyperCP in batch mode without using the GUI.
NB: You should launch the GUI at least a first time to get the necessary ancillary data to perform the Zhang surface correction (see README.md)
If you are just beginning to use HyperCP, please do not modify anything outside the CUSTOM SET UP section.
'''
################################################### CUSTOM SET UP ###################################################

clobber = False # if True overwrites existing files
PATH_HCP = '/tcenas/s3optcal/workdata/ocean_colour/HyperCP/HyperCP'   # Adjust with /full/path/to/HyperCP directory on local computer
INST_TYPE = 'SEABIRD' #SEABIRD or TRIOS

# In case you wish to test the sample data provided within HyperCP
PLATFORM_TYPE = 'NOTRACKER' #pySAS, SOLARTRACKER, or NOTRACKER. Adjust to desired acquisition platform type
PATH_DATA = os.path.join('Data','Sample_Data')   # For use with provided samples
PATH_WK = os.path.join(PATH_HCP,PATH_DATA)  # For use with provided samples
PATH_ANC = os.path.join(PATH_WK,f'SAMPLE_{INST_TYPE}_{PLATFORM_TYPE}_Ancillary.sb') # For use with provided samples

# For batching collections, adjust to your local settings:
# PATH_DATA = '/Users/daurin/Projects/HyperPACE/field_data/HyperSAS/EXPORTSNP'   # Adjust with full path on local computer
# PATH_WK = os.path.join(PATH_DATA)  # Adjust with full path on local computer
# PATH_ANC = os.path.join(PATH_DATA,f'*_Ancillary.sb') # Adjust with full path on local computer

################################################# END CUSTOM SET UP #################################################

## Setup Globals ##
PATH_CFG = os.path.join(PATH_HCP, 'Config', f'sample_{INST_TYPE}_{PLATFORM_TYPE}.cfg')
TO_LEVELS = ['L1A', 'L1AQC', 'L1B', 'L1BQC', 'L2']
FROM_LEVELS = ['RAW', 'L1A', 'L1AQC', 'L1B', 'L1BQC']
if INST_TYPE == 'SEABIRD':
    FILE_EXT = ['.raw', '_L1A.hdf', '_L1AQC.hdf', '_L1B.hdf', '_L1BQC.hdf']
else:
    FILE_EXT = ['.mlb', '_L1A.hdf', '_L1AQC.hdf', '_L1B.hdf', '_L1BQC.hdf']

os.environ['HYPERINSPACE_CMD'] = 'TRUE'


def process_raw_to_l2(filename):
    ''' Run either directly or using multiprocessor pool below. '''
    if INST_TYPE == 'SEABIRD':
        # Path to raw files:
        rawFPs = os.path.splitext(os.path.basename(filename))[0]
    elif INST_TYPE == 'TRIOS':
        rawFPs = filename # os.path.splitext(os.path.basename(filename))[0]

    # This will skip the file if either 1) the result exists and no clobber, or 2) the Level failed and produced a report.
    # Override with clobber, above.
    to_skip = {level: [os.path.basename(f).split('_' + level)[0]
                       for f in glob.glob(os.path.join(PATH_WK, level, '*'))] +
                      [os.path.basename(f).split('_' + level)[0]
                       for f in glob.glob(os.path.join(PATH_WK, 'Reports', f'*_{level}_fail.pdf'))]
               for level in TO_LEVELS}
    # failed = {}
    for from_level, to_level, ext in zip(FROM_LEVELS, TO_LEVELS, FILE_EXT):
        '''Single level CLI deprecated. Multi-level used. Raw-L2 only'''
        if to_level != 'L1A':
            continue
        if INST_TYPE == 'SEABIRD':
            # One file at a time
            l1aFileBase = os.path.splitext(os.path.basename(filename))[0]
            f = os.path.join(PATH_WK, from_level, rawFPs + ext)
            test  = os.path.exists(f)
        elif INST_TYPE == 'TRIOS':
            # All L0 files
            l0FileBase = os.path.splitext(os.path.basename(filename[0]))[0]
            l1aFileBase = l0FileBase.split('SPECTRUM_')[1]
            if to_level =='L1A':
                f = filename # a list
                test = [os.path.exists(f[i]) for i, x in enumerate(f) if os.path.exists(x)]
            else:
                '''deprecated'''
                f = os.path.join(PATH_WK, from_level, l1aFileBase + ext) # a file
                test = os.path.exists(f)


        # if not os.path.exists(f):
        if not test:
            print('***********************************')
            print(f'*** [{rawFPs}] STOPPED PROCESSING ***')
            print('***********************************')
            break
        # if rawFPs in to_skip[to_level] and not clobber:
        if l1aFileBase in to_skip[to_level] and not clobber:
            print('************************************************')
            # print(f'*** [{rawFPs}] ALREADY PROCESSED TO {to_level} ***')
            print(f'*** [{l1aFileBase}] ALREADY PROCESSED TO {to_level} ***')
            print('************************************************')
            continue
        print('************************************************')
        print(f'*** [{rawFPs}] PROCESSING TO {to_level} ***')
        print('************************************************')
        if INST_TYPE == 'SEABIRD':
            # Command(PATH_CFG, os.path.join(PATH_WK, from_level, rawFPs + ext), PATH_WK, to_level, None)
            # One file
            Command(PATH_CFG, from_level, os.path.join(PATH_WK, from_level, rawFPs + ext), PATH_WK, to_level, PATH_ANC)
        elif INST_TYPE == 'TRIOS':
            # rawFPs: list to L0 .mlbs
            Command(PATH_CFG, from_level, rawFPs, PATH_WK, to_level, PATH_ANC)


# %% One thread
# raw_filenames = sorted(glob.glob(os.path.join(PATH_WK, 'L0B', 'EXPORTS-EXPORTSNA-JC214-Process-*.raw')))
# for raw in raw_filenames[:3]:
#     process_raw_to_l2(raw)

# %% Multithread
#   Can't pickle HyperInSPACE, so start sub-processed instead
if len(sys.argv) > 1:
    # Code executed in subprocesses only
    process_raw_to_l2(sys.argv[2])
    sys.exit(0)


def worker(raw_filename):
    if type(raw_filename) is list:
        for file in raw_filename:
            print(f'### Processing {os.path.basename(file)} ...')

    else:
        print(f'### Processing {os.path.basename(raw_filename)} ...')
        proc = subprocess.run([sys.executable, 'run_sample.py', '-i', raw_filename])

    print(f'### Finished {os.path.basename(raw_filename)}')
    # print('### STDOUT ##################################')
    # print(proc.stdout[-200:])
    # print('#############################################')

## Watch for raw suffix below
if __name__ == '__main__':
    if INST_TYPE == 'SEABIRD':
        raw_filenames = sorted(glob.glob(os.path.join(PATH_WK, 'RAW', f'*{INST_TYPE}_{PLATFORM_TYPE}.raw'))) # For use with sample data
        # raw_filenames = sorted(glob.glob(os.path.join(PATH_WK, 'RAW', f'*.raw')))
        if not raw_filenames:
            raw_filenames = sorted(glob.glob(os.path.join(PATH_WK, 'RAW', f'*.RAW')))
    elif INST_TYPE == 'TRIOS':
        raw_filenames = sorted(glob.glob(os.path.join(PATH_WK, 'RAW', f'*.mlb')))

    # print(f'Processing {sorted(glob.glob(os.path.join(PATH_WK, "RAW", f"*{PLATFORM_TYPE}.raw")))}')
    print(f'Processing {raw_filenames}')
    print(f'Using configuration {PATH_CFG}')
    print(f'with ancillary data {PATH_ANC}')

    # If Zhang et al. 2017 correction is enabled a significant amount of memory is used (~3Go) for each process
    # so you might not be able to use all cores of the system
    if INST_TYPE == 'SEABIRD':
        with multiprocessing.Pool(4) as pool:
            pool.map(worker, raw_filenames) # Sends one file at a time to processor
    else:
        process_raw_to_l2(raw_filenames) # Sends list of files to... for TriOS raw .mlb triplets. No subprocessors