From 6a95aba231f9635163df9104b72901899df6725b Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 24 Jul 2024 12:15:20 -0300 Subject: [PATCH 001/129] Update feature extraction for heatmap visualization Modified feature extraction to use new FeatureExtractor interface. --- libemg/data_handler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index b2aa52b2..66f3b1aa 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -792,8 +792,9 @@ def extract_data(): data = data[:num_samples] # Extract features along each channel windows = data[np.newaxis].transpose(0, 2, 1) # add axis and tranpose to convert to (windows x channels x samples) - fe = FeatureExtractor() - feature_set_dict = fe.extract_features(feature_list, windows) + fe = FeatureExtractor(feature_list) + feature_set_dict = fe(windows, array=False) + assert isinstance(feature_set_dict, dict), f"Expected dictionary of features. Got: {type(feature_set_dict)}." if remap_function is not None: # Remap raw data to image format for key in feature_set_dict: From 1bf5dc35ae83264cabcc6bc614336a9b91d0fa3b Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 24 Jul 2024 12:17:56 -0300 Subject: [PATCH 002/129] Change features to fe in OnlineStreamer Replaced list of features with FeatureExtractor. This allows the user to pass in a FeatureExtractor object with feature parameters + standardization. --- libemg/emg_predictor.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 60d6c84f..a8225ece 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -590,14 +590,14 @@ def __init__(self, online_data_handler, file_path, file, smm, smm_items, - features, + fe, port, ip, std_out, tcp): self.window_size = window_size self.window_increment = window_increment self.odh = online_data_handler - self.features = features + self.fe = fe self.port = port self.ip = ip self.predictor = offline_predictor @@ -705,8 +705,6 @@ def _run_helper(self): self.odh.prepare_smm() - if self.features is not None: - fe = FeatureExtractor() self.expected_count = {mod:self.window_size for mod in self.odh.modalities} # todo: deal with different sampling frequencies for different modalities @@ -732,12 +730,11 @@ def _run_helper(self): window = {mod:get_windows(data[mod], self.window_size, self.window_increment) for mod in self.odh.modalities} # Dealing with the case for CNNs when no features are used - if self.features: + if self.fe is not None: model_input = None for mod in self.odh.modalities: # todo: features for each modality can be different - mod_features = fe.extract_features(self.features, window[mod], self.predictor.feature_params) - mod_features = self._format_data_sample(mod_features) + mod_features = self.fe(window[mod], array=True) if model_input is None: model_input = mod_features else: From 3bd2657b9a2ca9e73485a58325c29432f441db9a Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 8 Aug 2024 16:02:11 -0300 Subject: [PATCH 003/129] Offline regression example Added offline regression example .md and .rst files. Also added link to this example in index.rst. --- .../offline_regression.md | 98 +++++++++++++++++++ .../offline_regression_example.rst | 4 + docs/source/index.rst | 1 + 3 files changed, 103 insertions(+) create mode 100644 docs/source/examples/offline_regression_example/offline_regression.md create mode 100644 docs/source/examples/offline_regression_example/offline_regression_example.rst diff --git a/docs/source/examples/offline_regression_example/offline_regression.md b/docs/source/examples/offline_regression_example/offline_regression.md new file mode 100644 index 00000000..7ecb798b --- /dev/null +++ b/docs/source/examples/offline_regression_example/offline_regression.md @@ -0,0 +1,98 @@ +[View Source Code](https://github.com/LibEMG/LibEMG_OfflineRegression_Showcase) + + + +This simple offline example showcases some of the offline capabilities for regression analysis. In this example, we will load in the OneSubjectEMaGerDataset and assess the performance of multiple regressors. All code can be found in `main.py`. + +## Step 1: Importing LibEMG + +The very first step involves importing the modules needed. In general, each of LibEMG's modules has its own import. Make sure that you have successfully installed libemg through pip. + +```Python +import numpy as np +import matplotlib.pyplot as plt +from libemg.offline_metrics import OfflineMetrics +from libemg.datasets import OneSubjectEMaGerDataset +from libemg.feature_extractor import FeatureExtractor +from libemg.emg_predictor import EMGRegressor +``` + +## Step 2: Setting up Constants + +Preprocessing parameters, such as window size, window increment, and the feature set must be decided before EMG data can be prepared for estimation. LibEMG defines window and increment sizes as the number of samples. In this case, the dataset was recorded from the EMaGer cuff, which samples at 1 kHz, so a window of 150 samples corresponds to 150ms. + +The window increment, window size, and feature set default to 40, 150, and 'HTD', respecively. These variables can be customized in this script using the provided CLI. Use `python main.py -h` for an explanation of the CLI. Example usage is also provided below: + +```Bash +python main.py --window_size 200 --window_increment 50 --feature_set MSWT +``` + +# Step 3: Loading in Dataset + +This example uses the `OneSubjectEMaGerDataset`. Instantiating the `Dataset` will automatically download the data into the specified directory, and calling the `prepare_data()` method will load EMG data and metadata (e.g., reps, movements, labels) into an `OfflineDataHandler`. This dataset consists of 5 repetitions, so we use 4 for training data and 1 for testing data. After splitting our data into training and test splits, we perform windowing on the raw EMG data. By default, the metadata assigned to each window will be based on the mode of that window. Since we are analyzing regression data, we pass in a function that tells the `OfflineDataHandler` to grab the label from the last sample in the window instead of taking the mode of the window. We can specify how we want to handle windowing of each type of metadata by passing in a `metadata_operations` dictionary. + +```Python +# Load data +odh = OneSubjectEMaGerDataset().prepare_data() + +# Split into train/test reps +train_odh = odh.isolate_data('reps', [0, 1, 2, 3]) +test_odh = odh.isolate_data('reps', [4]) + +# Extract windows +metadata_operations = {'labels': lambda x: x[-1]} # grab label of last sample in window +train_windows, train_metadata = train_odh.parse_windows(args.window_size, args.window_increment, metadata_operations=metadata_operations) +test_windows, test_metadata = test_odh.parse_windows(args.window_size, args.window_increment, metadata_operations=metadata_operations) +``` + +# Step 4: Feature Extraction + +We then extract features using the `FeatureExtractor` for our training and test data. The `fit()` method expects a dictionary with the keys `training_features` and `training_labels`, so we create one and pass in our extracted features and training labels. + +```Python +training_features = fe.extract_feature_group(args.feature_set, train_windows, array=True), +training_labels = train_metadata['labels'] +test_features = fe.extract_feature_group(args.feature_set, test_windows, array=True) +test_labels = test_metadata['labels'] + +training_set = { + 'training_features': training_features, + 'training_labels': training_labels +} +``` + +# Step 5: Regression + +`LibEMG` allows you to pass in custom models, but you can also pass in a string that will create a model for you. In this example, we compare a linear regressor to a gradient boosting regressor. We iterate through a list of the models we want to observe, fit the model to the training data, and calculate metrics based on predictions on the test data. We then store these metrics for plotting later. + +```Python +results = {metric: [] for metric in ['R2', 'NRMSE', 'MAE']} +for model in models: + reg = EMGRegressor(model) + + # Fit and run model + print(f"Fitting {model}...") + reg.fit(training_set.copy()) + predictions = reg.run(test_features) + + metrics = om.extract_offline_metrics(results.keys(), test_labels, predictions) + for metric in metrics: + results[metric].append(metrics[metric].mean()) +``` + +# Step 6: Visualization + +Finally, we visualize our results. We first plot the decision stream for each model. After each model is fitted, we plot the offline metrics for each type of model. + +```Python +# Note: this will block the main thread once the plot is shown. Close the plot to continue execution. +reg.visualize(test_labels, predictions) + +fig, axs = plt.subplots(nrows=len(results), layout='constrained', figsize=(8, 8), sharex=True) +for metric, ax in zip(results.keys(), axs): + ax.bar(models, np.array(results[metric]) * 100) + ax.set_ylabel(f"{metric} (%)") + +fig.suptitle('Metrics Summary') +plt.show() +``` diff --git a/docs/source/examples/offline_regression_example/offline_regression_example.rst b/docs/source/examples/offline_regression_example/offline_regression_example.rst new file mode 100644 index 00000000..7e8707f7 --- /dev/null +++ b/docs/source/examples/offline_regression_example/offline_regression_example.rst @@ -0,0 +1,4 @@ +Offline Regression Analysis +========================================== +.. include:: offline_regression.md + :parser: myst_parser.sphinx_ \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 90ff5a3d..4e70e20a 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -43,6 +43,7 @@ LibEMG examples/features_and_group_example/features_and_group_example examples/feature_optimization_example/feature_optimization_example examples/deep_learning_example/deep_learning_example + examples/offline_regression_example/offline_regression_example .. toctree:: :maxdepth: 1 From 353e2866ed4c3b386dfadd00e412da0883d96915 Mon Sep 17 00:00:00 2001 From: eeddy Date: Wed, 14 Aug 2024 09:30:07 -0300 Subject: [PATCH 004/129] Example for new streamer --- libemg/_streamers/__init__.py | 1 + libemg/_streamers/__new_streaner.py | 24 ++++++++++++++++++++++++ libemg/streamers.py | 22 ++++++++++++++++++++++ 3 files changed, 47 insertions(+) create mode 100644 libemg/_streamers/__new_streaner.py diff --git a/libemg/_streamers/__init__.py b/libemg/_streamers/__init__.py index 79da57c6..65d10924 100644 --- a/libemg/_streamers/__init__.py +++ b/libemg/_streamers/__init__.py @@ -11,3 +11,4 @@ from libemg._streamers import _oymotion_windows_streamer from libemg._streamers import _emager_streamer from libemg._streamers import _leap_streamer +from libemg._streamers import __new_streaner \ No newline at end of file diff --git a/libemg/_streamers/__new_streaner.py b/libemg/_streamers/__new_streaner.py new file mode 100644 index 00000000..d49335c3 --- /dev/null +++ b/libemg/_streamers/__new_streaner.py @@ -0,0 +1,24 @@ +from libemg.shared_memory_manager import SharedMemoryManager +from multiprocessing import Process + + +""" +This class will inherit from the Process class. The goal is to read data and pass it to the Shared Memory object. +""" +class NewStreamer(Process): + def __init__(self, shared_memory_items: list = []): + Process.__init__(self, daemon=True) + self.shared_memory_items = shared_memory_items + # TODO: Pass in whatever parameters you will need here. + + """ + This function is required for the streamer to work. In this function you should have a while loop + that continuously listens for new data from the device and update the shared memory object. + """ + def run(self): + self.smm = SharedMemoryManager() + for item in self.shared_memory_items: + self.smm.create_variable(*item) + + #TODO: Fille out the rest (see any of the other streamers in the _streamers folder for examples) + diff --git a/libemg/streamers.py b/libemg/streamers.py index 2543d065..1d19a0d5 100644 --- a/libemg/streamers.py +++ b/libemg/streamers.py @@ -14,6 +14,28 @@ from libemg._streamers._emager_streamer import EmagerStreamer from libemg._streamers._sifi_bridge_streamer import SiFiBridgeStreamer from libemg._streamers._leap_streamer import LeapStreamer +from libemg._streamers.__new_streaner import NewStreamer + +def new_streamer(shared_memory_items = None): + """ + TODO: Update docs like other functions. + """ + # Need to make shared memory items to define the size of the shared memory buffer. This is a buffer + # of 5000 samples by 8 channels. + if shared_memory_items is None: + shared_memory_items = [] + shared_memory_items.append(["emg", (5000,8), np.double]) + shared_memory_items.append(["emg_count", (1,1), np.int32]) + + for item in shared_memory_items: + item.append(Lock()) + + # TODO: Update this + ns = NewStreamer(shared_memory_items) + ns.start() + return ns, shared_memory_items + + def sifibridge_streamer(version="1_1", shared_memory_items = None, From df083175f6e2080716ba14e369c1ced5a1f505b5 Mon Sep 17 00:00:00 2001 From: eeddy Date: Wed, 14 Aug 2024 09:35:57 -0300 Subject: [PATCH 005/129] Revert "Example for new streamer" This reverts commit 353e2866ed4c3b386dfadd00e412da0883d96915. --- libemg/_streamers/__init__.py | 1 - libemg/_streamers/__new_streaner.py | 24 ------------------------ libemg/streamers.py | 22 ---------------------- 3 files changed, 47 deletions(-) delete mode 100644 libemg/_streamers/__new_streaner.py diff --git a/libemg/_streamers/__init__.py b/libemg/_streamers/__init__.py index 65d10924..79da57c6 100644 --- a/libemg/_streamers/__init__.py +++ b/libemg/_streamers/__init__.py @@ -11,4 +11,3 @@ from libemg._streamers import _oymotion_windows_streamer from libemg._streamers import _emager_streamer from libemg._streamers import _leap_streamer -from libemg._streamers import __new_streaner \ No newline at end of file diff --git a/libemg/_streamers/__new_streaner.py b/libemg/_streamers/__new_streaner.py deleted file mode 100644 index d49335c3..00000000 --- a/libemg/_streamers/__new_streaner.py +++ /dev/null @@ -1,24 +0,0 @@ -from libemg.shared_memory_manager import SharedMemoryManager -from multiprocessing import Process - - -""" -This class will inherit from the Process class. The goal is to read data and pass it to the Shared Memory object. -""" -class NewStreamer(Process): - def __init__(self, shared_memory_items: list = []): - Process.__init__(self, daemon=True) - self.shared_memory_items = shared_memory_items - # TODO: Pass in whatever parameters you will need here. - - """ - This function is required for the streamer to work. In this function you should have a while loop - that continuously listens for new data from the device and update the shared memory object. - """ - def run(self): - self.smm = SharedMemoryManager() - for item in self.shared_memory_items: - self.smm.create_variable(*item) - - #TODO: Fille out the rest (see any of the other streamers in the _streamers folder for examples) - diff --git a/libemg/streamers.py b/libemg/streamers.py index 1d19a0d5..2543d065 100644 --- a/libemg/streamers.py +++ b/libemg/streamers.py @@ -14,28 +14,6 @@ from libemg._streamers._emager_streamer import EmagerStreamer from libemg._streamers._sifi_bridge_streamer import SiFiBridgeStreamer from libemg._streamers._leap_streamer import LeapStreamer -from libemg._streamers.__new_streaner import NewStreamer - -def new_streamer(shared_memory_items = None): - """ - TODO: Update docs like other functions. - """ - # Need to make shared memory items to define the size of the shared memory buffer. This is a buffer - # of 5000 samples by 8 channels. - if shared_memory_items is None: - shared_memory_items = [] - shared_memory_items.append(["emg", (5000,8), np.double]) - shared_memory_items.append(["emg_count", (1,1), np.int32]) - - for item in shared_memory_items: - item.append(Lock()) - - # TODO: Update this - ns = NewStreamer(shared_memory_items) - ns.start() - return ns, shared_memory_items - - def sifibridge_streamer(version="1_1", shared_memory_items = None, From 34de77a8feee32449124524ea7df82dbc5b76010 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 16 Aug 2024 11:26:12 -0300 Subject: [PATCH 006/129] Rename ColumnFetch to ColumnFetcher Class name was accidentally changed during a previous commit. Reverted so its name is more consistent with other classes. --- libemg/data_handler.py | 2 +- libemg/datasets.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index ec057c98..53c460a6 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -181,7 +181,7 @@ def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[st return packaged_file_data -class ColumnFetch(MetadataFetcher): +class ColumnFetcher(MetadataFetcher): def __init__(self, description: str, column_mask: Sequence[int] | int, values: Sequence | None = None): """Fetch metadata from columns within data file. diff --git a/libemg/datasets.py b/libemg/datasets.py index 04f67e8c..2c6c5ef7 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -2,7 +2,7 @@ import numpy as np import zipfile import scipy.io as sio -from libemg.data_handler import ColumnFetch, MetadataFetcher, OfflineDataHandler, RegexFilter, FilePackager +from libemg.data_handler import ColumnFetcher, MetadataFetcher, OfflineDataHandler, RegexFilter, FilePackager from libemg.utils import make_regex from glob import glob from os import walk @@ -243,7 +243,7 @@ def split_filename(f): return session_idx * np.ones((file_data.shape[0], 1), dtype=int) -class _RepFetcher(ColumnFetch): +class _RepFetcher(ColumnFetcher): def __call__(self, filename, file_data, all_files): column_data = super().__call__(filename, file_data, all_files) @@ -318,7 +318,7 @@ def prepare_data(self, format=OfflineDataHandler, subjects = None, sessions = No ] metadata_fetchers = [ _SessionFetcher(), - ColumnFetch('labels', column_mask), + ColumnFetcher('labels', column_mask), _RepFetcher('reps', list(range(36, 40))) ] odh = OfflineDataHandler() From 2da261284e10beb1b71cf9ef55b806a8631430f2 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 16 Aug 2024 11:38:11 -0300 Subject: [PATCH 007/129] Remove duplicate method Previous commit added a duplicate write_output method definition in OnlineStreamer. --- libemg/emg_predictor.py | 43 ----------------------------------------- 1 file changed, 43 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 2528861c..91a2c903 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -637,49 +637,6 @@ def start_stream(self, block=True): self._run_helper() else: self.process.start() - - def write_output(self, prediction, probabilities, probability, calculated_velocity, model_input): - time_stamp = time.time() - if calculated_velocity == "": - printed_velocity = "-1" - else: - printed_velocity = float(calculated_velocity) - if self.options['std_out']: - print(f"{int(prediction)} {printed_velocity} {time.time()}") - # Write classifier output: - if self.options['file']: - if not 'file_handle' in self.files.keys(): - self.files['file_handle'] = open(self.options['file_path'] + 'classifier_output.txt', "a", newline="") - writer = csv.writer(self.files['file_handle']) - feat_str = str(model_input[0]).replace('\n','')[1:-1] - row = [f"{time_stamp} {prediction} {probability[0]} {printed_velocity} {feat_str}"] - writer.writerow(row) - self.files['file_handle'].flush() - if "smm" in self.options.keys(): - # assumed to have "classifier_input" and "classifier_output" keys - # these are (1+) - def insert_classifier_input(data): - input_size = self.options['smm'].variables['classifier_input']["shape"][0] - data[:] = np.vstack((np.hstack([time_stamp, model_input[0]]), data))[:input_size,:] - return data - def insert_classifier_output(data): - output_size = self.options['smm'].variables['classifier_output']["shape"][0] - data[:] = np.vstack((np.hstack([time_stamp, prediction, probability[0], float(printed_velocity)]), data))[:output_size,:] - return data - self.options['smm'].modify_variable("classifier_input", - insert_classifier_input) - self.options['smm'].modify_variable("classifier_output", - insert_classifier_output) - self.options['classifier_smm_writes'] += 1 - - if self.output_format == "predictions": - message = str(prediction) + calculated_velocity + '\n' - elif self.output_format == "probabilities": - message = ' '.join([f'{i:.2f}' for i in probabilities[0]]) + calculated_velocity + " " + str(time_stamp) - if not self.tcp: - self.sock.sendto(bytes(message, 'utf-8'), (self.ip, self.port)) - else: - self.conn.sendall(str.encode(message)) def prepare_smm(self): for i in self.smm_items: From bc0972f0288d7969d38ff024d205d09da0ab2db7 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 16 Aug 2024 15:41:42 -0300 Subject: [PATCH 008/129] Add feature queue for time series models Some time series models operate by predicting on a sequence of windows instead of raw EMG data. Added option to queue windows instead of just passing in a single window. --- libemg/emg_predictor.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 91a2c903..63d3267d 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -585,6 +585,10 @@ class OnlineStreamer(ABC): If True, prints predictions to std_out. tcp: bool (optional), default = False If True, will stream predictions over TCP instead of UDP. + feature_queue_length: int (optional), default = 0 + Number of windows to include in the feature queue (i.e., sequence length). Mainly used in temporal models that make predictions on + a sequence of feature windows (batch x feature_queue_length x features) instead of on raw EMG. If 0, a queue is not created and features are passed as usual + (features). """ def __init__(self, @@ -597,7 +601,9 @@ def __init__(self, features, port, ip, std_out, - tcp): + tcp, + feature_queue_length = 0): + self.window_size = window_size self.window_increment = window_increment self.odh = online_data_handler @@ -605,6 +611,8 @@ def __init__(self, self.port = port self.ip = ip self.predictor = offline_predictor + self.feature_queue_length = feature_queue_length + self.queue = deque() if self.feature_queue_length > 0 else None self.options = {'file': file, 'file_path': file_path, 'std_out': std_out} @@ -749,6 +757,18 @@ def _run_helper(self): model_input = mod_features else: model_input = np.hstack((model_input, mod_features)) + + if self.queue: + # Queue features from previous windows + self.queue.popleft() + self.queue.append(model_input) + + model_input = np.concatenate(self.queue, axis=0) + print(model_input.shape) + # Do I need to cast to 3D here (for time series models) or should I expect the user will do that? + model_input = np.expand_dims(model_input, axis=0) + print(model_input.shape) + # TODO: Verify that this works then add queue parameter to child classes (then add to feature-extractor-rework branch) else: model_input = window[list(window.keys())[0]] #TODO: Change this From ae71d0f7d7cf6cea686b2361ff98329e2cef9d5d Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 10:53:42 -0300 Subject: [PATCH 009/129] Add explicit conditional check instead of relying on casting --- libemg/emg_predictor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 63d3267d..2d086320 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -758,7 +758,7 @@ def _run_helper(self): else: model_input = np.hstack((model_input, mod_features)) - if self.queue: + if self.queue is not None: # Queue features from previous windows self.queue.popleft() self.queue.append(model_input) From 8e71705dc0eeb3c5a0aa9f2e7a5213bf23d3e059 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 13:32:26 -0300 Subject: [PATCH 010/129] Only pop if queue is at max length --- libemg/emg_predictor.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 2d086320..6e1664e6 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -760,7 +760,8 @@ def _run_helper(self): if self.queue is not None: # Queue features from previous windows - self.queue.popleft() + if len(self.queue) == self.feature_queue_length: + self.queue.popleft() self.queue.append(model_input) model_input = np.concatenate(self.queue, axis=0) @@ -1065,7 +1066,7 @@ def __init__(self, offline_regressor, window_size, window_increment, online_data assert 'model_input' in [item[0] for item in smm_items], f"'model_input' tag not found in smm_items. Got: {smm_items}." assert 'model_output' in [item[0] for item in smm_items], f"'model_output' tag not found in smm_items. Got: {smm_items}." super(OnlineEMGRegressor, self).__init__(offline_regressor, window_size, window_increment, online_data_handler, file_path, - file, smm, smm_items, features, port, ip, std_out, tcp) + file, smm, smm_items, features, port, ip, std_out, tcp, feature_queue_length=32) self.smi = smm_items def run(self, block=True): From cfc9a845f8ff8143d65e8b6cc04ed7aa5941417a Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 13:44:27 -0300 Subject: [PATCH 011/129] Add feature queue parameter to child classes Feature queue parameter was in OnlineStreamer, but not the online classifier and regressor. Added documentation to those classes and implemented parameters to create models that feature queue. --- libemg/emg_predictor.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 6e1664e6..754f5e1c 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -586,9 +586,9 @@ class OnlineStreamer(ABC): tcp: bool (optional), default = False If True, will stream predictions over TCP instead of UDP. feature_queue_length: int (optional), default = 0 - Number of windows to include in the feature queue (i.e., sequence length). Mainly used in temporal models that make predictions on - a sequence of feature windows (batch x feature_queue_length x features) instead of on raw EMG. If 0, a queue is not created and features are passed as usual - (features). + Number of windows to include in online feature queue. Used for time series models that make a prediction on a sequence of feature windows + (batch x feature_queue_length x features) instead of raw EMG. If the value is greater than 0, creates a queue and passes the data to the model as + a 1 x feature_queue_length x num_features. If the value is 0, no feature queue is created and predictions are made on a single window (1 x features). Defaults to 0. """ def __init__(self, @@ -602,7 +602,7 @@ def __init__(self, port, ip, std_out, tcp, - feature_queue_length = 0): + feature_queue_length): self.window_size = window_size self.window_increment = window_increment @@ -765,10 +765,7 @@ def _run_helper(self): self.queue.append(model_input) model_input = np.concatenate(self.queue, axis=0) - print(model_input.shape) - # Do I need to cast to 3D here (for time series models) or should I expect the user will do that? - model_input = np.expand_dims(model_input, axis=0) - print(model_input.shape) + model_input = np.expand_dims(model_input, axis=0) # cast to 3D here for time series models # TODO: Verify that this works then add queue parameter to child classes (then add to feature-extractor-rework branch) else: @@ -834,12 +831,16 @@ class OnlineEMGClassifier(OnlineStreamer): If True, will stream predictions over TCP instead of UDP. output_format: str (optional), default=predictions If predictions, it will broadcast an integer of the prediction, if probabilities it broacasts the posterior probabilities + feature_queue_length: int (optional), default = 0 + Number of windows to include in online feature queue. Used for time series models that make a prediction on a sequence of feature windows + (batch x feature_queue_length x features) instead of raw EMG. If the value is greater than 0, creates a queue and passes the data to the model as + a 1 x feature_queue_length x num_features. If the value is 0, no feature queue is created and predictions are made on a single window (1 x features). Defaults to 0. """ def __init__(self, offline_classifier, window_size, window_increment, online_data_handler, features, file_path = '.', file=False, smm=False, smm_items= None, port=12346, ip='127.0.0.1', std_out=False, tcp=False, - output_format="predictions"): + output_format="predictions", feature_queue_length = 0): if smm_items is None: smm_items = [ @@ -849,7 +850,7 @@ def __init__(self, offline_classifier, window_size, window_increment, online_dat assert 'classifier_input' in [item[0] for item in smm_items], f"'model_input' tag not found in smm_items. Got: {smm_items}." assert 'classifier_output' in [item[0] for item in smm_items], f"'model_output' tag not found in smm_items. Got: {smm_items}." super(OnlineEMGClassifier, self).__init__(offline_classifier, window_size, window_increment, online_data_handler, - file_path, file, smm, smm_items, features, port, ip, std_out, tcp) + file_path, file, smm, smm_items, features, port, ip, std_out, tcp, feature_queue_length) self.output_format = output_format self.previous_predictions = deque(maxlen=self.predictor.majority_vote) self.smi = smm_items @@ -1053,10 +1054,14 @@ class OnlineEMGRegressor(OnlineStreamer): If True, prints predictions to std_out. tcp: bool (optional), default = False If True, will stream predictions over TCP instead of UDP. + feature_queue_length: int (optional), default = 0 + Number of windows to include in online feature queue. Used for time series models that make a prediction on a sequence of windows instead of raw EMG. + If the value is greater than 0, creates a queue and passes the data to the model as a 1 (window) x feature_queue_length x num_features. + If the value is 0, no feature queue is created and predictions are made on a single window. Defaults to 0. """ def __init__(self, offline_regressor, window_size, window_increment, online_data_handler, features, file_path = '.', file = False, smm = False, smm_items = None, - port=12346, ip='127.0.0.1', std_out=False, tcp=False): + port = 12346, ip = '127.0.0.1', std_out = False, tcp = False, feature_queue_length = 0): if smm_items is None: # I think probably just have smm_items default to None and remove the smm flag. Then if the user wants to track stuff, they can pass in smm_items and a function to handle them? smm_items = [ @@ -1066,7 +1071,7 @@ def __init__(self, offline_regressor, window_size, window_increment, online_data assert 'model_input' in [item[0] for item in smm_items], f"'model_input' tag not found in smm_items. Got: {smm_items}." assert 'model_output' in [item[0] for item in smm_items], f"'model_output' tag not found in smm_items. Got: {smm_items}." super(OnlineEMGRegressor, self).__init__(offline_regressor, window_size, window_increment, online_data_handler, file_path, - file, smm, smm_items, features, port, ip, std_out, tcp, feature_queue_length=32) + file, smm, smm_items, features, port, ip, std_out, tcp, feature_queue_length) self.smi = smm_items def run(self, block=True): From 6d83b9620f682833d1da77d8506d432b8f9f52d2 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 14:26:41 -0300 Subject: [PATCH 012/129] Add online channel mask Channels could not be isolated for online data. Added functionality to only grab certain channels online. --- libemg/data_handler.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index ec057c98..3d105261 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -536,13 +536,17 @@ class OnlineDataHandler(DataHandler): ---------- shared_memory_items: Object The shared memory object returned from the streamer. + channel_mask: list or None (optional), default=None + Mask of active channels to use online. Allows certain channels to be ignored when streaming in real-time. If None, all channels are used. + Defaults to None. """ - def __init__(self, shared_memory_items): + def __init__(self, shared_memory_items, channel_mask = None): self.shared_memory_items = shared_memory_items self.prepare_smm() self.log_signal = Event() self.visualize_signal = Event() self.fi = None + self.channel_mask = channel_mask def prepare_smm(self): self.modalities = [] @@ -584,6 +588,17 @@ def install_filter(self, fi): """ self.fi = fi + def install_channel_mask(self, mask): + """Install a channel mask to isolate certain channels for online streaming. + + Parameters + ---------- + mask: list or None (optional), default=None + Mask of active channels to use online. Allows certain channels to be ignored when streaming in real-time. If None, all channels are used. + Defaults to None. + """ + self.channel_mask = mask + def analyze_hardware(self, analyze_time=10): """Analyzes several metrics from the hardware: @@ -949,6 +964,8 @@ def get_data(self, N=0, filter=True): val[mod] = data[:N,:] else: val[mod] = data[:,:] + if self.channel_mask is not None: + val[mod] = val[mod][:, self.channel_mask] count[mod] = self.smm.get_variable(mod+"_count") return val,count From 19f204982acab83f7bddd0ed77c70dde025c7bb6 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 15:15:17 -0300 Subject: [PATCH 013/129] Revert "Revert "Rework FeatureExtractor"" This reverts commit ec5276332e7799b81679e36700f08cc026f66c6d. --- libemg/feature_extractor.py | 81 +++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/libemg/feature_extractor.py b/libemg/feature_extractor.py index 1e513d8e..69b43d24 100644 --- a/libemg/feature_extractor.py +++ b/libemg/feature_extractor.py @@ -5,6 +5,7 @@ from sklearn.decomposition import PCA, KernelPCA, FastICA from sklearn.manifold import TSNE, Isomap from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA +from sklearn.preprocessing import StandardScaler from scipy.stats import skew, kurtosis from librosa import lpc from pywt import wavedec, upcoef @@ -13,7 +14,31 @@ class FeatureExtractor: """ Feature extraction class including feature groups, feature list, and feature extraction code. """ - def get_feature_groups(self): + def __init__(self, feature_names, feature_dic = None, standardize = False): + if isinstance(feature_names, str): + # Expects list + feature_names = [feature_names] + + if feature_dic is None: + feature_dic = {} + + available_feature_groups = list(self.get_feature_groups().keys()) + feature_list = [] + for feature_name in feature_names: + if feature_name in available_feature_groups: + # Passed in feature group - get corresponding list of features + feature_name = self.get_feature_groups()[feature_name] + else: + feature_name = [feature_name] # cast to list so .extend() appends the item + feature_list.extend(feature_name) + + self.feature_list = feature_list + self.feature_dic = feature_dic + self.standardize = standardize + self.scaler = None + + @staticmethod + def get_feature_groups(): """Gets a list of all available feature groups. Returns @@ -35,7 +60,8 @@ def get_feature_groups(self): } return feature_groups - def get_feature_list(self): + @staticmethod + def get_feature_list(): """Gets a list of all available features. Returns @@ -106,34 +132,7 @@ def get_projection_list(self): projection_list = ['pca', 'kernelpca', 'ica', 'lda', 'tsne', 'isomap'] return projection_list - def extract_feature_group(self, feature_group, windows, feature_dic={}, array=False): - """Extracts a group of features. - - Parameters - ---------- - feature_group: string - The group of features to extract. See the get_feature_list() function for valid options. - windows: list - A list of windows - should be computed directly from the OfflineDataHandler or the utils.get_windows() method. - feature_dic: dict - A dictionary containing the parameters you'd like passed to each feature. ex. {"MDF_sf":1000} - array: bool (optional), default=False - If True, the dictionary will get converted to a list. - Returns - ---------- - dictionary or list - A dictionary where each key is a specific feature and its value is a list of the computed - features for each window. - """ - features = {} - if not feature_group in self.get_feature_groups(): - return features - feats = self.extract_features(self.get_feature_groups()[feature_group], windows, feature_dic) - if array: - return self._format_data(feats) - return feats - - def extract_features(self, feature_list, windows, feature_dic={}, array=False): + def __call__(self, windows, array=False): """Extracts a list of features. Parameters @@ -153,15 +152,29 @@ def extract_features(self, feature_list, windows, feature_dic={}, array=False): A dictionary where each key is a specific feature and its value is a list of the computed features for each window. """ + if self.standardize and not array: + raise ValueError('Cannot standardize data when it is returned as a dictionary. Please disable standardization or set array=True.') features = {} - for feature in feature_list: + for feature in self.feature_list: if feature in self.get_feature_list(): method_to_call = getattr(self, 'get' + feature + 'feat') - valid_keys = [i for i in list(feature_dic.keys()) if feature+"_" in i] - smaller_dictionary = dict((k, feature_dic[k]) for k in valid_keys if k in feature_dic) + valid_keys = [i for i in list(self.feature_dic.keys()) if feature+"_" in i] + smaller_dictionary = dict((k, self.feature_dic[k]) for k in valid_keys if k in self.feature_dic) features[feature] = method_to_call(windows, **smaller_dictionary) if array: - return self._format_data(features) + features = self._format_data(features) + + features = self.scale(features) + return features + + def scale(self, features): + if self.standardize: + if self.scaler is None: + # Fit scaler + self.scaler = StandardScaler() + self.scaler.fit(features) + features = self.scaler.transform(features) + return features def check_features(self, features, silent=False): From 81318c148a6383d547ad95d0bfce90e43d48d3a1 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 17:37:41 -0300 Subject: [PATCH 014/129] Add skip until buffer fills up --- libemg/emg_predictor.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 19ef4f21..75bc4844 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -758,9 +758,14 @@ def _run_helper(self): if self.queue is not None: # Queue features from previous windows if len(self.queue) == self.feature_queue_length: + # Remove oldest window self.queue.popleft() self.queue.append(model_input) + if len(self.queue) < self.feature_queue_length: + # Skip until buffer fills up + continue + model_input = np.concatenate(self.queue, axis=0) model_input = np.expand_dims(model_input, axis=0) # cast to 3D here for time series models # TODO: Verify that this works then add queue parameter to child classes (then add to feature-extractor-rework branch) From e8701ad085d2b63c5089af7570ce64254a2d0561 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 20 Aug 2024 15:06:04 -0300 Subject: [PATCH 015/129] Add TODO --- libemg/feature_extractor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libemg/feature_extractor.py b/libemg/feature_extractor.py index 69b43d24..255d9791 100644 --- a/libemg/feature_extractor.py +++ b/libemg/feature_extractor.py @@ -36,6 +36,7 @@ def __init__(self, feature_names, feature_dic = None, standardize = False): self.feature_dic = feature_dic self.standardize = standardize self.scaler = None + # TODO: Maybe add an install_scaler() method instead? Then just call that whenever you extract features? @staticmethod def get_feature_groups(): From 5a6bd18da1cd28280add33c3195f8e1bdbab24ee Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 21 Aug 2024 11:37:18 -0300 Subject: [PATCH 016/129] Add wildcard to regex helper Default value for regex helper function often threw an error when searching for a pattern. Replaced the default value with the wildcard, so users can use this to grab the potential values without knowing them. --- libemg/utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libemg/utils.py b/libemg/utils.py index 765beb29..8422dd5a 100644 --- a/libemg/utils.py +++ b/libemg/utils.py @@ -62,7 +62,7 @@ def _get_fn_windows(data, window_size, window_increment, fn): fn_of_windows = np.apply_along_axis(lambda x: fn(x), axis=2, arr=windows) return fn_of_windows.squeeze() -def make_regex(left_bound, right_bound, values=[]): +def make_regex(left_bound, right_bound, values = None): """Regex creation helper for the data handler. The OfflineDataHandler relies on regexes to parse the file/folder structures and extract data. @@ -74,8 +74,8 @@ def make_regex(left_bound, right_bound, values=[]): The left bound of the regex. right_bound: string The right bound of the regex. - values: list - The values between the two regexes. + values: list or None (optional), default = None + The values between the two regexes. If None, will try to find the values using a wildcard. Defaults to None. Returns ---------- @@ -87,10 +87,16 @@ def make_regex(left_bound, right_bound, values=[]): >>> make_regex(left_bound = "_C_", right_bound="_EMG.csv", values = [0,1,2,3,4,5]) """ left_bound_str = "(?<="+ left_bound +")" - mid_str = "(?:" - for i in values: - mid_str += i + "|" - mid_str = mid_str[:-1] - mid_str += ")" + + if values is None: + # Apply wildcard + mid_str = '(.*?)' + else: + mid_str = "(?:" + for i in values: + mid_str += i + "|" + mid_str = mid_str[:-1] + mid_str += ")" + right_bound_str = "(?=" + right_bound +")" return left_bound_str + mid_str + right_bound_str From 88821effa3549fdb66ac7fae7f6fb87a5f6ef034 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 21 Aug 2024 11:40:07 -0300 Subject: [PATCH 017/129] Add check for None in RegexFilter Since the regex helper function can take None values, users may pass in None to RegexFilter. This wouldn't work since we store metadata as an index of the values they pass in. Added a check to ensure this doesn't happen. --- libemg/data_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index d2913ba0..1049453b 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -43,6 +43,8 @@ def __init__(self, left_bound: str, right_bound: str, values: Sequence, descript description: str Description of filter - used to name the metadata field. """ + if values is None: + raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') self.pattern = make_regex(left_bound, right_bound, values) self.values = values self.description = description From c5c87489bb7437bcce622f63ad0e6789fd6c9fd7 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 21 Aug 2024 11:37:18 -0300 Subject: [PATCH 018/129] Add wildcard to regex helper Default value for regex helper function often threw an error when searching for a pattern. Replaced the default value with the wildcard, so users can use this to grab the potential values without knowing them. --- libemg/utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/libemg/utils.py b/libemg/utils.py index 765beb29..8422dd5a 100644 --- a/libemg/utils.py +++ b/libemg/utils.py @@ -62,7 +62,7 @@ def _get_fn_windows(data, window_size, window_increment, fn): fn_of_windows = np.apply_along_axis(lambda x: fn(x), axis=2, arr=windows) return fn_of_windows.squeeze() -def make_regex(left_bound, right_bound, values=[]): +def make_regex(left_bound, right_bound, values = None): """Regex creation helper for the data handler. The OfflineDataHandler relies on regexes to parse the file/folder structures and extract data. @@ -74,8 +74,8 @@ def make_regex(left_bound, right_bound, values=[]): The left bound of the regex. right_bound: string The right bound of the regex. - values: list - The values between the two regexes. + values: list or None (optional), default = None + The values between the two regexes. If None, will try to find the values using a wildcard. Defaults to None. Returns ---------- @@ -87,10 +87,16 @@ def make_regex(left_bound, right_bound, values=[]): >>> make_regex(left_bound = "_C_", right_bound="_EMG.csv", values = [0,1,2,3,4,5]) """ left_bound_str = "(?<="+ left_bound +")" - mid_str = "(?:" - for i in values: - mid_str += i + "|" - mid_str = mid_str[:-1] - mid_str += ")" + + if values is None: + # Apply wildcard + mid_str = '(.*?)' + else: + mid_str = "(?:" + for i in values: + mid_str += i + "|" + mid_str = mid_str[:-1] + mid_str += ")" + right_bound_str = "(?=" + right_bound +")" return left_bound_str + mid_str + right_bound_str From 2e63dfd26e0d76ec1eac40a19b50da9265e61a07 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 21 Aug 2024 11:40:07 -0300 Subject: [PATCH 019/129] Add check for None in RegexFilter Since the regex helper function can take None values, users may pass in None to RegexFilter. This wouldn't work since we store metadata as an index of the values they pass in. Added a check to ensure this doesn't happen. --- libemg/data_handler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 3d105261..e18cf808 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -43,6 +43,8 @@ def __init__(self, left_bound: str, right_bound: str, values: Sequence, descript description: str Description of filter - used to name the metadata field. """ + if values is None: + raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') self.pattern = make_regex(left_bound, right_bound, values) self.values = values self.description = description From 34cecb3bc354547cf48448034a17dc5c3685083c Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 19 Aug 2024 17:37:41 -0300 Subject: [PATCH 020/129] Add skip until buffer fills up --- libemg/emg_predictor.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 754f5e1c..7a526b2a 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -761,9 +761,14 @@ def _run_helper(self): if self.queue is not None: # Queue features from previous windows if len(self.queue) == self.feature_queue_length: + # Remove oldest window self.queue.popleft() self.queue.append(model_input) + if len(self.queue) < self.feature_queue_length: + # Skip until buffer fills up + continue + model_input = np.concatenate(self.queue, axis=0) model_input = np.expand_dims(model_input, axis=0) # cast to 3D here for time series models # TODO: Verify that this works then add queue parameter to child classes (then add to feature-extractor-rework branch) From a751cbb0f4d9bb0cc12f59421038b686a7fc1c9f Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 26 Aug 2024 11:53:04 -0300 Subject: [PATCH 021/129] Add string parameters for common metadata operations Users could pass in function handles to change metadata operations, but there are some operations that are very common and require users to define function handles (like grabbing the last sample). Added ability to pass in strings for some common operations. --- libemg/data_handler.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 53c460a6..b9173a5c 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -395,6 +395,13 @@ def parse_windows(self, window_size, window_increment, metadata_operations=None) The number of samples in a window. window_increment: int The number of samples that advances before next window. + metadata_operations: dict or None (optional),default=None + Specifies which operations should be performed on metadata attributes when performing windowing. By default, + all metadata is stored as its mode in a window. To change this behaviour, specify the metadata attribute as the key and + the operation as the value in the dictionary. The operation (value) should either be an accepted string (mean, median, last_sample) or + a function handle that takes in an ndarray of size (window_size, ) and returns a single value to represent the metadata for that window. Passing in a string + will map from that string to the specified operation. The windowing of only the attributes specified in this dictionary will be modified - all other + attributes will default to the mode. If None, all attributes default to the mode. Defaults to None. Returns ---------- @@ -407,6 +414,12 @@ def parse_windows(self, window_size, window_increment, metadata_operations=None) return self._parse_windows_helper(window_size, window_increment, metadata_operations) def _parse_windows_helper(self, window_size, window_increment, metadata_operations): + common_metadata_operations = { + 'mean': np.mean, + 'median': np.median, + 'last_sample': lambda x: x[-1] + } + metadata_ = {} for i, file in enumerate(self.data): # emg data windowing @@ -423,7 +436,14 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio if metadata_operations is not None: if k in metadata_operations.keys(): # do the specified operation - file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, metadata_operations[k]) + operation = metadata_operations[k] + + if isinstance(operation, str): + try: + operation = common_metadata_operations[operation] + except KeyError as e: + raise KeyError(f"Unexpected metadata operation string. Please pass in a function or an accepted string {tuple(common_metadata_operations.keys())}. Got: {operation}.") + file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, operation) else: file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) else: From 60249b094caf3183464c34f33e1a84f32af78443 Mon Sep 17 00:00:00 2001 From: Amir Reza Hariri Date: Tue, 27 Aug 2024 11:03:07 -0300 Subject: [PATCH 022/129] ninapro db2 dataglove support added --- libemg/data_handler.py | 14 +++++++++----- libemg/datasets.py | 35 ++++++++++++++++++++++++++--------- libemg/filtering.py | 8 ++++++-- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index b9173a5c..86036e9d 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -234,8 +234,9 @@ class OfflineDataHandler(DataHandler): The purpose of this class is to facilitate the process of accumulating offline training and testing data. This class is extensible to a wide range of file and folder structures. """ - def __init__(self): + def __init__(self, dataglove): super().__init__() + self.dataglove = dataglove def __add__(self, other): # Concatenate two OfflineDataHandlers together @@ -317,7 +318,8 @@ def append_to_attribute(name, value): print(f"{len(data_files)} data files fetched out of {len(all_files)} files.") # Read data from files - for file in data_files: + for f_num, file in enumerate(data_files): + print(f_num, len(data_files)) if '.hea' in file: # The key is the emg key that is in the mrdf file file_data = (wfdb.rdrecord(file.replace('.hea',''))).__getattribute__(mrdf_key) @@ -386,7 +388,7 @@ def active_threshold(self, nm_windows, active_windows, active_labels, num_std=3, print(f"{num_relabeled} of {len(active_labels)} active class windows were relabelled to no motion.") return active_labels - def parse_windows(self, window_size, window_increment, metadata_operations=None): + def parse_windows(self, window_size, window_increment, metadata_operations=None, dataglove=False): """Parses windows based on the acquired data from the get_data function. Parameters @@ -419,9 +421,10 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio 'median': np.median, 'last_sample': lambda x: x[-1] } - + metadata_ = {} for i, file in enumerate(self.data): + print(i, len(self.data)) # emg data windowing windows = get_windows(file,window_size,window_increment) if "windows_" in locals(): @@ -444,6 +447,7 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio except KeyError as e: raise KeyError(f"Unexpected metadata operation string. Please pass in a function or an accepted string {tuple(common_metadata_operations.keys())}. Got: {operation}.") file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, operation) + else: file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) else: @@ -454,7 +458,7 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio metadata_[k] = np.concatenate((metadata_[k], file_metadata)) - return windows_, metadata_ + return (windows_[:, :-self.dataglove, :], windows_[:, -self.dataglove:, :], metadata_) if self.dataglove else (windows_, metadata_) def isolate_channels(self, channels): diff --git a/libemg/datasets.py b/libemg/datasets.py index 2c6c5ef7..c3867053 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -63,7 +63,7 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for return odh class Ninapro(Dataset): - def __init__(self, save_dir='.', dataset_name="Ninapro"): + def __init__(self, save_dir='.', dataset_name="Ninapro", dataglove=False): # downloading the Ninapro dataset is not supported (no permission given from the authors)' # however, you can download it from http://ninapro.hevs.ch/DB8 # the subject zip files should be placed at: /NinaproDB8/DB8_s#.zip @@ -71,8 +71,9 @@ def __init__(self, save_dir='.', dataset_name="Ninapro"): self.dataset_name = dataset_name self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, "") self.exercise_step = [] + self.dataglove = dataglove - def convert_to_compatible(self): + def convert_to_compatible(self, step=2): # get the zip files (original format they're downloaded in) zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") # unzip the files -- if any are there (successive runs skip this) @@ -83,9 +84,9 @@ def convert_to_compatible(self): # get the mat files (the files we want to convert to csv) mat_files = find_all_files_of_type_recursively(self.dataset_folder,".mat") for mat_file in mat_files: - self.convert_to_csv(mat_file) + self.convert_to_csv(mat_file, step, self.dataglove) - def convert_to_csv(self, mat_file): + def convert_to_csv(self, mat_file, step, dataglove): # read the mat file mat_file = mat_file.replace("\\", "/") mat_dir = mat_file.split('/') @@ -95,16 +96,25 @@ def convert_to_csv(self, mat_file): exercise = int(mat_file.split('_')[3][1]) exercise_offset = self.exercise_step[exercise-1] # 0 reps already included data = mat['emg'] + if dataglove: + try: + target = mat['glove'] + except: + return restimulus = mat['restimulus'] rerepetition = mat['rerepetition'] if data.shape[0] != restimulus.shape[0]: # this happens in some cases min_shape = min([data.shape[0], restimulus.shape[0]]) data = data[:min_shape,:] + if dataglove: + target = target[:min_shape,] restimulus = restimulus[:min_shape,] rerepetition = rerepetition[:min_shape,] # remove 0 repetition - collection buffer remove_mask = (rerepetition != 0).squeeze() data = data[remove_mask,:] + if dataglove: + target = target[remove_mask,:] restimulus = restimulus[remove_mask] rerepetition = rerepetition[remove_mask] # important little not here: @@ -116,6 +126,8 @@ def convert_to_csv(self, mat_file): # so we remove the rest class too remove_mask = (restimulus != 0).squeeze() data = data[remove_mask,:] + if dataglove: + target = target[remove_mask,:] restimulus = restimulus[remove_mask] rerepetition = rerepetition[remove_mask] tail = 0 @@ -129,8 +141,11 @@ def convert_to_csv(self, mat_file): else: head = head[0] + tail # downsample to 1kHz from 2kHz using decimation - data_for_file = data[tail:head,:] - data_for_file = data_for_file[::2, :] + if dataglove: + data_for_file = np.concatenate((data[tail:head,:], target[tail:head,:]), 1) + else: + data_for_file = data[tail:head,:] + data_for_file = data_for_file[::step, :] # write to csv csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' np.savetxt(csv_file, data_for_file, delimiter=',') @@ -159,8 +174,10 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for return odh class NinaproDB2(Ninapro): - def __init__(self, save_dir='.', dataset_name="NinaproDB2"): - Ninapro.__init__(self, save_dir, dataset_name) + def __init__(self, save_dir='.', dataset_name="NinaproDB2", dataglove=False): + if dataglove: + dataglove = 22 # Number of dataglove ccolumns in DB2 + Ninapro.__init__(self, save_dir, dataset_name, dataglove) self.class_list = ["TODO"] self.exercise_step = [0,0,0] @@ -174,7 +191,7 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') ] - odh = OfflineDataHandler() + odh = OfflineDataHandler(self.dataglove) odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") return odh diff --git a/libemg/filtering.py b/libemg/filtering.py index 5e3989ab..f4a8820f 100644 --- a/libemg/filtering.py +++ b/libemg/filtering.py @@ -123,7 +123,7 @@ def _filter_offline_data_handler(self, data): ''' assert hasattr(data,"data") for f in range(len(data.data)): - data.data[f] = self._run_filter(data.data[f]) + data.data[f] = self._run_filter(data.data[f], data.dataglove) def _filter_np_ndarray(self, data): ''' Helper function that runs the installed filters on an np.ndarray. @@ -140,7 +140,7 @@ def _filter_np_ndarray(self, data): ''' return self._run_filter(data) - def _run_filter(self, matrix): + def _run_filter(self, matrix, dataglove=False): ''' Helper function that actually runs the installed filters on an np.ndarray. This is where the actual processing happens. Parameters @@ -153,6 +153,10 @@ def _run_filter(self, matrix): matrix: np.ndarray Data that has been filtered. ''' + + if dataglove: + matrix = matrix[:, :-dataglove] + for fl in range(len(self.filters)): if self.filters[fl]["name"] == "standardize": matrix = (matrix - self.filters[fl]["mean"]) / self.filters[fl]["std"] From 29e677c1416a6e5da8a8b49981670bc202a4dc67 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 27 Aug 2024 14:18:46 -0300 Subject: [PATCH 023/129] Revert "Revert "Revert "Rework FeatureExtractor""" This reverts commit 19f204982acab83f7bddd0ed77c70dde025c7bb6. --- libemg/feature_extractor.py | 82 +++++++++++++++---------------------- 1 file changed, 34 insertions(+), 48 deletions(-) diff --git a/libemg/feature_extractor.py b/libemg/feature_extractor.py index 255d9791..1e513d8e 100644 --- a/libemg/feature_extractor.py +++ b/libemg/feature_extractor.py @@ -5,7 +5,6 @@ from sklearn.decomposition import PCA, KernelPCA, FastICA from sklearn.manifold import TSNE, Isomap from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA -from sklearn.preprocessing import StandardScaler from scipy.stats import skew, kurtosis from librosa import lpc from pywt import wavedec, upcoef @@ -14,32 +13,7 @@ class FeatureExtractor: """ Feature extraction class including feature groups, feature list, and feature extraction code. """ - def __init__(self, feature_names, feature_dic = None, standardize = False): - if isinstance(feature_names, str): - # Expects list - feature_names = [feature_names] - - if feature_dic is None: - feature_dic = {} - - available_feature_groups = list(self.get_feature_groups().keys()) - feature_list = [] - for feature_name in feature_names: - if feature_name in available_feature_groups: - # Passed in feature group - get corresponding list of features - feature_name = self.get_feature_groups()[feature_name] - else: - feature_name = [feature_name] # cast to list so .extend() appends the item - feature_list.extend(feature_name) - - self.feature_list = feature_list - self.feature_dic = feature_dic - self.standardize = standardize - self.scaler = None - # TODO: Maybe add an install_scaler() method instead? Then just call that whenever you extract features? - - @staticmethod - def get_feature_groups(): + def get_feature_groups(self): """Gets a list of all available feature groups. Returns @@ -61,8 +35,7 @@ def get_feature_groups(): } return feature_groups - @staticmethod - def get_feature_list(): + def get_feature_list(self): """Gets a list of all available features. Returns @@ -133,7 +106,34 @@ def get_projection_list(self): projection_list = ['pca', 'kernelpca', 'ica', 'lda', 'tsne', 'isomap'] return projection_list - def __call__(self, windows, array=False): + def extract_feature_group(self, feature_group, windows, feature_dic={}, array=False): + """Extracts a group of features. + + Parameters + ---------- + feature_group: string + The group of features to extract. See the get_feature_list() function for valid options. + windows: list + A list of windows - should be computed directly from the OfflineDataHandler or the utils.get_windows() method. + feature_dic: dict + A dictionary containing the parameters you'd like passed to each feature. ex. {"MDF_sf":1000} + array: bool (optional), default=False + If True, the dictionary will get converted to a list. + Returns + ---------- + dictionary or list + A dictionary where each key is a specific feature and its value is a list of the computed + features for each window. + """ + features = {} + if not feature_group in self.get_feature_groups(): + return features + feats = self.extract_features(self.get_feature_groups()[feature_group], windows, feature_dic) + if array: + return self._format_data(feats) + return feats + + def extract_features(self, feature_list, windows, feature_dic={}, array=False): """Extracts a list of features. Parameters @@ -153,29 +153,15 @@ def __call__(self, windows, array=False): A dictionary where each key is a specific feature and its value is a list of the computed features for each window. """ - if self.standardize and not array: - raise ValueError('Cannot standardize data when it is returned as a dictionary. Please disable standardization or set array=True.') features = {} - for feature in self.feature_list: + for feature in feature_list: if feature in self.get_feature_list(): method_to_call = getattr(self, 'get' + feature + 'feat') - valid_keys = [i for i in list(self.feature_dic.keys()) if feature+"_" in i] - smaller_dictionary = dict((k, self.feature_dic[k]) for k in valid_keys if k in self.feature_dic) + valid_keys = [i for i in list(feature_dic.keys()) if feature+"_" in i] + smaller_dictionary = dict((k, feature_dic[k]) for k in valid_keys if k in feature_dic) features[feature] = method_to_call(windows, **smaller_dictionary) if array: - features = self._format_data(features) - - features = self.scale(features) - return features - - def scale(self, features): - if self.standardize: - if self.scaler is None: - # Fit scaler - self.scaler = StandardScaler() - self.scaler.fit(features) - features = self.scaler.transform(features) - + return self._format_data(features) return features def check_features(self, features, silent=False): From 8aaf19d02825645d7a13f17d347a2c81c6290606 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 27 Aug 2024 15:21:17 -0300 Subject: [PATCH 024/129] Add online standardization method Data could not be standardized online. Added a method to install standardization to online models. --- libemg/emg_predictor.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 75bc4844..3afec198 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -7,6 +7,7 @@ from sklearn.naive_bayes import GaussianNB from sklearn.neural_network import MLPClassifier, MLPRegressor from sklearn.svm import SVC, SVR +from sklearn.preprocessing import StandardScaler from libemg.feature_extractor import FeatureExtractor from libemg.shared_memory_manager import SharedMemoryManager from multiprocessing import Process, Lock @@ -613,7 +614,7 @@ def __init__(self, self.predictor = offline_predictor self.feature_queue_length = feature_queue_length self.queue = deque() if self.feature_queue_length > 0 else None - + self.scaler = None self.options = {'file': file, 'file_path': file_path, 'std_out': std_out} @@ -755,6 +756,9 @@ def _run_helper(self): else: model_input = np.hstack((model_input, mod_features)) + if self.scaler is not None: + model_input = self.scaler.transform(model_input) + if self.queue is not None: # Queue features from previous windows if len(self.queue) == self.feature_queue_length: @@ -768,7 +772,6 @@ def _run_helper(self): model_input = np.concatenate(self.queue, axis=0) model_input = np.expand_dims(model_input, axis=0) # cast to 3D here for time series models - # TODO: Verify that this works then add queue parameter to child classes (then add to feature-extractor-rework branch) else: model_input = window[list(window.keys())[0]] #TODO: Change this @@ -778,6 +781,22 @@ def _run_helper(self): self.write_output(model_input, window) + def install_standardization(self, standardization: np.ndarray | StandardScaler): + """Install standardization to online model. Standardizes each feature based on training data (i.e., standardizes across windows). + Standardization is only applied when features are extracted and is applied before feature queueing (i.e., features are standardized then queued). + To standardize data, use the standardize Filter. + + :param standardization: Standardization data. If an array, creates a scaler and fits to the provided array. If a StandardScaler, uses the StandardScaler. + :type standardization: np.ndarray | StandardScaler + """ + scaler = standardization + + if not isinstance(scaler, StandardScaler): + # Fit scaler to provided data + scaler = StandardScaler().fit(np.array(standardization)) + + self.scaler = scaler + # ----- All of these are unique to each online streamer ---------- def run(self): pass From 80969d8a84f6541e8d9ba01772570e86a40d359e Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 27 Aug 2024 15:34:12 -0300 Subject: [PATCH 025/129] Use proper FeatureExtractor interface Some changes were not reverted properly, causing the FeatureExtractor in OnlineStreamer to use the old interface. Updated to use the current interface (no breaking API). --- libemg/emg_predictor.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 3afec198..e640bde9 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -599,7 +599,7 @@ def __init__(self, online_data_handler, file_path, file, smm, smm_items, - fe, + features, port, ip, std_out, tcp, @@ -608,7 +608,7 @@ def __init__(self, self.window_size = window_size self.window_increment = window_increment self.odh = online_data_handler - self.fe = fe + self.features = features self.port = port self.ip = ip self.predictor = offline_predictor @@ -727,6 +727,7 @@ def _run_helper(self): self.odh.reset() files = {} + fe = FeatureExtractor() while True: if self.smm: if not self.options["smm"].get_variable("active_flag")[0,0]: @@ -746,11 +747,11 @@ def _run_helper(self): window = {mod:get_windows(data[mod], self.window_size, self.window_increment) for mod in self.odh.modalities} # Dealing with the case for CNNs when no features are used - if self.fe is not None: + if self.features is not None: model_input = None for mod in self.odh.modalities: # todo: features for each modality can be different - mod_features = self.fe(window[mod], array=True) + mod_features = fe.extract_features(self.features, window[mod], array=True) if model_input is None: model_input = mod_features else: From 3466dad437c410d21660918f21dcb984bdaedca6 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 4 Sep 2024 11:06:05 -0300 Subject: [PATCH 026/129] Fix heatmap feature extraction Feature extraction in visualize_heatmap used old modified API for FeatureExtractor. Reverted to expected version. --- libemg/data_handler.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 1049453b..91e8f29e 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -787,8 +787,8 @@ def extract_data(): data = data[:num_samples] # Extract features along each channel windows = data[np.newaxis].transpose(0, 2, 1) # add axis and tranpose to convert to (windows x channels x samples) - fe = FeatureExtractor(feature_list) - feature_set_dict = fe(windows, array=False) + fe = FeatureExtractor() + feature_set_dict = fe.extract_features(feature_list, windows, array=False) assert isinstance(feature_set_dict, dict), f"Expected dictionary of features. Got: {type(feature_set_dict)}." if remap_function is not None: # Remap raw data to image format From 5f2058319414dd5977dacb54ae2a8dc0a9cf55a1 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 4 Sep 2024 12:44:54 -0300 Subject: [PATCH 027/129] Add clarification to install_standardization docstring --- libemg/emg_predictor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index e640bde9..2378eee3 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -784,7 +784,8 @@ def _run_helper(self): def install_standardization(self, standardization: np.ndarray | StandardScaler): """Install standardization to online model. Standardizes each feature based on training data (i.e., standardizes across windows). - Standardization is only applied when features are extracted and is applied before feature queueing (i.e., features are standardized then queued). + Standardization is only applied when features are extracted and is applied before feature queueing (i.e., features are standardized then queued) + if relevant. To standardize data, use the standardize Filter. :param standardization: Standardization data. If an array, creates a scaler and fits to the provided array. If a StandardScaler, uses the StandardScaler. From 6f5e72dac8d17f3a8b1ca43f7945f32cdb2c400a Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 5 Sep 2024 16:25:55 -0300 Subject: [PATCH 028/129] Check for correct MacOS string --- libemg/streamers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/streamers.py b/libemg/streamers.py index 2543d065..22c8c66b 100644 --- a/libemg/streamers.py +++ b/libemg/streamers.py @@ -337,7 +337,7 @@ def oymotion_streamer(shared_memory_items : list | None = None, operating_system = platform.system().lower() # I'm only addressing this atm. - if operating_system == "windows" or operating_system == 'mac': + if operating_system == "windows" or operating_system == 'darwin': oym = Gforce(sampling_rate, res, emg, imu, shared_memory_items) oym.start() else: From 318fc0cb576dcbdb8dce07e4b50723884676ecc3 Mon Sep 17 00:00:00 2001 From: Amir Reza Hariri Date: Fri, 6 Sep 2024 10:39:56 -0300 Subject: [PATCH 029/129] ninapro db2 --- libemg/data_handler.py | 5 +++-- libemg/datasets.py | 10 +++++++--- libemg/filtering.py | 10 +++++----- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 86036e9d..841b89ed 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -234,9 +234,10 @@ class OfflineDataHandler(DataHandler): The purpose of this class is to facilitate the process of accumulating offline training and testing data. This class is extensible to a wide range of file and folder structures. """ - def __init__(self, dataglove): + def __init__(self, dataset=None, dataglove=False): super().__init__() self.dataglove = dataglove + self.dataset = dataset def __add__(self, other): # Concatenate two OfflineDataHandlers together @@ -458,7 +459,7 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio metadata_[k] = np.concatenate((metadata_[k], file_metadata)) - return (windows_[:, :-self.dataglove, :], windows_[:, -self.dataglove:, :], metadata_) if self.dataglove else (windows_, metadata_) + return (windows_[:, :-self.dataglove, :], windows_[:, -self.dataglove:, -1], metadata_) if self.dataglove else (windows_, metadata_) def isolate_channels(self, channels): diff --git a/libemg/datasets.py b/libemg/datasets.py index c3867053..19320dea 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -174,9 +174,12 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for return odh class NinaproDB2(Ninapro): + EMG_LEN = 12 + DATAGLOVE_LEN = 22 + def __init__(self, save_dir='.', dataset_name="NinaproDB2", dataglove=False): if dataglove: - dataglove = 22 # Number of dataglove ccolumns in DB2 + dataglove = type(self).DATAGLOVE_LEN # Number of dataglove ccolumns in DB2 Ninapro.__init__(self, save_dir, dataset_name, dataglove) self.class_list = ["TODO"] self.exercise_step = [0,0,0] @@ -191,8 +194,9 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') ] - odh = OfflineDataHandler(self.dataglove) - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + odh = OfflineDataHandler(self, self.dataglove) + column = slice(0, type(self).EMG_LEN) if not self.dataglove else None + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",", data_column=column) return odh # given a directory, return a list of files in that directory matching a format diff --git a/libemg/filtering.py b/libemg/filtering.py index f4a8820f..ee136472 100644 --- a/libemg/filtering.py +++ b/libemg/filtering.py @@ -123,7 +123,10 @@ def _filter_offline_data_handler(self, data): ''' assert hasattr(data,"data") for f in range(len(data.data)): - data.data[f] = self._run_filter(data.data[f], data.dataglove) + if data.dataglove: + data.data[f][:, :-data.dataglove] = self._run_filter(data.data[f][:, :-data.dataglove]) + else: + data.data[f] = self._run_filter(data.data[f]) def _filter_np_ndarray(self, data): ''' Helper function that runs the installed filters on an np.ndarray. @@ -140,7 +143,7 @@ def _filter_np_ndarray(self, data): ''' return self._run_filter(data) - def _run_filter(self, matrix, dataglove=False): + def _run_filter(self, matrix): ''' Helper function that actually runs the installed filters on an np.ndarray. This is where the actual processing happens. Parameters @@ -154,9 +157,6 @@ def _run_filter(self, matrix, dataglove=False): Data that has been filtered. ''' - if dataglove: - matrix = matrix[:, :-dataglove] - for fl in range(len(self.filters)): if self.filters[fl]["name"] == "standardize": matrix = (matrix - self.filters[fl]["mean"]) / self.filters[fl]["std"] From 24c19512e80594eaefc0111d23b59bb0f59f1f4a Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 6 Sep 2024 14:29:02 -0300 Subject: [PATCH 030/129] Remove unnecessary pop of oldest window There was a check for when the queue reached the max length that would manually pop the oldest window. This functionality is already built into deque by specifying the maxlen parameter, so replaced the check with maxlen. --- libemg/emg_predictor.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/libemg/emg_predictor.py b/libemg/emg_predictor.py index 2378eee3..4e2ce291 100644 --- a/libemg/emg_predictor.py +++ b/libemg/emg_predictor.py @@ -613,7 +613,7 @@ def __init__(self, self.ip = ip self.predictor = offline_predictor self.feature_queue_length = feature_queue_length - self.queue = deque() if self.feature_queue_length > 0 else None + self.queue = deque(maxlen=feature_queue_length) if self.feature_queue_length > 0 else None self.scaler = None self.options = {'file': file, 'file_path': file_path, 'std_out': std_out} @@ -762,10 +762,7 @@ def _run_helper(self): if self.queue is not None: # Queue features from previous windows - if len(self.queue) == self.feature_queue_length: - # Remove oldest window - self.queue.popleft() - self.queue.append(model_input) + self.queue.append(model_input) # oldest windows will automatically be dequeued if length exceeds maxlen if len(self.queue) < self.feature_queue_length: # Skip until buffer fills up From 25460b3150696092f2e81bd5f34261901f0d3219 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 9 Sep 2024 11:54:04 -0300 Subject: [PATCH 031/129] updates sifi streamer to accept bridge version. forked sifi bridge into bp and ba --- libemg/_streamers/_sifi_bridge_streamer.py | 12 +- libemg/streamers.py | 162 ++++++++++++++++++--- 2 files changed, 146 insertions(+), 28 deletions(-) diff --git a/libemg/_streamers/_sifi_bridge_streamer.py b/libemg/_streamers/_sifi_bridge_streamer.py index eea7735b..4d9e39b6 100644 --- a/libemg/_streamers/_sifi_bridge_streamer.py +++ b/libemg/_streamers/_sifi_bridge_streamer.py @@ -22,8 +22,8 @@ class SiFiBridgeStreamer(Process): Parameters ---------- - version : str - The version of the devie ('1_1 for bioarmband, 1_2 or 1_3 for biopoint). + name : str + The name of the device. shared_memory_items : list Shared memory configuration parameters for the streamer in format: ["tag", (size), datatype, Lock()]. @@ -62,7 +62,7 @@ class SiFiBridgeStreamer(Process): """ def __init__(self, - version: str = '1_2', + name: str | None = None, shared_memory_items: list = [], ecg: bool = False, emg: bool = True, @@ -96,7 +96,7 @@ def __init__(self, self.prepare_config_message(ecg, emg, eda, imu, ppg, notch_on, notch_freq, emgfir_on, emg_fir, eda_cfg, fc_lp, fc_hp, freq, streaming) - self.prepare_connect_message(version, mac) + self.prepare_connect_message(name, mac) self.prepare_executable(bridge_version) @@ -139,12 +139,12 @@ def prepare_config_message(self, self.config_message = bytes(self.config_message,"UTF-8") def prepare_connect_message(self, - version: str, + name: str, mac : str): if mac is not None: self.connect_message = '-c ' + str(mac) + '\n' else: - self.connect_message = '-c BioPoint_v' + str(version) + '\n' + self.connect_message = '-c ' + str(name) + '\n' self.connect_message = bytes(self.connect_message,"UTF-8") def prepare_executable(self, diff --git a/libemg/streamers.py b/libemg/streamers.py index 2543d065..6b43ae55 100644 --- a/libemg/streamers.py +++ b/libemg/streamers.py @@ -15,13 +15,13 @@ from libemg._streamers._sifi_bridge_streamer import SiFiBridgeStreamer from libemg._streamers._leap_streamer import LeapStreamer -def sifibridge_streamer(version="1_1", +def sifi_biopoint_streamer(name="BioPoint_v1_3", shared_memory_items = None, - ecg=False, + ecg=True, emg=True, - eda=False, - imu=False, - ppg=False, + eda=True, + imu=True, + ppg=True, notch_on=True, notch_freq=60, emg_fir_on = True, emg_fir=[20,450], @@ -30,26 +30,140 @@ def sifibridge_streamer(version="1_1", fc_hp = 5, # high pass eda freq = 250,# eda sampling frequency streaming=False, - mac= None): + mac= None, + bridge_version = "0.6.4"): # TODO, replace bridge_version with none after Sifi updates + """The streamer for the sifi biopoint. + This function connects to the sifi bridge and streams its data to the SharedMemory. This is used + for the SiFi biopoint. + Note that the IMU is acc_x, acc_y, acc_z, quat_w, quat_x, quat_y, quat_z. + Parameters + ---------- + name: string (option), default = 'BioPoint_v1_3' + The name for the sifi device. + shared_memory_items, default = [] + The key, size, datatype, and multiprocessing Lock for all data to be shared between processes. + ecg, default = True + The flag to enable electrocardiography recording from the main sensor unit. + emg, default = True + The flag to enable electromyography recording. + eda, default = True + The flag to enable electrodermal recording. + imu, default = True + The flag to enable inertial measurement unit recording + ppg, default = True + The flag to enable photoplethysmography recording + notch_on, default = True + The flag to enable a fc Hz notch filter on device (firmware). + notch_freq, default = 60 + The cutoff frequency of the notch filter specified by notch_on. + emg_fir_on, default = True + The flag to enable a bandpass filter on device (firmware). + emg_fir, default = [20, 450] + The low and high cutoff frequency of the bandpass filter specified by emg_fir_on. + eda_cfg, default = True + The flag to specify if using high or low frequency current for EDA or bioimpedance. + fc_lp, default = 0 + The low cutoff frequency for the bioimpedance. + fc_hp, default = 5 + The high cutoff frequency for the bioimpedance. + freq, default = 250 + The sampling frequency for bioimpedance. + streaming, default = False + Whether to package the modalities together within packets for lower latency. + mac, default = None: + mac address of the device to be connected to + Returns + ---------- + Object: streamer + The sifi streamer process object. + Object: shared memory + The shared memory items list to be passed to the OnlineDataHandler. + + Examples + --------- + >>> streamer, shared_memory = sifibridge_streamer() + """ + + if shared_memory_items is None: + shared_memory_items = [] + if emg: + shared_memory_items.append(["emg", (4000,1), np.double]) + shared_memory_items.append(["emg_count", (1,1), np.int32]) + if imu: + shared_memory_items.append(["imu", (200,7), np.double]) + shared_memory_items.append(["imu_count", (1,1), np.int32]) + if ecg: + shared_memory_items.append(["ecg", (1000,1), np.double]) + shared_memory_items.append(["ecg_count", (1,1), np.int32]) + if eda: + shared_memory_items.append(["eda", (200,1), np.double]) + shared_memory_items.append(["eda_count", (1,1), np.int32]) + if ppg: + shared_memory_items.append(["ppg", (200,4), np.double]) + shared_memory_items.append(["ppg_count", (1,1), np.int32]) + + for item in shared_memory_items: + item.append(Lock()) + sb = SiFiBridgeStreamer(name=name, + shared_memory_items=shared_memory_items, + notch_on=notch_on, + ecg=ecg, + emg=emg, + eda=eda, + imu=imu, + ppg=ppg, + notch_freq=notch_freq, + emgfir_on=emg_fir_on, + emg_fir = emg_fir, + eda_cfg = eda_cfg, + fc_lp = fc_lp, # low pass eda + fc_hp = fc_hp, # high pass eda + freq = freq,# eda sampling frequency + streaming=streaming, + mac = mac, + bridge_version=bridge_version) + sb.start() + return sb, shared_memory_items + + + +def sifi_bioarmband_streamer(name="BioPoint_v1_1", + shared_memory_items = None, + ecg=True, + emg=True, + eda=True, + imu=True, + ppg=True, + notch_on=False,#I'm pretty sure these aren't configured right for 1500Hz + notch_freq=60, + emg_fir_on = False,#I'm pretty sure these aren't configured right for 1500Hz + emg_fir=[20,450], + eda_cfg = True, + fc_lp = 0, # low pass eda + fc_hp = 5, # high pass eda + freq = 250,# eda sampling frequency + streaming=False, + mac= None, + bridge_version = "0.6.4"):# TODO, replace bridge_version with none after Sifi updates """The streamer for the sifi armband. This function connects to the sifi bridge and streams its data to the SharedMemory. This is used - for the SiFi biopoint and bioarmband. + for the SiFi bioarmband. Note that the IMU is acc_x, acc_y, acc_z, quat_w, quat_x, quat_y, quat_z. Parameters ---------- - version: string (option), default = '1_1' - The version for the sifi streamer. + name: string (option), default = 'BioPoint_v1_1' + The name for the sifi device. shared_memory_items, default = [] The key, size, datatype, and multiprocessing Lock for all data to be shared between processes. - ecg, default = False + ecg, default = True The flag to enable electrocardiography recording from the main sensor unit. emg, default = True The flag to enable electromyography recording. - eda, default = False + eda, default = True The flag to enable electrodermal recording. - imu, default = False + imu, default = True The flag to enable inertial measurement unit recording - ppg, default = False + ppg, default = True The flag to enable photoplethysmography recording notch_on, default = True The flag to enable a fc Hz notch filter on device (firmware). @@ -89,21 +203,21 @@ def sifibridge_streamer(version="1_1", shared_memory_items.append(["emg", (3000,8), np.double]) shared_memory_items.append(["emg_count", (1,1), np.int32]) if imu: - shared_memory_items.append(["imu", (100,10), np.double]) + shared_memory_items.append(["imu", (200,7), np.double]) shared_memory_items.append(["imu_count", (1,1), np.int32]) if ecg: - shared_memory_items.append(["ecg", (100,10), np.double]) + shared_memory_items.append(["ecg", (1000,1), np.double]) shared_memory_items.append(["ecg_count", (1,1), np.int32]) if eda: - shared_memory_items.append(["eda", (100,10), np.double]) + shared_memory_items.append(["eda", (200,1), np.double]) shared_memory_items.append(["eda_count", (1,1), np.int32]) if ppg: - shared_memory_items.append(["ppg", (100,10), np.double]) + shared_memory_items.append(["ppg", (200,4), np.double]) shared_memory_items.append(["ppg_count", (1,1), np.int32]) for item in shared_memory_items: item.append(Lock()) - sb = SiFiBridgeStreamer(version=version, + sb = SiFiBridgeStreamer(name=name, shared_memory_items=shared_memory_items, notch_on=notch_on, ecg=ecg, @@ -119,10 +233,14 @@ def sifibridge_streamer(version="1_1", fc_hp = fc_hp, # high pass eda freq = freq,# eda sampling frequency streaming=streaming, - mac = mac) + mac = mac, + bridge_version=bridge_version) sb.start() return sb, shared_memory_items + + + def myo_streamer( shared_memory_items : list | None = None, emg : bool = True, @@ -263,7 +381,7 @@ def delsys_api_streamer(license : str = None, Returns ---------- Object: streamer - The sifi streamer object. + The delsys streamer object. Object: shared memory The shared memory object. Examples @@ -308,7 +426,7 @@ def oymotion_streamer(shared_memory_items : list | None = None, Returns ---------- Object: streamer - The sifi streamer object + The oymotion streamer object Object: shared memory The shared memory object Examples @@ -362,7 +480,7 @@ def emager_streamer(shared_memory_items = None): Returns ---------- Object: streamer - The sifi streamer object. + The emager streamer object. Object: shared memory The shared memory object. Examples From 8a20f1badb9ffc8d7093f6f1786175fe37e25b68 Mon Sep 17 00:00:00 2001 From: Amir Reza Hariri Date: Mon, 9 Sep 2024 12:27:16 -0300 Subject: [PATCH 032/129] db2 dataglove support limited to NinaproDB2 class --- libemg/data_handler.py | 41 ++++++++++----- libemg/datasets.py | 113 ++++++++++++++++++++++++++++++++--------- libemg/filtering.py | 6 +-- 3 files changed, 119 insertions(+), 41 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 841b89ed..eaf13bc0 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -43,6 +43,8 @@ def __init__(self, left_bound: str, right_bound: str, values: Sequence, descript description: str Description of filter - used to name the metadata field. """ + if values is None: + raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') self.pattern = make_regex(left_bound, right_bound, values) self.values = values self.description = description @@ -234,10 +236,8 @@ class OfflineDataHandler(DataHandler): The purpose of this class is to facilitate the process of accumulating offline training and testing data. This class is extensible to a wide range of file and folder structures. """ - def __init__(self, dataset=None, dataglove=False): + def __init__(self): super().__init__() - self.dataglove = dataglove - self.dataset = dataset def __add__(self, other): # Concatenate two OfflineDataHandlers together @@ -319,8 +319,7 @@ def append_to_attribute(name, value): print(f"{len(data_files)} data files fetched out of {len(all_files)} files.") # Read data from files - for f_num, file in enumerate(data_files): - print(f_num, len(data_files)) + for file in data_files: if '.hea' in file: # The key is the emg key that is in the mrdf file file_data = (wfdb.rdrecord(file.replace('.hea',''))).__getattribute__(mrdf_key) @@ -389,7 +388,7 @@ def active_threshold(self, nm_windows, active_windows, active_labels, num_std=3, print(f"{num_relabeled} of {len(active_labels)} active class windows were relabelled to no motion.") return active_labels - def parse_windows(self, window_size, window_increment, metadata_operations=None, dataglove=False): + def parse_windows(self, window_size, window_increment, metadata_operations=None): """Parses windows based on the acquired data from the get_data function. Parameters @@ -422,10 +421,9 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio 'median': np.median, 'last_sample': lambda x: x[-1] } - + metadata_ = {} for i, file in enumerate(self.data): - print(i, len(self.data)) # emg data windowing windows = get_windows(file,window_size,window_increment) if "windows_" in locals(): @@ -448,7 +446,6 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio except KeyError as e: raise KeyError(f"Unexpected metadata operation string. Please pass in a function or an accepted string {tuple(common_metadata_operations.keys())}. Got: {operation}.") file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, operation) - else: file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) else: @@ -459,7 +456,7 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio metadata_[k] = np.concatenate((metadata_[k], file_metadata)) - return (windows_[:, :-self.dataglove, :], windows_[:, -self.dataglove:, -1], metadata_) if self.dataglove else (windows_, metadata_) + return windows_, metadata_ def isolate_channels(self, channels): @@ -561,13 +558,17 @@ class OnlineDataHandler(DataHandler): ---------- shared_memory_items: Object The shared memory object returned from the streamer. + channel_mask: list or None (optional), default=None + Mask of active channels to use online. Allows certain channels to be ignored when streaming in real-time. If None, all channels are used. + Defaults to None. """ - def __init__(self, shared_memory_items): + def __init__(self, shared_memory_items, channel_mask = None): self.shared_memory_items = shared_memory_items self.prepare_smm() self.log_signal = Event() self.visualize_signal = Event() self.fi = None + self.channel_mask = channel_mask def prepare_smm(self): self.modalities = [] @@ -609,6 +610,17 @@ def install_filter(self, fi): """ self.fi = fi + def install_channel_mask(self, mask): + """Install a channel mask to isolate certain channels for online streaming. + + Parameters + ---------- + mask: list or None (optional), default=None + Mask of active channels to use online. Allows certain channels to be ignored when streaming in real-time. If None, all channels are used. + Defaults to None. + """ + self.channel_mask = mask + def analyze_hardware(self, analyze_time=10): """Analyzes several metrics from the hardware: @@ -796,7 +808,8 @@ def extract_data(): # Extract features along each channel windows = data[np.newaxis].transpose(0, 2, 1) # add axis and tranpose to convert to (windows x channels x samples) fe = FeatureExtractor() - feature_set_dict = fe.extract_features(feature_list, windows) + feature_set_dict = fe.extract_features(feature_list, windows, array=False) + assert isinstance(feature_set_dict, dict), f"Expected dictionary of features. Got: {type(feature_set_dict)}." if remap_function is not None: # Remap raw data to image format for key in feature_set_dict: @@ -974,6 +987,8 @@ def get_data(self, N=0, filter=True): val[mod] = data[:N,:] else: val[mod] = data[:,:] + if self.channel_mask is not None: + val[mod] = val[mod][:, self.channel_mask] count[mod] = self.smm.get_variable(mod+"_count") return val,count @@ -1059,4 +1074,4 @@ def _check_streaming(self, timeout=15): def start_listening(self): print("LibEMG>v1.0 no longer requires online_data_handler.start_listening().\nThis is deprecated.") - pass + pass \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index 19320dea..3c8a35e7 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -73,7 +73,7 @@ def __init__(self, save_dir='.', dataset_name="Ninapro", dataglove=False): self.exercise_step = [] self.dataglove = dataglove - def convert_to_compatible(self, step=2): + def convert_to_compatible(self): # get the zip files (original format they're downloaded in) zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") # unzip the files -- if any are there (successive runs skip this) @@ -84,9 +84,9 @@ def convert_to_compatible(self, step=2): # get the mat files (the files we want to convert to csv) mat_files = find_all_files_of_type_recursively(self.dataset_folder,".mat") for mat_file in mat_files: - self.convert_to_csv(mat_file, step, self.dataglove) + self.convert_to_csv(mat_file) - def convert_to_csv(self, mat_file, step, dataglove): + def convert_to_csv(self, mat_file): # read the mat file mat_file = mat_file.replace("\\", "/") mat_dir = mat_file.split('/') @@ -96,25 +96,16 @@ def convert_to_csv(self, mat_file, step, dataglove): exercise = int(mat_file.split('_')[3][1]) exercise_offset = self.exercise_step[exercise-1] # 0 reps already included data = mat['emg'] - if dataglove: - try: - target = mat['glove'] - except: - return restimulus = mat['restimulus'] rerepetition = mat['rerepetition'] if data.shape[0] != restimulus.shape[0]: # this happens in some cases min_shape = min([data.shape[0], restimulus.shape[0]]) data = data[:min_shape,:] - if dataglove: - target = target[:min_shape,] restimulus = restimulus[:min_shape,] rerepetition = rerepetition[:min_shape,] # remove 0 repetition - collection buffer remove_mask = (rerepetition != 0).squeeze() data = data[remove_mask,:] - if dataglove: - target = target[remove_mask,:] restimulus = restimulus[remove_mask] rerepetition = rerepetition[remove_mask] # important little not here: @@ -126,8 +117,6 @@ def convert_to_csv(self, mat_file, step, dataglove): # so we remove the rest class too remove_mask = (restimulus != 0).squeeze() data = data[remove_mask,:] - if dataglove: - target = target[remove_mask,:] restimulus = restimulus[remove_mask] rerepetition = rerepetition[remove_mask] tail = 0 @@ -141,11 +130,8 @@ def convert_to_csv(self, mat_file, step, dataglove): else: head = head[0] + tail # downsample to 1kHz from 2kHz using decimation - if dataglove: - data_for_file = np.concatenate((data[tail:head,:], target[tail:head,:]), 1) - else: - data_for_file = data[tail:head,:] - data_for_file = data_for_file[::step, :] + data_for_file = data[tail:head,:] + data_for_file = data_for_file[::2, :] # write to csv csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' np.savetxt(csv_file, data_for_file, delimiter=',') @@ -179,7 +165,7 @@ class NinaproDB2(Ninapro): def __init__(self, save_dir='.', dataset_name="NinaproDB2", dataglove=False): if dataglove: - dataglove = type(self).DATAGLOVE_LEN # Number of dataglove ccolumns in DB2 + dataglove = self.DATAGLOVE_LEN # Number of dataglove ccolumns in DB2 Ninapro.__init__(self, save_dir, dataset_name, dataglove) self.class_list = ["TODO"] self.exercise_step = [0,0,0] @@ -194,10 +180,91 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') ] - odh = OfflineDataHandler(self, self.dataglove) - column = slice(0, type(self).EMG_LEN) if not self.dataglove else None - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",", data_column=column) + odh = OfflineDataHandler() + metadata_fetchers = None if not self.dataglove else [ColumnFetcher('cyberglove', column_mask= + [idx for idx in range(self.EMG_LEN, self.EMG_LEN + self.dataglove)])] + emg_column_mask = [idx for idx in range(self.EMG_LEN)] + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",", data_column=emg_column_mask, metadata_fetchers=metadata_fetchers) return odh + + def convert_to_compatible(self): + # get the zip files (original format they're downloaded in) + zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") + # unzip the files -- if any are there (successive runs skip this) + for zip_file in zip_files: + with zipfile.ZipFile(zip_file, 'r') as zip_ref: + zip_ref.extractall(zip_file[:-4]+'/') + os.remove(zip_file) + # get the mat files (the files we want to convert to csv) + mat_files = find_all_files_of_type_recursively(self.dataset_folder,".mat") + for mat_file in mat_files: + self.convert_to_csv(mat_file) + + def convert_to_csv(self, mat_file): + # read the mat file + mat_file = mat_file.replace("\\", "/") + mat_dir = mat_file.split('/') + mat_dir = os.path.join(*mat_dir[:-1],"") + mat = sio.loadmat(mat_file) + # get the data + exercise = int(mat_file.split('_')[3][1]) + exercise_offset = self.exercise_step[exercise-1] # 0 reps already included + data = mat['emg'] + if self.dataglove: + try: + target = mat['glove'] + except: + return + restimulus = mat['restimulus'] + rerepetition = mat['rerepetition'] + if data.shape[0] != restimulus.shape[0]: # this happens in some cases + min_shape = min([data.shape[0], restimulus.shape[0]]) + data = data[:min_shape,:] + if self.dataglove: + target = target[:min_shape,] + restimulus = restimulus[:min_shape,] + rerepetition = rerepetition[:min_shape,] + # remove 0 repetition - collection buffer + remove_mask = (rerepetition != 0).squeeze() + data = data[remove_mask,:] + if self.dataglove: + target = target[remove_mask,:] + restimulus = restimulus[remove_mask] + rerepetition = rerepetition[remove_mask] + # important little not here: + # the "rest" really is only the rest between motions, not a dedicated rest class. + # there will be many more rest repetitions (as it is between every class) + # so usually we really care about classifying rest as its important (most of the time we do nothing) + # but for this dataset it doesn't make sense to include (and not its just an offline showcase of the library) + # I encourage you to plot the restimulus to see what I mean. -> plt.plot(restimulus) + # so we remove the rest class too + remove_mask = (restimulus != 0).squeeze() + data = data[remove_mask,:] + if self.dataglove: + target = target[remove_mask,:] + restimulus = restimulus[remove_mask] + rerepetition = rerepetition[remove_mask] + tail = 0 + while tail < data.shape[0]-1: + rep = rerepetition[tail][0] # remove the 1 offset (0 was the collection buffer) + motion = restimulus[tail][0] # remove the 1 offset (0 was between motions "rest") + # find head + head = np.where(rerepetition[tail:] != rep)[0] + if head.shape == (0,): # last segment of data + head = data.shape[0] -1 + else: + head = head[0] + tail + # downsample to 1kHz from 2kHz using decimation + if self.dataglove: + data_for_file = np.concatenate((data[tail:head,:], target[tail:head,:]), 1) + else: + data_for_file = data[tail:head,:] + data_for_file = data_for_file[::2, :] + # write to csv + csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' + np.savetxt(csv_file, data_for_file, delimiter=',') + tail = head + os.remove(mat_file) # given a directory, return a list of files in that directory matching a format # can be nested diff --git a/libemg/filtering.py b/libemg/filtering.py index ee136472..5e3989ab 100644 --- a/libemg/filtering.py +++ b/libemg/filtering.py @@ -123,10 +123,7 @@ def _filter_offline_data_handler(self, data): ''' assert hasattr(data,"data") for f in range(len(data.data)): - if data.dataglove: - data.data[f][:, :-data.dataglove] = self._run_filter(data.data[f][:, :-data.dataglove]) - else: - data.data[f] = self._run_filter(data.data[f]) + data.data[f] = self._run_filter(data.data[f]) def _filter_np_ndarray(self, data): ''' Helper function that runs the installed filters on an np.ndarray. @@ -156,7 +153,6 @@ def _run_filter(self, matrix): matrix: np.ndarray Data that has been filtered. ''' - for fl in range(len(self.filters)): if self.filters[fl]["name"] == "standardize": matrix = (matrix - self.filters[fl]["mean"]) / self.filters[fl]["std"] From 6e436a919c7fa50d07ccf406e51ae8dbbe9ae173 Mon Sep 17 00:00:00 2001 From: Amir Reza Hariri Date: Mon, 9 Sep 2024 13:35:05 -0300 Subject: [PATCH 033/129] documentation for NinaproDB2 --- libemg/datasets.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 3c8a35e7..1a2410be 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -63,7 +63,7 @@ def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for return odh class Ninapro(Dataset): - def __init__(self, save_dir='.', dataset_name="Ninapro", dataglove=False): + def __init__(self, save_dir='.', dataset_name="Ninapro"): # downloading the Ninapro dataset is not supported (no permission given from the authors)' # however, you can download it from http://ninapro.hevs.ch/DB8 # the subject zip files should be placed at: /NinaproDB8/DB8_s#.zip @@ -71,7 +71,6 @@ def __init__(self, save_dir='.', dataset_name="Ninapro", dataglove=False): self.dataset_name = dataset_name self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, "") self.exercise_step = [] - self.dataglove = dataglove def convert_to_compatible(self): # get the zip files (original format they're downloaded in) @@ -164,9 +163,20 @@ class NinaproDB2(Ninapro): DATAGLOVE_LEN = 22 def __init__(self, save_dir='.', dataset_name="NinaproDB2", dataglove=False): - if dataglove: - dataglove = self.DATAGLOVE_LEN # Number of dataglove ccolumns in DB2 - Ninapro.__init__(self, save_dir, dataset_name, dataglove) + """Ninapro DB2 class to fetch data from DB2 dataset matlab file for processing. + + Parameters + ---------- + dataglove: bool + If True, cyberglove data will also be fetched from matlab files in convert_to_compatible method, and will set + metadata_fetchers in OfflineDataHandler.get_data() to put cyberglove data into metadata (for regression). Use the following + code for OfflineDataHandler().parse_windows(): + >>> metadata_operations = {'cyberglove': lambda x: x[-1]} # fetch the last sample for regression in time window + >>> inputs, metadata = odh.parse_windows(, , metadata_operations=metadata_operations) + >>> regression_targets = metadata['cyberglove'] + """ + self.dataglove = self.DATAGLOVE_LEN if dataglove else False + Ninapro.__init__(self, save_dir, dataset_name) self.class_list = ["TODO"] self.exercise_step = [0,0,0] From e631c6a4f935416a642910bb741f7eccbd2061a5 Mon Sep 17 00:00:00 2001 From: eeddy Date: Fri, 13 Sep 2024 12:17:07 -0300 Subject: [PATCH 034/129] Dataset updates --- libemg/_datasets/_3DC.py | 41 ++ libemg/_datasets/__init__.py | 3 + .../__pycache__/_3DC.cpython-310.pyc | Bin 0 -> 2653 bytes .../__pycache__/__init__.cpython-310.pyc | Bin 0 -> 238 bytes .../__pycache__/ciil.cpython-310.pyc | Bin 0 -> 3396 bytes .../continous_transitions.cpython-310.pyc | Bin 0 -> 2309 bytes .../__pycache__/dataset.cpython-310.pyc | Bin 0 -> 2218 bytes .../__pycache__/emg_epn612.cpython-310.pyc | Bin 0 -> 2053 bytes .../__pycache__/grab_myo.cpython-310.pyc | Bin 0 -> 2318 bytes .../one_subject_myo.cpython-310.pyc | Bin 0 -> 1819 bytes libemg/_datasets/ciil.py | 81 +++ libemg/_datasets/continous_transitions.py | 62 ++ libemg/_datasets/dataset.py | 185 ++++++ libemg/_datasets/emg_epn612.py | 52 ++ libemg/_datasets/grab_myo.py | 58 ++ libemg/_datasets/one_subject_myo.py | 38 ++ libemg/datasets.py | 614 +----------------- 17 files changed, 549 insertions(+), 585 deletions(-) create mode 100644 libemg/_datasets/_3DC.py create mode 100644 libemg/_datasets/__init__.py create mode 100644 libemg/_datasets/__pycache__/_3DC.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/__init__.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/ciil.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/continous_transitions.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/dataset.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/emg_epn612.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/grab_myo.cpython-310.pyc create mode 100644 libemg/_datasets/__pycache__/one_subject_myo.cpython-310.pyc create mode 100644 libemg/_datasets/ciil.py create mode 100644 libemg/_datasets/continous_transitions.py create mode 100644 libemg/_datasets/dataset.py create mode 100644 libemg/_datasets/emg_epn612.py create mode 100644 libemg/_datasets/grab_myo.py create mode 100644 libemg/_datasets/one_subject_myo.py diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py new file mode 100644 index 00000000..c49b1583 --- /dev/null +++ b/libemg/_datasets/_3DC.py @@ -0,0 +1,41 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os + +class _3DCDataset(Dataset): + def __init__(self, save_dir='.', redownload=False, dataset_name="_3DCDataset"): + Dataset.__init__(self, + 1000, + 10, + '3DC Armband (Prototype)', + 22, + ["Neutral", "Radial Deviation", "Wrist Flexion", "Ulnar Deviation", "Wrist Extension", "Supination", "Pronation", "Power Grip", "Open Hand", "Chuck Grip", "Pinch Grip"], + '8 (4 Train, 4 Test)', + "The 3DC dataset including 11 classes.", + "@article{cote2019deep, title={Deep learning for electromyographic hand gesture signal classification using transfer learning}, author={C{^o}t{'e}-Allard, Ulysse and Fall, Cheikh Latyr and Drouin, Alexandre and Campeau-Lecours, Alexandre and Gosselin, Cl{'e}ment and Glette, Kyrre and Laviolette, Fran{\c{c}}ois and Gosselin, Benoit}, journal={IEEE transactions on neural systems and rehabilitation engineering}, volume={27}, number={4}, pages={760--771}, year={2019}, publisher={IEEE} }", + save_dir, redownload) + self.url = "https://github.com/libemg/3DCDataset" + self.dataset_name = dataset_name + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + + def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,23)], + sets_values = ["train", "test"], + reps_values = ["0","1","2","3"], + classes_values = [str(i) for i in range(11)]): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + elif (self.redownload): + self.remove_dataset(self.dataset_folder) + self.download(self.url, self.dataset_folder) + + if format == OfflineDataHandler: + regex_filters = [ + RegexFilter(left_bound = "/", right_bound="/EMG", values = sets_values, description='sets'), + RegexFilter(left_bound = "_", right_bound=".txt", values = classes_values, description='classes'), + RegexFilter(left_bound = "EMG_gesture_", right_bound="_", values = reps_values, description='reps'), + RegexFilter(left_bound="Participant", right_bound="/",values=subjects_values, description='subjects') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} \ No newline at end of file diff --git a/libemg/_datasets/__init__.py b/libemg/_datasets/__init__.py new file mode 100644 index 00000000..023df7a3 --- /dev/null +++ b/libemg/_datasets/__init__.py @@ -0,0 +1,3 @@ +from libemg._datasets import _3DC +from libemg._datasets import one_subject_myo + diff --git a/libemg/_datasets/__pycache__/_3DC.cpython-310.pyc b/libemg/_datasets/__pycache__/_3DC.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..abdf4923a8bcba9c75a6da7b9534feb453428f15 GIT binary patch literal 2653 zcmZ`*&2QYs73Yv#?uxrw$+8j)XbbSrqFF%RwVWVMj5vs^uNHL`A&yaiXhTq(S?&OH<`TA=TdyRsUzT+Hab`FJ0{nfW~x>+8M+ z-(PmWAO4|jS>K~_^K(J@44V1~I%-iOtc-+&plk~}b3!L`L$}f$(FhyI7IkUkr9~Tz zTsomgou^jMyMoeYaN4w{ZSKiz@Z$fjvvtg`7EpY3f@V`5jb%n<;y#oORk7afScm(gA z%0d@iNSLn&k>sg#OcV%=2ns)T2T^n(fcNDi(V7bGEqqScq8C-Qby+K$rnq zKZ=Fe4faQjKOY58Vm*=7@PRB!O#3ll4$HtjF5Zu`F^kK)PgqivQvGoHxB#ng@4=ox ze;LblweMIkt=Vqy>xo=idJS8^vc_a9sn%@)TYqlK>Z3yC_7N!71E-07_nF zXAF?~0OWBDct4sw`0(D{yAK}h!N>&82o}Qm;dFT>xEkSVjAIcjrav8NJys6~gOux0 zdDc&gY#{g<%hJKyH`iN}Hr#X5iXxtK9Yw||r7)f4EsF9uW5zC&amTS98TY*4xoN5R zf<=@|(_O6_7J@R_bBzl~59J2h^d+OkH@PTcDuL&$*Ot5uf8IPCJX1`n0n>1g7^RcJ zL6Jb#xmK$HmvKf*^J;)J(H~Fb8W`}PsUM?2?B#DIxy1da=X+#_Ot)_^OMjJJ0@wfg z!^DwdkY4_=4P9!@t>1eKqOCc(B!9HOwx8A^zb3{}TE4c8-M?oDukxrn+-Lkx0DKJq zk3W3}jiKdjYx?fZ5Z*4i!XU2h;Kl|#5CGKG_=a6W!1$ga_YHaf|3)g&L+u+GO72{% z2(H9<{S~qZl(lSy_RpGOt=07rLW@n+b7eme+sk% ze3Ed@9tMW^Z^&Tk4i0~FjMr2vK-?E>sH3x@%&BQho{mrRGY^n0>9QaecA-g@tY}5}W5=G6EW?f?W zjfPeIRc4I>QDqRXlz2e`T=6ClUvY*UCFWn-)~(gBve96;yW;{l<8{Nbhs)+tm{R+mb``29u$Kt*5Sq~ zrKrXzY$KT03SzKQx=Wmvy1b8eoPv&i_<}lP*cQv$A>MEmK)2;~$QJQPhisCT-L<;} zf9}6LZ;J%*)TcW)U)W6(Cp|~vl|PaV=zC2hIOH-QI6sQAf|dg1P85Az#!$9#rXrUL z4$}bYPk~Hf?~O>`J}@+jO?9DbSw7j`@F31j(*5_+VYpLsSbv4H>aPGtb_j1Tr*SaC kdJ6xGy*&!qfi;z-|K;AaKB+VK6z_<_=iIRXA+l}%2f%aZ@Bjb+ literal 0 HcmV?d00001 diff --git a/libemg/_datasets/__pycache__/__init__.cpython-310.pyc b/libemg/_datasets/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6b4618cb6b31bd4b3f731c941a8cb8ffdf0cdc25 GIT binary patch literal 238 zcmd1j<>g`kf~hYar3C=##~=17mz{oSacWVqericZVqR)WN~OL_esXDUYF}3 literal 0 HcmV?d00001 diff --git a/libemg/_datasets/__pycache__/ciil.cpython-310.pyc b/libemg/_datasets/__pycache__/ciil.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e4e24266d20b37b09169f18103c9c821dcf7ad29 GIT binary patch literal 3396 zcmd5yc-j+4dFacL1()blq#{q(coUYx{(kmDkn zv?3d3<`3#MXg8r9Y=BVKGctxPqeJTUU`sCuZibe}^AJ^KY+xXDw=wEOvxCEC5uq|&83oIQ!t zM9^nAcAqdyg6C-XNunOK-?THx&c#ai1kHmp%3R;UngS9*+mHV$=%<=}w}IXIwf? zdd4$`fRnw5hf+3MqYwH@4dqqej~SaT)<0YBPDd*aLS>Vm+au7f$DhRZ*Be=Fxsoi0y`r6%2 z<3yJw>>;4B(2eOS1l^QVCiZRZO2)h544%6s{(u*9)&D{=A$_L$VZtbV>ff41DSaMq zu|Y5Y6HL8HKZ;}C8a@>#!9fw)!D&>8J^j7Z-2W6SC&}p4F9FxecMuVnnLh}SIcNww z38(_tQGnexS%wT-w)P;`#?~_c*0%GU$Q96C&=GLXbDLPnanOpQ3H_yPj2ZvR80zej zEE`lA0la750>8U9En}vdzPV|i12C_=AiFND(pt~nEvPwKr}c4xHt4bMU9jfCvgz@u zwLllgE?t^hC+NwsLzmI&xw}PG*|T;_ zNS#bSjXFK^Z{&%yn}?0cKR5%5@FIZbRZo*jXK|XuCNBV-h?iH66!C(@^#Izz5{Mog z;n&4oq9A}h@-O>a`#}bTeWD$JDPY8IUDPBt=*WZz^K@l{$5H`4hu7XhXXts!8nyrH zYKMXiLjeGUSXvuAjY2N3fs)S_c;y1=<&BYwcX0^9Q~U|Vdq<|}^U(YY2xL3)R}hl| zh^C_nMBdh9RTm)PAQfB%3~@+&#J!UHuC8#_Rlyb{HPuxS_xia}d=&B>CUuoE8HsqP zfN>7XzTDZ`W|4xN$`q;X`++8bc43!b+zbxgRwN(kl3@ZS56Ss`M<2@)F2LQRCtVjv z-$B>#x72gMK89h$1WyBII5#-g1#^$uMPA#l>%vgP39JqGoq4H6eHMKa05eJ@#aWoC z%QStQ@HC`i8O@4dd53NYHb{{PGAh&s5hgt*5Do$hhA#J*GH-P)mMITaHWp|G54JN0 zd(QSDuHx{dh35eY;&K85;%i80=$@n0|vZU2?AjF9gcb}2)^A3dG^LomiQ9J z&xH6AR7hqBr2|yTR)y3W0@VoGD-WLEb!fr>udEkMjJG*pML=nJ(2u*yg!RU{Dv<4< zHVR}sfu9}%%i-h%YIP8!V^H7_#pk`tS*7!DB;L^yo>{|-(xi)M#>&)%8j}i{*S^+h zBd;#-HStYLBAfsKrpSas#Sv#viCXRUUcGX8{ldjnXzjI2moBV}MT`V5OPH82 z>t_zlLlX$G8*{uo#`@O@?-rwhRJ zi&JZfo&csliPmeFzIMd)%XDRi>FYC0Ux#&1y+E!%ZMgo-rZ|U(`bQ9*#?g(N3;*4m zi8*RGbF}`t>zYl}H?wyLPJVYJxAREEdC-~#VUX0qULS-eYnrS#A*~IU19UqjCE}0= z?Oa*Sm9<=17ppkmu+chdS5P2*iNB%1k6G~{ifbr7LUA3%4HO@PXu9GK{Fz{Vn_!wx z3}M~{P5d3j7bpy6yyOie6!ia_Y3-an@_z03H$?}R`s&s7UM~7ic!4J;0u*S$xBW`? z&DP3UU$&<7Z$2G6i__QG8Ndf0{%>K5K~@%4&=V4#aW}Keqg8czBV*QUkhSoY2BK!o L18vXSOYi**5eq{y literal 0 HcmV?d00001 diff --git a/libemg/_datasets/__pycache__/continous_transitions.cpython-310.pyc b/libemg/_datasets/__pycache__/continous_transitions.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ba7c99796c1d9c05135b395cb483fdc4b7375ece GIT binary patch literal 2309 zcmZ`)&2Aev5GJ`lTCHSRkrZ3*A#jTJpfr$D2S$+~DDq<@1q!%9n_dgM) zVG;|4+fTb1?7Hdk2GA2A@c@`abDTi{DA9)d07M(ka7Gp-j4~rbK9R=IoLd2k0~U!>aYkUG(B$So%2eF%bZ8Q`6W;H9ey6?v#l5|~&WLr} zd;53pwj7@8I=&yqq4a%KrptKwaWG8gf-x>-Ca|GTt(k zDOl9wo7%O@8BJctQ4&xNXODhEybj$zUw6J2jEfGFgCJ&9Mreh`SiJuUi=g+Gc(> zze8vwoC`!8Z^NbBMl-mc8Ja`%hsFYn>#%BQnLY_=&d?P1QHFgh=eE9CB6?kQvnZ#RPvvAa_BgW@ggT4x(kC4WP;nnp)DGmw?K2 zbAdC9R(DVX>0WMR)(q3lCi;==pgvjPsdZuhwbgzb_Evy4VGp7;tqY?%&3C;5Y}z8N z(%Qn97PM|w0Lz6L25!%AF0JO?Kt}6LG$RM-<)f*Sk>Ry@Ewi&i=75FzHqs;A@8q@@ zI)Y?q&uh{h)-&rWJ#7>Ir|}aY$ah$J$Gi;w!2_-Bz**3G4^}gyf#~%n(t5`~1J*6@ zt7EmsTD%}z$Im^-@1Em#ufVV4T**q{xs>BC=J-o;i63O=_#Xp)oZ~NMrE~l+`T##Z z$M0UH_W^!9e2*VIT!sHFKHfUMt1}109XQ?oRVTyd;iTd|j~L{^-EJt^{auCK$;T_n z`las50oz?YyWJj7Mxf@1@zIcVrBH=#6bQjYi*N@X$eB)!lkH2NR_D1QclIV$dq8{p z3UlQy->^=M%790%>=ewWGEWNOUto&sN2qKO_TwNr`w!ZgmO%bCgOgC?E|tlHQQ9(; z@hFOv1t}I1lKFxO*=i^&&%7T>CXUf!J zQ+AN1ET*c&PEsf@kS0f>Rp!^UGdrOJNMNg;DJ=LN$i0#djUNR^khnc3pFn`TjeOJ{ z^!<>YKnV#wM}znES}7l`wz%A1Yvs{gvW9k9PB}{h4(S?xt_c$H7+4LLv4c%)8#cDF zd)~XGi5vKKPFr{je}eb$E!-elww~X{T?e_iTv-|~tLCzHw9ku1 zdv%TT;^x&?%Ub9|;n9VEc^Pym<~rxYjRbih6iF$+r3F?z9zl`$OlxQim;IA+9xS1J SsV}Rj04t*^gm?KhjQ;_u>q!g% literal 0 HcmV?d00001 diff --git a/libemg/_datasets/__pycache__/dataset.cpython-310.pyc b/libemg/_datasets/__pycache__/dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..851a674388eaab7a04b16c68c71a04d8fd7c1045 GIT binary patch literal 2218 zcmZ`*OK%%D5GJ`RX|wC3%MVe z=;x>qnN%^Zt+yLXKf$adb1KN3iIF>5rc?SInbp04e)vpLARS1+;6ez10P_OcU`jro~Q+0rLt1i%9 z)dLz-0nol62jrU0CZP>n@EO8wvcp@^nhFwwGG3YAEv)*sq|DDKqrMbpYE6OGM9zX(=0ud`B6J? zQQxVaHq74Q+y zc@gDuzUj8`xXg2iMRx-=xrG~o)^qqB8c~hta1DIB9AgBv4T0Z)zXw7M{(?Es>;^QO zt$HdSf2)$Qx(jL(uSPj+0VZW8q?!+v%u5KS|H0TUH@oh`Ne*d%Nbj}R+?>%Uy@waE z-{8d%G310^v5LNe#8sUd8GeWL7m+!(UHNCKjh-+ESA7aAAKu#_K9=zxyjJ`S6f_zc z(AAw5&$_}#XecN<*W-<&gB=QIq_JwdNvZP4d}Me5xe+PIJP6GPtzm!7@VY{bhT4Wa z+H-~%z!*}&MTjQu3>1MA?-C_%hd9Z1k`FOSPC76T-OYF1e7BK$PUYK_l@S326 z0awvW_8pva+OIq)Z*2C4PA5I(lCYOS_Tj@4$U^8BDQ0A}Sb(5D2OzeSYFyn-(n9bk z%XpGzQu8w5=6oUH?TD93r)M%dpLDF}UMM3~o)(cQmF+m~c^?OkmaLEL+Ksu(!hZnh zSB+9geHuVfAJGxJN7-LKRk%Ls)PQf-uf`sxB!~FL7r%B0Ub$0UJO0KYHu%1F@Q;|8 O8sD=3O32U~Jp31?&;4Nl literal 0 HcmV?d00001 diff --git a/libemg/_datasets/__pycache__/emg_epn612.cpython-310.pyc b/libemg/_datasets/__pycache__/emg_epn612.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e608238afd1b2e573bf57f5b9d11a0829c1e8adf GIT binary patch literal 2053 zcmZ`)&2t+y6kqL^*I#KUY10-Cdtf^COz}EylYvYNb(@b~>SWRa#RIdmUdi@4^6plW zL*m&ChxBjY#33ms{u6H8b%jg*1BMe%Yr8c}S&yEir>Cc%p5FU8sa8uGe7`^Zv-if) zv_I5j@iSra2pZo9fi;Y%HbNbQ)L5tb$mkd&vt#CSgIXO6^aiy%cHS2{g}iq<#a#`X z*m|vDi=Z>3Q^Lk`t!7_>SFMI*VJi>;C!(gyt9!j3jbfrSPlFg!!ese?^vUr~Lk}6)=Z?Y7#nYg=J=l!N5ur&M#eFwq3FlLb zQGyBO^~vpl5GmjEJe)-Jg!R3(Mt!~c*_t;Yae|Zj{rknyR%ST;I2aMBCtR9o zAO_MLCQ&RM9vl%LM@%lw?Rp8tghASvoY3B{8oLf&|9<4VMV~AKiJz-UFQ-sEEfv}N2`^`t@Uv|+(H_+tjyIaj~@~>=CJ_^wC zY)T=w1Y{Cq2U>-qS=G*MAbgE}Mwr*;t z2J|-crYM|gr&eFfEZ@mxQDtUdhj~eqGXocI!YNzY_x11eQ~R`#S;NYCH7i`GldWO# zyegK?uZZQWaE5THr(dAVRC_Y;zM{>nocCDSD;h4}$}L_!$Wf^Cog9VZ&rvut6qOl@ z#tenCfWrA76tHIjg)@9F3WWhzS7%sX{g_#qvkEuM9K7_iB9<%d$x>;b65td!j0D+q zB`QsRnQ3tHLrjwZyJA4xGzt$X!S9NNC8JGup6BDZ8`5NqKOLRa`5<5RP0>jrHn31+R}anbRZsqP#jYU2P8c7fpECsY(@Do6U9aL zG$ztiL1G3hu#c2b{3@5pF#zxbAz0KM!@06ZVkk2f2;eeRS<((taD$~Dr{FOL6N8c% zs{dMkMlS+RzpI7@)St9z&;=6e*a;r^f=LufL}oLRm-bYwsvVUSo&cU9FJYzOO%PX6 z1v$t>6}^NU-9ff)BO8^_3g~a6Phh+_zk`3WDaYB)Sx-?5@&S zpia)QIrQFxom>7z5B(K;Z7==>1%jgJ%t~yY6m~eXk9mAEJ5*k;R}FZ6x$#&3ci{d+ z;q}J`;TjBa83;8fNsSQ+2tnRV&5;#YBRjCQY^6@%+%~99ou>wM7}>W1ms$^uj(Y^B zjSf-X%}7Rq$&RU7-}U=xk};gQ6J<1ITrJ&a1NP)rl1j$6V?6Wv)(94m&XLGmJjkcZom+T5j9{l*?wWXAj%dj?Q<7qHF>$!`(7|E zdQ1+Zj8VGRyP3z65zD0LrO6H(4a!$Y=kSQ$fJZywXfN-M_mm%oNtVbkM3uq>Vv}ie z;a0a?RHQp~HoEKG&09z4+R<4g=v&~SHc%FOAe9gDbr?E?EnuW^Knln(B1)|RB1?E|MUc0t-T3-i!|Sx*OVZ*w%Oj3WzP9iOA49IH#!7GZ^1G-Ark zg|f$y94dP^Pcr3;=rIdv!d0W(*UwYRc*oyvG1xl4q*Q8O?vr(mbMYbh5L{ zVkv-Q0+$6}#v}iO&nqaR7s!ayKwM(J^|Oinw_80;-bpb=qVeehBLN$TnydjbYfvqK ziE&`UjCDxFDy&*KhgdA|c-g4lOBs}VJ5D6K(pIE8{b4avhR~Kn)?TEty$HYE=lSSL zdnn~tTnZ*j?)eAMN;xrPVNjZ`yQ98A| zLl!BUv#|)4#u1lEoQ$JP@>5{*B{|!%)UkP22-7^C7kHiH*COoe&xKGG%F<*6H;wB? zbku~C02VeSUITKZ8xqd)28s?6Y!sl|x}K)WdB`K^NA^STu~X$(e*9x3*hD#&n(_xs z>U*N^lWHa+Pb0~~7&?2#QfO>gy~^QHHejk&F5|@pjk=p*Kpgv!YAi4aOQOvrif*#}4w&J0k)T0- z56LUvG^aZ>;a(;bpdy%2UZ(;^Dt*mr^jQKs3&K|!nP*8y#(W0F2q-0@4%LO0F-e- zN{E%3sCx*pToLM0kNPDk&C3=<+bE65EG>YRvd7MaTN+~zwsmO&&!-01ZywBl2W@j7 zz^nxh1luAzhEV4OI9*3OJG_uAv=1jN5A7o+Rp{+;cA6HsZtR{amP(2JK1DR$M&H_|S>$C=H6ssVW!BjFD3%yw*$1l1 zdq7!M$llC4=x@(@Bc&$tn|^FUDcXrW&Ai=0KWe*Y*tD|(YN(FR>E zZ22xC#9aJp%q8~UZo?&8WcL1Tarc(-CKLEjko##R0N0Z3_Fz#fxJB3A*2iWozs z0BsT@zXO;p0^mEf3_#$QbX(9QSJ-l+%4U)kM>H3uF|zv7FY$|MT2^v{mvEmR#SAHZVb&!lWzVB zomH$rqa3*W^o09%%BFCJ&*0$>(AV!WHizzpW!vDprfioYVbDl+7l3q zMXeu|S0l4c)`?5nWR3WyPw>|K^*6O0bS;5N+7p;BC=8eZ z*zPa`no*P$bjlHLN72vInAay&)76Ri9xmPR7CT#@%au0NC{ej@G|5`0w&3Zly03f7 vl*-;RxA=nL#^M;65zGesC;30?Vs)wgHnZCQws!eDEDMQU+%_OTWYhc?Q4r`6 literal 0 HcmV?d00001 diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py new file mode 100644 index 00000000..bed59df1 --- /dev/null +++ b/libemg/_datasets/ciil.py @@ -0,0 +1,81 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os + +class CIIL_MinimalData(Dataset): + def __init__(self, save_dir='.'): + Dataset.__init__(self, + 200, + 8, + 'Myo Armband', + 11, + ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], + '1 Train (1s), 15 Test', + "The goal of this Myo dataset is to explore how well models perform when they have a limited amount of training data (1s per class).", + 'https://ieeexplore.ieee.org/abstract/document/10394393', + save_dir) + self.url = "https://github.com/LibEMG/CIILData" + self.dataset_folder = os.path.join(self.save_dir , 'CIILData') + + def prepare_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + elif (self.redownload): + self.remove_dataset(self.dataset_folder) + self.download(self.url, self.dataset_folder) + + subfolder = 'MinimalTrainingData' + subjects = [str(i) for i in range(0, 11)] + classes_values = [str(i) for i in range(0,5)] + reps_values = ["0","1","2"] + sets = ["train", "test"] + regex_filters = [ + RegexFilter(left_bound = "/", right_bound="/", values = sets, description='sets'), + RegexFilter(left_bound = "/subject", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps'), + RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder + '/' + subfolder, regex_filters=regex_filters, delimiter=",") + + return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + +class CIIL_ElectrodeShift(Dataset): + def __init__(self, save_dir='.'): + Dataset.__init__(self, + 200, + 8, + 'Myo Armband', + 21, + ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], + '5 Train, 8 Test', + "An Electrode Shift confounding factors dataset.", + 'https://link.springer.com/article/10.1186/s12984-024-01355-4', + save_dir) + self.url = "https://github.com/LibEMG/CIILData" + self.dataset_folder = os.path.join(self.save_dir , 'CIILData') + + def prepare_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + elif (self.redownload): + self.remove_dataset(self.dataset_folder) + self.download(self.url, self.dataset_folder) + + subfolder = 'ElectrodeShift' + subjects = [str(i) for i in range(0, 21)] + classes_values = [str(i) for i in range(0,5)] + reps_values = ["0","1","2","3","4"] + sets = ["training", "trial_1", "trial_2", "trial_3", "trial_4"] + regex_filters = [ + RegexFilter(left_bound = "/", right_bound="/", values = sets, description='sets'), + RegexFilter(left_bound = "/subject", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps'), + RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder + '/' + subfolder, regex_filters=regex_filters, delimiter=",") + + return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1,2,3,4])} \ No newline at end of file diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py new file mode 100644 index 00000000..b846e0f4 --- /dev/null +++ b/libemg/_datasets/continous_transitions.py @@ -0,0 +1,62 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os +import pandas as pd +import h5py +import numpy as np + +class ContinuousTransitions(Dataset): + def __init__(self, save_dir='.', redownload=False, dataset_name="ContinuousTransitions"): + Dataset.__init__(self, + 2000, + 6, + 'Delsys', + 2, + [], + '', + "", + "https://doi.org/10.57922/mec.2503", + save_dir, redownload) + self.dataset_name = dataset_name + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + + def prepare_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + print("Please download the dataset from: ") #TODO: Update + return + + # Training ODH + odh_tr = OfflineDataHandler() + odh_tr.subjects = [] + odh_tr.classes = [] + odh_tr.extra_attributes = ['subjects', 'classes'] + + # Testing ODH + odh_te = OfflineDataHandler() + odh_te.subjects = [] + odh_te.classes = [] + odh_te.extra_attributes = ['subjects', 'classes'] + + for s in [2,3]: + data = h5py.File('ContinuousTransitions/P' + f"{s:02}" + '.hdf5', "r") + cont_labels = data['continuous']['emg']['prompt'][()] + cont_labels = np.hstack([np.ones((1000)) * cont_labels[0], cont_labels[0:len(cont_labels)-1000]]) # Rolling about 0.5s as per Shri's suggestion + cont_emg = data['continuous']['emg']['signal'][()] + cont_chg_idxs = np.insert(np.where(cont_labels[:-1] != cont_labels[1:])[0], 0, -1) + cont_chg_idxs = np.insert(cont_chg_idxs, len(cont_chg_idxs), len(cont_emg)) + for i in range(0, len(cont_chg_idxs)-1): + odh_te.data.append(cont_emg[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]) + odh_te.classes.append(np.expand_dims(cont_labels[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]-1, axis=1)) + odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s-2) #TODO: Update + + ramp_emg = data['ramp']['emg']['signal'][()] + ramp_labels = data['ramp']['emg']['prompt'][()] + r_chg_idxs = np.insert(np.where(ramp_labels[:-1] != ramp_labels[1:])[0], 0, -1) + r_chg_idxs = np.insert(r_chg_idxs, len(r_chg_idxs), len(ramp_emg)) + for i in range(0, len(r_chg_idxs)-1): + odh_tr.data.append(ramp_emg[r_chg_idxs[i]+1:r_chg_idxs[i+1]]) + odh_tr.classes.append(np.expand_dims(ramp_labels[r_chg_idxs[i]+1:r_chg_idxs[i+1]]-1, axis=1)) + odh_tr.subjects.append(np.ones((len(odh_tr.data[-1]), 1)) * s-2) + + return {'All': odh_tr+odh_te, 'Train': odh_tr, 'Test': odh_te} \ No newline at end of file diff --git a/libemg/_datasets/dataset.py b/libemg/_datasets/dataset.py new file mode 100644 index 00000000..4db2ef32 --- /dev/null +++ b/libemg/_datasets/dataset.py @@ -0,0 +1,185 @@ +import os +from libemg.data_handler import OfflineDataHandler +# this assumes you have git downloaded (not pygit, but the command line program git) + +class Dataset: + def __init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, save_dir='.', redownload=False, ): + self.save_dir = save_dir + self.redownload=redownload + + # Every class should have this + self.sampling=sampling + self.num_channels=num_channels + self.recording_device=recording_device + self.num_subjects=num_subjects + self.gestures=gestures + self.num_reps=num_reps + self.description=description + self.citation=citation + + def download(self, url, dataset_name): + clone_command = "git clone " + url + " " + dataset_name + os.system(clone_command) + + def remove_dataset(self, dataset_folder): + remove_command = "rm -rf " + dataset_folder + os.system(remove_command) + + def check_exists(self, dataset_folder): + return os.path.exists(dataset_folder) + + def prepare_data(self, format=OfflineDataHandler): + pass + + def get_info(self): + print(self.description + '\n' + 'Sampling Rate: ' + str(self.sampling) + '\nNumber of Channels: ' + str(self.num_channels) + + '\nDevice: ' + self.recording_device + '\nGestures: ' + str(self.gestures) + '\nNumber of Reps: ' + str(self.num_reps) + '\nNumber of Subjects: ' + str(self.num_subjects) + + '\nCitation: ' + str(self.citation)) + +# given a directory, return a list of files in that directory matching a format +# can be nested +# this is just a handly utility +def find_all_files_of_type_recursively(dir, terminator): + files = os.listdir(dir) + file_list = [] + for file in files: + if file.endswith(terminator): + file_list.append(dir+file) + else: + if os.path.isdir(dir+file): + file_list += find_all_files_of_type_recursively(dir+file+'/',terminator) + return file_list + + +# class GRABMyo(Dataset): +# def __init__(self, save_dir='.', redownload=False, subjects=list(range(1,44)), sessions=list(range(1,4)), dataset_name="GRABMyo"): +# Dataset.__init__(self, save_dir, redownload) +# self.url = "https://physionet.org/files/grabmyo/1.0.2/" +# self.dataset_name = dataset_name +# self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) +# self.subjects = subjects +# self.sessions = sessions + +# if (not self.check_exists(self.dataset_folder)): +# self.download_data() +# elif (self.redownload): +# self.remove_dataset(self.dataset_folder) +# self.download_data() +# else: +# print("Data Already Downloaded.") + +# def download_data(self): +# curl_command = "curl --create-dirs" + " -O --output-dir " + str(self.dataset_folder) + "/ " +# # Download files +# print("Starting download...") +# files = ['readme.txt', 'subject-info.csv', 'MotionSequence.txt'] +# for f in files: +# os.system(curl_command + self.url + f) +# for session in self.sessions: +# curl_command = "curl --create-dirs" + " -O --output-dir " + str(self.dataset_folder) + "/" + "Session" + str(session) + "/ " +# for p in self.subjects: +# for t in range(1,8): +# for g in range(1,18): +# endpoint = self.url + "Session" + str(session) + "/session" + str(session) + "_participant" + str(p) + "/session" + str(session) + "_participant" + str(p) + "_gesture" + str(g) + "_trial" + str(t) +# os.system(curl_command + endpoint + '.hea') +# os.system(curl_command + endpoint + '.dat') +# print("Download complete.") + +# def prepare_data(self, format=OfflineDataHandler, subjects=[str(i) for i in range(1,44)], sessions=["1","2","3"]): +# if format == OfflineDataHandler: +# sets_regex = make_regex(left_bound = "session", right_bound="_", values = sessions) +# classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] +# classes_regex = make_regex(left_bound = "_gesture", right_bound="_", values = classes_values) +# reps_values = ["1","2","3","4","5","6","7"] +# reps_regex = make_regex(left_bound = "trial", right_bound=".hea", values = reps_values) +# subjects_regex = make_regex(left_bound="participant", right_bound="_",values=subjects) +# dic = { +# "sessions": sessions, +# "sessions_regex": sets_regex, +# "reps": reps_values, +# "reps_regex": reps_regex, +# "classes": classes_values, +# "classes_regex": classes_regex, +# "subjects": subjects, +# "subjects_regex": subjects_regex +# } +# odh = OfflineDataHandler() +# odh.get_data(folder_location=self.dataset_folder, filename_dic=dic, delimiter=",") +# return odh + +# def print_info(self): +# print('Reference: https://www.physionet.org/content/grabmyo/1.0.2/') +# print('Name: ' + self.dataset_name) +# print('Gestures: 17') +# print('Trials: 7') +# print('Time Per Rep: 5s') +# print('Subjects: 43') +# print("Forearm EMG (16): Columns 0-15\nWrist EMG (12): 18-23 and 26-31\nUnused (4): 16,23,24,31") + + +# class NinaDB1(Dataset): +# def __init__(self, dataset_dir, subjects): +# Dataset.__init__(self, dataset_dir) +# self.dataset_folder = dataset_dir +# self.subjects = subjects + +# if (not self.check_exists(self.dataset_folder)): +# print("The dataset does not currently exist... Please download it from: http://ninaweb.hevs.ch/data1") +# exit(1) +# else: +# filenames = next(walk(self.dataset_folder), (None, None, []))[2] +# if not any("csv" in f for f in filenames): +# self.setup(filenames) +# print("Extracted and set up repo.") +# self.prepare_data() + +# def setup(self, filenames): +# for f in filenames: +# if "zip" in f: +# file_path = os.path.join(self.dataset_folder, f) +# with zipfile.ZipFile(file_path, 'r') as zip_ref: +# zip_ref.extractall(self.dataset_folder) +# self.convert_data() + +# def convert_data(self): +# mat_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.mat'))] +# for f in mat_files: +# mat_dict = sio.loadmat(f) +# output_ = np.concatenate((mat_dict['emg'], mat_dict['restimulus'], mat_dict['rerepetition']), axis=1) +# mask_ids = output_[:,11] != 0 +# output_ = output_[mask_ids,:] +# np.savetxt(f[:-4]+'.csv', output_,delimiter=',') + +# def cleanup_data(self): +# mat_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.mat'))] +# zip_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.zip'))] +# files = mat_files + zip_files +# for f in files: +# os.remove(f) + +# def prepare_data(self, format=OfflineDataHandler): +# if format == OfflineDataHandler: +# classes_values = list(range(1,24)) +# classes_column = [10] +# classset_values = [str(i) for i in list(range(1,4))] +# classset_regex = make_regex(left_bound="_E", right_bound=".csv", values=classset_values) +# reps_values = list(range(1,11)) + +# reps_column = [11] +# subjects_values = [str(s) for s in self.subjects] +# subjects_regex = make_regex(left_bound="S", right_bound="_A", values=subjects_values) +# data_column = list(range(0,10)) +# dic = { +# "reps": reps_values, +# "reps_column": reps_column, +# "classes": classes_values, +# "classes_column": classes_column, +# "subjects": subjects_values, +# "subjects_regex": subjects_regex, +# "classset": classset_values, +# "classset_regex": classset_regex, +# "data_column": data_column +# } +# odh = OfflineDataHandler() +# odh.get_data(folder_location=self.dataset_folder, filename_dic=dic, delimiter=",") +# return odh diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py new file mode 100644 index 00000000..b4bb2552 --- /dev/null +++ b/libemg/_datasets/emg_epn612.py @@ -0,0 +1,52 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os +import pickle +import json +import numpy as np + +class EMGEPN612(Dataset): + def __init__(self): + Dataset.__init__(self, + 200, + 8, + 'Myo Armband', + 612, + ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], + '50 (For 306 Users), 25 (For 306 Users)', + "A large 612 user dataset for developing cross user models.", + 'https://doi.org/10.5281/zenodo.4421500') + self.url = "https://github.com/libemg/OneSubjectMyoDataset" + self.dataset_name = 'EMGEPN612.pkl' + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + + def prepare_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") #TODO: Fill this in + return + + file = open(self.dataset_folder, 'rb') + data = pickle.load(file) + + emg = data[0] + labels = data[2] + + odh_tr = OfflineDataHandler() + odh_tr.subjects = [] + odh_tr.classes = [] + odh_tr.extra_attributes = ['subjects', 'classes'] + for i, e in enumerate(emg['training']): + odh_tr.data.append(e) + odh_tr.classes.append(np.ones((len(e), 1)) * labels['training'][i]) + odh_tr.subjects.append(np.ones((len(e), 1)) * i//150) + odh_te = OfflineDataHandler() + odh_te.subjects = [] + odh_te.classes = [] + odh_te.extra_attributes = ['subjects', 'classes'] + for i, e in enumerate(emg['testing']): + odh_te.data.append(e) + odh_te.classes.append(np.ones((len(e), 1)) * labels['testing'][i]) + odh_te.subjects.append(np.ones((len(e), 1)) * (i//150 + 306)) + + return {'All': odh_tr+odh_te, 'Train': odh_tr, 'Test': odh_te} \ No newline at end of file diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py new file mode 100644 index 00000000..15c08f6b --- /dev/null +++ b/libemg/_datasets/grab_myo.py @@ -0,0 +1,58 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os + +class GRABMyo(Dataset): + def __init__(self): + pass + + def __init__(self, save_dir='.', version='1.0.2', redownload=False, subjects=list(range(1,44)), sessions=list(range(1,4)), dataset_name="grabmyo"): + Dataset.__init__(self, + 0, + 0, + '', + 3, + [], + '', + "", + "", + save_dir, redownload) + self.dataset_name = dataset_name + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, version) + + def download_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + print("Please download the GRABMyo dataset from: https://physionet.org/content/grabmyo/1.0.2/") #TODO: Fill this in + return + + def prepare_data(self, subjects=[str(i) for i in range(1,44)], sessions=["1"]): + print('\nPlease cite: ' + self.citation+'\n') + sessions = ["1"] # ["1", "2", "3"] + subjects = ["1", "2", "3", "4", "5"] #[str(i) for i in range(1,44)] + classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] + reps_values = ["1","2","3","4","5","6","7"] + + regex_filters = [ + RegexFilter(left_bound = "session", right_bound="_", values = sessions, description='session'), + RegexFilter(left_bound = "_gesture", right_bound="_", values = classes_values, description='classes'), + RegexFilter(left_bound = "trial", right_bound=".hea", values = reps_values, description='reps'), + RegexFilter(left_bound="participant", right_bound="_",values=subjects, description='subjects') + ] + + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + + forearm_data = odh.isolate_channels(list(range(0,16))) + train_data = forearm_data.isolate_data('reps', [0,1,2,3,4]) + test_data = forearm_data.isolate_data('reps', [5,6]) + return {'All': forearm_data, 'Train': train_data, 'Test': test_data} + +# def print_info(self): +# print('Reference: https://www.physionet.org/content/grabmyo/1.0.2/') +# print('Name: ' + self.dataset_name) +# print('Gestures: 17') +# print('Trials: 7') +# print('Time Per Rep: 5s') +# print('Subjects: 43') +# print("Forearm EMG (16): Columns 0-15\nWrist EMG (12): 18-23 and 26-31\nUnused (4): 16,23,24,31") diff --git a/libemg/_datasets/one_subject_myo.py b/libemg/_datasets/one_subject_myo.py new file mode 100644 index 00000000..8337f180 --- /dev/null +++ b/libemg/_datasets/one_subject_myo.py @@ -0,0 +1,38 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os + +class OneSubjectMyoDataset(Dataset): + def __init__(self, save_dir='.', redownload=False, dataset_name="OneSubjectMyoDataset"): + Dataset.__init__(self, + 200, + 8, + 'Myo Armband', + 1, + ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], + '6 (4 Train, 2 Test)', + "A simple Myo dataset that is used for some of the LibEMG offline demos.", + 'N/A', save_dir, redownload) + self.url = "https://github.com/libemg/OneSubjectMyoDataset" + self.dataset_name = dataset_name + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + + def prepare_data(self, format=OfflineDataHandler): + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + elif (self.redownload): + self.remove_dataset(self.dataset_folder) + self.download(self.url, self.dataset_folder) + + if format == OfflineDataHandler: + sets_values = ["1","2","3","4","5","6"] + classes_values = ["0","1","2","3","4"] + reps_values = ["0","1"] + regex_filters = [ + RegexFilter(left_bound = "/trial_", right_bound="/", values = sets_values, description='sets'), + RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes'), + RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + return {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index 1a2410be..1f4458d5 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -1,588 +1,32 @@ -import os -import numpy as np -import zipfile -import scipy.io as sio -from libemg.data_handler import ColumnFetcher, MetadataFetcher, OfflineDataHandler, RegexFilter, FilePackager -from libemg.utils import make_regex -from glob import glob -from os import walk -from pathlib import Path -from datetime import datetime -# this assumes you have git downloaded (not pygit, but the command line program git) - -class Dataset: - def __init__(self, save_dir='.', redownload=False): - self.save_dir = save_dir - self.redownload=redownload - - def download(self, url, dataset_name): - clone_command = "git clone " + url + " " + dataset_name - os.system(clone_command) - - def remove_dataset(self, dataset_folder): - remove_command = "rm -rf " + dataset_folder - os.system(remove_command) - - def check_exists(self, dataset_folder): - return os.path.exists(dataset_folder) - - def prepare_data(self, format=OfflineDataHandler): - pass - - -class _3DCDataset(Dataset): - def __init__(self, save_dir='.', redownload=False, dataset_name="_3DCDataset"): - Dataset.__init__(self, save_dir, redownload) - self.url = "https://github.com/libemg/3DCDataset" - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) - self.class_list = ["Neutral", "Radial Deviation", "Wrist Flexion", "Ulnar Deviation", "Wrist Extension", "Supination", - "Pronation", "Power Grip", "Open Hand", "Chuck Grip", "Pinch Grip"] - - if (not self.check_exists(self.dataset_folder)): - self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) - - - - def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,23)], - sets_values = ["train", "test"], - reps_values = ["0","1","2","3"], - classes_values = [str(i) for i in range(11)]): - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound = "/", right_bound="/EMG", values = sets_values, description='sets'), - RegexFilter(left_bound = "_", right_bound=".txt", values = classes_values, description='classes'), - RegexFilter(left_bound = "EMG_gesture_", right_bound="_", values = reps_values, description='reps'), - RegexFilter(left_bound="Participant", right_bound="/",values=subjects_values, description='subjects') - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return odh - -class Ninapro(Dataset): - def __init__(self, save_dir='.', dataset_name="Ninapro"): - # downloading the Ninapro dataset is not supported (no permission given from the authors)' - # however, you can download it from http://ninapro.hevs.ch/DB8 - # the subject zip files should be placed at: /NinaproDB8/DB8_s#.zip - Dataset.__init__(self, save_dir) - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, "") - self.exercise_step = [] - - def convert_to_compatible(self): - # get the zip files (original format they're downloaded in) - zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") - # unzip the files -- if any are there (successive runs skip this) - for zip_file in zip_files: - with zipfile.ZipFile(zip_file, 'r') as zip_ref: - zip_ref.extractall(zip_file[:-4]+'/') - os.remove(zip_file) - # get the mat files (the files we want to convert to csv) - mat_files = find_all_files_of_type_recursively(self.dataset_folder,".mat") - for mat_file in mat_files: - self.convert_to_csv(mat_file) - - def convert_to_csv(self, mat_file): - # read the mat file - mat_file = mat_file.replace("\\", "/") - mat_dir = mat_file.split('/') - mat_dir = os.path.join(*mat_dir[:-1],"") - mat = sio.loadmat(mat_file) - # get the data - exercise = int(mat_file.split('_')[3][1]) - exercise_offset = self.exercise_step[exercise-1] # 0 reps already included - data = mat['emg'] - restimulus = mat['restimulus'] - rerepetition = mat['rerepetition'] - if data.shape[0] != restimulus.shape[0]: # this happens in some cases - min_shape = min([data.shape[0], restimulus.shape[0]]) - data = data[:min_shape,:] - restimulus = restimulus[:min_shape,] - rerepetition = rerepetition[:min_shape,] - # remove 0 repetition - collection buffer - remove_mask = (rerepetition != 0).squeeze() - data = data[remove_mask,:] - restimulus = restimulus[remove_mask] - rerepetition = rerepetition[remove_mask] - # important little not here: - # the "rest" really is only the rest between motions, not a dedicated rest class. - # there will be many more rest repetitions (as it is between every class) - # so usually we really care about classifying rest as its important (most of the time we do nothing) - # but for this dataset it doesn't make sense to include (and not its just an offline showcase of the library) - # I encourage you to plot the restimulus to see what I mean. -> plt.plot(restimulus) - # so we remove the rest class too - remove_mask = (restimulus != 0).squeeze() - data = data[remove_mask,:] - restimulus = restimulus[remove_mask] - rerepetition = rerepetition[remove_mask] - tail = 0 - while tail < data.shape[0]-1: - rep = rerepetition[tail][0] # remove the 1 offset (0 was the collection buffer) - motion = restimulus[tail][0] # remove the 1 offset (0 was between motions "rest") - # find head - head = np.where(rerepetition[tail:] != rep)[0] - if head.shape == (0,): # last segment of data - head = data.shape[0] -1 - else: - head = head[0] + tail - # downsample to 1kHz from 2kHz using decimation - data_for_file = data[tail:head,:] - data_for_file = data_for_file[::2, :] - # write to csv - csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' - np.savetxt(csv_file, data_for_file, delimiter=',') - tail = head - os.remove(mat_file) - -class NinaproDB8(Ninapro): - def __init__(self, save_dir='.', dataset_name="NinaproDB8"): - Ninapro.__init__(self, save_dir, dataset_name) - self.class_list = ["Thumb Flexion/Extension", "Thumb Abduction/Adduction", "Index Finger Flexion/Extension", "Middle Finger Flexion/Extension", "Combined Ring and Little Fingers Flexion/Extension", - "Index Pointer", "Cylindrical Grip", "Lateral Grip", "Tripod Grip"] - self.exercise_step = [0,10,20] - - def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,13)], - reps_values = [str(i) for i in range(22)], - classes_values = [str(i) for i in range(9)]): - - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), - RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), - RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return odh - -class NinaproDB2(Ninapro): - EMG_LEN = 12 - DATAGLOVE_LEN = 22 - - def __init__(self, save_dir='.', dataset_name="NinaproDB2", dataglove=False): - """Ninapro DB2 class to fetch data from DB2 dataset matlab file for processing. - - Parameters - ---------- - dataglove: bool - If True, cyberglove data will also be fetched from matlab files in convert_to_compatible method, and will set - metadata_fetchers in OfflineDataHandler.get_data() to put cyberglove data into metadata (for regression). Use the following - code for OfflineDataHandler().parse_windows(): - >>> metadata_operations = {'cyberglove': lambda x: x[-1]} # fetch the last sample for regression in time window - >>> inputs, metadata = odh.parse_windows(, , metadata_operations=metadata_operations) - >>> regression_targets = metadata['cyberglove'] - """ - self.dataglove = self.DATAGLOVE_LEN if dataglove else False - Ninapro.__init__(self, save_dir, dataset_name) - self.class_list = ["TODO"] - self.exercise_step = [0,0,0] - - def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,41)], - reps_values = [str(i) for i in range(6)], - classes_values = [str(i) for i in range(50)]): - - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), - RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), - RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') - ] - odh = OfflineDataHandler() - metadata_fetchers = None if not self.dataglove else [ColumnFetcher('cyberglove', column_mask= - [idx for idx in range(self.EMG_LEN, self.EMG_LEN + self.dataglove)])] - emg_column_mask = [idx for idx in range(self.EMG_LEN)] - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",", data_column=emg_column_mask, metadata_fetchers=metadata_fetchers) - return odh - - def convert_to_compatible(self): - # get the zip files (original format they're downloaded in) - zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") - # unzip the files -- if any are there (successive runs skip this) - for zip_file in zip_files: - with zipfile.ZipFile(zip_file, 'r') as zip_ref: - zip_ref.extractall(zip_file[:-4]+'/') - os.remove(zip_file) - # get the mat files (the files we want to convert to csv) - mat_files = find_all_files_of_type_recursively(self.dataset_folder,".mat") - for mat_file in mat_files: - self.convert_to_csv(mat_file) - - def convert_to_csv(self, mat_file): - # read the mat file - mat_file = mat_file.replace("\\", "/") - mat_dir = mat_file.split('/') - mat_dir = os.path.join(*mat_dir[:-1],"") - mat = sio.loadmat(mat_file) - # get the data - exercise = int(mat_file.split('_')[3][1]) - exercise_offset = self.exercise_step[exercise-1] # 0 reps already included - data = mat['emg'] - if self.dataglove: - try: - target = mat['glove'] - except: - return - restimulus = mat['restimulus'] - rerepetition = mat['rerepetition'] - if data.shape[0] != restimulus.shape[0]: # this happens in some cases - min_shape = min([data.shape[0], restimulus.shape[0]]) - data = data[:min_shape,:] - if self.dataglove: - target = target[:min_shape,] - restimulus = restimulus[:min_shape,] - rerepetition = rerepetition[:min_shape,] - # remove 0 repetition - collection buffer - remove_mask = (rerepetition != 0).squeeze() - data = data[remove_mask,:] - if self.dataglove: - target = target[remove_mask,:] - restimulus = restimulus[remove_mask] - rerepetition = rerepetition[remove_mask] - # important little not here: - # the "rest" really is only the rest between motions, not a dedicated rest class. - # there will be many more rest repetitions (as it is between every class) - # so usually we really care about classifying rest as its important (most of the time we do nothing) - # but for this dataset it doesn't make sense to include (and not its just an offline showcase of the library) - # I encourage you to plot the restimulus to see what I mean. -> plt.plot(restimulus) - # so we remove the rest class too - remove_mask = (restimulus != 0).squeeze() - data = data[remove_mask,:] - if self.dataglove: - target = target[remove_mask,:] - restimulus = restimulus[remove_mask] - rerepetition = rerepetition[remove_mask] - tail = 0 - while tail < data.shape[0]-1: - rep = rerepetition[tail][0] # remove the 1 offset (0 was the collection buffer) - motion = restimulus[tail][0] # remove the 1 offset (0 was between motions "rest") - # find head - head = np.where(rerepetition[tail:] != rep)[0] - if head.shape == (0,): # last segment of data - head = data.shape[0] -1 - else: - head = head[0] + tail - # downsample to 1kHz from 2kHz using decimation - if self.dataglove: - data_for_file = np.concatenate((data[tail:head,:], target[tail:head,:]), 1) - else: - data_for_file = data[tail:head,:] - data_for_file = data_for_file[::2, :] - # write to csv - csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' - np.savetxt(csv_file, data_for_file, delimiter=',') - tail = head - os.remove(mat_file) - -# given a directory, return a list of files in that directory matching a format -# can be nested -# this is just a handly utility -def find_all_files_of_type_recursively(dir, terminator): - files = os.listdir(dir) - file_list = [] - for file in files: - if file.endswith(terminator): - file_list.append(dir+file) - else: - if os.path.isdir(dir+file): - file_list += find_all_files_of_type_recursively(dir+file+'/',terminator) - return file_list - - -class OneSubjectMyoDataset(Dataset): - def __init__(self, save_dir='.', redownload=False, dataset_name="OneSubjectMyoDataset"): - Dataset.__init__(self, save_dir, redownload) - self.url = "https://github.com/libemg/OneSubjectMyoDataset" - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) - - if (not self.check_exists(self.dataset_folder)): - self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) - - def prepare_data(self, format=OfflineDataHandler): - if format == OfflineDataHandler: - sets_values = ["1","2","3","4","5","6"] - classes_values = ["0","1","2","3","4"] - reps_values = ["0","1"] - regex_filters = [ - RegexFilter(left_bound = "/trial_", right_bound="/", values = sets_values, description='sets'), - RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes'), - RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps') - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return odh - - -class _SessionFetcher(MetadataFetcher): - def __init__(self): - super().__init__('sessions') - - def __call__(self, filename, file_data, all_files): - def split_filename(f): - # Split date and name into separate variables - date_idx = f.find('2018') - date = datetime.strptime(Path(f[date_idx:]).stem, '%Y-%m-%d-%H-%M-%S-%f') - description = f[:date_idx] - return date, description - - data_file_date, data_file_description = split_filename(filename) - - # Grab the other file of a different date. Return the index of which session it is - same_subject_files = [f for f in all_files if data_file_description in f] - file_dates = [split_filename(subject_filename)[0] for subject_filename in same_subject_files] - file_dates.sort() - session_idx = file_dates.index(data_file_date) - return session_idx * np.ones((file_data.shape[0], 1), dtype=int) - - -class _RepFetcher(ColumnFetcher): - def __call__(self, filename, file_data, all_files): - column_data = super().__call__(filename, file_data, all_files) - - # Get rep transitions - diff = np.diff(column_data, axis=0) - rep_end_row_mask, rep_end_col_mask = np.nonzero((diff < 0) & (column_data[1:] == 0)) - unique_rep_end_row_mask = np.unique(rep_end_row_mask) # remove duplicate start indices (for combined movements) - # rest_end_row_mask = np.nonzero(np.diff(np.nonzero(column_data == 0)[0]) > 1)[0] - # rest_end_row_mask = np.nonzero(np.diff(np.nonzero(np.all(column_data == 0, axis=1))[0]) > 1)[0] - # unique_rep_end_row_mask = np.concatenate((unique_rep_end_row_mask, rest_end_row_mask)) - # unique_rep_end_row_mask = np.sort(unique_rep_end_row_mask) - - - # Populate metadata array - metadata = np.empty((column_data.shape[0], 1), dtype=np.int16) - rep_counters = [0 for _ in range(5)] # 5 different press types - previous_rep_start = 0 - for idx, rep_start in enumerate(unique_rep_end_row_mask): - movement_idx = 4 if np.sum(rep_end_row_mask == rep_start) > 1 else rep_end_col_mask[idx] # if multiple columns are nonzero then it's a combined movement - rep = rep_counters[movement_idx] - metadata[previous_rep_start:rep_start] = rep - rep_counters[movement_idx] += 1 - previous_rep_start = rep_start - - # Fill in final samples - metadata[rep_start:] = rep - - return metadata - - -class PutEMGForceDataset(Dataset): - def __init__(self, save_dir = '.', dataset_name = 'PutEMGForceDataset', data_filetype = None): - """Dataset wrapper for putEMG-Force dataset. Used for regression of finger forces. - - Parameters - ---------- - save_dir : str, default='.' - Base data directory. - dataset_name : str, default='PutEMGForceDataset' - Name of dataset. Looks for dataset in filepath created by appending save_dir and dataset_name. - data_filetype : list or None, default=None - Type of data file to use. Accepted values are 'repeats_long', 'repeats_short', 'sequential', or any combination of those. If None is passed, all will be used. - """ - # TODO: Implement downloading dataset using .sh or .py file - super().__init__(save_dir) - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir, self.dataset_name) - if data_filetype is None: - data_filetype = ['repeats_short', 'repeats_long', 'sequential'] - elif not isinstance(data_filetype, list): - data_filetype = [data_filetype] - self.data_filetype = data_filetype - - def prepare_data(self, format=OfflineDataHandler, subjects = None, sessions = None, reps = None, labels = 'forces', label_dof_mask = None): - if subjects is None: - subjects = [str(idx).zfill(2) for idx in range(60)] - - if labels == 'forces': - column_mask = np.arange(25, 35) - elif labels == 'trajectories': - column_mask = np.arange(36, 40) - else: - raise ValueError(f"Expected either 'forces' or trajectories' for labels parameter, but received {labels}.") - - if label_dof_mask is not None: - column_mask = column_mask[label_dof_mask] - - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound='/emg_force-', right_bound='-', values=subjects, description='subjects'), - RegexFilter(left_bound='-', right_bound='-', values=self.data_filetype, description='data_filetype'), - ] - metadata_fetchers = [ - _SessionFetcher(), - ColumnFetcher('labels', column_mask), - _RepFetcher('reps', list(range(36, 40))) - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers, delimiter=',', skiprows=1, data_column=list(range(1, 25))) - if sessions is not None: - odh = odh.isolate_data('sessions', sessions) - if reps is not None: - odh = odh.isolate_data('reps', reps) - return odh - - -class OneSubjectEMaGerDataset(Dataset): - def __init__(self, save_dir = '.', redownload = False, dataset_name = 'OneSubjectEMaGerDataset'): - super().__init__(save_dir, redownload) - self.url = 'https://github.com/LibEMG/OneSubjectEMaGerDataset' - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir, self.dataset_name) - - if (not self.check_exists(self.dataset_folder)): - self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) - - def prepare_data(self, format=OfflineDataHandler): - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound='/', right_bound='/', values=['open-close', 'pro-sup'], description='movements'), - RegexFilter(left_bound='_R_', right_bound='_emg.csv', values=[str(idx) for idx in range(5)], description='reps') - ] - package_function = lambda x, y: Path(x).parent.absolute() == Path(y).parent.absolute() - metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - return odh - - -# class GRABMyo(Dataset): -# def __init__(self, save_dir='.', redownload=False, subjects=list(range(1,44)), sessions=list(range(1,4)), dataset_name="GRABMyo"): -# Dataset.__init__(self, save_dir, redownload) -# self.url = "https://physionet.org/files/grabmyo/1.0.2/" -# self.dataset_name = dataset_name -# self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) -# self.subjects = subjects -# self.sessions = sessions - -# if (not self.check_exists(self.dataset_folder)): -# self.download_data() -# elif (self.redownload): -# self.remove_dataset(self.dataset_folder) -# self.download_data() -# else: -# print("Data Already Downloaded.") - -# def download_data(self): -# curl_command = "curl --create-dirs" + " -O --output-dir " + str(self.dataset_folder) + "/ " -# # Download files -# print("Starting download...") -# files = ['readme.txt', 'subject-info.csv', 'MotionSequence.txt'] -# for f in files: -# os.system(curl_command + self.url + f) -# for session in self.sessions: -# curl_command = "curl --create-dirs" + " -O --output-dir " + str(self.dataset_folder) + "/" + "Session" + str(session) + "/ " -# for p in self.subjects: -# for t in range(1,8): -# for g in range(1,18): -# endpoint = self.url + "Session" + str(session) + "/session" + str(session) + "_participant" + str(p) + "/session" + str(session) + "_participant" + str(p) + "_gesture" + str(g) + "_trial" + str(t) -# os.system(curl_command + endpoint + '.hea') -# os.system(curl_command + endpoint + '.dat') -# print("Download complete.") - -# def prepare_data(self, format=OfflineDataHandler, subjects=[str(i) for i in range(1,44)], sessions=["1","2","3"]): -# if format == OfflineDataHandler: -# sets_regex = make_regex(left_bound = "session", right_bound="_", values = sessions) -# classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] -# classes_regex = make_regex(left_bound = "_gesture", right_bound="_", values = classes_values) -# reps_values = ["1","2","3","4","5","6","7"] -# reps_regex = make_regex(left_bound = "trial", right_bound=".hea", values = reps_values) -# subjects_regex = make_regex(left_bound="participant", right_bound="_",values=subjects) -# dic = { -# "sessions": sessions, -# "sessions_regex": sets_regex, -# "reps": reps_values, -# "reps_regex": reps_regex, -# "classes": classes_values, -# "classes_regex": classes_regex, -# "subjects": subjects, -# "subjects_regex": subjects_regex -# } -# odh = OfflineDataHandler() -# odh.get_data(folder_location=self.dataset_folder, filename_dic=dic, delimiter=",") -# return odh - -# def print_info(self): -# print('Reference: https://www.physionet.org/content/grabmyo/1.0.2/') -# print('Name: ' + self.dataset_name) -# print('Gestures: 17') -# print('Trials: 7') -# print('Time Per Rep: 5s') -# print('Subjects: 43') -# print("Forearm EMG (16): Columns 0-15\nWrist EMG (12): 18-23 and 26-31\nUnused (4): 16,23,24,31") - - -# class NinaDB1(Dataset): -# def __init__(self, dataset_dir, subjects): -# Dataset.__init__(self, dataset_dir) -# self.dataset_folder = dataset_dir -# self.subjects = subjects - -# if (not self.check_exists(self.dataset_folder)): -# print("The dataset does not currently exist... Please download it from: http://ninaweb.hevs.ch/data1") -# exit(1) -# else: -# filenames = next(walk(self.dataset_folder), (None, None, []))[2] -# if not any("csv" in f for f in filenames): -# self.setup(filenames) -# print("Extracted and set up repo.") -# self.prepare_data() - -# def setup(self, filenames): -# for f in filenames: -# if "zip" in f: -# file_path = os.path.join(self.dataset_folder, f) -# with zipfile.ZipFile(file_path, 'r') as zip_ref: -# zip_ref.extractall(self.dataset_folder) -# self.convert_data() - -# def convert_data(self): -# mat_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.mat'))] -# for f in mat_files: -# mat_dict = sio.loadmat(f) -# output_ = np.concatenate((mat_dict['emg'], mat_dict['restimulus'], mat_dict['rerepetition']), axis=1) -# mask_ids = output_[:,11] != 0 -# output_ = output_[mask_ids,:] -# np.savetxt(f[:-4]+'.csv', output_,delimiter=',') +from libemg._datasets._3DC import _3DCDataset +from libemg._datasets.one_subject_myo import OneSubjectMyoDataset +from libemg._datasets.emg_epn612 import EMGEPN612 +from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift +from libemg._datasets.grab_myo import GRABMyo +from libemg._datasets.continous_transitions import ContinuousTransitions + +def get_dataset_list(): + """Gets a list of all available datasets. -# def cleanup_data(self): -# mat_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.mat'))] -# zip_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.zip'))] -# files = mat_files + zip_files -# for f in files: -# os.remove(f) + Returns + ---------- + dictionary + A dictionary with the all available datasets and their respective classes. + """ + return { + 'OneSubjectMyo': OneSubjectMyoDataset, + '3DC': _3DCDataset, + 'EMGEPN612': EMGEPN612, + 'CIIL_MinimalData': CIIL_MinimalData, + 'CIIL_ElectrodeShift': CIIL_ElectrodeShift, + 'GRABMyo': GRABMyo, + 'ContinuousTransitions': ContinuousTransitions, + } -# def prepare_data(self, format=OfflineDataHandler): -# if format == OfflineDataHandler: -# classes_values = list(range(1,24)) -# classes_column = [10] -# classset_values = [str(i) for i in list(range(1,4))] -# classset_regex = make_regex(left_bound="_E", right_bound=".csv", values=classset_values) -# reps_values = list(range(1,11)) +def get_dataset_info(dataset): + if dataset in get_dataset_list(): + get_dataset_list()[dataset]().get_info() + else: + print("ERROR: Invalid dataset name") -# reps_column = [11] -# subjects_values = [str(s) for s in self.subjects] -# subjects_regex = make_regex(left_bound="S", right_bound="_A", values=subjects_values) -# data_column = list(range(0,10)) -# dic = { -# "reps": reps_values, -# "reps_column": reps_column, -# "classes": classes_values, -# "classes_column": classes_column, -# "subjects": subjects_values, -# "subjects_regex": subjects_regex, -# "classset": classset_values, -# "classset_regex": classset_regex, -# "data_column": data_column -# } -# odh = OfflineDataHandler() -# odh.get_data(folder_location=self.dataset_folder, filename_dic=dic, delimiter=",") -# return odh + \ No newline at end of file From 0a89aa5a615dfd9565961b73aa5cac2b0799f6d3 Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 10:27:47 -0300 Subject: [PATCH 035/129] Updates --- .gitignore | 9 +- libemg/_datasets/_3DC.py | 4 +- .../__pycache__/_3DC.cpython-310.pyc | Bin 2653 -> 0 bytes .../__pycache__/__init__.cpython-310.pyc | Bin 238 -> 0 bytes .../__pycache__/ciil.cpython-310.pyc | Bin 3396 -> 0 bytes .../continous_transitions.cpython-310.pyc | Bin 2309 -> 0 bytes .../__pycache__/dataset.cpython-310.pyc | Bin 2218 -> 0 bytes .../__pycache__/emg_epn612.cpython-310.pyc | Bin 2053 -> 0 bytes .../__pycache__/grab_myo.cpython-310.pyc | Bin 2318 -> 0 bytes .../one_subject_myo.cpython-310.pyc | Bin 1819 -> 0 bytes libemg/_datasets/ciil.py | 8 +- libemg/_datasets/continous_transitions.py | 14 +- libemg/_datasets/dataset.py | 140 +---------------- libemg/_datasets/emg_epn612.py | 6 +- libemg/_datasets/fors_emg.py | 44 ++++++ libemg/_datasets/grab_myo.py | 22 +-- libemg/_datasets/myodisco.py | 76 +++++++++ libemg/_datasets/nina_pro.py | 147 ++++++++++++++++++ libemg/_datasets/one_subject_myo.py | 25 ++- libemg/datasets.py | 6 + libemg/feature_extractor.py | 21 +-- 21 files changed, 329 insertions(+), 193 deletions(-) delete mode 100644 libemg/_datasets/__pycache__/_3DC.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/__init__.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/ciil.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/continous_transitions.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/dataset.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/emg_epn612.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/grab_myo.cpython-310.pyc delete mode 100644 libemg/_datasets/__pycache__/one_subject_myo.cpython-310.pyc create mode 100644 libemg/_datasets/fors_emg.py create mode 100644 libemg/_datasets/myodisco.py create mode 100644 libemg/_datasets/nina_pro.py diff --git a/.gitignore b/.gitignore index bfc40973..64dd7bcd 100644 --- a/.gitignore +++ b/.gitignore @@ -45,4 +45,11 @@ test_*.py *.csv .vscode/* test_delsys_api.py -resources/ \ No newline at end of file +resources/ +*.csv +*.txt +ContinuousTransitions/* +FORS-EMG/* +MyoDisCo/* +NinaProDB1/* +*.zip \ No newline at end of file diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py index c49b1583..bd86335a 100644 --- a/libemg/_datasets/_3DC.py +++ b/libemg/_datasets/_3DC.py @@ -9,10 +9,10 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="_3DCDataset"): 10, '3DC Armband (Prototype)', 22, - ["Neutral", "Radial Deviation", "Wrist Flexion", "Ulnar Deviation", "Wrist Extension", "Supination", "Pronation", "Power Grip", "Open Hand", "Chuck Grip", "Pinch Grip"], + {0: "Neutral", 1: "Radial Deviation", 2: "Wrist Flexion", 3: "Ulnar Deviation", 4: "Wrist Extension", 5: "Supination", 6: "Pronation", 7: "Power Grip", 8: "Open Hand", 9: "Chuck Grip", 10: "Pinch Grip"}, '8 (4 Train, 4 Test)', "The 3DC dataset including 11 classes.", - "@article{cote2019deep, title={Deep learning for electromyographic hand gesture signal classification using transfer learning}, author={C{^o}t{'e}-Allard, Ulysse and Fall, Cheikh Latyr and Drouin, Alexandre and Campeau-Lecours, Alexandre and Gosselin, Cl{'e}ment and Glette, Kyrre and Laviolette, Fran{\c{c}}ois and Gosselin, Benoit}, journal={IEEE transactions on neural systems and rehabilitation engineering}, volume={27}, number={4}, pages={760--771}, year={2019}, publisher={IEEE} }", + "https://ieeexplore.ieee.org/document/8630679", save_dir, redownload) self.url = "https://github.com/libemg/3DCDataset" self.dataset_name = dataset_name diff --git a/libemg/_datasets/__pycache__/_3DC.cpython-310.pyc b/libemg/_datasets/__pycache__/_3DC.cpython-310.pyc deleted file mode 100644 index abdf4923a8bcba9c75a6da7b9534feb453428f15..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2653 zcmZ`*&2QYs73Yv#?uxrw$+8j)XbbSrqFF%RwVWVMj5vs^uNHL`A&yaiXhTq(S?&OH<`TA=TdyRsUzT+Hab`FJ0{nfW~x>+8M+ z-(PmWAO4|jS>K~_^K(J@44V1~I%-iOtc-+&plk~}b3!L`L$}f$(FhyI7IkUkr9~Tz zTsomgou^jMyMoeYaN4w{ZSKiz@Z$fjvvtg`7EpY3f@V`5jb%n<;y#oORk7afScm(gA z%0d@iNSLn&k>sg#OcV%=2ns)T2T^n(fcNDi(V7bGEqqScq8C-Qby+K$rnq zKZ=Fe4faQjKOY58Vm*=7@PRB!O#3ll4$HtjF5Zu`F^kK)PgqivQvGoHxB#ng@4=ox ze;LblweMIkt=Vqy>xo=idJS8^vc_a9sn%@)TYqlK>Z3yC_7N!71E-07_nF zXAF?~0OWBDct4sw`0(D{yAK}h!N>&82o}Qm;dFT>xEkSVjAIcjrav8NJys6~gOux0 zdDc&gY#{g<%hJKyH`iN}Hr#X5iXxtK9Yw||r7)f4EsF9uW5zC&amTS98TY*4xoN5R zf<=@|(_O6_7J@R_bBzl~59J2h^d+OkH@PTcDuL&$*Ot5uf8IPCJX1`n0n>1g7^RcJ zL6Jb#xmK$HmvKf*^J;)J(H~Fb8W`}PsUM?2?B#DIxy1da=X+#_Ot)_^OMjJJ0@wfg z!^DwdkY4_=4P9!@t>1eKqOCc(B!9HOwx8A^zb3{}TE4c8-M?oDukxrn+-Lkx0DKJq zk3W3}jiKdjYx?fZ5Z*4i!XU2h;Kl|#5CGKG_=a6W!1$ga_YHaf|3)g&L+u+GO72{% z2(H9<{S~qZl(lSy_RpGOt=07rLW@n+b7eme+sk% ze3Ed@9tMW^Z^&Tk4i0~FjMr2vK-?E>sH3x@%&BQho{mrRGY^n0>9QaecA-g@tY}5}W5=G6EW?f?W zjfPeIRc4I>QDqRXlz2e`T=6ClUvY*UCFWn-)~(gBve96;yW;{l<8{Nbhs)+tm{R+mb``29u$Kt*5Sq~ zrKrXzY$KT03SzKQx=Wmvy1b8eoPv&i_<}lP*cQv$A>MEmK)2;~$QJQPhisCT-L<;} zf9}6LZ;J%*)TcW)U)W6(Cp|~vl|PaV=zC2hIOH-QI6sQAf|dg1P85Az#!$9#rXrUL z4$}bYPk~Hf?~O>`J}@+jO?9DbSw7j`@F31j(*5_+VYpLsSbv4H>aPGtb_j1Tr*SaC kdJ6xGy*&!qfi;z-|K;AaKB+VK6z_<_=iIRXA+l}%2f%aZ@Bjb+ diff --git a/libemg/_datasets/__pycache__/__init__.cpython-310.pyc b/libemg/_datasets/__pycache__/__init__.cpython-310.pyc deleted file mode 100644 index 6b4618cb6b31bd4b3f731c941a8cb8ffdf0cdc25..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 238 zcmd1j<>g`kf~hYar3C=##~=17mz{oSacWVqericZVqR)WN~OL_esXDUYF}3 diff --git a/libemg/_datasets/__pycache__/ciil.cpython-310.pyc b/libemg/_datasets/__pycache__/ciil.cpython-310.pyc deleted file mode 100644 index e4e24266d20b37b09169f18103c9c821dcf7ad29..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3396 zcmd5yc-j+4dFacL1()blq#{q(coUYx{(kmDkn zv?3d3<`3#MXg8r9Y=BVKGctxPqeJTUU`sCuZibe}^AJ^KY+xXDw=wEOvxCEC5uq|&83oIQ!t zM9^nAcAqdyg6C-XNunOK-?THx&c#ai1kHmp%3R;UngS9*+mHV$=%<=}w}IXIwf? zdd4$`fRnw5hf+3MqYwH@4dqqej~SaT)<0YBPDd*aLS>Vm+au7f$DhRZ*Be=Fxsoi0y`r6%2 z<3yJw>>;4B(2eOS1l^QVCiZRZO2)h544%6s{(u*9)&D{=A$_L$VZtbV>ff41DSaMq zu|Y5Y6HL8HKZ;}C8a@>#!9fw)!D&>8J^j7Z-2W6SC&}p4F9FxecMuVnnLh}SIcNww z38(_tQGnexS%wT-w)P;`#?~_c*0%GU$Q96C&=GLXbDLPnanOpQ3H_yPj2ZvR80zej zEE`lA0la750>8U9En}vdzPV|i12C_=AiFND(pt~nEvPwKr}c4xHt4bMU9jfCvgz@u zwLllgE?t^hC+NwsLzmI&xw}PG*|T;_ zNS#bSjXFK^Z{&%yn}?0cKR5%5@FIZbRZo*jXK|XuCNBV-h?iH66!C(@^#Izz5{Mog z;n&4oq9A}h@-O>a`#}bTeWD$JDPY8IUDPBt=*WZz^K@l{$5H`4hu7XhXXts!8nyrH zYKMXiLjeGUSXvuAjY2N3fs)S_c;y1=<&BYwcX0^9Q~U|Vdq<|}^U(YY2xL3)R}hl| zh^C_nMBdh9RTm)PAQfB%3~@+&#J!UHuC8#_Rlyb{HPuxS_xia}d=&B>CUuoE8HsqP zfN>7XzTDZ`W|4xN$`q;X`++8bc43!b+zbxgRwN(kl3@ZS56Ss`M<2@)F2LQRCtVjv z-$B>#x72gMK89h$1WyBII5#-g1#^$uMPA#l>%vgP39JqGoq4H6eHMKa05eJ@#aWoC z%QStQ@HC`i8O@4dd53NYHb{{PGAh&s5hgt*5Do$hhA#J*GH-P)mMITaHWp|G54JN0 zd(QSDuHx{dh35eY;&K85;%i80=$@n0|vZU2?AjF9gcb}2)^A3dG^LomiQ9J z&xH6AR7hqBr2|yTR)y3W0@VoGD-WLEb!fr>udEkMjJG*pML=nJ(2u*yg!RU{Dv<4< zHVR}sfu9}%%i-h%YIP8!V^H7_#pk`tS*7!DB;L^yo>{|-(xi)M#>&)%8j}i{*S^+h zBd;#-HStYLBAfsKrpSas#Sv#viCXRUUcGX8{ldjnXzjI2moBV}MT`V5OPH82 z>t_zlLlX$G8*{uo#`@O@?-rwhRJ zi&JZfo&csliPmeFzIMd)%XDRi>FYC0Ux#&1y+E!%ZMgo-rZ|U(`bQ9*#?g(N3;*4m zi8*RGbF}`t>zYl}H?wyLPJVYJxAREEdC-~#VUX0qULS-eYnrS#A*~IU19UqjCE}0= z?Oa*Sm9<=17ppkmu+chdS5P2*iNB%1k6G~{ifbr7LUA3%4HO@PXu9GK{Fz{Vn_!wx z3}M~{P5d3j7bpy6yyOie6!ia_Y3-an@_z03H$?}R`s&s7UM~7ic!4J;0u*S$xBW`? z&DP3UU$&<7Z$2G6i__QG8Ndf0{%>K5K~@%4&=V4#aW}Keqg8czBV*QUkhSoY2BK!o L18vXSOYi**5eq{y diff --git a/libemg/_datasets/__pycache__/continous_transitions.cpython-310.pyc b/libemg/_datasets/__pycache__/continous_transitions.cpython-310.pyc deleted file mode 100644 index ba7c99796c1d9c05135b395cb483fdc4b7375ece..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2309 zcmZ`)&2Aev5GJ`lTCHSRkrZ3*A#jTJpfr$D2S$+~DDq<@1q!%9n_dgM) zVG;|4+fTb1?7Hdk2GA2A@c@`abDTi{DA9)d07M(ka7Gp-j4~rbK9R=IoLd2k0~U!>aYkUG(B$So%2eF%bZ8Q`6W;H9ey6?v#l5|~&WLr} zd;53pwj7@8I=&yqq4a%KrptKwaWG8gf-x>-Ca|GTt(k zDOl9wo7%O@8BJctQ4&xNXODhEybj$zUw6J2jEfGFgCJ&9Mreh`SiJuUi=g+Gc(> zze8vwoC`!8Z^NbBMl-mc8Ja`%hsFYn>#%BQnLY_=&d?P1QHFgh=eE9CB6?kQvnZ#RPvvAa_BgW@ggT4x(kC4WP;nnp)DGmw?K2 zbAdC9R(DVX>0WMR)(q3lCi;==pgvjPsdZuhwbgzb_Evy4VGp7;tqY?%&3C;5Y}z8N z(%Qn97PM|w0Lz6L25!%AF0JO?Kt}6LG$RM-<)f*Sk>Ry@Ewi&i=75FzHqs;A@8q@@ zI)Y?q&uh{h)-&rWJ#7>Ir|}aY$ah$J$Gi;w!2_-Bz**3G4^}gyf#~%n(t5`~1J*6@ zt7EmsTD%}z$Im^-@1Em#ufVV4T**q{xs>BC=J-o;i63O=_#Xp)oZ~NMrE~l+`T##Z z$M0UH_W^!9e2*VIT!sHFKHfUMt1}109XQ?oRVTyd;iTd|j~L{^-EJt^{auCK$;T_n z`las50oz?YyWJj7Mxf@1@zIcVrBH=#6bQjYi*N@X$eB)!lkH2NR_D1QclIV$dq8{p z3UlQy->^=M%790%>=ewWGEWNOUto&sN2qKO_TwNr`w!ZgmO%bCgOgC?E|tlHQQ9(; z@hFOv1t}I1lKFxO*=i^&&%7T>CXUf!J zQ+AN1ET*c&PEsf@kS0f>Rp!^UGdrOJNMNg;DJ=LN$i0#djUNR^khnc3pFn`TjeOJ{ z^!<>YKnV#wM}znES}7l`wz%A1Yvs{gvW9k9PB}{h4(S?xt_c$H7+4LLv4c%)8#cDF zd)~XGi5vKKPFr{je}eb$E!-elww~X{T?e_iTv-|~tLCzHw9ku1 zdv%TT;^x&?%Ub9|;n9VEc^Pym<~rxYjRbih6iF$+r3F?z9zl`$OlxQim;IA+9xS1J SsV}Rj04t*^gm?KhjQ;_u>q!g% diff --git a/libemg/_datasets/__pycache__/dataset.cpython-310.pyc b/libemg/_datasets/__pycache__/dataset.cpython-310.pyc deleted file mode 100644 index 851a674388eaab7a04b16c68c71a04d8fd7c1045..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2218 zcmZ`*OK%%D5GJ`RX|wC3%MVe z=;x>qnN%^Zt+yLXKf$adb1KN3iIF>5rc?SInbp04e)vpLARS1+;6ez10P_OcU`jro~Q+0rLt1i%9 z)dLz-0nol62jrU0CZP>n@EO8wvcp@^nhFwwGG3YAEv)*sq|DDKqrMbpYE6OGM9zX(=0ud`B6J? zQQxVaHq74Q+y zc@gDuzUj8`xXg2iMRx-=xrG~o)^qqB8c~hta1DIB9AgBv4T0Z)zXw7M{(?Es>;^QO zt$HdSf2)$Qx(jL(uSPj+0VZW8q?!+v%u5KS|H0TUH@oh`Ne*d%Nbj}R+?>%Uy@waE z-{8d%G310^v5LNe#8sUd8GeWL7m+!(UHNCKjh-+ESA7aAAKu#_K9=zxyjJ`S6f_zc z(AAw5&$_}#XecN<*W-<&gB=QIq_JwdNvZP4d}Me5xe+PIJP6GPtzm!7@VY{bhT4Wa z+H-~%z!*}&MTjQu3>1MA?-C_%hd9Z1k`FOSPC76T-OYF1e7BK$PUYK_l@S326 z0awvW_8pva+OIq)Z*2C4PA5I(lCYOS_Tj@4$U^8BDQ0A}Sb(5D2OzeSYFyn-(n9bk z%XpGzQu8w5=6oUH?TD93r)M%dpLDF}UMM3~o)(cQmF+m~c^?OkmaLEL+Ksu(!hZnh zSB+9geHuVfAJGxJN7-LKRk%Ls)PQf-uf`sxB!~FL7r%B0Ub$0UJO0KYHu%1F@Q;|8 O8sD=3O32U~Jp31?&;4Nl diff --git a/libemg/_datasets/__pycache__/emg_epn612.cpython-310.pyc b/libemg/_datasets/__pycache__/emg_epn612.cpython-310.pyc deleted file mode 100644 index e608238afd1b2e573bf57f5b9d11a0829c1e8adf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2053 zcmZ`)&2t+y6kqL^*I#KUY10-Cdtf^COz}EylYvYNb(@b~>SWRa#RIdmUdi@4^6plW zL*m&ChxBjY#33ms{u6H8b%jg*1BMe%Yr8c}S&yEir>Cc%p5FU8sa8uGe7`^Zv-if) zv_I5j@iSra2pZo9fi;Y%HbNbQ)L5tb$mkd&vt#CSgIXO6^aiy%cHS2{g}iq<#a#`X z*m|vDi=Z>3Q^Lk`t!7_>SFMI*VJi>;C!(gyt9!j3jbfrSPlFg!!ese?^vUr~Lk}6)=Z?Y7#nYg=J=l!N5ur&M#eFwq3FlLb zQGyBO^~vpl5GmjEJe)-Jg!R3(Mt!~c*_t;Yae|Zj{rknyR%ST;I2aMBCtR9o zAO_MLCQ&RM9vl%LM@%lw?Rp8tghASvoY3B{8oLf&|9<4VMV~AKiJz-UFQ-sEEfv}N2`^`t@Uv|+(H_+tjyIaj~@~>=CJ_^wC zY)T=w1Y{Cq2U>-qS=G*MAbgE}Mwr*;t z2J|-crYM|gr&eFfEZ@mxQDtUdhj~eqGXocI!YNzY_x11eQ~R`#S;NYCH7i`GldWO# zyegK?uZZQWaE5THr(dAVRC_Y;zM{>nocCDSD;h4}$}L_!$Wf^Cog9VZ&rvut6qOl@ z#tenCfWrA76tHIjg)@9F3WWhzS7%sX{g_#qvkEuM9K7_iB9<%d$x>;b65td!j0D+q zB`QsRnQ3tHLrjwZyJA4xGzt$X!S9NNC8JGup6BDZ8`5NqKOLRa`5<5RP0>jrHn31+R}anbRZsqP#jYU2P8c7fpECsY(@Do6U9aL zG$ztiL1G3hu#c2b{3@5pF#zxbAz0KM!@06ZVkk2f2;eeRS<((taD$~Dr{FOL6N8c% zs{dMkMlS+RzpI7@)St9z&;=6e*a;r^f=LufL}oLRm-bYwsvVUSo&cU9FJYzOO%PX6 z1v$t>6}^NU-9ff)BO8^_3g~a6Phh+_zk`3WDaYB)Sx-?5@&S zpia)QIrQFxom>7z5B(K;Z7==>1%jgJ%t~yY6m~eXk9mAEJ5*k;R}FZ6x$#&3ci{d+ z;q}J`;TjBa83;8fNsSQ+2tnRV&5;#YBRjCQY^6@%+%~99ou>wM7}>W1ms$^uj(Y^B zjSf-X%}7Rq$&RU7-}U=xk};gQ6J<1ITrJ&a1NP)rl1j$6V?6Wv)(94m&XLGmJjkcZom+T5j9{l*?wWXAj%dj?Q<7qHF>$!`(7|E zdQ1+Zj8VGRyP3z65zD0LrO6H(4a!$Y=kSQ$fJZywXfN-M_mm%oNtVbkM3uq>Vv}ie z;a0a?RHQp~HoEKG&09z4+R<4g=v&~SHc%FOAe9gDbr?E?EnuW^Knln(B1)|RB1?E|MUc0t-T3-i!|Sx*OVZ*w%Oj3WzP9iOA49IH#!7GZ^1G-Ark zg|f$y94dP^Pcr3;=rIdv!d0W(*UwYRc*oyvG1xl4q*Q8O?vr(mbMYbh5L{ zVkv-Q0+$6}#v}iO&nqaR7s!ayKwM(J^|Oinw_80;-bpb=qVeehBLN$TnydjbYfvqK ziE&`UjCDxFDy&*KhgdA|c-g4lOBs}VJ5D6K(pIE8{b4avhR~Kn)?TEty$HYE=lSSL zdnn~tTnZ*j?)eAMN;xrPVNjZ`yQ98A| zLl!BUv#|)4#u1lEoQ$JP@>5{*B{|!%)UkP22-7^C7kHiH*COoe&xKGG%F<*6H;wB? zbku~C02VeSUITKZ8xqd)28s?6Y!sl|x}K)WdB`K^NA^STu~X$(e*9x3*hD#&n(_xs z>U*N^lWHa+Pb0~~7&?2#QfO>gy~^QHHejk&F5|@pjk=p*Kpgv!YAi4aOQOvrif*#}4w&J0k)T0- z56LUvG^aZ>;a(;bpdy%2UZ(;^Dt*mr^jQKs3&K|!nP*8y#(W0F2q-0@4%LO0F-e- zN{E%3sCx*pToLM0kNPDk&C3=<+bE65EG>YRvd7MaTN+~zwsmO&&!-01ZywBl2W@j7 zz^nxh1luAzhEV4OI9*3OJG_uAv=1jN5A7o+Rp{+;cA6HsZtR{amP(2JK1DR$M&H_|S>$C=H6ssVW!BjFD3%yw*$1l1 zdq7!M$llC4=x@(@Bc&$tn|^FUDcXrW&Ai=0KWe*Y*tD|(YN(FR>E zZ22xC#9aJp%q8~UZo?&8WcL1Tarc(-CKLEjko##R0N0Z3_Fz#fxJB3A*2iWozs z0BsT@zXO;p0^mEf3_#$QbX(9QSJ-l+%4U)kM>H3uF|zv7FY$|MT2^v{mvEmR#SAHZVb&!lWzVB zomH$rqa3*W^o09%%BFCJ&*0$>(AV!WHizzpW!vDprfioYVbDl+7l3q zMXeu|S0l4c)`?5nWR3WyPw>|K^*6O0bS;5N+7p;BC=8eZ z*zPa`no*P$bjlHLN72vInAay&)76Ri9xmPR7CT#@%au0NC{ej@G|5`0w&3Zly03f7 vl*-;RxA=nL#^M;65zGesC;30?Vs)wgHnZCQws!eDEDMQU+%_OTWYhc?Q4r`6 diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index bed59df1..fd953956 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -9,7 +9,7 @@ def __init__(self, save_dir='.'): 8, 'Myo Armband', 11, - ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], + {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, '1 Train (1s), 15 Test', "The goal of this Myo dataset is to explore how well models perform when they have a limited amount of training data (1s per class).", 'https://ieeexplore.ieee.org/abstract/document/10394393', @@ -48,9 +48,9 @@ def __init__(self, save_dir='.'): 8, 'Myo Armband', 21, - ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], - '5 Train, 8 Test', - "An Electrode Shift confounding factors dataset.", + {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, + '5 Train (Before Shift), 8 Test (After Shift)', + "An electrode shift confounding factors dataset.", 'https://link.springer.com/article/10.1186/s12984-024-01355-4', save_dir) self.url = "https://github.com/LibEMG/CIILData" diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index b846e0f4..d45ed73f 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -11,11 +11,11 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="ContinuousTrans 2000, 6, 'Delsys', - 2, - [], - '', - "", - "https://doi.org/10.57922/mec.2503", + 43, + {0: 'No Motion', 1: 'Wrist Flexion', 2: 'Wrist Extension', 3: 'Wrist Pronation', 4: 'Wrist Supination', 5: 'Hand Close', 6: 'Hand Open'}, + '6 Training (Ramp), 42 Transitions (All combinations of Transitions) x 6 Reps', + "The testing set in this dataset has continuous transitions between classes which is a more realistic offline evaluation standard for myoelectric control.", + "https://ieeexplore.ieee.org/document/10254242", save_dir, redownload) self.dataset_name = dataset_name self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) @@ -38,7 +38,7 @@ def prepare_data(self): odh_te.classes = [] odh_te.extra_attributes = ['subjects', 'classes'] - for s in [2,3]: + for s in [2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]: data = h5py.File('ContinuousTransitions/P' + f"{s:02}" + '.hdf5', "r") cont_labels = data['continuous']['emg']['prompt'][()] cont_labels = np.hstack([np.ones((1000)) * cont_labels[0], cont_labels[0:len(cont_labels)-1000]]) # Rolling about 0.5s as per Shri's suggestion @@ -48,7 +48,7 @@ def prepare_data(self): for i in range(0, len(cont_chg_idxs)-1): odh_te.data.append(cont_emg[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]) odh_te.classes.append(np.expand_dims(cont_labels[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]-1, axis=1)) - odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s-2) #TODO: Update + odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s-2) ramp_emg = data['ramp']['emg']['signal'][()] ramp_labels = data['ramp']['emg']['prompt'][()] diff --git a/libemg/_datasets/dataset.py b/libemg/_datasets/dataset.py index 4db2ef32..cb2d68b3 100644 --- a/libemg/_datasets/dataset.py +++ b/libemg/_datasets/dataset.py @@ -3,7 +3,7 @@ # this assumes you have git downloaded (not pygit, but the command line program git) class Dataset: - def __init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, save_dir='.', redownload=False, ): + def __init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, save_dir='.', redownload=False): self.save_dir = save_dir self.redownload=redownload @@ -32,7 +32,7 @@ def prepare_data(self, format=OfflineDataHandler): pass def get_info(self): - print(self.description + '\n' + 'Sampling Rate: ' + str(self.sampling) + '\nNumber of Channels: ' + str(self.num_channels) + + print(str(self.description) + '\n' + 'Sampling Rate: ' + str(self.sampling) + '\nNumber of Channels: ' + str(self.num_channels) + '\nDevice: ' + self.recording_device + '\nGestures: ' + str(self.gestures) + '\nNumber of Reps: ' + str(self.num_reps) + '\nNumber of Subjects: ' + str(self.num_subjects) + '\nCitation: ' + str(self.citation)) @@ -48,138 +48,4 @@ def find_all_files_of_type_recursively(dir, terminator): else: if os.path.isdir(dir+file): file_list += find_all_files_of_type_recursively(dir+file+'/',terminator) - return file_list - - -# class GRABMyo(Dataset): -# def __init__(self, save_dir='.', redownload=False, subjects=list(range(1,44)), sessions=list(range(1,4)), dataset_name="GRABMyo"): -# Dataset.__init__(self, save_dir, redownload) -# self.url = "https://physionet.org/files/grabmyo/1.0.2/" -# self.dataset_name = dataset_name -# self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) -# self.subjects = subjects -# self.sessions = sessions - -# if (not self.check_exists(self.dataset_folder)): -# self.download_data() -# elif (self.redownload): -# self.remove_dataset(self.dataset_folder) -# self.download_data() -# else: -# print("Data Already Downloaded.") - -# def download_data(self): -# curl_command = "curl --create-dirs" + " -O --output-dir " + str(self.dataset_folder) + "/ " -# # Download files -# print("Starting download...") -# files = ['readme.txt', 'subject-info.csv', 'MotionSequence.txt'] -# for f in files: -# os.system(curl_command + self.url + f) -# for session in self.sessions: -# curl_command = "curl --create-dirs" + " -O --output-dir " + str(self.dataset_folder) + "/" + "Session" + str(session) + "/ " -# for p in self.subjects: -# for t in range(1,8): -# for g in range(1,18): -# endpoint = self.url + "Session" + str(session) + "/session" + str(session) + "_participant" + str(p) + "/session" + str(session) + "_participant" + str(p) + "_gesture" + str(g) + "_trial" + str(t) -# os.system(curl_command + endpoint + '.hea') -# os.system(curl_command + endpoint + '.dat') -# print("Download complete.") - -# def prepare_data(self, format=OfflineDataHandler, subjects=[str(i) for i in range(1,44)], sessions=["1","2","3"]): -# if format == OfflineDataHandler: -# sets_regex = make_regex(left_bound = "session", right_bound="_", values = sessions) -# classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] -# classes_regex = make_regex(left_bound = "_gesture", right_bound="_", values = classes_values) -# reps_values = ["1","2","3","4","5","6","7"] -# reps_regex = make_regex(left_bound = "trial", right_bound=".hea", values = reps_values) -# subjects_regex = make_regex(left_bound="participant", right_bound="_",values=subjects) -# dic = { -# "sessions": sessions, -# "sessions_regex": sets_regex, -# "reps": reps_values, -# "reps_regex": reps_regex, -# "classes": classes_values, -# "classes_regex": classes_regex, -# "subjects": subjects, -# "subjects_regex": subjects_regex -# } -# odh = OfflineDataHandler() -# odh.get_data(folder_location=self.dataset_folder, filename_dic=dic, delimiter=",") -# return odh - -# def print_info(self): -# print('Reference: https://www.physionet.org/content/grabmyo/1.0.2/') -# print('Name: ' + self.dataset_name) -# print('Gestures: 17') -# print('Trials: 7') -# print('Time Per Rep: 5s') -# print('Subjects: 43') -# print("Forearm EMG (16): Columns 0-15\nWrist EMG (12): 18-23 and 26-31\nUnused (4): 16,23,24,31") - - -# class NinaDB1(Dataset): -# def __init__(self, dataset_dir, subjects): -# Dataset.__init__(self, dataset_dir) -# self.dataset_folder = dataset_dir -# self.subjects = subjects - -# if (not self.check_exists(self.dataset_folder)): -# print("The dataset does not currently exist... Please download it from: http://ninaweb.hevs.ch/data1") -# exit(1) -# else: -# filenames = next(walk(self.dataset_folder), (None, None, []))[2] -# if not any("csv" in f for f in filenames): -# self.setup(filenames) -# print("Extracted and set up repo.") -# self.prepare_data() - -# def setup(self, filenames): -# for f in filenames: -# if "zip" in f: -# file_path = os.path.join(self.dataset_folder, f) -# with zipfile.ZipFile(file_path, 'r') as zip_ref: -# zip_ref.extractall(self.dataset_folder) -# self.convert_data() - -# def convert_data(self): -# mat_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.mat'))] -# for f in mat_files: -# mat_dict = sio.loadmat(f) -# output_ = np.concatenate((mat_dict['emg'], mat_dict['restimulus'], mat_dict['rerepetition']), axis=1) -# mask_ids = output_[:,11] != 0 -# output_ = output_[mask_ids,:] -# np.savetxt(f[:-4]+'.csv', output_,delimiter=',') - -# def cleanup_data(self): -# mat_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.mat'))] -# zip_files = [y for x in os.walk(self.dataset_folder) for y in glob(os.path.join(x[0], '*.zip'))] -# files = mat_files + zip_files -# for f in files: -# os.remove(f) - -# def prepare_data(self, format=OfflineDataHandler): -# if format == OfflineDataHandler: -# classes_values = list(range(1,24)) -# classes_column = [10] -# classset_values = [str(i) for i in list(range(1,4))] -# classset_regex = make_regex(left_bound="_E", right_bound=".csv", values=classset_values) -# reps_values = list(range(1,11)) - -# reps_column = [11] -# subjects_values = [str(s) for s in self.subjects] -# subjects_regex = make_regex(left_bound="S", right_bound="_A", values=subjects_values) -# data_column = list(range(0,10)) -# dic = { -# "reps": reps_values, -# "reps_column": reps_column, -# "classes": classes_values, -# "classes_column": classes_column, -# "subjects": subjects_values, -# "subjects_regex": subjects_regex, -# "classset": classset_values, -# "classset_regex": classset_regex, -# "data_column": data_column -# } -# odh = OfflineDataHandler() -# odh.get_data(folder_location=self.dataset_folder, filename_dic=dic, delimiter=",") -# return odh + return file_list \ No newline at end of file diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index b4bb2552..5d3c9b10 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -12,8 +12,8 @@ def __init__(self): 8, 'Myo Armband', 612, - ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], - '50 (For 306 Users), 25 (For 306 Users)', + {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, + '50 Reps x 306 Users (Train), 25 Reps x 306 Users (Test)', "A large 612 user dataset for developing cross user models.", 'https://doi.org/10.5281/zenodo.4421500') self.url = "https://github.com/libemg/OneSubjectMyoDataset" @@ -23,7 +23,7 @@ def __init__(self): def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): - print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") #TODO: Fill this in + print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") return file = open(self.dataset_folder, 'rb') diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py new file mode 100644 index 00000000..897f5a9f --- /dev/null +++ b/libemg/_datasets/fors_emg.py @@ -0,0 +1,44 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os +import scipy.io +import numpy as np + +class FORSEMG(Dataset): + def __init__(self): + Dataset.__init__(self, + 985, + 8, + 'Experimental Device', + 19, + {0: 'Thump Up', 1: 'Index', 2: 'Right Angle', 3: 'Peace', 4: 'Index Little', 5: 'Thumb Little', 6: 'Hand Close', 7: 'Hand Open', 8: 'Wrist Flexion', 9: 'Wrist Extension', 10: 'Ulnar Deviation', 11: 'Radial Deviation'}, + '5 Train, 10 Test (2 Forarm Orientations x 5 Reps)', + "FORS-EMG: Twelve gestures elicited in three forearm orientations (neutral, pronation, and supination).", + 'https://arxiv.org/abs/2409.07484t') + self.dataset_name = 'FORS-EMG' + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + + def prepare_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + print("Please download the pickled dataset from: https://www.kaggle.com/datasets/ummerummanchaity/fors-emg-a-novel-semg-dataset?resource=download") + return + + odh = OfflineDataHandler() + odh.subjects = [] + odh.classes = [] + odh.reps = [] + odh.orientation = [] + odh.extra_attributes = ['subjects', 'classes', 'reps', 'orientation'] + + for s in range(1, 20): + for g_i, g in enumerate(['Thumb_UP', 'Index', 'Right_Angle', 'Peace', 'Index_Little', 'Thumb_Little', 'Hand_Close', 'Hand_Open', 'Wrist_Flexion', 'Wrist_Extension', 'Radial_Deviation']): + for r in [1,2,3,4,5]: + for o_i, o in enumerate(['rest', 'pronation', 'supination']): + mat = scipy.io.loadmat('FORS-EMG/Subject' + str(s) + '/' + o + '/' + g + '-' + str(r) + '.mat') + odh.data.append(mat['value'].T) + odh.classes.append(np.ones((len(odh.data[-1]), 1)) * g_i) + odh.subjects.append(np.ones((len(odh.data[-1]), 1)) * s-1) + odh.reps.append(np.ones((len(odh.data[-1]), 1)) * r-1) + odh.orientation.append(np.ones((len(odh.data[-1]), 1)) * o_i) + return {'All': odh, 'Train': odh.isolate_data('orientation', [0]), 'Test': odh.isolate_data('orientation', [1,2])} \ No newline at end of file diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 15c08f6b..f600c63c 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -20,16 +20,13 @@ def __init__(self, save_dir='.', version='1.0.2', redownload=False, subjects=lis self.dataset_name = dataset_name self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, version) - def download_data(self): - print('\nPlease cite: ' + self.citation+'\n') + def prepare_data(self, subjects=[str(i) for i in range(1,44)], sessions=["1"]): if (not self.check_exists(self.dataset_folder)): - print("Please download the GRABMyo dataset from: https://physionet.org/content/grabmyo/1.0.2/") #TODO: Fill this in + print("Please download the GRABMyo dataset from: https://physionet.org/content/grabmyo/1.0.2/") return - - def prepare_data(self, subjects=[str(i) for i in range(1,44)], sessions=["1"]): print('\nPlease cite: ' + self.citation+'\n') - sessions = ["1"] # ["1", "2", "3"] - subjects = ["1", "2", "3", "4", "5"] #[str(i) for i in range(1,44)] + sessions = ["1", "2", "3"] + subjects = [str(i) for i in range(1,44)] classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] reps_values = ["1","2","3","4","5","6","7"] @@ -46,13 +43,4 @@ def prepare_data(self, subjects=[str(i) for i in range(1,44)], sessions=["1"]): forearm_data = odh.isolate_channels(list(range(0,16))) train_data = forearm_data.isolate_data('reps', [0,1,2,3,4]) test_data = forearm_data.isolate_data('reps', [5,6]) - return {'All': forearm_data, 'Train': train_data, 'Test': test_data} - -# def print_info(self): -# print('Reference: https://www.physionet.org/content/grabmyo/1.0.2/') -# print('Name: ' + self.dataset_name) -# print('Gestures: 17') -# print('Trials: 7') -# print('Time Per Rep: 5s') -# print('Subjects: 43') -# print("Forearm EMG (16): Columns 0-15\nWrist EMG (12): 18-23 and 26-31\nUnused (4): 16,23,24,31") + return {'All': forearm_data, 'Train': train_data, 'Test': test_data} \ No newline at end of file diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py new file mode 100644 index 00000000..412a7364 --- /dev/null +++ b/libemg/_datasets/myodisco.py @@ -0,0 +1,76 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +from libemg.utils import * +from libemg.feature_extractor import FeatureExtractor +import os + +class MyoDisCo(Dataset): + def __init__(self, save_dir='.', redownload=False, dataset_name="MyoDisCo", cross_day=False): + self.cross_day = cross_day + desc = 'The MyoDisCo dataset which includes both the across day and limb position confounds. (Limb Position Version)' + if self.cross_day: + desc = 'The MyoDisCo dataset which includes both the across day and limb position confounds. (Cross Day Version)' + Dataset.__init__(self, + 200, + 8, + 'Myo Armband', + 14, + {0: "Wrist Extension", 1: "Finger Gun", 2: "Wrist Flexion", 3: "Hand Close", 4: "Hand Open", 5: "Thumbs Up", 6: "Rest"}, + '20 (Train) and 20 (Test) - Each gesture ~0.5s', + desc, + "https://iopscience.iop.org/article/10.1088/1741-2552/ad4915/meta", + save_dir, redownload) + self.url = "https://github.com/libemg/MyoDisCo" + self.dataset_name = dataset_name + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + + def prepare_data(self): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + elif (self.redownload): + self.remove_dataset(self.dataset_folder) + self.download(self.url, self.dataset_folder) + + + sets_values = ['day1', 'day2', 'positions'] + subjects_value = [str(i) for i in range(1,15)] + classes_values = ["1","2","3","4","5","8","9"] + reps_values = [str(i) for i in range(0,20)] + regex_filters = [ + RegexFilter(left_bound = "/", right_bound="/", values = sets_values, description='sets'), + RegexFilter(left_bound = "C_", right_bound="_EMG", values = classes_values, description='classes'), + RegexFilter(left_bound = "R_", right_bound="_C", values = reps_values, description='reps'), + RegexFilter(left_bound="S", right_bound="/",values=subjects_value, description='subjects') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + + fe = FeatureExtractor() + # We need to parse each item to remove no motion + for i, d in enumerate(odh.data): + w = get_windows(d, 20, 10) + mav = fe.extract_features(['MAV'], w, array=True) + max_idx = np.argmax(np.mean(mav, axis=1)) * 10 + 20 + if odh.classes[i][0][0] == 6: + odh.data[i] = d[100:200] + else: + low = max_idx-50 + high = max_idx+50 + if low < 0: + high += np.abs(low) + low = 0 + elif high >= len(odh.data[i]): + low -= np.abs(len(odh.data[i])-high) + high = len(odh.data[i]) + odh.data[i] = d[low:high] + + odh.sets[i] = np.ones((len(odh.data[i]), 1)) * odh.sets[i][0][0] + odh.classes[i] = np.ones((len(odh.data[i]), 1)) * odh.classes[i][0][0] + odh.reps[i] = np.ones((len(odh.data[i]), 1)) * odh.reps[i][0][0] + odh.subjects[i] = np.ones((len(odh.data[i]), 1)) * odh.subjects[i][0][0] + + if self.cross_day: + return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + + return {'All': odh, 'Train': odh.isolate_data("sets", [1]), 'Test': odh.isolate_data("sets", [2])} diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py new file mode 100644 index 00000000..14573922 --- /dev/null +++ b/libemg/_datasets/nina_pro.py @@ -0,0 +1,147 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os +import scipy.io as sio +import zipfile +import numpy as np + +def find_all_files_of_type_recursively(dir, terminator): + files = os.listdir(dir) + file_list = [] + for file in files: + if file.endswith(terminator): + file_list.append(dir+file) + else: + if os.path.isdir(dir+file): + file_list += find_all_files_of_type_recursively(dir+file+'/',terminator) + return file_list + +class Ninapro(Dataset): + def __init__(self, + sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, + save_dir='.', + dataset_name="Ninapro"): + # downloading the Ninapro dataset is not supported (no permission given from the authors)' + # however, you can download it from http://ninapro.hevs.ch/DB8 + # the subject zip files should be placed at: /NinaproDB8/DB8_s#.zip + Dataset.__init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, save_dir) + self.dataset_name = dataset_name + self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, "") + self.exercise_step = [] + + def convert_to_compatible(self): + # get the zip files (original format they're downloaded in) + zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") + # unzip the files -- if any are there (successive runs skip this) + for zip_file in zip_files: + with zipfile.ZipFile(zip_file, 'r') as zip_ref: + zip_ref.extractall(zip_file[:-4]+'/') + os.remove(zip_file) + # get the mat files (the files we want to convert to csv) + mat_files = find_all_files_of_type_recursively(self.dataset_folder,".mat") + for mat_file in mat_files: + self.convert_to_csv(mat_file) + + def convert_to_csv(self, mat_file): + # read the mat file + mat_file = mat_file.replace("\\", "/") + mat_dir = mat_file.split('/') + mat_dir = os.path.join(*mat_dir[:-1],"") + mat = sio.loadmat(mat_file) + # get the data + exercise = int(mat_file.split('_')[3][1]) + exercise_offset = self.exercise_step[exercise-1] # 0 reps already included + data = mat['emg'] + restimulus = mat['restimulus'] + rerepetition = mat['rerepetition'] + if data.shape[0] != restimulus.shape[0]: # this happens in some cases + min_shape = min([data.shape[0], restimulus.shape[0]]) + data = data[:min_shape,:] + restimulus = restimulus[:min_shape,] + rerepetition = rerepetition[:min_shape,] + # remove 0 repetition - collection buffer + remove_mask = (rerepetition != 0).squeeze() + data = data[remove_mask,:] + restimulus = restimulus[remove_mask] + rerepetition = rerepetition[remove_mask] + # important little not here: + # the "rest" really is only the rest between motions, not a dedicated rest class. + # there will be many more rest repetitions (as it is between every class) + # so usually we really care about classifying rest as its important (most of the time we do nothing) + # but for this dataset it doesn't make sense to include (and not its just an offline showcase of the library) + # I encourage you to plot the restimulus to see what I mean. -> plt.plot(restimulus) + # so we remove the rest class too + remove_mask = (restimulus != 0).squeeze() + data = data[remove_mask,:] + restimulus = restimulus[remove_mask] + rerepetition = rerepetition[remove_mask] + tail = 0 + while tail < data.shape[0]-1: + rep = rerepetition[tail][0] # remove the 1 offset (0 was the collection buffer) + motion = restimulus[tail][0] # remove the 1 offset (0 was between motions "rest") + # find head + head = np.where(rerepetition[tail:] != rep)[0] + if head.shape == (0,): # last segment of data + head = data.shape[0] -1 + else: + head = head[0] + tail + # downsample to 1kHz from 2kHz using decimation + data_for_file = data[tail:head,:] + data_for_file = data_for_file[::2, :] + # write to csv + csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' + np.savetxt(csv_file, data_for_file, delimiter=',') + tail = head + os.remove(mat_file) + +class NinaproDB8(Ninapro): + def __init__(self, save_dir='.', dataset_name="NinaProDB8"): + Ninapro.__init__(self, save_dir, dataset_name) + self.class_list = ["Thumb Flexion/Extension", "Thumb Abduction/Adduction", "Index Finger Flexion/Extension", "Middle Finger Flexion/Extension", "Combined Ring and Little Fingers Flexion/Extension", + "Index Pointer", "Cylindrical Grip", "Lateral Grip", "Tripod Grip"] + self.exercise_step = [0,10,20] + + def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,13)], + reps_values = [str(i) for i in range(22)], + classes_values = [str(i) for i in range(9)]): + + if format == OfflineDataHandler: + regex_filters = [ + RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), + RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), + RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + return odh + +class NinaproDB2(Ninapro): + def __init__(self, save_dir='.', dataset_name="NinaProDB2"): + Ninapro.__init__(self, + 2000, + 12, + 'Delsys', + 40, + {0: 'See Exercises B and C from: https://ninapro.hevs.ch/instructions/DB2.html'}, + '4 Train, 2 Test', + "NinaProb DB2.", + 'https://ninapro.hevs.ch/', + save_dir, dataset_name) + self.exercise_step = [0,0,0] + + def prepare_data(self, subjects_values = [str(i) for i in range(1,41)], + reps_values = [str(i) for i in range(6)], + classes_values = [str(i) for i in range(50)]): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + print("Please download the NinaProDB2 dataset from: https://ninapro.hevs.ch/instructions/DB2.html") + return + self.convert_to_compatible() + regex_filters = [ + RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), + RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), + RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + return {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3]), 'Test': odh.isolate_data('reps', [4,5])} \ No newline at end of file diff --git a/libemg/_datasets/one_subject_myo.py b/libemg/_datasets/one_subject_myo.py index 8337f180..24774f49 100644 --- a/libemg/_datasets/one_subject_myo.py +++ b/libemg/_datasets/one_subject_myo.py @@ -9,7 +9,7 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="OneSubjectMyoDa 8, 'Myo Armband', 1, - ['Close', 'Open', 'Rest', 'Flexion', 'Extension'], + {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, '6 (4 Train, 2 Test)', "A simple Myo dataset that is used for some of the LibEMG offline demos.", 'N/A', save_dir, redownload) @@ -24,15 +24,14 @@ def prepare_data(self, format=OfflineDataHandler): self.remove_dataset(self.dataset_folder) self.download(self.url, self.dataset_folder) - if format == OfflineDataHandler: - sets_values = ["1","2","3","4","5","6"] - classes_values = ["0","1","2","3","4"] - reps_values = ["0","1"] - regex_filters = [ - RegexFilter(left_bound = "/trial_", right_bound="/", values = sets_values, description='sets'), - RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes'), - RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps') - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} \ No newline at end of file + sets_values = ["1","2","3","4","5","6"] + classes_values = ["0","1","2","3","4"] + reps_values = ["0","1"] + regex_filters = [ + RegexFilter(left_bound = "/trial_", right_bound="/", values = sets_values, description='sets'), + RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes'), + RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + return {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index 1f4458d5..298ecf19 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -4,6 +4,9 @@ from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift from libemg._datasets.grab_myo import GRABMyo from libemg._datasets.continous_transitions import ContinuousTransitions +from libemg._datasets.nina_pro import NinaproDB2 +from libemg._datasets.myodisco import MyoDisCo +from libemg._datasets.fors_emg import FORSEMG def get_dataset_list(): """Gets a list of all available datasets. @@ -21,6 +24,9 @@ def get_dataset_list(): 'CIIL_ElectrodeShift': CIIL_ElectrodeShift, 'GRABMyo': GRABMyo, 'ContinuousTransitions': ContinuousTransitions, + 'NinaProDB2': NinaproDB2, + 'MyoDisCo': MyoDisCo, + 'FORS-EMG': FORSEMG, } def get_dataset_info(dataset): diff --git a/libemg/feature_extractor.py b/libemg/feature_extractor.py index 1e513d8e..f174a292 100644 --- a/libemg/feature_extractor.py +++ b/libemg/feature_extractor.py @@ -1397,15 +1397,18 @@ def getWENGfeat(self, windows, WENG_fs = 1000): list The computed features associated with each window. Size: Wx((order+1)*Nchannels) """ - # get the highest power of 2 the nyquist rate is divisible by - order = math.floor(np.log(WENG_fs/2)/np.log(2) - 1) - # Khushaba et al suggests using sym8 - # note, this will often throw a WARNING saying the user specified order is too high -- but this is what the - # original paper suggests using as the order. - wavelets = wavedec(windows, wavelet='sym8', level=order,axis=2) - # for every order, compute the energy (sum of DWT) - total of the squared signal - features = np.hstack([np.log(np.sum(i**2, axis=2)+1e-10) for i in wavelets]) - return features + import warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # get the highest power of 2 the nyquist rate is divisible by + order = math.floor(np.log(WENG_fs/2)/np.log(2) - 1) + # Khushaba et al suggests using sym8 + # note, this will often throw a WARNING saying the user specified order is too high -- but this is what the + # original paper suggests using as the order. + wavelets = wavedec(windows, wavelet='sym8', level=order,axis=2) + # for every order, compute the energy (sum of DWT) - total of the squared signal + features = np.hstack([np.log(np.sum(i**2, axis=2)+1e-10) for i in wavelets]) + return features def getWVfeat(self, windows, WV_fs=1000): From c7a6a12db2fdfcfca2bea9445e1eab0aed3c8bc0 Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 11:04:15 -0300 Subject: [PATCH 036/129] Added Grab Myo --- .gitignore | 3 +- dataset_tryout.py | 47 ++++++++++++++++++++++++ libemg/_datasets/grab_myo.py | 71 ++++++++++++++++++++++++++++-------- libemg/datasets.py | 5 ++- 4 files changed, 107 insertions(+), 19 deletions(-) create mode 100644 dataset_tryout.py diff --git a/.gitignore b/.gitignore index 64dd7bcd..eec03535 100644 --- a/.gitignore +++ b/.gitignore @@ -52,4 +52,5 @@ ContinuousTransitions/* FORS-EMG/* MyoDisCo/* NinaProDB1/* -*.zip \ No newline at end of file +*.zip +libemg/_datasets/__pycache__/* \ No newline at end of file diff --git a/dataset_tryout.py b/dataset_tryout.py new file mode 100644 index 00000000..8cd617a2 --- /dev/null +++ b/dataset_tryout.py @@ -0,0 +1,47 @@ +from libemg.datasets import * +from libemg.feature_extractor import * +from libemg.emg_predictor import EMGClassifier +from libemg.offline_metrics import OfflineMetrics + +print(get_dataset_list()) + +dataset = get_dataset_list()['MyoDisCo'](cross_day=True) +dataset.get_info() +data = dataset.prepare_data() + +train_data = data['Train'] +test_data = data['Test'] + +print("Loaded Data") + +accuracies = [] +for s in range(0, dataset.num_subjects): + print("Subject: " + str(s)) + s_train_dh = train_data.isolate_data('subjects', [s]) + s_test_dh = test_data.isolate_data('subjects', [s]) + train_windows, train_meta = s_train_dh.parse_windows(30, 5) + test_windows, test_meta = s_test_dh.parse_windows(30, 5) + + fe = FeatureExtractor() + train_feats = fe.extract_features(['WENG'], train_windows) + test_feats = fe.extract_features(['WENG'], test_windows) + + # fe.visualize_feature_space(train_feats, 'PCA', train_meta['classes']) + + model = EMGClassifier(model='LDA') + ds = { + 'training_features': train_feats, + 'training_labels': train_meta['classes'] + } + model.fit(ds) + + preds, probs = model.run(test_feats) + om = OfflineMetrics() + accuracies.append(om.get_CA(test_meta['classes'], preds)) + conf_mat = om.get_CONF_MAT(preds, test_meta['classes']) + # om.visualize_conf_matrix(conf_mat) + print(om.get_CA(test_meta['classes'], preds)) + +print('CA: ' + str(np.mean(accuracies)) + ' +/- ' + str(np.std(accuracies))) + + \ No newline at end of file diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index f600c63c..27b6c4e3 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -3,28 +3,40 @@ import os class GRABMyo(Dataset): - def __init__(self): - pass - - def __init__(self, save_dir='.', version='1.0.2', redownload=False, subjects=list(range(1,44)), sessions=list(range(1,4)), dataset_name="grabmyo"): + """ + By default this just uses the 16 forearm electrodes. + """ + def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo", baseline=False): + split = '7 Train, 14 Test (2 Seperate Days x 7 Reps)' + if baseline: + split = '5 Train, 2 Test (Basline)' Dataset.__init__(self, - 0, - 0, - '', - 3, - [], - '', - "", - "", - save_dir, redownload) + 2048, + 16, + 'EMGUSB2+ device (OT Bioelletronica, Italy)', + 19, + {0: 'Lateral Prehension', 1: 'Thumb Adduction', 2: 'Thumb and Little Finger Opposition', 3: 'Thumb and Index Finger Opposition', 4: 'Thumb and Index Finger Extension', 5: 'Thumb and Little Finger Extension', 6: 'Index and Middle Finger Extension', + 7: 'Little Finger Extension', 8: 'Index Finger Extension', 9: 'Thumb Finger Extension', 10: 'Wrist Extension', 11: 'Wrist Flexion', 12: 'Forearm Supination', 13: 'Forearm Pronation', 14: 'Hand Open', 15: 'Hand Close', 16: 'Rest'}, + split, + "GrabMyo: A large cross session dataset including 17 gestures elicited across 3 seperate sessions.", + 'https://www.nature.com/articles/s41597-022-01836-y') self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, version) - - def prepare_data(self, subjects=[str(i) for i in range(1,44)], sessions=["1"]): + self.dataset_folder = os.path.join(save_dir , self.dataset_name, version) + + def check_if_exist(self): if (not self.check_exists(self.dataset_folder)): print("Please download the GRABMyo dataset from: https://physionet.org/content/grabmyo/1.0.2/") return print('\nPlease cite: ' + self.citation+'\n') + + +class GRABMyoCrossDay(GRABMyo): + def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo"): + GRABMyo.__init__(self, save_dir=save_dir, version=version, dataset_name=dataset_name, baseline=False) + + def prepare_data(self): + self.check_exists() + sessions = ["1", "2", "3"] subjects = [str(i) for i in range(1,44)] classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] @@ -40,6 +52,33 @@ def prepare_data(self, subjects=[str(i) for i in range(1,44)], sessions=["1"]): odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + forearm_data = odh.isolate_channels(list(range(0,16))) + train_data = forearm_data.isolate_data('sets', [0]) + test_data = forearm_data.isolate_data('sets', [1]) + return {'All': forearm_data, 'Train': train_data, 'Test': test_data} + +class GRABMyoBaseline(GRABMyo): + def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo"): + GRABMyo.__init__(self, save_dir=save_dir, version=version, dataset_name=dataset_name, baseline=True) + + def prepare_data(self): + self.check_exists() + + sessions = ["1"] + subjects = [str(i) for i in range(1,44)] + classes_values = ["1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17"] + reps_values = ["1","2","3","4","5","6","7"] + + regex_filters = [ + RegexFilter(left_bound = "session", right_bound="_", values = sessions, description='session'), + RegexFilter(left_bound = "_gesture", right_bound="_", values = classes_values, description='classes'), + RegexFilter(left_bound = "trial", right_bound=".hea", values = reps_values, description='reps'), + RegexFilter(left_bound="participant", right_bound="_",values=subjects, description='subjects') + ] + + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + forearm_data = odh.isolate_channels(list(range(0,16))) train_data = forearm_data.isolate_data('reps', [0,1,2,3,4]) test_data = forearm_data.isolate_data('reps', [5,6]) diff --git a/libemg/datasets.py b/libemg/datasets.py index 298ecf19..afc2db69 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -2,7 +2,7 @@ from libemg._datasets.one_subject_myo import OneSubjectMyoDataset from libemg._datasets.emg_epn612 import EMGEPN612 from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift -from libemg._datasets.grab_myo import GRABMyo +from libemg._datasets.grab_myo import GRABMyoBaseline, GRABMyoCrossDay from libemg._datasets.continous_transitions import ContinuousTransitions from libemg._datasets.nina_pro import NinaproDB2 from libemg._datasets.myodisco import MyoDisCo @@ -22,7 +22,8 @@ def get_dataset_list(): 'EMGEPN612': EMGEPN612, 'CIIL_MinimalData': CIIL_MinimalData, 'CIIL_ElectrodeShift': CIIL_ElectrodeShift, - 'GRABMyo': GRABMyo, + 'GRABMyoBaseline': GRABMyoBaseline, + 'GRABMyoCrossDay': GRABMyoCrossDay, 'ContinuousTransitions': ContinuousTransitions, 'NinaProDB2': NinaproDB2, 'MyoDisCo': MyoDisCo, From cd7f61d38670805432a276b7e763027af5aceb0b Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 11:18:11 -0300 Subject: [PATCH 037/129] Updates --- libemg/_datasets/grab_myo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 27b6c4e3..94ef08cf 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -35,7 +35,7 @@ def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo"): GRABMyo.__init__(self, save_dir=save_dir, version=version, dataset_name=dataset_name, baseline=False) def prepare_data(self): - self.check_exists() + self.check_if_exist() sessions = ["1", "2", "3"] subjects = [str(i) for i in range(1,44)] @@ -62,7 +62,7 @@ def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo"): GRABMyo.__init__(self, save_dir=save_dir, version=version, dataset_name=dataset_name, baseline=True) def prepare_data(self): - self.check_exists() + self.check_if_exist() sessions = ["1"] subjects = [str(i) for i in range(1,44)] From 4ea2ebce3132f55deb55ccf8260665a487eea10d Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 11:29:09 -0300 Subject: [PATCH 038/129] Changed pathing --- libemg/_datasets/_3DC.py | 8 ++-- libemg/_datasets/ciil.py | 14 +++---- libemg/_datasets/continous_transitions.py | 8 ++-- libemg/_datasets/dataset.py | 5 +-- libemg/_datasets/emg_epn612.py | 5 +-- libemg/_datasets/fors_emg.py | 5 +-- libemg/_datasets/grab_myo.py | 15 ++++--- libemg/_datasets/myodisco.py | 8 ++-- libemg/_datasets/nina_pro.py | 48 +++++++++++------------ libemg/_datasets/one_subject_myo.py | 7 ++-- 10 files changed, 53 insertions(+), 70 deletions(-) diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py index bd86335a..26a8bfd7 100644 --- a/libemg/_datasets/_3DC.py +++ b/libemg/_datasets/_3DC.py @@ -3,7 +3,7 @@ import os class _3DCDataset(Dataset): - def __init__(self, save_dir='.', redownload=False, dataset_name="_3DCDataset"): + def __init__(self, dataset_folder="_3DCDataset/"): Dataset.__init__(self, 1000, 10, @@ -12,11 +12,9 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="_3DCDataset"): {0: "Neutral", 1: "Radial Deviation", 2: "Wrist Flexion", 3: "Ulnar Deviation", 4: "Wrist Extension", 5: "Supination", 6: "Pronation", 7: "Power Grip", 8: "Open Hand", 9: "Chuck Grip", 10: "Pinch Grip"}, '8 (4 Train, 4 Test)', "The 3DC dataset including 11 classes.", - "https://ieeexplore.ieee.org/document/8630679", - save_dir, redownload) + "https://ieeexplore.ieee.org/document/8630679") self.url = "https://github.com/libemg/3DCDataset" - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + self.dataset_folder = dataset_folder def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,23)], sets_values = ["train", "test"], diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index fd953956..94833fce 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -3,7 +3,7 @@ import os class CIIL_MinimalData(Dataset): - def __init__(self, save_dir='.'): + def __init__(self, dataset_folder='CIILData/'): Dataset.__init__(self, 200, 8, @@ -12,10 +12,9 @@ def __init__(self, save_dir='.'): {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, '1 Train (1s), 15 Test', "The goal of this Myo dataset is to explore how well models perform when they have a limited amount of training data (1s per class).", - 'https://ieeexplore.ieee.org/abstract/document/10394393', - save_dir) + 'https://ieeexplore.ieee.org/abstract/document/10394393') self.url = "https://github.com/LibEMG/CIILData" - self.dataset_folder = os.path.join(self.save_dir , 'CIILData') + self.dataset_folder = dataset_folder def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') @@ -42,7 +41,7 @@ def prepare_data(self): return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} class CIIL_ElectrodeShift(Dataset): - def __init__(self, save_dir='.'): + def __init__(self, dataset_folder='CIILData/'): Dataset.__init__(self, 200, 8, @@ -51,10 +50,9 @@ def __init__(self, save_dir='.'): {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, '5 Train (Before Shift), 8 Test (After Shift)', "An electrode shift confounding factors dataset.", - 'https://link.springer.com/article/10.1186/s12984-024-01355-4', - save_dir) + 'https://link.springer.com/article/10.1186/s12984-024-01355-4') self.url = "https://github.com/LibEMG/CIILData" - self.dataset_folder = os.path.join(self.save_dir , 'CIILData') + self.dataset_folder = dataset_folder def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index d45ed73f..bee3a8f4 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -6,7 +6,7 @@ import numpy as np class ContinuousTransitions(Dataset): - def __init__(self, save_dir='.', redownload=False, dataset_name="ContinuousTransitions"): + def __init__(self, dataset_folder="ContinuousTransitions/"): Dataset.__init__(self, 2000, 6, @@ -15,10 +15,8 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="ContinuousTrans {0: 'No Motion', 1: 'Wrist Flexion', 2: 'Wrist Extension', 3: 'Wrist Pronation', 4: 'Wrist Supination', 5: 'Hand Close', 6: 'Hand Open'}, '6 Training (Ramp), 42 Transitions (All combinations of Transitions) x 6 Reps', "The testing set in this dataset has continuous transitions between classes which is a more realistic offline evaluation standard for myoelectric control.", - "https://ieeexplore.ieee.org/document/10254242", - save_dir, redownload) - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + "https://ieeexplore.ieee.org/document/10254242") + self.dataset_folder = dataset_folder def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') diff --git a/libemg/_datasets/dataset.py b/libemg/_datasets/dataset.py index cb2d68b3..3b232abe 100644 --- a/libemg/_datasets/dataset.py +++ b/libemg/_datasets/dataset.py @@ -3,10 +3,7 @@ # this assumes you have git downloaded (not pygit, but the command line program git) class Dataset: - def __init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, save_dir='.', redownload=False): - self.save_dir = save_dir - self.redownload=redownload - + def __init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation): # Every class should have this self.sampling=sampling self.num_channels=num_channels diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 5d3c9b10..8bdcc3a2 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -6,7 +6,7 @@ import numpy as np class EMGEPN612(Dataset): - def __init__(self): + def __init__(self, dataset_file='EMGEPN612.pkl'): Dataset.__init__(self, 200, 8, @@ -17,8 +17,7 @@ def __init__(self): "A large 612 user dataset for developing cross user models.", 'https://doi.org/10.5281/zenodo.4421500') self.url = "https://github.com/libemg/OneSubjectMyoDataset" - self.dataset_name = 'EMGEPN612.pkl' - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + self.dataset_name = dataset_file def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py index 897f5a9f..89171cf3 100644 --- a/libemg/_datasets/fors_emg.py +++ b/libemg/_datasets/fors_emg.py @@ -5,7 +5,7 @@ import numpy as np class FORSEMG(Dataset): - def __init__(self): + def __init__(self, dataset_folder='FORS-EMG/'): Dataset.__init__(self, 985, 8, @@ -15,8 +15,7 @@ def __init__(self): '5 Train, 10 Test (2 Forarm Orientations x 5 Reps)', "FORS-EMG: Twelve gestures elicited in three forearm orientations (neutral, pronation, and supination).", 'https://arxiv.org/abs/2409.07484t') - self.dataset_name = 'FORS-EMG' - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + self.dataset_folder = dataset_folder def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 94ef08cf..694e0d1b 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -6,7 +6,7 @@ class GRABMyo(Dataset): """ By default this just uses the 16 forearm electrodes. """ - def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo", baseline=False): + def __init__(self, dataset_folder='GRABMyo/', baseline=False): split = '7 Train, 14 Test (2 Seperate Days x 7 Reps)' if baseline: split = '5 Train, 2 Test (Basline)' @@ -14,14 +14,13 @@ def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo", baseli 2048, 16, 'EMGUSB2+ device (OT Bioelletronica, Italy)', - 19, + 43, {0: 'Lateral Prehension', 1: 'Thumb Adduction', 2: 'Thumb and Little Finger Opposition', 3: 'Thumb and Index Finger Opposition', 4: 'Thumb and Index Finger Extension', 5: 'Thumb and Little Finger Extension', 6: 'Index and Middle Finger Extension', 7: 'Little Finger Extension', 8: 'Index Finger Extension', 9: 'Thumb Finger Extension', 10: 'Wrist Extension', 11: 'Wrist Flexion', 12: 'Forearm Supination', 13: 'Forearm Pronation', 14: 'Hand Open', 15: 'Hand Close', 16: 'Rest'}, split, "GrabMyo: A large cross session dataset including 17 gestures elicited across 3 seperate sessions.", 'https://www.nature.com/articles/s41597-022-01836-y') - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(save_dir , self.dataset_name, version) + self.dataset_folder = dataset_folder def check_if_exist(self): if (not self.check_exists(self.dataset_folder)): @@ -31,8 +30,8 @@ def check_if_exist(self): class GRABMyoCrossDay(GRABMyo): - def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo"): - GRABMyo.__init__(self, save_dir=save_dir, version=version, dataset_name=dataset_name, baseline=False) + def __init__(self, dataset_folder="GRABMyo"): + GRABMyo.__init__(self, dataset_folder=dataset_folder, baseline=False) def prepare_data(self): self.check_if_exist() @@ -58,8 +57,8 @@ def prepare_data(self): return {'All': forearm_data, 'Train': train_data, 'Test': test_data} class GRABMyoBaseline(GRABMyo): - def __init__(self, save_dir='.', version='1.0.2', dataset_name="grabmyo"): - GRABMyo.__init__(self, save_dir=save_dir, version=version, dataset_name=dataset_name, baseline=True) + def __init__(self, dataset_folder="GRABMyo"): + GRABMyo.__init__(self, dataset_folder=dataset_folder, baseline=True) def prepare_data(self): self.check_if_exist() diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py index 412a7364..73ffe15d 100644 --- a/libemg/_datasets/myodisco.py +++ b/libemg/_datasets/myodisco.py @@ -5,7 +5,7 @@ import os class MyoDisCo(Dataset): - def __init__(self, save_dir='.', redownload=False, dataset_name="MyoDisCo", cross_day=False): + def __init__(self, dataset_folder="MyoDisCo/", cross_day=False): self.cross_day = cross_day desc = 'The MyoDisCo dataset which includes both the across day and limb position confounds. (Limb Position Version)' if self.cross_day: @@ -18,11 +18,9 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="MyoDisCo", cros {0: "Wrist Extension", 1: "Finger Gun", 2: "Wrist Flexion", 3: "Hand Close", 4: "Hand Open", 5: "Thumbs Up", 6: "Rest"}, '20 (Train) and 20 (Test) - Each gesture ~0.5s', desc, - "https://iopscience.iop.org/article/10.1088/1741-2552/ad4915/meta", - save_dir, redownload) + "https://iopscience.iop.org/article/10.1088/1741-2552/ad4915/meta") self.url = "https://github.com/libemg/MyoDisCo" - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + self.dataset_folder = dataset_folder def prepare_data(self): print('\nPlease cite: ' + self.citation+'\n') diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 14573922..736c82a8 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -19,14 +19,12 @@ def find_all_files_of_type_recursively(dir, terminator): class Ninapro(Dataset): def __init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, - save_dir='.', - dataset_name="Ninapro"): + dataset_folder="Ninapro"): # downloading the Ninapro dataset is not supported (no permission given from the authors)' # however, you can download it from http://ninapro.hevs.ch/DB8 # the subject zip files should be placed at: /NinaproDB8/DB8_s#.zip - Dataset.__init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation, save_dir) - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name, "") + Dataset.__init__(self, sampling, num_channels, recording_device, num_subjects, gestures, num_reps, description, citation) + self.dataset_folder = dataset_folder self.exercise_step = [] def convert_to_compatible(self): @@ -94,29 +92,29 @@ def convert_to_csv(self, mat_file): tail = head os.remove(mat_file) -class NinaproDB8(Ninapro): - def __init__(self, save_dir='.', dataset_name="NinaProDB8"): - Ninapro.__init__(self, save_dir, dataset_name) - self.class_list = ["Thumb Flexion/Extension", "Thumb Abduction/Adduction", "Index Finger Flexion/Extension", "Middle Finger Flexion/Extension", "Combined Ring and Little Fingers Flexion/Extension", - "Index Pointer", "Cylindrical Grip", "Lateral Grip", "Tripod Grip"] - self.exercise_step = [0,10,20] +# class NinaproDB8(Ninapro): +# def __init__(self, save_dir='.', dataset_name="NinaProDB8"): +# Ninapro.__init__(self, save_dir, dataset_name) +# self.class_list = ["Thumb Flexion/Extension", "Thumb Abduction/Adduction", "Index Finger Flexion/Extension", "Middle Finger Flexion/Extension", "Combined Ring and Little Fingers Flexion/Extension", +# "Index Pointer", "Cylindrical Grip", "Lateral Grip", "Tripod Grip"] +# self.exercise_step = [0,10,20] - def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,13)], - reps_values = [str(i) for i in range(22)], - classes_values = [str(i) for i in range(9)]): +# def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,13)], +# reps_values = [str(i) for i in range(22)], +# classes_values = [str(i) for i in range(9)]): - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), - RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), - RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return odh +# if format == OfflineDataHandler: +# regex_filters = [ +# RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), +# RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), +# RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') +# ] +# odh = OfflineDataHandler() +# odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") +# return odh class NinaproDB2(Ninapro): - def __init__(self, save_dir='.', dataset_name="NinaProDB2"): + def __init__(self, dataset_folder="NinaProDB2/"): Ninapro.__init__(self, 2000, 12, @@ -126,7 +124,7 @@ def __init__(self, save_dir='.', dataset_name="NinaProDB2"): '4 Train, 2 Test', "NinaProb DB2.", 'https://ninapro.hevs.ch/', - save_dir, dataset_name) + dataset_folder = dataset_folder) self.exercise_step = [0,0,0] def prepare_data(self, subjects_values = [str(i) for i in range(1,41)], diff --git a/libemg/_datasets/one_subject_myo.py b/libemg/_datasets/one_subject_myo.py index 24774f49..c4ffe7a7 100644 --- a/libemg/_datasets/one_subject_myo.py +++ b/libemg/_datasets/one_subject_myo.py @@ -3,7 +3,7 @@ import os class OneSubjectMyoDataset(Dataset): - def __init__(self, save_dir='.', redownload=False, dataset_name="OneSubjectMyoDataset"): + def __init__(self, dataset_folder="OneSubjectMyoDataset/"): Dataset.__init__(self, 200, 8, @@ -12,10 +12,9 @@ def __init__(self, save_dir='.', redownload=False, dataset_name="OneSubjectMyoDa {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, '6 (4 Train, 2 Test)', "A simple Myo dataset that is used for some of the LibEMG offline demos.", - 'N/A', save_dir, redownload) + 'N/A') self.url = "https://github.com/libemg/OneSubjectMyoDataset" - self.dataset_name = dataset_name - self.dataset_folder = os.path.join(self.save_dir , self.dataset_name) + self.dataset_folder = dataset_folder def prepare_data(self, format=OfflineDataHandler): if (not self.check_exists(self.dataset_folder)): From 9c1f741e4c93b787fcd739496cc134545cc3cd37 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 16 Sep 2024 12:45:35 -0300 Subject: [PATCH 039/129] Update OneSubjectEMaGerDataset to new format Added in OneSubjectEMaGerDataset for regression tasks. Updated for new Dataset format. --- libemg/_datasets/one_subject_emager.py | 33 ++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 libemg/_datasets/one_subject_emager.py diff --git a/libemg/_datasets/one_subject_emager.py b/libemg/_datasets/one_subject_emager.py new file mode 100644 index 00000000..ebc1f764 --- /dev/null +++ b/libemg/_datasets/one_subject_emager.py @@ -0,0 +1,33 @@ +from pathlib import Path + +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter, FilePackager + + +class OneSubjectEMaGerDataset(Dataset): + def __init__(self, dataset_folder = 'OneSubjectEMaGerDataset/'): + super().__init__( + sampling=1010, + num_channels=64, + recording_device='EMaGer', + num_subjects=1, + gestures={0: 'Hand Close (-) / Hand Open (+)', 1: 'Pronation (-) / Supination (+)'}, + num_reps=5, + description='A simple EMaGer dataset used for regression examples in LibEMG demos.', + citation='N/A' + ) + self.url = 'https://github.com/LibEMG/OneSubjectEMaGerDataset' + self.dataset_folder = dataset_folder + + def prepare_data(self): + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + regex_filters = [ + RegexFilter(left_bound='/', right_bound='/', values=['open-close', 'pro-sup'], description='movements'), + RegexFilter(left_bound='_R_', right_bound='_emg.csv', values=[str(idx) for idx in range(self.num_reps)], description='reps') + ] + package_function = lambda x, y: Path(x).parent.absolute() == Path(y).parent.absolute() + metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + return {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3]), 'Test': odh.isolate_data('reps', [4])} From 75730046f71a0560801b43bbedfeb77b5b2ffc08 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 16 Sep 2024 13:36:36 -0300 Subject: [PATCH 040/129] Add split parameter to prepare_data Return value of prepare_data was changed to a dictionary, which would be a breaking change. Modified this so users can pass in a flag that will determine if it is returned as a dictionary or a data handler. --- libemg/_datasets/_3DC.py | 41 +++++++++++++---------- libemg/_datasets/ciil.py | 22 ++++++------ libemg/_datasets/continous_transitions.py | 9 +++-- libemg/_datasets/dataset.py | 2 +- libemg/_datasets/emg_epn612.py | 13 ++++--- libemg/_datasets/fors_emg.py | 9 +++-- libemg/_datasets/grab_myo.py | 18 +++++++--- libemg/_datasets/myodisco.py | 20 +++++++---- libemg/_datasets/nina_pro.py | 17 +++++++--- libemg/_datasets/one_subject_emager.py | 8 +++-- libemg/_datasets/one_subject_myo.py | 11 +++--- 11 files changed, 112 insertions(+), 58 deletions(-) diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py index 26a8bfd7..8bc1c62b 100644 --- a/libemg/_datasets/_3DC.py +++ b/libemg/_datasets/_3DC.py @@ -16,24 +16,31 @@ def __init__(self, dataset_folder="_3DCDataset/"): self.url = "https://github.com/libemg/3DCDataset" self.dataset_folder = dataset_folder - def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,23)], - sets_values = ["train", "test"], - reps_values = ["0","1","2","3"], - classes_values = [str(i) for i in range(11)]): + def prepare_data(self, split = False, subjects_values = None, sets_values = None, reps_values = None, + classes_values = None): + if subjects_values is None: + subjects_values = [str(i) for i in range(1,23)] + if sets_values is None: + sets_values = ["train", "test"] + if reps_values is None: + reps_values = ["0","1","2","3"] + if classes_values is None: + classes_values = [str(i) for i in range(11)] + print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) - if format == OfflineDataHandler: - regex_filters = [ - RegexFilter(left_bound = "/", right_bound="/EMG", values = sets_values, description='sets'), - RegexFilter(left_bound = "_", right_bound=".txt", values = classes_values, description='classes'), - RegexFilter(left_bound = "EMG_gesture_", right_bound="_", values = reps_values, description='reps'), - RegexFilter(left_bound="Participant", right_bound="/",values=subjects_values, description='subjects') - ] - odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} \ No newline at end of file + regex_filters = [ + RegexFilter(left_bound = "/", right_bound="/EMG", values = sets_values, description='sets'), + RegexFilter(left_bound = "_", right_bound=".txt", values = classes_values, description='classes'), + RegexFilter(left_bound = "EMG_gesture_", right_bound="_", values = reps_values, description='reps'), + RegexFilter(left_bound="Participant", right_bound="/",values=subjects_values, description='subjects') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + + return data \ No newline at end of file diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index 94833fce..229b20d2 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -16,13 +16,10 @@ def __init__(self, dataset_folder='CIILData/'): self.url = "https://github.com/LibEMG/CIILData" self.dataset_folder = dataset_folder - def prepare_data(self): + def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) subfolder = 'MinimalTrainingData' subjects = [str(i) for i in range(0, 11)] @@ -38,7 +35,11 @@ def prepare_data(self): odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder + '/' + subfolder, regex_filters=regex_filters, delimiter=",") - return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + + return data class CIIL_ElectrodeShift(Dataset): def __init__(self, dataset_folder='CIILData/'): @@ -54,13 +55,10 @@ def __init__(self, dataset_folder='CIILData/'): self.url = "https://github.com/LibEMG/CIILData" self.dataset_folder = dataset_folder - def prepare_data(self): + def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) subfolder = 'ElectrodeShift' subjects = [str(i) for i in range(0, 21)] @@ -76,4 +74,8 @@ def prepare_data(self): odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder + '/' + subfolder, regex_filters=regex_filters, delimiter=",") - return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1,2,3,4])} \ No newline at end of file + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1,2,3,4])} + + return data diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index bee3a8f4..2c7c3162 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -18,7 +18,7 @@ def __init__(self, dataset_folder="ContinuousTransitions/"): "https://ieeexplore.ieee.org/document/10254242") self.dataset_folder = dataset_folder - def prepare_data(self): + def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): print("Please download the dataset from: ") #TODO: Update @@ -57,4 +57,9 @@ def prepare_data(self): odh_tr.classes.append(np.expand_dims(ramp_labels[r_chg_idxs[i]+1:r_chg_idxs[i+1]]-1, axis=1)) odh_tr.subjects.append(np.ones((len(odh_tr.data[-1]), 1)) * s-2) - return {'All': odh_tr+odh_te, 'Train': odh_tr, 'Test': odh_te} \ No newline at end of file + odh_all = odh_tr + odh_te + data = odh_all + if split: + data = {'All': odh_all, 'Train': odh_tr, 'Test': odh_te} + + return data diff --git a/libemg/_datasets/dataset.py b/libemg/_datasets/dataset.py index 3b232abe..947b3909 100644 --- a/libemg/_datasets/dataset.py +++ b/libemg/_datasets/dataset.py @@ -25,7 +25,7 @@ def remove_dataset(self, dataset_folder): def check_exists(self, dataset_folder): return os.path.exists(dataset_folder) - def prepare_data(self, format=OfflineDataHandler): + def prepare_data(self, split = False): pass def get_info(self): diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 8bdcc3a2..2b98e4e7 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -19,13 +19,13 @@ def __init__(self, dataset_file='EMGEPN612.pkl'): self.url = "https://github.com/libemg/OneSubjectMyoDataset" self.dataset_name = dataset_file - def prepare_data(self): + def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') - if (not self.check_exists(self.dataset_folder)): + if (not self.check_exists(self.dataset_name)): print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") return - file = open(self.dataset_folder, 'rb') + file = open(self.dataset_name, 'rb') data = pickle.load(file) emg = data[0] @@ -48,4 +48,9 @@ def prepare_data(self): odh_te.classes.append(np.ones((len(e), 1)) * labels['testing'][i]) odh_te.subjects.append(np.ones((len(e), 1)) * (i//150 + 306)) - return {'All': odh_tr+odh_te, 'Train': odh_tr, 'Test': odh_te} \ No newline at end of file + odh_all = odh_tr + odh_te + data = odh_all + if split: + data = {'All': odh_all, 'Train': odh_tr, 'Test': odh_te} + + return data \ No newline at end of file diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py index 89171cf3..1f34f65c 100644 --- a/libemg/_datasets/fors_emg.py +++ b/libemg/_datasets/fors_emg.py @@ -17,7 +17,7 @@ def __init__(self, dataset_folder='FORS-EMG/'): 'https://arxiv.org/abs/2409.07484t') self.dataset_folder = dataset_folder - def prepare_data(self): + def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): print("Please download the pickled dataset from: https://www.kaggle.com/datasets/ummerummanchaity/fors-emg-a-novel-semg-dataset?resource=download") @@ -40,4 +40,9 @@ def prepare_data(self): odh.subjects.append(np.ones((len(odh.data[-1]), 1)) * s-1) odh.reps.append(np.ones((len(odh.data[-1]), 1)) * r-1) odh.orientation.append(np.ones((len(odh.data[-1]), 1)) * o_i) - return {'All': odh, 'Train': odh.isolate_data('orientation', [0]), 'Test': odh.isolate_data('orientation', [1,2])} \ No newline at end of file + + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data('orientation', [0]), 'Test': odh.isolate_data('orientation', [1,2])} + + return data diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 694e0d1b..1238cfc5 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -33,7 +33,7 @@ class GRABMyoCrossDay(GRABMyo): def __init__(self, dataset_folder="GRABMyo"): GRABMyo.__init__(self, dataset_folder=dataset_folder, baseline=False) - def prepare_data(self): + def prepare_data(self, split = False): self.check_if_exist() sessions = ["1", "2", "3"] @@ -54,13 +54,18 @@ def prepare_data(self): forearm_data = odh.isolate_channels(list(range(0,16))) train_data = forearm_data.isolate_data('sets', [0]) test_data = forearm_data.isolate_data('sets', [1]) - return {'All': forearm_data, 'Train': train_data, 'Test': test_data} + + data = forearm_data + if split: + data = {'All': forearm_data, 'Train': train_data, 'Test': test_data} + + return data class GRABMyoBaseline(GRABMyo): def __init__(self, dataset_folder="GRABMyo"): GRABMyo.__init__(self, dataset_folder=dataset_folder, baseline=True) - def prepare_data(self): + def prepare_data(self, split = False): self.check_if_exist() sessions = ["1"] @@ -81,4 +86,9 @@ def prepare_data(self): forearm_data = odh.isolate_channels(list(range(0,16))) train_data = forearm_data.isolate_data('reps', [0,1,2,3,4]) test_data = forearm_data.isolate_data('reps', [5,6]) - return {'All': forearm_data, 'Train': train_data, 'Test': test_data} \ No newline at end of file + + data = forearm_data + if split: + data = {'All': forearm_data, 'Train': train_data, 'Test': test_data} + + return data \ No newline at end of file diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py index 73ffe15d..5f439a43 100644 --- a/libemg/_datasets/myodisco.py +++ b/libemg/_datasets/myodisco.py @@ -22,13 +22,10 @@ def __init__(self, dataset_folder="MyoDisCo/", cross_day=False): self.url = "https://github.com/libemg/MyoDisCo" self.dataset_folder = dataset_folder - def prepare_data(self): + def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) sets_values = ['day1', 'day2', 'positions'] @@ -68,7 +65,16 @@ def prepare_data(self): odh.reps[i] = np.ones((len(odh.data[i]), 1)) * odh.reps[i][0][0] odh.subjects[i] = np.ones((len(odh.data[i]), 1)) * odh.subjects[i][0][0] + if self.cross_day: - return {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} - - return {'All': odh, 'Train': odh.isolate_data("sets", [1]), 'Test': odh.isolate_data("sets", [2])} + odh_train = odh.isolate_data('sets', [0]) + odh_test = odh.isolate_data('sets', [1]) + else: + odh_train = odh.isolate_data('sets', [1]) + odh_test = odh.isolate_data('sets', [2]) + + data = odh + if split: + data = {'All': odh, 'Train': odh_train, 'Test': odh_test} + + return data diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 736c82a8..3fd65076 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -127,9 +127,14 @@ def __init__(self, dataset_folder="NinaProDB2/"): dataset_folder = dataset_folder) self.exercise_step = [0,0,0] - def prepare_data(self, subjects_values = [str(i) for i in range(1,41)], - reps_values = [str(i) for i in range(6)], - classes_values = [str(i) for i in range(50)]): + def prepare_data(self, split = False, subjects_values = None, reps_values = None, classes_values = None): + if subjects_values is None: + subjects_values = [str(i) for i in range(1,41)] + if reps_values is None: + reps_values = [str(i) for i in range(6)] + if classes_values is None: + classes_values = [str(i) for i in range(50)] + print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): print("Please download the NinaProDB2 dataset from: https://ninapro.hevs.ch/instructions/DB2.html") @@ -142,4 +147,8 @@ def prepare_data(self, subjects_values = [str(i) for i in range(1,41)], ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3]), 'Test': odh.isolate_data('reps', [4,5])} \ No newline at end of file + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3]), 'Test': odh.isolate_data('reps', [4,5])} + + return data \ No newline at end of file diff --git a/libemg/_datasets/one_subject_emager.py b/libemg/_datasets/one_subject_emager.py index ebc1f764..d248e387 100644 --- a/libemg/_datasets/one_subject_emager.py +++ b/libemg/_datasets/one_subject_emager.py @@ -19,7 +19,7 @@ def __init__(self, dataset_folder = 'OneSubjectEMaGerDataset/'): self.url = 'https://github.com/LibEMG/OneSubjectEMaGerDataset' self.dataset_folder = dataset_folder - def prepare_data(self): + def prepare_data(self, split = False): if (not self.check_exists(self.dataset_folder)): self.download(self.url, self.dataset_folder) regex_filters = [ @@ -30,4 +30,8 @@ def prepare_data(self): metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - return {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3]), 'Test': odh.isolate_data('reps', [4])} + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3]), 'Test': odh.isolate_data('reps', [4])} + + return data diff --git a/libemg/_datasets/one_subject_myo.py b/libemg/_datasets/one_subject_myo.py index c4ffe7a7..94488506 100644 --- a/libemg/_datasets/one_subject_myo.py +++ b/libemg/_datasets/one_subject_myo.py @@ -16,12 +16,9 @@ def __init__(self, dataset_folder="OneSubjectMyoDataset/"): self.url = "https://github.com/libemg/OneSubjectMyoDataset" self.dataset_folder = dataset_folder - def prepare_data(self, format=OfflineDataHandler): + def prepare_data(self, split = False): if (not self.check_exists(self.dataset_folder)): self.download(self.url, self.dataset_folder) - elif (self.redownload): - self.remove_dataset(self.dataset_folder) - self.download(self.url, self.dataset_folder) sets_values = ["1","2","3","4","5","6"] classes_values = ["0","1","2","3","4"] @@ -33,4 +30,8 @@ def prepare_data(self, format=OfflineDataHandler): ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") - return {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} \ No newline at end of file + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} + + return data From 642d26c81849e98d9017aab8730e268fb98d8915 Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 14:53:23 -0300 Subject: [PATCH 041/129] Updates --- dataset_tryout.py | 86 ++++++++++++++++------------- libemg/_datasets/one_subject_myo.py | 5 +- libemg/datasets.py | 2 +- 3 files changed, 52 insertions(+), 41 deletions(-) diff --git a/dataset_tryout.py b/dataset_tryout.py index 8cd617a2..21f9296a 100644 --- a/dataset_tryout.py +++ b/dataset_tryout.py @@ -2,46 +2,54 @@ from libemg.feature_extractor import * from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics - -print(get_dataset_list()) +import pickle -dataset = get_dataset_list()['MyoDisCo'](cross_day=True) -dataset.get_info() -data = dataset.prepare_data() - -train_data = data['Train'] -test_data = data['Test'] +info = { + 'dataset': [], + 'features': [], + 'model': [], + 'accuracies': [], + 'subject': [] +} -print("Loaded Data") - -accuracies = [] -for s in range(0, dataset.num_subjects): - print("Subject: " + str(s)) - s_train_dh = train_data.isolate_data('subjects', [s]) - s_test_dh = test_data.isolate_data('subjects', [s]) - train_windows, train_meta = s_train_dh.parse_windows(30, 5) - test_windows, test_meta = s_test_dh.parse_windows(30, 5) - - fe = FeatureExtractor() - train_feats = fe.extract_features(['WENG'], train_windows) - test_feats = fe.extract_features(['WENG'], test_windows) +for d in get_dataset_list().keys(): + dataset = get_dataset_list()[d]() + dataset.get_info() + data = dataset.prepare_data(split=True) + + train_data = data['Train'] + test_data = data['Test'] + + for s in range(0, dataset.num_subjects): + s_train_dh = train_data.isolate_data('subjects', [s]) + s_test_dh = test_data.isolate_data('subjects', [s]) + train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * 300), int(dataset.sampling/1000 * 50)) + test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * 300), int(dataset.sampling/1000 * 50)) - # fe.visualize_feature_space(train_feats, 'PCA', train_meta['classes']) - - model = EMGClassifier(model='LDA') - ds = { - 'training_features': train_feats, - 'training_labels': train_meta['classes'] - } - model.fit(ds) - - preds, probs = model.run(test_feats) - om = OfflineMetrics() - accuracies.append(om.get_CA(test_meta['classes'], preds)) - conf_mat = om.get_CONF_MAT(preds, test_meta['classes']) - # om.visualize_conf_matrix(conf_mat) - print(om.get_CA(test_meta['classes'], preds)) - -print('CA: ' + str(np.mean(accuracies)) + ' +/- ' + str(np.std(accuracies))) - + for f_i, feats in enumerate([[['WENG'], {'WENG_fs': dataset.sampling}], [['MAV', 'SSC', 'WL', 'ZC'], {}]]): + fe = FeatureExtractor() + train_feats = fe.extract_features(feats[0], train_windows, feats[1]) + test_feats = fe.extract_features(feats[0], test_windows, feats[1]) + + model = EMGClassifier(model='LDA') + ds = { + 'training_features': train_feats, + 'training_labels': train_meta['classes'] + } + model.fit(ds) + + preds, probs = model.run(test_feats) + om = OfflineMetrics() + conf_mat = om.get_CONF_MAT(preds, test_meta['classes']) + print(om.get_CA(test_meta['classes'], preds)) + + info['accuracies'].append(om.get_CA(test_meta['classes'], preds)) + info['dataset'].append(d) + info['features'].append(f_i) + info['model'].append('LDA') + info['subject'].append(s) + + # Save info every iteration + with open('results.pickle', 'wb') as handle: + pickle.dump(info, handle, protocol=pickle.HIGHEST_PROTOCOL) \ No newline at end of file diff --git a/libemg/_datasets/one_subject_myo.py b/libemg/_datasets/one_subject_myo.py index 94488506..5c3156af 100644 --- a/libemg/_datasets/one_subject_myo.py +++ b/libemg/_datasets/one_subject_myo.py @@ -1,6 +1,6 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os +import numpy as np class OneSubjectMyoDataset(Dataset): def __init__(self, dataset_folder="OneSubjectMyoDataset/"): @@ -30,6 +30,9 @@ def prepare_data(self, split = False): ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + odh.subjects = [] + odh.subjects = [np.zeros((len(d), 1)) for d in odh.data] + odh.extra_attributes.append('subjects') data = odh if split: data = {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} diff --git a/libemg/datasets.py b/libemg/datasets.py index afc2db69..396ae513 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -19,7 +19,6 @@ def get_dataset_list(): return { 'OneSubjectMyo': OneSubjectMyoDataset, '3DC': _3DCDataset, - 'EMGEPN612': EMGEPN612, 'CIIL_MinimalData': CIIL_MinimalData, 'CIIL_ElectrodeShift': CIIL_ElectrodeShift, 'GRABMyoBaseline': GRABMyoBaseline, @@ -28,6 +27,7 @@ def get_dataset_list(): 'NinaProDB2': NinaproDB2, 'MyoDisCo': MyoDisCo, 'FORS-EMG': FORSEMG, + 'EMGEPN612': EMGEPN612, } def get_dataset_info(dataset): From a8759c84ca85eea37b4e3e5396a2ed38fa595d85 Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 14:55:07 -0300 Subject: [PATCH 042/129] Updates --- libemg/_streamers/_oymotion_streamer.py | 60 ++++++++++++++----------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/libemg/_streamers/_oymotion_streamer.py b/libemg/_streamers/_oymotion_streamer.py index 8bdabada..3bd6dedf 100644 --- a/libemg/_streamers/_oymotion_streamer.py +++ b/libemg/_streamers/_oymotion_streamer.py @@ -68,8 +68,11 @@ def start_stream(self): ## BEGIN HARDWARE SPECIFIC CONFIG import platform if platform.system() == 'Linux': - from bluepy import btle - from bluepy.btle import DefaultDelegate, Scanner, Peripheral + try: + from bluepy import btle + from bluepy.btle import DefaultDelegate, Scanner, Peripheral + except: + pass from datetime import datetime, timedelta import struct from enum import Enum @@ -288,31 +291,34 @@ def __init__(self, _cmd, _timeoutTime, _cb): self._cb = _cb if platform.system() == 'Linux': - class MyDelegate(btle.DefaultDelegate): - def __init__(self, gforce): - super().__init__() - self.gforce = gforce - self.bluepy_thread = threading.Thread(target=self.bluepy_handler) - self.bluepy_thread.setDaemon(True) - self.bluepy_thread.start() - - def bluepy_handler(self): - while True: - if not self.gforce.send_queue.empty(): - cmd = self.gforce.send_queue.get_nowait() - self.gforce.cmdCharacteristic.write(cmd) - self.gforce.device.waitForNotifications(1) - - def handleNotification(self, cHandle, data): - # check cHandle - # self.gforce.lock.acquire() - if cHandle == self.gforce.cmdCharacteristic.getHandle(): - self.gforce._onResponse(data) - - # check cHandle - if cHandle == self.gforce.notifyCharacteristic.getHandle(): - self.gforce.handleDataNotification(data, self.gforce.onData) - # self.gforce.lock.release() + try: + class MyDelegate(btle.DefaultDelegate): + def __init__(self, gforce): + super().__init__() + self.gforce = gforce + self.bluepy_thread = threading.Thread(target=self.bluepy_handler) + self.bluepy_thread.setDaemon(True) + self.bluepy_thread.start() + + def bluepy_handler(self): + while True: + if not self.gforce.send_queue.empty(): + cmd = self.gforce.send_queue.get_nowait() + self.gforce.cmdCharacteristic.write(cmd) + self.gforce.device.waitForNotifications(1) + + def handleNotification(self, cHandle, data): + # check cHandle + # self.gforce.lock.acquire() + if cHandle == self.gforce.cmdCharacteristic.getHandle(): + self.gforce._onResponse(data) + + # check cHandle + if cHandle == self.gforce.notifyCharacteristic.getHandle(): + self.gforce.handleDataNotification(data, self.gforce.onData) + # self.gforce.lock.release() + except: + print('Bluepy not installed...') class GForceProfile(): From 0e830bdc7c3d18032efc22ae1d9f14c2e4e02b2b Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 16 Sep 2024 19:54:36 -0300 Subject: [PATCH 043/129] Fixed Grab Myo --- libemg/_datasets/grab_myo.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 1238cfc5..871dc190 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -42,7 +42,7 @@ def prepare_data(self, split = False): reps_values = ["1","2","3","4","5","6","7"] regex_filters = [ - RegexFilter(left_bound = "session", right_bound="_", values = sessions, description='session'), + RegexFilter(left_bound = "session", right_bound="_", values = sessions, description='sessions'), RegexFilter(left_bound = "_gesture", right_bound="_", values = classes_values, description='classes'), RegexFilter(left_bound = "trial", right_bound=".hea", values = reps_values, description='reps'), RegexFilter(left_bound="participant", right_bound="_",values=subjects, description='subjects') @@ -52,8 +52,8 @@ def prepare_data(self, split = False): odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") forearm_data = odh.isolate_channels(list(range(0,16))) - train_data = forearm_data.isolate_data('sets', [0]) - test_data = forearm_data.isolate_data('sets', [1]) + train_data = forearm_data.isolate_data('sessions', [0]) + test_data = forearm_data.isolate_data('sessions', [1,2]) data = forearm_data if split: From 48bdd449d4856b03435fb4a508edc44f7ff83a85 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 17 Sep 2024 16:33:45 -0300 Subject: [PATCH 044/129] Added resp to EPN --- libemg/_datasets/emg_epn612.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 2b98e4e7..4fb79993 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -34,20 +34,27 @@ def prepare_data(self, split = False): odh_tr = OfflineDataHandler() odh_tr.subjects = [] odh_tr.classes = [] - odh_tr.extra_attributes = ['subjects', 'classes'] + odh_tr.reps = [] + tr_reps = [0,0,0,0,0,0] + odh_tr.extra_attributes = ['subjects', 'classes', 'reps'] for i, e in enumerate(emg['training']): odh_tr.data.append(e) odh_tr.classes.append(np.ones((len(e), 1)) * labels['training'][i]) odh_tr.subjects.append(np.ones((len(e), 1)) * i//150) + odh_tr.reps.append(np.ones((len(e), 1)) * tr_reps[labels['training'][i]]) + tr_reps[labels['training'][i]] += 1 odh_te = OfflineDataHandler() odh_te.subjects = [] odh_te.classes = [] - odh_te.extra_attributes = ['subjects', 'classes'] + odh_te.reps = [] + te_reps = [0,0,0,0,0,0] + odh_te.extra_attributes = ['subjects', 'classes', 'reps'] for i, e in enumerate(emg['testing']): odh_te.data.append(e) odh_te.classes.append(np.ones((len(e), 1)) * labels['testing'][i]) odh_te.subjects.append(np.ones((len(e), 1)) * (i//150 + 306)) - + odh_te.reps.append(np.ones((len(e), 1)) * te_reps[labels['training'][i]]) + tr_reps[labels['training'][i]] += 1 odh_all = odh_tr + odh_te data = odh_all if split: From fff712d27c8393508df8a87afa8769a30d0adbba Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 17 Sep 2024 17:43:01 -0300 Subject: [PATCH 045/129] Updated the data handler to run faster --- libemg/_datasets/emg_epn612.py | 8 +++-- libemg/data_handler.py | 58 +++++++++++++--------------------- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 4fb79993..4e991ee6 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -40,9 +40,11 @@ def prepare_data(self, split = False): for i, e in enumerate(emg['training']): odh_tr.data.append(e) odh_tr.classes.append(np.ones((len(e), 1)) * labels['training'][i]) - odh_tr.subjects.append(np.ones((len(e), 1)) * i//150) + odh_tr.subjects.append(np.ones((len(e), 1)) * i//300) odh_tr.reps.append(np.ones((len(e), 1)) * tr_reps[labels['training'][i]]) tr_reps[labels['training'][i]] += 1 + if i % 300 == 0: + tr_reps = [0,0,0,0,0,0] odh_te = OfflineDataHandler() odh_te.subjects = [] odh_te.classes = [] @@ -54,7 +56,9 @@ def prepare_data(self, split = False): odh_te.classes.append(np.ones((len(e), 1)) * labels['testing'][i]) odh_te.subjects.append(np.ones((len(e), 1)) * (i//150 + 306)) odh_te.reps.append(np.ones((len(e), 1)) * te_reps[labels['training'][i]]) - tr_reps[labels['training'][i]] += 1 + te_reps[labels['training'][i]] += 1 + if i % 150 == 0: + te_reps = [0,0,0,0,0,0] odh_all = odh_tr + odh_te data = odh_all if split: diff --git a/libemg/data_handler.py b/libemg/data_handler.py index eaf13bc0..af8113a8 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -483,7 +483,7 @@ def isolate_channels(self, channels): new_odh.data[i] = new_odh.data[i][:,channels] return new_odh - def isolate_data(self, key, values): + def isolate_data(self, key, values, fast=False): """Entry point for isolating a single key of data within the offline data handler. First, error checking is performed within this method, then if it passes, the isolate_data_helper is called to make a new OfflineDataHandler that contains only that data. @@ -493,6 +493,8 @@ def isolate_data(self, key, values): The metadata key that will be used to filter (e.g., "subject", "rep", "class", "set", whatever you'd like). values: list A list of values that you want to isolate. (e.g. [0,1,2,3]). Indexing starts at 0. + fast: Boolean (default=False) + If true, it iterates over the median value for each EMG element. This should be used when parsing on things like reps, subjects, classes, etc. Returns ---------- @@ -501,47 +503,31 @@ def isolate_data(self, key, values): """ assert key in self.extra_attributes assert type(values) == list - return self._isolate_data_helper(key,values) + return self._isolate_data_helper(key,values,fast) - def _isolate_data_helper(self, key, values): + def _isolate_data_helper(self, key, values,fast): new_odh = OfflineDataHandler() setattr(new_odh, "extra_attributes", self.extra_attributes) key_attr = getattr(self, key) - - # if these end up being ndarrays, it means that the metadata was IN the csv file. - - if type(key_attr[0]) == np.ndarray: - # for every file (list element) - data = [] - for f in range(len(key_attr)): - # get the keep_mask + for e in self.extra_attributes: + setattr(new_odh, e, []) + + for f in range(len(key_attr)): + if fast: + if key_attr[f][0][0] in values: + keep_mask = [True] * len(key_attr[f]) + else: + keep_mask = [False] * len(key_attr[f]) + else: keep_mask = list([i in values for i in key_attr[f]]) - # append the valid data - if self.data[f][keep_mask,:].shape[0]> 0: - data.append(self.data[f][keep_mask,:]) - setattr(new_odh, "data", data) + + if self.data[f][keep_mask,:].shape[0]> 0: + new_odh.data.append(self.data[f][keep_mask,:]) + for e in self.extra_attributes: + updated_arr = getattr(new_odh, e) + updated_arr.append(getattr(self, e)[f][keep_mask]) + setattr(new_odh, e, updated_arr) - for k in self.extra_attributes: - key_value = getattr(self, k) - if type(key_value[0]) == np.ndarray: - # the other metadata that is in the csv file should be sliced the same way as the ndarray - key = [] - for f in range(len(key_attr)): - keep_mask = list([i in values for i in key_attr[f]]) - if key_value[f][keep_mask,:].shape[0]>0: - key.append(key_value[f][keep_mask,:]) - setattr(new_odh, k, key) - - else: - assert False # we should never get here - # # if the other metadata was not in the csv file (i.e. subject label in filename but classes in csv), then just keep it - # setattr(new_odh, k, key_value) - else: - assert False # we should never get here - # keep_mask = list([i in values for i in key_attr]) - # setattr(new_odh, "data", list(compress(self.data, keep_mask))) - # for k in self.extra_attributes: - # setattr(new_odh, k,list(compress(getattr(self, k), keep_mask))) return new_odh def visualize(): From dcaa0065fd3172a608ec006e4d05e50cecdbf4be Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 17 Sep 2024 18:10:54 -0300 Subject: [PATCH 046/129] Made them all parse fast --- libemg/_datasets/ciil.py | 2 +- libemg/_datasets/fors_emg.py | 2 +- libemg/_datasets/grab_myo.py | 8 ++++---- libemg/_datasets/myodisco.py | 8 ++++---- libemg/_datasets/nina_pro.py | 2 +- libemg/_datasets/one_subject_myo.py | 2 +- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index 229b20d2..0014d0c1 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -37,7 +37,7 @@ def prepare_data(self, split = False): data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + data = {'All': odh, 'Train': odh.isolate_data("sets", [0], fast=True), 'Test': odh.isolate_data("sets", [1], fast=True)} return data diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py index 1f34f65c..b9d2f5c4 100644 --- a/libemg/_datasets/fors_emg.py +++ b/libemg/_datasets/fors_emg.py @@ -43,6 +43,6 @@ def prepare_data(self, split = False): data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data('orientation', [0]), 'Test': odh.isolate_data('orientation', [1,2])} + data = {'All': odh, 'Train': odh.isolate_data('orientation', [0], fast=True), 'Test': odh.isolate_data('orientation', [1,2], fast=True)} return data diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 871dc190..62793c60 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -52,8 +52,8 @@ def prepare_data(self, split = False): odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") forearm_data = odh.isolate_channels(list(range(0,16))) - train_data = forearm_data.isolate_data('sessions', [0]) - test_data = forearm_data.isolate_data('sessions', [1,2]) + train_data = forearm_data.isolate_data('sessions', [0], fast=True) + test_data = forearm_data.isolate_data('sessions', [1,2], fast=True) data = forearm_data if split: @@ -84,8 +84,8 @@ def prepare_data(self, split = False): odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") forearm_data = odh.isolate_channels(list(range(0,16))) - train_data = forearm_data.isolate_data('reps', [0,1,2,3,4]) - test_data = forearm_data.isolate_data('reps', [5,6]) + train_data = forearm_data.isolate_data('reps', [0,1,2,3,4], fast=True) + test_data = forearm_data.isolate_data('reps', [5,6], fast=True) data = forearm_data if split: diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py index 5f439a43..1b0f3285 100644 --- a/libemg/_datasets/myodisco.py +++ b/libemg/_datasets/myodisco.py @@ -67,11 +67,11 @@ def prepare_data(self, split = False): if self.cross_day: - odh_train = odh.isolate_data('sets', [0]) - odh_test = odh.isolate_data('sets', [1]) + odh_train = odh.isolate_data('sets', [0], fast=True) + odh_test = odh.isolate_data('sets', [1], fast=True) else: - odh_train = odh.isolate_data('sets', [1]) - odh_test = odh.isolate_data('sets', [2]) + odh_train = odh.isolate_data('sets', [1], fast=True) + odh_test = odh.isolate_data('sets', [2], fast=True) data = odh if split: diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 3fd65076..516a9cc9 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -149,6 +149,6 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3]), 'Test': odh.isolate_data('reps', [4,5])} + data = {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3], fast=True), 'Test': odh.isolate_data('reps', [4,5], fast=True)} return data \ No newline at end of file diff --git a/libemg/_datasets/one_subject_myo.py b/libemg/_datasets/one_subject_myo.py index 5c3156af..d01737f0 100644 --- a/libemg/_datasets/one_subject_myo.py +++ b/libemg/_datasets/one_subject_myo.py @@ -35,6 +35,6 @@ def prepare_data(self, split = False): odh.extra_attributes.append('subjects') data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4]), 'Test': odh.isolate_data("sets", [5,6])} + data = {'All': odh, 'Train': odh.isolate_data("sets", [0,1,2,3,4], fast=True), 'Test': odh.isolate_data("sets", [5,6], fast=True)} return data From 426797164589643a9e97fd42249cf898b529a4a7 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 17 Sep 2024 21:04:21 -0300 Subject: [PATCH 047/129] Sped up window parsing --- libemg/data_handler.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index af8113a8..06f0cda1 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -421,19 +421,15 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio 'median': np.median, 'last_sample': lambda x: x[-1] } - - metadata_ = {} + window_data = [] + metadata = {k: [] for k in self.extra_attributes} for i, file in enumerate(self.data): # emg data windowing - windows = get_windows(file,window_size,window_increment) - if "windows_" in locals(): - windows_ = np.concatenate((windows_, windows)) - else: - windows_ = windows - # metadata windowing + window_data.append(get_windows(file,window_size,window_increment)) + for k in self.extra_attributes: if type(getattr(self,k)[i]) != np.ndarray: - file_metadata = np.ones((windows.shape[0])) * getattr(self, k)[i] + file_metadata = np.ones((window_data[-1].shape[0])) * getattr(self, k)[i] else: if metadata_operations is not None: if k in metadata_operations.keys(): @@ -450,13 +446,10 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) else: file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) - if k not in metadata_.keys(): - metadata_[k] = file_metadata - else: - metadata_[k] = np.concatenate((metadata_[k], file_metadata)) - + + metadata[k].append(file_metadata) - return windows_, metadata_ + return np.vstack(window_data), {np.hstack(metadata[k]) for k in metadata.keys()} def isolate_channels(self, channels): From e8284c74d7bbe5941c92e8f70705ea6dc726c0a8 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 17 Sep 2024 21:12:18 -0300 Subject: [PATCH 048/129] Fixed data handler --- libemg/data_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 06f0cda1..5a331e3a 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -449,7 +449,7 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio metadata[k].append(file_metadata) - return np.vstack(window_data), {np.hstack(metadata[k]) for k in metadata.keys()} + return np.vstack(window_data), {k: np.hstack(metadata[k]) for k in metadata.keys()} def isolate_channels(self, channels): From 203cbc8ed3aa4b7e03c13d1dc649b413a4e6dc7e Mon Sep 17 00:00:00 2001 From: eeddy Date: Wed, 18 Sep 2024 16:02:01 -0300 Subject: [PATCH 049/129] Updated ref --- libemg/_datasets/_3DC.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py index 8bc1c62b..4b4f33b9 100644 --- a/libemg/_datasets/_3DC.py +++ b/libemg/_datasets/_3DC.py @@ -12,7 +12,7 @@ def __init__(self, dataset_folder="_3DCDataset/"): {0: "Neutral", 1: "Radial Deviation", 2: "Wrist Flexion", 3: "Ulnar Deviation", 4: "Wrist Extension", 5: "Supination", 6: "Pronation", 7: "Power Grip", 8: "Open Hand", 9: "Chuck Grip", 10: "Pinch Grip"}, '8 (4 Train, 4 Test)', "The 3DC dataset including 11 classes.", - "https://ieeexplore.ieee.org/document/8630679") + "https://doi.org/10.3389/fbioe.2020.00158") self.url = "https://github.com/libemg/3DCDataset" self.dataset_folder = dataset_folder From 30fef474b43083ea668a0681eb245a56137079d3 Mon Sep 17 00:00:00 2001 From: eeddy Date: Wed, 18 Sep 2024 18:13:42 -0300 Subject: [PATCH 050/129] Made faster --- .gitignore | 6 ++- dataset_tryout.py | 54 +----------------------- libemg/_datasets/_3DC.py | 2 +- libemg/_datasets/ciil.py | 2 +- libemg/_datasets/one_subject_emager.py | 2 +- libemg/datasets.py | 58 +++++++++++++++++++++++++- 6 files changed, 66 insertions(+), 58 deletions(-) diff --git a/.gitignore b/.gitignore index eec03535..12c3b8a2 100644 --- a/.gitignore +++ b/.gitignore @@ -53,4 +53,8 @@ FORS-EMG/* MyoDisCo/* NinaProDB1/* *.zip -libemg/_datasets/__pycache__/* \ No newline at end of file +libemg/_datasets/__pycache__/* +CIILData/* +EMGEPN612.pkl +OneSubjectMyoDataset/ +_3DCDataset/ diff --git a/dataset_tryout.py b/dataset_tryout.py index 21f9296a..d4e6d4f5 100644 --- a/dataset_tryout.py +++ b/dataset_tryout.py @@ -1,55 +1,3 @@ from libemg.datasets import * -from libemg.feature_extractor import * -from libemg.emg_predictor import EMGClassifier -from libemg.offline_metrics import OfflineMetrics -import pickle -info = { - 'dataset': [], - 'features': [], - 'model': [], - 'accuracies': [], - 'subject': [] -} - -for d in get_dataset_list().keys(): - dataset = get_dataset_list()[d]() - dataset.get_info() - data = dataset.prepare_data(split=True) - - train_data = data['Train'] - test_data = data['Test'] - - for s in range(0, dataset.num_subjects): - s_train_dh = train_data.isolate_data('subjects', [s]) - s_test_dh = test_data.isolate_data('subjects', [s]) - train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * 300), int(dataset.sampling/1000 * 50)) - test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * 300), int(dataset.sampling/1000 * 50)) - - for f_i, feats in enumerate([[['WENG'], {'WENG_fs': dataset.sampling}], [['MAV', 'SSC', 'WL', 'ZC'], {}]]): - fe = FeatureExtractor() - train_feats = fe.extract_features(feats[0], train_windows, feats[1]) - test_feats = fe.extract_features(feats[0], test_windows, feats[1]) - - model = EMGClassifier(model='LDA') - ds = { - 'training_features': train_feats, - 'training_labels': train_meta['classes'] - } - model.fit(ds) - - preds, probs = model.run(test_feats) - om = OfflineMetrics() - conf_mat = om.get_CONF_MAT(preds, test_meta['classes']) - print(om.get_CA(test_meta['classes'], preds)) - - info['accuracies'].append(om.get_CA(test_meta['classes'], preds)) - info['dataset'].append(d) - info['features'].append(f_i) - info['model'].append('LDA') - info['subject'].append(s) - - # Save info every iteration - with open('results.pickle', 'wb') as handle: - pickle.dump(info, handle, protocol=pickle.HIGHEST_PROTOCOL) - \ No newline at end of file +evaluate('LDA', 300, 100, included_datasets=['OneSubjectMyo', '3DC', 'CIIL_ElectrodeShift']) \ No newline at end of file diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py index 4b4f33b9..04ce0ede 100644 --- a/libemg/_datasets/_3DC.py +++ b/libemg/_datasets/_3DC.py @@ -41,6 +41,6 @@ def prepare_data(self, split = False, subjects_values = None, sets_values = None odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1])} + data = {'All': odh, 'Train': odh.isolate_data("sets", [0], fast=True), 'Test': odh.isolate_data("sets", [1], fast=True)} return data \ No newline at end of file diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index 0014d0c1..7ebd3850 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -76,6 +76,6 @@ def prepare_data(self, split = False): data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data("sets", [0]), 'Test': odh.isolate_data("sets", [1,2,3,4])} + data = {'All': odh, 'Train': odh.isolate_data("sets", [0], fast=True), 'Test': odh.isolate_data("sets", [1,2,3,4], fast=True)} return data diff --git a/libemg/_datasets/one_subject_emager.py b/libemg/_datasets/one_subject_emager.py index d248e387..e37139b4 100644 --- a/libemg/_datasets/one_subject_emager.py +++ b/libemg/_datasets/one_subject_emager.py @@ -32,6 +32,6 @@ def prepare_data(self, split = False): odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3]), 'Test': odh.isolate_data('reps', [4])} + data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3], fast=True), 'Test': odh.isolate_data('reps', [4], fast=True)} return data diff --git a/libemg/datasets.py b/libemg/datasets.py index 396ae513..204a8651 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -7,6 +7,10 @@ from libemg._datasets.nina_pro import NinaproDB2 from libemg._datasets.myodisco import MyoDisCo from libemg._datasets.fors_emg import FORSEMG +from libemg.feature_extractor import FeatureExtractor +from libemg.emg_predictor import EMGClassifier +from libemg.offline_metrics import OfflineMetrics +from libemg.filtering import Filter def get_dataset_list(): """Gets a list of all available datasets. @@ -31,9 +35,61 @@ def get_dataset_list(): } def get_dataset_info(dataset): + """Prints out the information about a certain dataset. + + Parameters + ---------- + dataset: string + The name of the dataset you want the information of. + """ if dataset in get_dataset_list(): get_dataset_list()[dataset]().get_info() else: print("ERROR: Invalid dataset name") - \ No newline at end of file +#TODO: Update docs +def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_datasets=['OneSubjectMyo', '3DC', 'CIIL_ElectrodeShift', 'GRABMyoCrossDay'], feature_dic={}): + """Evaluates an algorithm against all included datasets. + + Parameters + ---------- + window_size: int + The window size (**in ms**). + window_inc: int + The window increment (**in ms**). + """ + for d in included_datasets: + print('Evaluating ' + d + ' dataset...') + dataset = get_dataset_list()[d]() + dataset.get_info() + data = dataset.prepare_data(split=True) + + train_data = data['Train'] + test_data = data['Test'] + + filter = Filter(dataset.sampling) + filter.install_common_filters() + filter.filter(train_data) + filter.filter(test_data) + + for s in range(0, dataset.num_subjects): + print(str(s) + '/' + str(dataset.num_subjects) + ' completed.') + s_train_dh = train_data.isolate_data('subjects', [s]) + s_test_dh = test_data.isolate_data('subjects', [s]) + train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) + test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) + + fe = FeatureExtractor() + train_feats = fe.extract_features(feature_list, train_windows, feature_dic=feature_dic) + test_feats = fe.extract_features(feature_list, test_windows, feature_dic=feature_dic) + + model = EMGClassifier(model) + ds = { + 'training_features': train_feats, + 'training_labels': train_meta['classes'] + } + model.fit(ds) + + preds, _ = model.run(test_feats) + om = OfflineMetrics() + print(om.get_CA(test_meta['classes'], preds)) \ No newline at end of file From ebd500be4243710fe5c766fd935e7ebe562b1e6f Mon Sep 17 00:00:00 2001 From: eeddy Date: Wed, 18 Sep 2024 18:23:05 -0300 Subject: [PATCH 051/129] Fixed --- libemg/datasets.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 204a8651..66cc6d96 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -61,7 +61,6 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data for d in included_datasets: print('Evaluating ' + d + ' dataset...') dataset = get_dataset_list()[d]() - dataset.get_info() data = dataset.prepare_data(split=True) train_data = data['Train'] @@ -83,13 +82,13 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data train_feats = fe.extract_features(feature_list, train_windows, feature_dic=feature_dic) test_feats = fe.extract_features(feature_list, test_windows, feature_dic=feature_dic) - model = EMGClassifier(model) + clf = EMGClassifier(model) ds = { 'training_features': train_feats, 'training_labels': train_meta['classes'] } - model.fit(ds) + clf.fit(ds) - preds, _ = model.run(test_feats) + preds, _ = clf.run(test_feats) om = OfflineMetrics() print(om.get_CA(test_meta['classes'], preds)) \ No newline at end of file From d8789d970274a1c5c94166f26855684025d620c7 Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 19 Sep 2024 13:37:58 -0300 Subject: [PATCH 052/129] Updated --- libemg/_datasets/emg_epn612.py | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 4e991ee6..fbad69f9 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -1,8 +1,7 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os import pickle -import json +import random import numpy as np class EMGEPN612(Dataset): @@ -20,6 +19,7 @@ def __init__(self, dataset_file='EMGEPN612.pkl'): self.dataset_name = dataset_file def prepare_data(self, split = False): + random.seed(1) print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_name)): print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") @@ -59,9 +59,32 @@ def prepare_data(self, split = False): te_reps[labels['training'][i]] += 1 if i % 150 == 0: te_reps = [0,0,0,0,0,0] - odh_all = odh_tr + odh_te + + odh_all = odh_tr + odh_te # Has no cropping + odh_tr = self._update_odh(odh_tr) + odh_te = self._update_odh(odh_te) + data = odh_all if split: data = {'All': odh_all, 'Train': odh_tr, 'Test': odh_te} - return data \ No newline at end of file + return data + + def _update_odh(self, odh): + active = [c[0][0] != 0 for c in odh.classes] + lens = [len(e) for e in np.array(odh.data, dtype='object')[active]] + for i_e, e in enumerate(odh.data): + if odh.classes[i_e][0][0] == 0: + # It is no motion and we need to crop it (make datset even) + odh.data[i_e] = e[100:100+random.randint(min(lens), max(lens))] + else: + # Crop first and last 20% of each gesture + start_idx = int(len(e) * 0.2) + end_idx = len(e) - int(len(e) * 0.2) + odh.data[i_e] = e[start_idx:end_idx] + odh.subjects[i_e] = odh.subjects[i_e][start_idx:end_idx] + odh.classes[i_e] = odh.classes[i_e][start_idx:end_idx] + odh.reps[i_e] = odh.reps[i_e][start_idx:end_idx] + return odh + + \ No newline at end of file From d74f08b4f7bb6f94cb950b448df409175e6f5278 Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 19 Sep 2024 16:45:59 -0300 Subject: [PATCH 053/129] Updates --- libemg/_datasets/emg_epn612.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index fbad69f9..9e7c1675 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -77,14 +77,6 @@ def _update_odh(self, odh): if odh.classes[i_e][0][0] == 0: # It is no motion and we need to crop it (make datset even) odh.data[i_e] = e[100:100+random.randint(min(lens), max(lens))] - else: - # Crop first and last 20% of each gesture - start_idx = int(len(e) * 0.2) - end_idx = len(e) - int(len(e) * 0.2) - odh.data[i_e] = e[start_idx:end_idx] - odh.subjects[i_e] = odh.subjects[i_e][start_idx:end_idx] - odh.classes[i_e] = odh.classes[i_e][start_idx:end_idx] - odh.reps[i_e] = odh.reps[i_e][start_idx:end_idx] return odh \ No newline at end of file From 580094dbe1bf53ac8b7cd378042cb3a5f5c63a92 Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 19 Sep 2024 16:52:59 -0300 Subject: [PATCH 054/129] Updates --- libemg/_datasets/emg_epn612.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 9e7c1675..a667a0ab 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -77,6 +77,9 @@ def _update_odh(self, odh): if odh.classes[i_e][0][0] == 0: # It is no motion and we need to crop it (make datset even) odh.data[i_e] = e[100:100+random.randint(min(lens), max(lens))] + odh.subjects[i_e] = odh.subjects[i_e][100:100+random.randint(min(lens), max(lens))] + odh.classes[i_e] = odh.classes[i_e][100:100+random.randint(min(lens), max(lens))] + odh.reps[i_e] = odh.reps[i_e][100:100+random.randint(min(lens), max(lens))] return odh \ No newline at end of file From 06e8dec1905ff86f5b0d551c6f01afca17ce6139 Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 19 Sep 2024 16:58:47 -0300 Subject: [PATCH 055/129] Updates --- libemg/_datasets/emg_epn612.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index a667a0ab..a050d867 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -59,10 +59,10 @@ def prepare_data(self, split = False): te_reps[labels['training'][i]] += 1 if i % 150 == 0: te_reps = [0,0,0,0,0,0] - - odh_all = odh_tr + odh_te # Has no cropping + odh_tr = self._update_odh(odh_tr) odh_te = self._update_odh(odh_te) + odh_all = odh_tr + odh_te data = odh_all if split: @@ -75,11 +75,12 @@ def _update_odh(self, odh): lens = [len(e) for e in np.array(odh.data, dtype='object')[active]] for i_e, e in enumerate(odh.data): if odh.classes[i_e][0][0] == 0: + idx = random.randint(min(lens), max(lens)) # It is no motion and we need to crop it (make datset even) - odh.data[i_e] = e[100:100+random.randint(min(lens), max(lens))] - odh.subjects[i_e] = odh.subjects[i_e][100:100+random.randint(min(lens), max(lens))] - odh.classes[i_e] = odh.classes[i_e][100:100+random.randint(min(lens), max(lens))] - odh.reps[i_e] = odh.reps[i_e][100:100+random.randint(min(lens), max(lens))] + odh.data[i_e] = e[100:100+idx] + odh.subjects[i_e] = odh.subjects[i_e][100:100+idx] + odh.classes[i_e] = odh.classes[i_e][100:100+idx] + odh.reps[i_e] = odh.reps[i_e][100:100+idx] return odh \ No newline at end of file From a9fc17891c18976b0e90faf1b5c91cddfc144bbc Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 19 Sep 2024 20:55:12 -0300 Subject: [PATCH 056/129] Added cropping --- libemg/_datasets/emg_epn612.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index a050d867..4d817da0 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -81,6 +81,14 @@ def _update_odh(self, odh): odh.subjects[i_e] = odh.subjects[i_e][100:100+idx] odh.classes[i_e] = odh.classes[i_e][100:100+idx] odh.reps[i_e] = odh.reps[i_e][100:100+idx] + else: + # It is an active class and we are croppign it + min_idx = int(len(e) * 0.2) + max_idx = len(e) - int(len(e) * 0.2) + odh.data[i_e] = e[min_idx:max_idx] + odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] + odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] + odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] return odh \ No newline at end of file From fa6551377a896a032eed161b878c169961c7392a Mon Sep 17 00:00:00 2001 From: eeddy Date: Fri, 20 Sep 2024 09:39:46 -0300 Subject: [PATCH 057/129] Updated to crop better --- libemg/_datasets/emg_epn612.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 4d817da0..96ec5300 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -1,8 +1,10 @@ from libemg._datasets.dataset import Dataset -from libemg.data_handler import OfflineDataHandler, RegexFilter +from libemg.data_handler import OfflineDataHandler import pickle import random import numpy as np +from libemg.feature_extractor import FeatureExtractor +from libemg.utils import * class EMGEPN612(Dataset): def __init__(self, dataset_file='EMGEPN612.pkl'): @@ -71,24 +73,26 @@ def prepare_data(self, split = False): return data def _update_odh(self, odh): - active = [c[0][0] != 0 for c in odh.classes] - lens = [len(e) for e in np.array(odh.data, dtype='object')[active]] + fe = FeatureExtractor() for i_e, e in enumerate(odh.data): if odh.classes[i_e][0][0] == 0: - idx = random.randint(min(lens), max(lens)) # It is no motion and we need to crop it (make datset even) - odh.data[i_e] = e[100:100+idx] - odh.subjects[i_e] = odh.subjects[i_e][100:100+idx] - odh.classes[i_e] = odh.classes[i_e][100:100+idx] - odh.reps[i_e] = odh.reps[i_e][100:100+idx] + odh.data[i_e] = e[100:200] + odh.subjects[i_e] = odh.subjects[i_e][100:200] + odh.classes[i_e] = odh.classes[i_e][100:200] + odh.reps[i_e] = odh.reps[i_e][100:200] else: - # It is an active class and we are croppign it - min_idx = int(len(e) * 0.2) - max_idx = len(e) - int(len(e) * 0.2) - odh.data[i_e] = e[min_idx:max_idx] - odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] - odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] - odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] + # It is an active class and we are croppign it + if len(e) > 100: + windows = get_windows(e, 20, 5) + feats = fe.extract_features(['MAV'], windows, array=True) + mval = np.argmax(np.mean(feats, axis=1)) * 5 + max_idx = min([len(e), mval + 50]) + min_idx = max([0, mval - 50]) + odh.data[i_e] = e[min_idx:max_idx] + odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] + odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] + odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] return odh \ No newline at end of file From 6ed821ace777470f72df66ad881206f0e3060770 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 24 Sep 2024 16:09:37 -0300 Subject: [PATCH 058/129] Updates --- libemg/_datasets/emg_epn612.py | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 96ec5300..42d07858 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -21,7 +21,6 @@ def __init__(self, dataset_file='EMGEPN612.pkl'): self.dataset_name = dataset_file def prepare_data(self, split = False): - random.seed(1) print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_name)): print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") @@ -73,26 +72,13 @@ def prepare_data(self, split = False): return data def _update_odh(self, odh): - fe = FeatureExtractor() for i_e, e in enumerate(odh.data): if odh.classes[i_e][0][0] == 0: # It is no motion and we need to crop it (make datset even) - odh.data[i_e] = e[100:200] - odh.subjects[i_e] = odh.subjects[i_e][100:200] - odh.classes[i_e] = odh.classes[i_e][100:200] - odh.reps[i_e] = odh.reps[i_e][100:200] - else: - # It is an active class and we are croppign it - if len(e) > 100: - windows = get_windows(e, 20, 5) - feats = fe.extract_features(['MAV'], windows, array=True) - mval = np.argmax(np.mean(feats, axis=1)) * 5 - max_idx = min([len(e), mval + 50]) - min_idx = max([0, mval - 50]) - odh.data[i_e] = e[min_idx:max_idx] - odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] - odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] - odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] + odh.data[i_e] = e[100:300] + odh.subjects[i_e] = odh.subjects[i_e][100:300] + odh.classes[i_e] = odh.classes[i_e][100:300] + odh.reps[i_e] = odh.reps[i_e][100:300] return odh \ No newline at end of file From bbf80c48b86820062f6d2767645d33e94a77005e Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 24 Sep 2024 16:31:16 -0300 Subject: [PATCH 059/129] Undo --- libemg/_datasets/emg_epn612.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 42d07858..afe4ec7e 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -72,6 +72,7 @@ def prepare_data(self, split = False): return data def _update_odh(self, odh): + fe = FeatureExtractor() for i_e, e in enumerate(odh.data): if odh.classes[i_e][0][0] == 0: # It is no motion and we need to crop it (make datset even) @@ -79,6 +80,18 @@ def _update_odh(self, odh): odh.subjects[i_e] = odh.subjects[i_e][100:300] odh.classes[i_e] = odh.classes[i_e][100:300] odh.reps[i_e] = odh.reps[i_e][100:300] + else: + # It is an active class and we are croppign it + if len(e) > 100: + windows = get_windows(e, 20, 5) + feats = fe.extract_features(['MAV'], windows, array=True) + mval = np.argmax(np.mean(feats, axis=1)) * 5 + max_idx = min([len(e), mval + 50]) + min_idx = max([0, mval - 50]) + odh.data[i_e] = e[min_idx:max_idx] + odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] + odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] + odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] return odh \ No newline at end of file From a65a8e9d45331911626b5cbd21d8d5a39bbbeeb8 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 24 Sep 2024 16:32:56 -0300 Subject: [PATCH 060/129] Hyser dataset Started working on Hyser dataset. Created parent class and started 1DOF class. Both classes still need to be tested. --- libemg/_datasets/hyser.py | 88 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 libemg/_datasets/hyser.py diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py new file mode 100644 index 00000000..56ce6409 --- /dev/null +++ b/libemg/_datasets/hyser.py @@ -0,0 +1,88 @@ +from pathlib import Path +from abc import ABC, abstractmethod + +from libemg.data_handler import RegexFilter, FilePackager, OfflineDataHandler +from libemg._datasets.dataset import Dataset + +class Hyser(Dataset, ABC): + def __init__(self, gestures, num_reps, description, dataset_folder): + # super().__init__( + # sampling=1010, + # num_channels=64, + # recording_device='EMaGer', + # num_subjects=1, + # gestures={0: 'Hand Close (-) / Hand Open (+)', 1: 'Pronation (-) / Supination (+)'}, + # num_reps=5, + # description='A simple EMaGer dataset used for regression examples in LibEMG demos.', + # citation='N/A' + # ) + super().__init__( + sampling=2048, + num_channels=256, + recording_device='OT Bioelettronica Quattrocento', + num_subjects=20, + gestures=gestures, + num_reps=num_reps, + description=description, + citation='https://doi.org/10.13026/ym7v-bh53' + ) + + self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/#files-panel' + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + if (not self.check_exists(self.dataset_folder)): + raise FileNotFoundError(f"Didn't find Hyser data in {self.dataset_folder}. Please download the dataset and store it in the appropriate directory before running + prepare_data(). See {self.url} for download details.") + return self._prepare_data_helper(split=split) + # regex_filters = [ + # RegexFilter(left_bound='/', right_bound='/', values=['open-close', 'pro-sup'], description='movements'), + # RegexFilter(left_bound='_R_', right_bound='_emg.csv', values=[str(idx) for idx in range(self.num_reps)], description='reps') + # ] + # package_function = lambda x, y: Path(x).parent.absolute() == Path(y).parent.absolute() + # metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] + # odh = OfflineDataHandler() + # odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + # data = odh + # if split: + # data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3], fast=True), 'Test': odh.isolate_data('reps', [4], fast=True)} + + # return data + + @abstractmethod + def _prepare_data_helper(self, split = False): + ... + + +class Hyser1DOF(Hyser): + def __init__(self, dataset_folder = 'Hyser1DOF'): + gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little Finger'} + definition = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' + super().__init__(gestures=gestures, num_reps=3, description=definition, dataset_folder=dataset_folder) + + def _prepare_data_helper(self, split = False): + def package_function(metadata_file, data_file): + same_parent_directory = Path(metadata_file).parent.absolute() == Path(data_file).parent.absolute() + + finger_filter = RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger') + same_finger = finger_filter.get_metadata(metadata_file) == finger_filter.get_metadata(data_file) + return same_parent_directory and same_finger + + + regex_filters = [ + RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx) for idx in range(self.num_subjects + 1)], description='subjects'), # +1 due to Python indexing + RegexFilter(left_bound='_session', right_bound='/1dof_', values=['1', '2'], description='session'), + RegexFilter(left_bound='1dof_', right_bound='_finger', values=['raw'], description='data_type'), + RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger'), + RegexFilter(left_bound='_sample', right_bound='.hea', values=['1', '2', '3'], description='rep') + ] + metadata_fetchers = [ + FilePackager(RegexFilter(left_bound='/1dof_', right_bound='_finger', values=['force'], description='data_type'), + package_function=package_function, load='p_signal') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + data = odh + if split: + data = {} + return data From 31517ae620c17bfc727bccc2683afd1f4c55df41 Mon Sep 17 00:00:00 2001 From: eeddy Date: Wed, 25 Sep 2024 13:06:25 -0300 Subject: [PATCH 061/129] Updated libemg --- libemg/_datasets/emg_epn612.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index afe4ec7e..81130faf 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -76,10 +76,10 @@ def _update_odh(self, odh): for i_e, e in enumerate(odh.data): if odh.classes[i_e][0][0] == 0: # It is no motion and we need to crop it (make datset even) - odh.data[i_e] = e[100:300] - odh.subjects[i_e] = odh.subjects[i_e][100:300] - odh.classes[i_e] = odh.classes[i_e][100:300] - odh.reps[i_e] = odh.reps[i_e][100:300] + odh.data[i_e] = e[100:200] + odh.subjects[i_e] = odh.subjects[i_e][100:200] + odh.classes[i_e] = odh.classes[i_e][100:200] + odh.reps[i_e] = odh.reps[i_e][100:200] else: # It is an active class and we are croppign it if len(e) > 100: From 0ed0e3043933f2495c4203963b4c12e1abace7a8 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 25 Sep 2024 13:23:53 -0300 Subject: [PATCH 062/129] Fix zoom logical error Parameter passed to scipy zoom created a value for each column instead of one for each axis. This didn't throw an error in the past because most data that had been tested was also 2 DOF. Modified to proper zoom factor. --- libemg/data_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 5a331e3a..eef61095 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -165,7 +165,7 @@ def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[st # Align with EMG data if self.align_method == 'zoom': zoom_rate = file_data.shape[0] / packaged_file_data.shape[0] - zoom_factor = [zoom_rate if idx == 0 else 1 for idx in range(packaged_file_data.shape[1])] # only align the 0th axis (samples) + zoom_factor = (zoom_rate, 1) # only align the 0th axis (samples) packaged_file_data = zoom(packaged_file_data, zoom=zoom_factor) elif callable(self.align_method): packaged_file_data = self.align_method(packaged_file_data, file_data) From 44c7df4e0b055b175b7e1e6adb17376a1c18a9a6 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 25 Sep 2024 13:46:26 -0300 Subject: [PATCH 063/129] Add regex filter packaging and .hea support to FilePackager FilePackager could not read .hea files and creating a package function to match based on filename was tedious. Added the option to pass in a list of RegexFilters that will match the regex metadata from two files to package them. --- libemg/data_handler.py | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index eef61095..ed60f400 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -117,32 +117,51 @@ def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[st class FilePackager(MetadataFetcher): - def __init__(self, regex_filter: RegexFilter, package_function: Callable[[str, str], bool], align_method: str | Callable[[npt.NDArray, npt.NDArray], npt.NDArray] = 'zoom', load = None, column_mask = None): + def __init__(self, regex_filter: RegexFilter, package_function: Callable[[str, str], bool] | Sequence[RegexFilter], + align_method: str | Callable[[npt.NDArray, npt.NDArray], npt.NDArray] = 'zoom', load: Callable[[str], npt.NDArray] | str | None = None, column_mask: Sequence[int] | None = None): """Package data file with another file that contains relevant metadata (e.g., a labels file). Cycles through all files that match the RegexFilter and packages a data file with a metadata file based on a packaging function. Parameters ---------- regex_filter: RegexFilter - Used to find the type of metadata files. - package_function: callable + Used to find the type of metadata files. The description of this RegexFilter is used to assign the name of the field for this metadata in the OfflineDataHandler. + package_function: callable or Sequence[RegexFilter] Function handle used to determine if two files should be packaged together (i.e., found the metadata file that goes with the data file). Takes in the filename of a metadata file and the filename of the data file. Should return True if the files should be packaged together and False if not. + Alternatively, a list of RegexFilters can be passed in and a function will be created that packages files only if the regex metadata of the data filename + and metadata filename match. align_method: str or callable, default='zoom' Method for aligning the samples of the metadata file and data file. Pass in 'zoom' for the metadata file to be zoomed using spline interpolation to the size of the data file or pass in a callable that takes in the metadata and the EMG data and returns the aligned metadata. - load: callable or None, default=None - Custom loading function for metadata file. If None is passed, the metadata is loaded based on the file extension (only .csv and .txt are supported). + load: callable, str, or None, default=None + Determines how metadata file is loaded. If a custom loading function, should take in the filename and return an array. If a string, + it is assumed to be the MRDF key of a .hea file. If None is passed, the metadata is loaded based on the file extension (only .csv and .txt are supported). column_mask: list or None, default=None List of integers corresponding to the indices of the columns that should be extracted from the raw file data. If None is passed, all columns are extracted. """ super().__init__(regex_filter.description) self.regex_filter = regex_filter + self.package_filters = None + + if isinstance(package_function, Sequence): + # Create function to ensure metadata matches + self.package_filters = copy.deepcopy(package_function) + package_function = self._match_regex_patterns self.package_function = package_function self.align_method = align_method self.load = load self.column_mask = column_mask + def _match_regex_patterns(self, metadata_file: str, data_file: str): + assert self.package_filters is not None, 'Attempting to match package filters, but None found.' + for filter in self.package_filters: + matching_metadata = filter.get_metadata(metadata_file) == filter.get_metadata(data_file) + if not matching_metadata: + return False + return True + + def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[str]): potential_files = self.regex_filter.get_matching_files(all_files) packaged_files = [Path(potential_file) for potential_file in potential_files if self.package_function(potential_file, filename)] @@ -150,13 +169,19 @@ def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[st # I think it's easier to enforce a single file per FilePackager, but we could build in functionality to allow multiple files then just vstack all the data if there's a use case for that. raise ValueError(f"Found {len(packaged_files)} files to be packaged with {filename} when trying to package {self.regex_filter.description} file (1 file should be found). Please check filter and package functions.") packaged_file = packaged_files[0] + suffix = packaged_file.suffix + packaged_file = packaged_file.as_posix() if callable(self.load): # Passed in a custom loading function packaged_file_data = self.load(packaged_file) - elif packaged_file.suffix == '.txt': + elif isinstance(self.load, str): + # Passed in a MRDF key + assert suffix == '.hea', f"Provided string for load parameter, but packaged file doesn't have extension .hea. Please pass in a custom load function and/or ensure the correct file is packaged." + packaged_file_data = (wfdb.rdrecord(packaged_file.replace('.hea', ''))).__getattribute__(self.load) + elif suffix == '.txt': packaged_file_data = np.loadtxt(packaged_file, delimiter=',') - elif packaged_file.suffix == '.csv': + elif suffix == '.csv': packaged_file_data = pd.read_csv(packaged_file) packaged_file_data = packaged_file_data.to_numpy() else: From a7916e2c39ed5042d88d9fcb95d975e4ee3e1211 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 25 Sep 2024 14:19:49 -0300 Subject: [PATCH 064/129] Add check in regex package function Some metadata files would throw an error when calling get_metadata because the file didn't match the original regex filter. Added a check so False is returned if it doesn't match the RegexFilters. --- libemg/data_handler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index ed60f400..aa67de90 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -156,6 +156,9 @@ def __init__(self, regex_filter: RegexFilter, package_function: Callable[[str, s def _match_regex_patterns(self, metadata_file: str, data_file: str): assert self.package_filters is not None, 'Attempting to match package filters, but None found.' for filter in self.package_filters: + if len(filter.get_matching_files([metadata_file])) == 0: + # Doesn't match filters + return False matching_metadata = filter.get_metadata(metadata_file) == filter.get_metadata(data_file) if not matching_metadata: return False From 12453e2113d57016ea3b84bb6430c3eeca7d32e9 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 25 Sep 2024 15:48:49 -0300 Subject: [PATCH 065/129] Fix Hyser1DOF Added correct RegexFilters, package function, and data split. --- libemg/_datasets/hyser.py | 70 ++++++++++++++------------------------- 1 file changed, 25 insertions(+), 45 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 56ce6409..1bf93ab3 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -1,21 +1,11 @@ -from pathlib import Path from abc import ABC, abstractmethod +from copy import deepcopy from libemg.data_handler import RegexFilter, FilePackager, OfflineDataHandler from libemg._datasets.dataset import Dataset class Hyser(Dataset, ABC): - def __init__(self, gestures, num_reps, description, dataset_folder): - # super().__init__( - # sampling=1010, - # num_channels=64, - # recording_device='EMaGer', - # num_subjects=1, - # gestures={0: 'Hand Close (-) / Hand Open (+)', 1: 'Pronation (-) / Supination (+)'}, - # num_reps=5, - # description='A simple EMaGer dataset used for regression examples in LibEMG demos.', - # citation='N/A' - # ) + def __init__(self, gestures, num_reps, description, dataset_folder, analysis = 'baseline'): super().__init__( sampling=2048, num_channels=256, @@ -27,27 +17,15 @@ def __init__(self, gestures, num_reps, description, dataset_folder): citation='https://doi.org/10.13026/ym7v-bh53' ) - self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/#files-panel' + self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/' self.dataset_folder = dataset_folder + self.analysis = analysis def prepare_data(self, split = False): if (not self.check_exists(self.dataset_folder)): - raise FileNotFoundError(f"Didn't find Hyser data in {self.dataset_folder}. Please download the dataset and store it in the appropriate directory before running - prepare_data(). See {self.url} for download details.") + raise FileNotFoundError(f"Didn't find Hyser data in {self.dataset_folder} directory. Please download the dataset and \ + store it in the appropriate directory before running prepare_data(). See {self.url} for download details.") return self._prepare_data_helper(split=split) - # regex_filters = [ - # RegexFilter(left_bound='/', right_bound='/', values=['open-close', 'pro-sup'], description='movements'), - # RegexFilter(left_bound='_R_', right_bound='_emg.csv', values=[str(idx) for idx in range(self.num_reps)], description='reps') - # ] - # package_function = lambda x, y: Path(x).parent.absolute() == Path(y).parent.absolute() - # metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] - # odh = OfflineDataHandler() - # odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - # data = odh - # if split: - # data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3], fast=True), 'Test': odh.isolate_data('reps', [4], fast=True)} - - # return data @abstractmethod def _prepare_data_helper(self, split = False): @@ -55,34 +33,36 @@ def _prepare_data_helper(self, split = False): class Hyser1DOF(Hyser): - def __init__(self, dataset_folder = 'Hyser1DOF'): + def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little Finger'} definition = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=3, description=definition, dataset_folder=dataset_folder) + super().__init__(gestures=gestures, num_reps=3, description=definition, dataset_folder=dataset_folder, analysis=analysis) def _prepare_data_helper(self, split = False): - def package_function(metadata_file, data_file): - same_parent_directory = Path(metadata_file).parent.absolute() == Path(data_file).parent.absolute() - - finger_filter = RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger') - same_finger = finger_filter.get_metadata(metadata_file) == finger_filter.get_metadata(data_file) - return same_parent_directory and same_finger - - - regex_filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx) for idx in range(self.num_subjects + 1)], description='subjects'), # +1 due to Python indexing - RegexFilter(left_bound='_session', right_bound='/1dof_', values=['1', '2'], description='session'), - RegexFilter(left_bound='1dof_', right_bound='_finger', values=['raw'], description='data_type'), + sessions_values = ['1', '2'] if self.analysis == 'session' else ['1'] # only grab first session unless both are desired + common_filters = [ + RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx).zfill(2) for idx in range(self.num_subjects + 1)], description='subjects'), # +1 due to Python indexing + RegexFilter(left_bound='_session', right_bound='/1dof_', values=sessions_values, description='session'), RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger'), RegexFilter(left_bound='_sample', right_bound='.hea', values=['1', '2', '3'], description='rep') ] + + regex_filters = deepcopy(common_filters) + regex_filters.append(RegexFilter(left_bound='1dof_', right_bound='_finger', values=['raw'], description='data_type')) + metadata_fetchers = [ - FilePackager(RegexFilter(left_bound='/1dof_', right_bound='_finger', values=['force'], description='data_type'), - package_function=package_function, load='p_signal') + FilePackager(RegexFilter(left_bound='/1dof_', right_bound='_finger', values=['force'], description='labels'), + package_function=common_filters, load='p_signal') ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) data = odh if split: - data = {} + # Can likely move this to parent class... + if self.analysis == 'session': + data = {'All': odh, 'Train': odh.isolate_data('session', [0], fast=True), 'Test': odh.isolate_data('session', [1], fast=True)} + elif self.analysis == 'baseline': + data = {'All': odh, 'Train': odh.isolate_data('rep', [0, 1], fast=True), 'Test': odh.isolate_data('rep', [2], fast=True)} + else: + raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") return data From 4f62584c53be2f2b6b5018e5311968bd1dc75b8e Mon Sep 17 00:00:00 2001 From: Ethan Date: Thu, 26 Sep 2024 12:39:18 -0300 Subject: [PATCH 066/129] Added CI dataset --- .gitignore | 1 + dataset_tryout.py | 2 +- libemg/_datasets/intensity.py | 41 +++++++++++++++++++++++++++++++++++ libemg/datasets.py | 9 ++++++++ 4 files changed, 52 insertions(+), 1 deletion(-) create mode 100644 libemg/_datasets/intensity.py diff --git a/.gitignore b/.gitignore index 12c3b8a2..ac273cd8 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ CIILData/* EMGEPN612.pkl OneSubjectMyoDataset/ _3DCDataset/ +ContractionIntensity/ \ No newline at end of file diff --git a/dataset_tryout.py b/dataset_tryout.py index d4e6d4f5..c59eba21 100644 --- a/dataset_tryout.py +++ b/dataset_tryout.py @@ -1,3 +1,3 @@ from libemg.datasets import * -evaluate('LDA', 300, 100, included_datasets=['OneSubjectMyo', '3DC', 'CIIL_ElectrodeShift']) \ No newline at end of file +evaluate('LDA', 300, 100, feature_list=['MAV','SSC','ZC','WL'], included_datasets=['ContractionIntensity']) \ No newline at end of file diff --git a/libemg/_datasets/intensity.py b/libemg/_datasets/intensity.py new file mode 100644 index 00000000..2404d5ef --- /dev/null +++ b/libemg/_datasets/intensity.py @@ -0,0 +1,41 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +import os + +class ContractionIntensity(Dataset): + def __init__(self, dataset_folder="ContractionIntensity/"): + Dataset.__init__(self, + 1000, + 8, + 'BE328 by Liberating Technologies, Inc', + 10, + {0: "No Motion", 1: "Wrist Flexion", 2: "Wrist Flexion", 3: "Wrist Pronation", 4: "Wrist Supination", 5: "Chuck Grip", 6: "Hand Open"}, + '4 Ramp Reps (Train), 4 Reps x 20%, 30%, 40%, 50%, 60%, 70%, 80%, MVC (Test)', + "A contraction intensity dataset.", + "https://pubmed.ncbi.nlm.nih.gov/23894224/") + self.url = "https://github.com/libemg/ContractionIntensity" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + subjects_values = [str(i) for i in range(1,11)] + intensity_values = ["Ramp", "20P", "30P", "40P", "50P", "60P", "70P", "80P", "MVC"] + classes_values = [str(i) for i in range(7)] + reps_values = ["0","1","2","3"] + + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + regex_filters = [ + RegexFilter(left_bound="/S", right_bound="/",values=subjects_values, description='subjects'), + RegexFilter(left_bound = "_", right_bound="_C", values = intensity_values, description='intensities'), + RegexFilter(left_bound = "_C", right_bound="_R", values = classes_values, description='classes'), + RegexFilter(left_bound = "_R", right_bound=".csv", values = reps_values, description='reps'), + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("intensities", [0], fast=True), 'Test': odh.isolate_data("intensities", list(range(1, len(intensity_values))), fast=True)} + + return data \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index 66cc6d96..4e3852e7 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -7,10 +7,12 @@ from libemg._datasets.nina_pro import NinaproDB2 from libemg._datasets.myodisco import MyoDisCo from libemg._datasets.fors_emg import FORSEMG +from libemg._datasets.intensity import ContractionIntensity from libemg.feature_extractor import FeatureExtractor from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics from libemg.filtering import Filter +import numpy as np def get_dataset_list(): """Gets a list of all available datasets. @@ -32,6 +34,7 @@ def get_dataset_list(): 'MyoDisCo': MyoDisCo, 'FORS-EMG': FORSEMG, 'EMGEPN612': EMGEPN612, + 'ContractionIntensity': ContractionIntensity, } def get_dataset_info(dataset): @@ -77,6 +80,12 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data s_test_dh = test_data.isolate_data('subjects', [s]) train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) + + # This means that we need to relabel the dataset labels to start at 0 instead of 1 + if 0 not in train_meta['classes']: + train_meta['classes'] = np.array([c-min(train_meta['classes']) for c in train_meta['classes']]) + test_meta['classes'] = np.array([c-min(test_meta['classes']) for c in test_meta['classes']]) + fe = FeatureExtractor() train_feats = fe.extract_features(feature_list, train_windows, feature_dic=feature_dic) From 981352f3e7e2b63022f5071567b12e5ddaf24c81 Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 26 Sep 2024 15:15:41 -0300 Subject: [PATCH 067/129] Updates --- .gitignore | 3 ++- libemg/_datasets/intensity.py | 4 ++-- libemg/datasets.py | 6 ------ 3 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index ac273cd8..d9032520 100644 --- a/.gitignore +++ b/.gitignore @@ -58,4 +58,5 @@ CIILData/* EMGEPN612.pkl OneSubjectMyoDataset/ _3DCDataset/ -ContractionIntensity/ \ No newline at end of file +ContractionIntensity/ +CIILData/ \ No newline at end of file diff --git a/libemg/_datasets/intensity.py b/libemg/_datasets/intensity.py index 2404d5ef..c7ea49f7 100644 --- a/libemg/_datasets/intensity.py +++ b/libemg/_datasets/intensity.py @@ -19,8 +19,8 @@ def __init__(self, dataset_folder="ContractionIntensity/"): def prepare_data(self, split = False): subjects_values = [str(i) for i in range(1,11)] intensity_values = ["Ramp", "20P", "30P", "40P", "50P", "60P", "70P", "80P", "MVC"] - classes_values = [str(i) for i in range(7)] - reps_values = ["0","1","2","3"] + classes_values = [str(i) for i in range(1,8)] + reps_values = ["1","2","3","4"] print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): diff --git a/libemg/datasets.py b/libemg/datasets.py index 4e3852e7..f67ea68c 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -80,12 +80,6 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data s_test_dh = test_data.isolate_data('subjects', [s]) train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) - - # This means that we need to relabel the dataset labels to start at 0 instead of 1 - if 0 not in train_meta['classes']: - train_meta['classes'] = np.array([c-min(train_meta['classes']) for c in train_meta['classes']]) - test_meta['classes'] = np.array([c-min(test_meta['classes']) for c in test_meta['classes']]) - fe = FeatureExtractor() train_feats = fe.extract_features(feature_list, train_windows, feature_dic=feature_dic) From 2d58dae0ebee6ec97c444fd167161ab962f30cef Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 26 Sep 2024 18:20:07 -0300 Subject: [PATCH 068/129] Updates --- dataset_tryout.py | 3 ++- libemg/datasets.py | 22 +++++++++++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/dataset_tryout.py b/dataset_tryout.py index c59eba21..2f0068e5 100644 --- a/dataset_tryout.py +++ b/dataset_tryout.py @@ -1,3 +1,4 @@ from libemg.datasets import * -evaluate('LDA', 300, 100, feature_list=['MAV','SSC','ZC','WL'], included_datasets=['ContractionIntensity']) \ No newline at end of file +accs = evaluate('LDA', 300, 100, feature_list=['MAV','SSC','ZC','WL'], included_datasets=['ContractionIntensity']) +print('\n' + str(accs)) \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index f67ea68c..f6ce4940 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -50,8 +50,7 @@ def get_dataset_info(dataset): else: print("ERROR: Invalid dataset name") -#TODO: Update docs -def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_datasets=['OneSubjectMyo', '3DC', 'CIIL_ElectrodeShift', 'GRABMyoCrossDay'], feature_dic={}): +def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC']): """Evaluates an algorithm against all included datasets. Parameters @@ -60,7 +59,19 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data The window size (**in ms**). window_inc: int The window increment (**in ms**). + feature_list: list (default=['MAV']) + A list of features. + feature_dic: dic (default={}) + A dictionary of parameters for the passed in features. + included_dataasets: list + The name of the datasets you want to evaluate your model on. + + Returns + ---------- + dictionary + A dictionary with a set of accuracies for different datasets """ + accuracies = {} for d in included_datasets: print('Evaluating ' + d + ' dataset...') dataset = get_dataset_list()[d]() @@ -74,6 +85,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data filter.filter(train_data) filter.filter(test_data) + accs = [] for s in range(0, dataset.num_subjects): print(str(s) + '/' + str(dataset.num_subjects) + ' completed.') s_train_dh = train_data.isolate_data('subjects', [s]) @@ -94,4 +106,8 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], included_data preds, _ = clf.run(test_feats) om = OfflineMetrics() - print(om.get_CA(test_meta['classes'], preds)) \ No newline at end of file + ca = om.get_CA(test_meta['classes'], preds) + accs.append(ca) + print(ca) + accuracies[d] = accs + return accuracies \ No newline at end of file From 6b609b85a899d4e4d092ba10f89a717fcaac80d2 Mon Sep 17 00:00:00 2001 From: eeddy Date: Fri, 27 Sep 2024 08:31:00 -0300 Subject: [PATCH 069/129] UpdaTes --- libemg/_datasets/_3DC.py | 1 - libemg/_datasets/ciil.py | 1 - libemg/_datasets/continous_transitions.py | 4 +-- libemg/_datasets/emg_epn612.py | 1 - libemg/_datasets/fors_emg.py | 1 - libemg/_datasets/grab_myo.py | 1 - libemg/_datasets/intensity.py | 1 - libemg/_datasets/myodisco.py | 1 - libemg/_datasets/radman_lp.py | 41 +++++++++++++++++++++++ 9 files changed, 42 insertions(+), 10 deletions(-) create mode 100644 libemg/_datasets/radman_lp.py diff --git a/libemg/_datasets/_3DC.py b/libemg/_datasets/_3DC.py index 04ce0ede..daacbced 100644 --- a/libemg/_datasets/_3DC.py +++ b/libemg/_datasets/_3DC.py @@ -1,6 +1,5 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os class _3DCDataset(Dataset): def __init__(self, dataset_folder="_3DCDataset/"): diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index 7ebd3850..54904be1 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -1,6 +1,5 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os class CIIL_MinimalData(Dataset): def __init__(self, dataset_folder='CIILData/'): diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index 2c7c3162..79e15857 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -1,7 +1,5 @@ from libemg._datasets.dataset import Dataset -from libemg.data_handler import OfflineDataHandler, RegexFilter -import os -import pandas as pd +from libemg.data_handler import OfflineDataHandler import h5py import numpy as np diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 81130faf..2f948b6f 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -1,7 +1,6 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler import pickle -import random import numpy as np from libemg.feature_extractor import FeatureExtractor from libemg.utils import * diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py index b9d2f5c4..5c62a954 100644 --- a/libemg/_datasets/fors_emg.py +++ b/libemg/_datasets/fors_emg.py @@ -1,6 +1,5 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os import scipy.io import numpy as np diff --git a/libemg/_datasets/grab_myo.py b/libemg/_datasets/grab_myo.py index 62793c60..be3d88ed 100644 --- a/libemg/_datasets/grab_myo.py +++ b/libemg/_datasets/grab_myo.py @@ -1,6 +1,5 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os class GRABMyo(Dataset): """ diff --git a/libemg/_datasets/intensity.py b/libemg/_datasets/intensity.py index c7ea49f7..c61ccf9b 100644 --- a/libemg/_datasets/intensity.py +++ b/libemg/_datasets/intensity.py @@ -1,6 +1,5 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -import os class ContractionIntensity(Dataset): def __init__(self, dataset_folder="ContractionIntensity/"): diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py index 1b0f3285..1caccd12 100644 --- a/libemg/_datasets/myodisco.py +++ b/libemg/_datasets/myodisco.py @@ -2,7 +2,6 @@ from libemg.data_handler import OfflineDataHandler, RegexFilter from libemg.utils import * from libemg.feature_extractor import FeatureExtractor -import os class MyoDisCo(Dataset): def __init__(self, dataset_folder="MyoDisCo/", cross_day=False): diff --git a/libemg/_datasets/radman_lp.py b/libemg/_datasets/radman_lp.py new file mode 100644 index 00000000..0c5666c3 --- /dev/null +++ b/libemg/_datasets/radman_lp.py @@ -0,0 +1,41 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter + +# TODO: Update +class ContractionIntensity(Dataset): + def __init__(self, dataset_folder="ContractionIntensity/"): + Dataset.__init__(self, + 1000, + 8, + 'BE328 by Liberating Technologies, Inc', + 10, + {0: "No Motion", 1: "Wrist Flexion", 2: "Wrist Flexion", 3: "Wrist Pronation", 4: "Wrist Supination", 5: "Chuck Grip", 6: "Hand Open"}, + '4 Ramp Reps (Train), 4 Reps x 20%, 30%, 40%, 50%, 60%, 70%, 80%, MVC (Test)', + "A contraction intensity dataset.", + "https://pubmed.ncbi.nlm.nih.gov/23894224/") + self.url = "https://github.com/libemg/LimbPosition" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + subjects_values = [str(i) for i in range(1,11)] + intensity_values = ["Ramp", "20P", "30P", "40P", "50P", "60P", "70P", "80P", "MVC"] + classes_values = [str(i) for i in range(1,8)] + reps_values = ["1","2","3","4"] + + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + regex_filters = [ + RegexFilter(left_bound="/S", right_bound="/",values=subjects_values, description='subjects'), + RegexFilter(left_bound = "_", right_bound="_C", values = intensity_values, description='intensities'), + RegexFilter(left_bound = "_C", right_bound="_R", values = classes_values, description='classes'), + RegexFilter(left_bound = "_R", right_bound=".csv", values = reps_values, description='reps'), + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("intensities", [0], fast=True), 'Test': odh.isolate_data("intensities", list(range(1, len(intensity_values))), fast=True)} + + return data \ No newline at end of file From 45e5abf9341fac397613a4b595b842a561a7befd Mon Sep 17 00:00:00 2001 From: eeddy Date: Fri, 27 Sep 2024 09:59:55 -0300 Subject: [PATCH 070/129] added limb position --- .gitignore | 4 +++- dataset_tryout.py | 2 +- libemg/_datasets/fougner_lp.py | 40 ++++++++++++++++++++++++++++++++++ libemg/_datasets/intensity.py | 2 +- libemg/_datasets/radman_lp.py | 29 ++++++++++++------------ libemg/datasets.py | 24 ++++++++++++-------- 6 files changed, 74 insertions(+), 27 deletions(-) create mode 100644 libemg/_datasets/fougner_lp.py diff --git a/.gitignore b/.gitignore index d9032520..79c11978 100644 --- a/.gitignore +++ b/.gitignore @@ -59,4 +59,6 @@ EMGEPN612.pkl OneSubjectMyoDataset/ _3DCDataset/ ContractionIntensity/ -CIILData/ \ No newline at end of file +CIILData/ +*.pkl +LimbPosition/ \ No newline at end of file diff --git a/dataset_tryout.py b/dataset_tryout.py index 2f0068e5..908a8ba1 100644 --- a/dataset_tryout.py +++ b/dataset_tryout.py @@ -1,4 +1,4 @@ from libemg.datasets import * -accs = evaluate('LDA', 300, 100, feature_list=['MAV','SSC','ZC','WL'], included_datasets=['ContractionIntensity']) +accs = evaluate('LDA', 300, 100, feature_list=['MAV','SSC','ZC','WL'], included_datasets=['FougnerLP'], save_dir='') print('\n' + str(accs)) \ No newline at end of file diff --git a/libemg/_datasets/fougner_lp.py b/libemg/_datasets/fougner_lp.py new file mode 100644 index 00000000..df55c7c5 --- /dev/null +++ b/libemg/_datasets/fougner_lp.py @@ -0,0 +1,40 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter + +class FougnerLP(Dataset): + def __init__(self, dataset_folder="LimbPosition/"): + Dataset.__init__(self, + 1000, + 8, + 'BE328 by Liberating Technologies, Inc.', + 12, + {0: 'Wrist Flexion', 1: 'Wrist Extension', 2: 'Pronation', 3: 'Supination', 4: 'Hand Open', 5: 'Power Grip', 6: 'Pinch Grip', 7: 'Rest'}, + '10 Reps (Train), 10 Reps x 4 Positions', + "A limb position dataset (with 5 static limb positions).", + "https://ieeexplore.ieee.org/document/5985538") + self.url = "https://github.com/libemg/LimbPosition" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + subjects_values = [str(i) for i in range(1,13)] + position_values = ["P1", "P2", "P3", "P4", "P5"] + classes_values = ["1", "2", "3", "4", "5", "8", "9", "12"] + reps_values = ["1","2","3","4","5","6","7","8","9","10"] + + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + regex_filters = [ + RegexFilter(left_bound="/S", right_bound="/",values=subjects_values, description='subjects'), + RegexFilter(left_bound = "_", right_bound="_R", values = position_values, description='positions'), + RegexFilter(left_bound = "_C", right_bound="_P", values = classes_values, description='classes'), + RegexFilter(left_bound = "_R", right_bound=".txt", values = reps_values, description='reps'), + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder + 'FougnerLimbPosition/', regex_filters=regex_filters, delimiter=",") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("positions", [0], fast=True), 'Test': odh.isolate_data("positions", list(range(1, len(position_values))), fast=True)} + + return data \ No newline at end of file diff --git a/libemg/_datasets/intensity.py b/libemg/_datasets/intensity.py index c61ccf9b..dbecd855 100644 --- a/libemg/_datasets/intensity.py +++ b/libemg/_datasets/intensity.py @@ -8,7 +8,7 @@ def __init__(self, dataset_folder="ContractionIntensity/"): 8, 'BE328 by Liberating Technologies, Inc', 10, - {0: "No Motion", 1: "Wrist Flexion", 2: "Wrist Flexion", 3: "Wrist Pronation", 4: "Wrist Supination", 5: "Chuck Grip", 6: "Hand Open"}, + {0: "No Motion", 1: "Wrist Flexion", 2: "Wrist Extension", 3: "Wrist Pronation", 4: "Wrist Supination", 5: "Chuck Grip", 6: "Hand Open"}, '4 Ramp Reps (Train), 4 Reps x 20%, 30%, 40%, 50%, 60%, 70%, 80%, MVC (Test)', "A contraction intensity dataset.", "https://pubmed.ncbi.nlm.nih.gov/23894224/") diff --git a/libemg/_datasets/radman_lp.py b/libemg/_datasets/radman_lp.py index 0c5666c3..2cbfb853 100644 --- a/libemg/_datasets/radman_lp.py +++ b/libemg/_datasets/radman_lp.py @@ -1,25 +1,24 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -# TODO: Update -class ContractionIntensity(Dataset): - def __init__(self, dataset_folder="ContractionIntensity/"): +class RadmanLP(Dataset): + def __init__(self, dataset_folder="LimbPosition/"): Dataset.__init__(self, 1000, - 8, - 'BE328 by Liberating Technologies, Inc', + 6, + 'DelsysTrigno', 10, - {0: "No Motion", 1: "Wrist Flexion", 2: "Wrist Flexion", 3: "Wrist Pronation", 4: "Wrist Supination", 5: "Chuck Grip", 6: "Hand Open"}, - '4 Ramp Reps (Train), 4 Reps x 20%, 30%, 40%, 50%, 60%, 70%, 80%, MVC (Test)', - "A contraction intensity dataset.", - "https://pubmed.ncbi.nlm.nih.gov/23894224/") + {'N/A': 'Uncertain'}, + '4 Reps (Train), 4 Reps x 15 Positions', + "A large limb position dataset (with 16 static limb positions).", + "https://pubmed.ncbi.nlm.nih.gov/25570046/") self.url = "https://github.com/libemg/LimbPosition" self.dataset_folder = dataset_folder def prepare_data(self, split = False): subjects_values = [str(i) for i in range(1,11)] - intensity_values = ["Ramp", "20P", "30P", "40P", "50P", "60P", "70P", "80P", "MVC"] - classes_values = [str(i) for i in range(1,8)] + position_values = ["P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "P10", "P11", "P12", "P13", "P14", "P15", "P16"] + classes_values = [str(i) for i in range(1,9)] reps_values = ["1","2","3","4"] print('\nPlease cite: ' + self.citation+'\n') @@ -28,14 +27,14 @@ def prepare_data(self, split = False): regex_filters = [ RegexFilter(left_bound="/S", right_bound="/",values=subjects_values, description='subjects'), - RegexFilter(left_bound = "_", right_bound="_C", values = intensity_values, description='intensities'), - RegexFilter(left_bound = "_C", right_bound="_R", values = classes_values, description='classes'), + RegexFilter(left_bound = "_", right_bound="_R", values = position_values, description='positions'), + RegexFilter(left_bound = "_C", right_bound="_P", values = classes_values, description='classes'), RegexFilter(left_bound = "_R", right_bound=".csv", values = reps_values, description='reps'), ] odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + odh.get_data(folder_location=self.dataset_folder + 'RadmandLimbPosition/', regex_filters=regex_filters, delimiter=",") data = odh if split: - data = {'All': odh, 'Train': odh.isolate_data("intensities", [0], fast=True), 'Test': odh.isolate_data("intensities", list(range(1, len(intensity_values))), fast=True)} + data = {'All': odh, 'Train': odh.isolate_data("positions", [0], fast=True), 'Test': odh.isolate_data("positions", list(range(1, len(position_values))), fast=True)} return data \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index f6ce4940..8196907d 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -7,12 +7,14 @@ from libemg._datasets.nina_pro import NinaproDB2 from libemg._datasets.myodisco import MyoDisCo from libemg._datasets.fors_emg import FORSEMG +from libemg._datasets.radman_lp import RadmanLP +from libemg._datasets.fougner_lp import FougnerLP from libemg._datasets.intensity import ContractionIntensity from libemg.feature_extractor import FeatureExtractor from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics -from libemg.filtering import Filter -import numpy as np +import pickle +import time def get_dataset_list(): """Gets a list of all available datasets. @@ -35,6 +37,8 @@ def get_dataset_list(): 'FORS-EMG': FORSEMG, 'EMGEPN612': EMGEPN612, 'ContractionIntensity': ContractionIntensity, + 'RadmandLP': RadmanLP, + 'FougnerLP': FougnerLP, } def get_dataset_info(dataset): @@ -50,7 +54,7 @@ def get_dataset_info(dataset): else: print("ERROR: Invalid dataset name") -def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC']): +def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], save_dir=None): """Evaluates an algorithm against all included datasets. Parameters @@ -65,7 +69,8 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ A dictionary of parameters for the passed in features. included_dataasets: list The name of the datasets you want to evaluate your model on. - + save_dir: string (default=None) + The name of the directory you want to incrementally save the results to (it will be a pickle file). Returns ---------- dictionary @@ -79,11 +84,6 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ train_data = data['Train'] test_data = data['Test'] - - filter = Filter(dataset.sampling) - filter.install_common_filters() - filter.filter(train_data) - filter.filter(test_data) accs = [] for s in range(0, dataset.num_subjects): @@ -110,4 +110,10 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ accs.append(ca) print(ca) accuracies[d] = accs + + # Save to pickle file + if save_dir is not None: + with open(save_dir + str(time.time()) + '.pkl', 'wb') as handle: + pickle.dump(accuracies, handle, protocol=pickle.HIGHEST_PROTOCOL) + return accuracies \ No newline at end of file From 275dd36694b5c998c986c3198443cab6734b333e Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Fri, 27 Sep 2024 18:30:14 -0300 Subject: [PATCH 071/129] radman->radmand --- libemg/_datasets/{radman_lp.py => radmand_lp.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename libemg/_datasets/{radman_lp.py => radmand_lp.py} (98%) diff --git a/libemg/_datasets/radman_lp.py b/libemg/_datasets/radmand_lp.py similarity index 98% rename from libemg/_datasets/radman_lp.py rename to libemg/_datasets/radmand_lp.py index 2cbfb853..e521b195 100644 --- a/libemg/_datasets/radman_lp.py +++ b/libemg/_datasets/radmand_lp.py @@ -1,7 +1,7 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter -class RadmanLP(Dataset): +class RadmandLP(Dataset): def __init__(self, dataset_folder="LimbPosition/"): Dataset.__init__(self, 1000, From a0a0e1ed47ee53a72f90231037d087ae34cb1668 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Fri, 27 Sep 2024 19:11:38 -0300 Subject: [PATCH 072/129] added h5py req --- requirements.txt | 1 + setup.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 619df973..df5d9559 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ wfdb bleak semantic-version requests +h5py # For Docs sphinx==5.0.0 sphinx_rtd_theme==1.0.0 diff --git a/setup.py b/setup.py index 436f1b7d..95cbfe00 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,8 @@ "opencv-python", "pythonnet", "bleak", - "dearpygui" + "dearpygui", + "h5py" ], keywords=[ "emg", From e5d7a8eb9eebb67acbb2faa75377819fc0465b4d Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Fri, 27 Sep 2024 19:21:55 -0300 Subject: [PATCH 073/129] added kaufmannMD --- libemg/datasets.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 8196907d..06ca2cd2 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -7,9 +7,10 @@ from libemg._datasets.nina_pro import NinaproDB2 from libemg._datasets.myodisco import MyoDisCo from libemg._datasets.fors_emg import FORSEMG -from libemg._datasets.radman_lp import RadmanLP +from libemg._datasets.radmand_lp import RadmandLP from libemg._datasets.fougner_lp import FougnerLP from libemg._datasets.intensity import ContractionIntensity +from libemg._datasets.kaufmann_md import KaufmannMD from libemg.feature_extractor import FeatureExtractor from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics @@ -37,8 +38,9 @@ def get_dataset_list(): 'FORS-EMG': FORSEMG, 'EMGEPN612': EMGEPN612, 'ContractionIntensity': ContractionIntensity, - 'RadmandLP': RadmanLP, + 'RadmandLP': RadmandLP, 'FougnerLP': FougnerLP, + "KaufmannMD": KaufmannMD } def get_dataset_info(dataset): From d1b52253473e0db9819671db8531f7c0b5607967 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Fri, 27 Sep 2024 19:22:08 -0300 Subject: [PATCH 074/129] created kaufmann class --- libemg/_datasets/kaufmann_md.py | 40 +++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 libemg/_datasets/kaufmann_md.py diff --git a/libemg/_datasets/kaufmann_md.py b/libemg/_datasets/kaufmann_md.py new file mode 100644 index 00000000..846e20a5 --- /dev/null +++ b/libemg/_datasets/kaufmann_md.py @@ -0,0 +1,40 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter + +class KaufmannMD(Dataset): + def __init__(self, dataset_folder="MultiDay/"): + Dataset.__init__(self, + 2048, + 4, + 'MindMedia', + 1, + {0: "No Motion", 1:"Wrist Extension", 2:"Wrist Flexion", 3:"Wrist Adduction", + 4:"Wrist Abduction", 5:"Wrist Supination", 6:"Wrist Pronation", 7:"Hand Open", + 8:"Hand Closed", 9:"Key Grip", 10:"Index Point"}, + '1 rep per day, 120 days total. 60/60 train-test split', + "A single subject, multi-day (120) collection.", + "https://ieeexplore.ieee.org/document/5627288") + self.url = "https://github.com/LibEMG/MultiDay" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + subjects_values = ["0"] + day_values = [str(i) for i in range(1,122)] + classes_values = [str(i) for i in range(11)] + + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + regex_filters = [ + RegexFilter(left_bound="/S", right_bound="_D",values=subjects_values, description='subjects'), + RegexFilter(left_bound = "_D", right_bound="_C", values = day_values, description='days'), + RegexFilter(left_bound = "_C", right_bound=".csv", values = classes_values, description='classes'), + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=" ") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("days", list(range(60)), fast=True), 'Test': odh.isolate_data("days", list(range(60,121)), fast=True)} + + return data \ No newline at end of file From 218e0024ac5328ef9ae227c7badc97e3a2b6b80c Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Fri, 27 Sep 2024 19:24:43 -0300 Subject: [PATCH 075/129] added submodules to _dataset --- libemg/_datasets/__init__.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/libemg/_datasets/__init__.py b/libemg/_datasets/__init__.py index 023df7a3..34a73f80 100644 --- a/libemg/_datasets/__init__.py +++ b/libemg/_datasets/__init__.py @@ -1,3 +1,16 @@ from libemg._datasets import _3DC +from libemg._datasets import ciil +from libemg._datasets import continous_transitions +from libemg._datasets import dataset +from libemg._datasets import emg_epn612 +from libemg._datasets import fors_emg +from libemg._datasets import fougner_lp +from libemg._datasets import grab_myo +from libemg._datasets import hyser +from libemg._datasets import intensity +from libemg._datasets import kaufmann_md +from libemg._datasets import myodisco +from libemg._datasets import nina_pro +from libemg._datasets import one_subject_emager from libemg._datasets import one_subject_myo - +from libemg._datasets import radmand_lp From f3cd6b059eaf72f1f8f3f93e6b30d314a4f393db Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 30 Sep 2024 14:27:57 -0300 Subject: [PATCH 076/129] Updated myodisco --- libemg/_datasets/myodisco.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py index 1caccd12..6b225e68 100644 --- a/libemg/_datasets/myodisco.py +++ b/libemg/_datasets/myodisco.py @@ -43,9 +43,9 @@ def prepare_data(self, split = False): fe = FeatureExtractor() # We need to parse each item to remove no motion for i, d in enumerate(odh.data): - w = get_windows(d, 20, 10) + w = get_windows(d, 20, 5) mav = fe.extract_features(['MAV'], w, array=True) - max_idx = np.argmax(np.mean(mav, axis=1)) * 10 + 20 + max_idx = np.argmax(np.mean(mav, axis=1)) * 5 if odh.classes[i][0][0] == 6: odh.data[i] = d[100:200] else: From 3c41d00612ccf226b5370858f0c63e0d44162b43 Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 30 Sep 2024 14:29:56 -0300 Subject: [PATCH 077/129] Updates --- libemg/_datasets/myodisco.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/libemg/_datasets/myodisco.py b/libemg/_datasets/myodisco.py index 6b225e68..8674e704 100644 --- a/libemg/_datasets/myodisco.py +++ b/libemg/_datasets/myodisco.py @@ -45,19 +45,13 @@ def prepare_data(self, split = False): for i, d in enumerate(odh.data): w = get_windows(d, 20, 5) mav = fe.extract_features(['MAV'], w, array=True) - max_idx = np.argmax(np.mean(mav, axis=1)) * 5 if odh.classes[i][0][0] == 6: odh.data[i] = d[100:200] else: - low = max_idx-50 - high = max_idx+50 - if low < 0: - high += np.abs(low) - low = 0 - elif high >= len(odh.data[i]): - low -= np.abs(len(odh.data[i])-high) - high = len(odh.data[i]) - odh.data[i] = d[low:high] + mval = np.argmax(np.mean(mav, axis=1)) * 5 + max_idx = min([len(d), mval + 50]) + min_idx = max([0, mval - 50]) + odh.data[i] = d[min_idx:max_idx] odh.sets[i] = np.ones((len(odh.data[i]), 1)) * odh.sets[i][0][0] odh.classes[i] = np.ones((len(odh.data[i]), 1)) * odh.classes[i][0][0] From a3d4b2e76408009e99b1b9af7be9963bbb250b51 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Tue, 1 Oct 2024 13:56:05 -0300 Subject: [PATCH 078/129] added h5py --- requirements.txt | 1 + setup.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 619df973..1618bd49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,3 +28,4 @@ dearpygui opencv-python datetime websockets==8.1 +h5py \ No newline at end of file diff --git a/setup.py b/setup.py index 436f1b7d..95cbfe00 100644 --- a/setup.py +++ b/setup.py @@ -40,7 +40,8 @@ "opencv-python", "pythonnet", "bleak", - "dearpygui" + "dearpygui", + "h5py" ], keywords=[ "emg", From 16408219926434abc4860e33431bdfdee56ae1c5 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 1 Oct 2024 17:32:18 -0300 Subject: [PATCH 079/129] HyserNDOF and HyserRandom Classes Added classes for Hyser NDOF and random datasets. --- libemg/_datasets/hyser.py | 112 ++++++++++++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 16 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 1bf93ab3..c8aa04d7 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -20,49 +20,129 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/' self.dataset_folder = dataset_folder self.analysis = analysis + + sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired + self.common_regex_filters = [ + RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx + 1).zfill(2) for idx in range(self.num_subjects)], description='subjects'), # +1 due to Python indexing + RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions'), + RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(self.num_reps)], description='reps') + ] def prepare_data(self, split = False): if (not self.check_exists(self.dataset_folder)): raise FileNotFoundError(f"Didn't find Hyser data in {self.dataset_folder} directory. Please download the dataset and \ store it in the appropriate directory before running prepare_data(). See {self.url} for download details.") return self._prepare_data_helper(split=split) - + @abstractmethod - def _prepare_data_helper(self, split = False): + def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ... class Hyser1DOF(Hyser): def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): - gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little Finger'} + gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} definition = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' super().__init__(gestures=gestures, num_reps=3, description=definition, dataset_folder=dataset_folder, analysis=analysis) def _prepare_data_helper(self, split = False): - sessions_values = ['1', '2'] if self.analysis == 'session' else ['1'] # only grab first session unless both are desired - common_filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx).zfill(2) for idx in range(self.num_subjects + 1)], description='subjects'), # +1 due to Python indexing - RegexFilter(left_bound='_session', right_bound='/1dof_', values=sessions_values, description='session'), - RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger'), - RegexFilter(left_bound='_sample', right_bound='.hea', values=['1', '2', '3'], description='rep') - ] + filename_filters = deepcopy(self.common_regex_filters) + filename_filters.append(RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger')) - regex_filters = deepcopy(common_filters) + regex_filters = deepcopy(filename_filters) regex_filters.append(RegexFilter(left_bound='1dof_', right_bound='_finger', values=['raw'], description='data_type')) metadata_fetchers = [ FilePackager(RegexFilter(left_bound='/1dof_', right_bound='_finger', values=['force'], description='labels'), - package_function=common_filters, load='p_signal') + package_function=filename_filters, load='p_signal') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + data = odh + if split: + if self.analysis == 'sessions': + data = {'All': odh, 'Train': odh.isolate_data('sessions', [0], fast=True), 'Test': odh.isolate_data('sessions', [1], fast=True)} + elif self.analysis == 'baseline': + data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1], fast=True), 'Test': odh.isolate_data('reps', [2], fast=True)} + else: + raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") + return data + + +class HyserNDOF(Hyser): + def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): + # TODO: Add a 'regression' flag... maybe add a 'DOFs' parameter instead of just gestures? + gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} + definition = 'Hyser N DOF dataset. Includes combined finger movements. Ground truth finger forces are recorded for use in finger force regression.' + super().__init__(gestures=gestures, num_reps=2, description=definition, dataset_folder=dataset_folder, analysis=analysis) + self.finger_combinations = { + 1: 'Thumb + Index', + 2: 'Thumb + Middle', + 3: 'Thumg + Ring', + 4: 'Thumb + Little', + 5: 'Index + Middle', + 6: 'Thumb + Index + Middle', + 7: 'Index + Middle + Ring', + 8: 'Middle + Ring + Little', + 9: 'Index + Middle + Ring + Little', + 10: 'All Fingers', + 11: 'Thumb + Index (Opposing)', + 12: 'Thumb + Middle (Opposing)', + 13: 'Thumg + Ring (Opposing)', + 14: 'Thumb + Little (Opposing)', + 15: 'Index + Middle (Opposing)' + } + + def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: + filename_filters = deepcopy(self.common_regex_filters) + filename_filters.append(RegexFilter(left_bound='_combination', right_bound='_sample', values=[str(idx + 1) for idx in range(len(self.finger_combinations))], description='finger_combinations')) + + regex_filters = deepcopy(filename_filters) + regex_filters.append(RegexFilter(left_bound='/ndof_', right_bound='_combination', values=['raw'], description='data_type')) + + metadata_fetchers = [ + FilePackager(RegexFilter(left_bound='/ndof_', right_bound='_combination', values=['force'], description='labels'), + package_function=filename_filters, load='p_signal') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + data = odh + if split: + if self.analysis == 'sessions': + data = {'All': odh, 'Train': odh.isolate_data('sessions', [0], fast=True), 'Test': odh.isolate_data('sessions', [1], fast=True)} + elif self.analysis == 'baseline': + data = {'All': odh, 'Train': odh.isolate_data('reps', [0], fast=True), 'Test': odh.isolate_data('reps', [1], fast=True)} + else: + raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") + + return data + + +class HyserRandom(Hyser): + def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): + gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} + definition = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' + super().__init__(gestures=gestures, num_reps=5, description=definition, dataset_folder=dataset_folder, analysis=analysis) + + def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: + filename_filters = deepcopy(self.common_regex_filters) + + regex_filters = deepcopy(filename_filters) + regex_filters.append(RegexFilter(left_bound='/random_', right_bound='_sample', values=['raw'], description='data_type')) + + metadata_fetchers = [ + FilePackager(RegexFilter(left_bound='/random_', right_bound='_sample', values=['force'], description='labels'), + package_function=filename_filters, load='p_signal') ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) data = odh if split: - # Can likely move this to parent class... - if self.analysis == 'session': - data = {'All': odh, 'Train': odh.isolate_data('session', [0], fast=True), 'Test': odh.isolate_data('session', [1], fast=True)} + if self.analysis == 'sessions': + data = {'All': odh, 'Train': odh.isolate_data('sessions', [0], fast=True), 'Test': odh.isolate_data('sessions', [1], fast=True)} elif self.analysis == 'baseline': - data = {'All': odh, 'Train': odh.isolate_data('rep', [0, 1], fast=True), 'Test': odh.isolate_data('rep', [2], fast=True)} + data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2], fast=True), 'Test': odh.isolate_data('reps', [3, 4], fast=True)} else: raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") + return data From 050598befee38f92f41b02c2f2797c1ab63d71a6 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 2 Oct 2024 11:48:21 -0300 Subject: [PATCH 080/129] Add type hint to RegexFilter Specified that values should be a list of strings. --- libemg/data_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index aa67de90..ab0392c8 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -29,7 +29,7 @@ from libemg.utils import get_windows, _get_fn_windows, _get_mode_windows, make_regex class RegexFilter: - def __init__(self, left_bound: str, right_bound: str, values: Sequence, description: str): + def __init__(self, left_bound: str, right_bound: str, values: Sequence[str], description: str): """Filters files based on filenames that match the associated regex pattern and grabs metadata based on the regex pattern. Parameters From 3242bb569c2e6aab712b3beb795f822460d35043 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 2 Oct 2024 13:28:39 -0300 Subject: [PATCH 081/129] Handle single values from MetadataFetcher MetadatFetcher stated in the docs that it expected an N x M array, but didn't throw an error as long as an array was returned. Added a check to ensure that single values aren't being returned. If a single value array is returned, it is cast to an N x 1 array. --- libemg/data_handler.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index ab0392c8..0c77d7ad 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -96,8 +96,9 @@ def __init__(self, description: str): self.description = description @abstractmethod - def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[str]): + def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[str]) -> npt.NDArray: """Fetch metadata. Must return a (N x M) numpy.ndarray, where N is the number of samples in the EMG data and M is the number of columns in the metadata. + If a single value array is returned (0D or 1D), it will be cast to a N x 1 array where all values are the original value. Parameters ---------- @@ -375,6 +376,9 @@ def append_to_attribute(name, value): # Fetch remaining metadata for metadata_fetcher in metadata_fetchers: metadata = metadata_fetcher(file, file_data, all_files) + if metadata.ndim == 0 or metadata.shape[0] == 1: + # Cast to array with the same # of samples as EMG data + metadata = np.full((file_data.shape[0], 1), fill_value=metadata) if metadata.ndim == 1: # Ensure that output is always 2D array metadata = np.expand_dims(metadata, axis=1) From 70d801948ee9435dc77cf017f3be969155b71213 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 2 Oct 2024 14:11:25 -0300 Subject: [PATCH 082/129] Hyser PR Dataset Implemented pattern recognition Hyser dataset. --- libemg/_datasets/hyser.py | 143 ++++++++++++++++++++++++++++++++++---- 1 file changed, 131 insertions(+), 12 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index c8aa04d7..545f248d 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -1,7 +1,10 @@ from abc import ABC, abstractmethod from copy import deepcopy +from pathlib import Path -from libemg.data_handler import RegexFilter, FilePackager, OfflineDataHandler +import numpy as np + +from libemg.data_handler import RegexFilter, FilePackager, OfflineDataHandler, MetadataFetcher from libemg._datasets.dataset import Dataset class Hyser(Dataset, ABC): @@ -24,8 +27,7 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired self.common_regex_filters = [ RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx + 1).zfill(2) for idx in range(self.num_subjects)], description='subjects'), # +1 due to Python indexing - RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions'), - RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(self.num_reps)], description='reps') + RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions') ] def prepare_data(self, split = False): @@ -42,11 +44,12 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class Hyser1DOF(Hyser): def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} - definition = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=3, description=definition, dataset_folder=dataset_folder, analysis=analysis) + description = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' + super().__init__(gestures=gestures, num_reps=3, description=description, dataset_folder=dataset_folder, analysis=analysis) def _prepare_data_helper(self, split = False): filename_filters = deepcopy(self.common_regex_filters) + filename_filters.append(RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(self.num_reps)], description='reps')) filename_filters.append(RegexFilter(left_bound='_finger', right_bound='_sample', values=['1', '2', '3', '4', '5'], description='finger')) regex_filters = deepcopy(filename_filters) @@ -65,7 +68,7 @@ def _prepare_data_helper(self, split = False): elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1], fast=True), 'Test': odh.isolate_data('reps', [2], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") return data @@ -73,8 +76,8 @@ class HyserNDOF(Hyser): def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): # TODO: Add a 'regression' flag... maybe add a 'DOFs' parameter instead of just gestures? gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} - definition = 'Hyser N DOF dataset. Includes combined finger movements. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=2, description=definition, dataset_folder=dataset_folder, analysis=analysis) + description = 'Hyser N DOF dataset. Includes combined finger movements. Ground truth finger forces are recorded for use in finger force regression.' + super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis) self.finger_combinations = { 1: 'Thumb + Index', 2: 'Thumb + Middle', @@ -95,6 +98,7 @@ def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) + filename_filters.append(RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(self.num_reps)], description='reps')) filename_filters.append(RegexFilter(left_bound='_combination', right_bound='_sample', values=[str(idx + 1) for idx in range(len(self.finger_combinations))], description='finger_combinations')) regex_filters = deepcopy(filename_filters) @@ -113,7 +117,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0], fast=True), 'Test': odh.isolate_data('reps', [1], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") return data @@ -121,11 +125,12 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class HyserRandom(Hyser): def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} - definition = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=5, description=definition, dataset_folder=dataset_folder, analysis=analysis) + description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' + super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis) def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) + filename_filters.append(RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(self.num_reps)], description='reps')) regex_filters = deepcopy(filename_filters) regex_filters.append(RegexFilter(left_bound='/random_', right_bound='_sample', values=['raw'], description='data_type')) @@ -143,6 +148,120 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2], fast=True), 'Test': odh.isolate_data('reps', [3, 4], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are session, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") + + return data + + +class _PRLabelsFetcher(MetadataFetcher): + def __init__(self): + super().__init__(description='labels') + self.sample_regex = RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(204)], description='samples') + + def _get_labels(self, filename): + label_filename_map = { + 'dynamic': 'label_dynamic.txt', + 'maintenance': 'label_maintenance.txt' + } + matches = [] + for task_type, labels_file in label_filename_map.items(): + if task_type in filename: + matches.append(labels_file) + + assert len(matches) == 1, f"Expected a single label file for this file, but got {len(matches)}. Got filename: {filename}. Filename should contain either 'dynamic' or 'maintenance'." + + labels_file = matches[0] + parent = Path(filename).absolute().parent + labels_file = Path(parent, labels_file).as_posix() + return np.loadtxt(labels_file, delimiter=',', dtype=int) + + def __call__(self, filename, file_data, all_files): + labels = self._get_labels(filename) + sample_idx = self.sample_regex.get_metadata(filename) + return labels[sample_idx] + + +class _PRRepFetcher(_PRLabelsFetcher): + def __init__(self): + super().__init__() + self.description = 'reps' + + def __call__(self, filename, file_data, all_files): + label = super().__call__(filename, file_data, all_files) + labels = self._get_labels(filename) + same_label_mask = np.where(labels == label)[0] + sample_idx = self.sample_regex.get_metadata(filename) + rep_idx = list(same_label_mask).index(sample_idx) + if 'dynamic' in filename: + # Each trial is 3 dynamic reps, 1 maintenance rep + rep_idx = rep_idx // 3 + + assert rep_idx <= 1, f"Rep values should be 0 or 1 (2 total reps). Got: {rep_idx}." + return np.array(rep_idx) + + +class HyserPR(Hyser): + def __init__(self, dataset_folder = 'HyserPR', analysis = 'baseline'): + gestures = { + 1: 'Thumb Extension', + 2: 'Index Finger Extension', + 3: 'Middle Finger Extension', + 4: 'Ring Finger Extension', + 5: 'Little Finger Extension', + 6: 'Wrist Flexion', + 7: 'Wrist Extension', + 8: 'Wrist Radial', + 9: 'Wrist Ulnar', + 10: 'Wrist Pronation', + 11: 'Wrist Supination', + 12: 'Extension of Thumb and Index Fingers', + 13: 'Extension of Index and Middle Fingers', + 14: 'Wrist Flexion Combined with Hand Close', + 15: 'Wrist Extension Combined with Hand Close', + 16: 'Wrist Radial Combined with Hand Close', + 17: 'Wrist Ulnar Combined with Hand Close', + 18: 'Wrist Pronation Combined with Hand Close', + 19: 'Wrist Supination Combined with Hand Close', + 20: 'Wrist Flexion Combined with Hand Open', + 21: 'Wrist Extension Combined with Hand Open', + 22: 'Wrist Radial Combined with Hand Open', + 23: 'Wrist Ulnar Combined with Hand Open', + 24: 'Wrist Pronation Combined with Hand Open', + 25: 'Wrist Supination Combined with Hand Open', + 26: 'Extension of Thumb, Index and Middle Fingers', + 27: 'Extension of Index, Middle and Ring Fingers', + 28: 'Extension of Middle, Ring and Little Fingers', + 29: 'Extension of Index, Middle, Ring and Little Fingers', + 30: 'Hand Close', + 31: 'Hand Open', + 32: 'Thumb and Index Fingers Pinch', + 33: 'Thumb, Index and Middle Fingers Pinch', + 34: 'Thumb and Middle Fingers Pinch' + } + description = 'Hyser pattern recognition (PR) dataset. Includes dynamic and maintenance tasks for 34 hand gestures.' + super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis) # num_reps=2 b/c 2 trials + + def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: + filename_filters = deepcopy(self.common_regex_filters) + filename_filters.append(RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(204)], description='samples')) # max # of dynamic tasks + filename_filters.append(RegexFilter(left_bound='/', right_bound='_', values=['dynamic', 'maintenance'], description='tasks')) + + regex_filters = deepcopy(filename_filters) + regex_filters.append(RegexFilter(left_bound='_', right_bound='_sample', values=['raw'], description='data_type')) + + metadata_fetchers = [ + _PRLabelsFetcher(), + _PRRepFetcher() + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + data = odh + if split: + if self.analysis == 'sessions': + data = {'All': odh, 'Train': odh.isolate_data('sessions', [0], fast=True), 'Test': odh.isolate_data('sessions', [1], fast=True)} + elif self.analysis == 'baseline': + data = {'All': odh, 'Train': odh.isolate_data('reps', [0], fast=True), 'Test': odh.isolate_data('reps', [1], fast=True)} + else: + raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") return data From ffb9564dcde275864350ca6661bed4e5ea828fed Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 2 Oct 2024 15:31:05 -0300 Subject: [PATCH 083/129] Remove subject 10 from random task dataset Subject 10 is missing a labels file, so removed this subjects from the random task dataset. --- libemg/_datasets/hyser.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 545f248d..3304fd78 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -7,8 +7,9 @@ from libemg.data_handler import RegexFilter, FilePackager, OfflineDataHandler, MetadataFetcher from libemg._datasets.dataset import Dataset + class Hyser(Dataset, ABC): - def __init__(self, gestures, num_reps, description, dataset_folder, analysis = 'baseline'): + def __init__(self, gestures, num_reps, description, dataset_folder, analysis = 'baseline', subjects = None): super().__init__( sampling=2048, num_channels=256, @@ -20,13 +21,16 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' citation='https://doi.org/10.13026/ym7v-bh53' ) + if subjects is None: + subjects = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] + self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/' self.dataset_folder = dataset_folder self.analysis = analysis sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired self.common_regex_filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=[str(idx + 1).zfill(2) for idx in range(self.num_subjects)], description='subjects'), # +1 due to Python indexing + RegexFilter(left_bound='subject', right_bound='_session', values=subjects, description='subjects'), # +1 due to Python indexing RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions') ] @@ -126,7 +130,8 @@ class HyserRandom(Hyser): def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis) + subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 + super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) From a4b2d3ca440489e0bafdefbad4b054af6297deca Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 2 Oct 2024 15:33:32 -0300 Subject: [PATCH 084/129] Rename Hyser to _Hyser Added _ to signify that this is a hidden class. --- libemg/_datasets/hyser.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 3304fd78..66a2ebc2 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -8,7 +8,7 @@ from libemg._datasets.dataset import Dataset -class Hyser(Dataset, ABC): +class _Hyser(Dataset, ABC): def __init__(self, gestures, num_reps, description, dataset_folder, analysis = 'baseline', subjects = None): super().__init__( sampling=2048, @@ -45,7 +45,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ... -class Hyser1DOF(Hyser): +class Hyser1DOF(_Hyser): def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' @@ -76,7 +76,7 @@ def _prepare_data_helper(self, split = False): return data -class HyserNDOF(Hyser): +class HyserNDOF(_Hyser): def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): # TODO: Add a 'regression' flag... maybe add a 'DOFs' parameter instead of just gestures? gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} @@ -126,7 +126,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: return data -class HyserRandom(Hyser): +class HyserRandom(_Hyser): def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' @@ -205,7 +205,7 @@ def __call__(self, filename, file_data, all_files): return np.array(rep_idx) -class HyserPR(Hyser): +class HyserPR(_Hyser): def __init__(self, dataset_folder = 'HyserPR', analysis = 'baseline'): gestures = { 1: 'Thumb Extension', From dbbd5fefd0a762ce0d2f1f1421623f39816b5e14 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Wed, 2 Oct 2024 15:39:35 -0300 Subject: [PATCH 085/129] Hyser documentation Added documentation to Hyser dataset classes. --- libemg/_datasets/hyser.py | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 66a2ebc2..98baf942 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -47,6 +47,16 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class Hyser1DOF(_Hyser): def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): + """1 degree of freedom (DOF) Hyser dataset. + + Parameters + ---------- + dataset_folder: str, default='Hyser1DOF' + Directory that contains Hyser 1 DOF dataset. + analysis: str, default='baseline' + Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on + reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + """ gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' super().__init__(gestures=gestures, num_reps=3, description=description, dataset_folder=dataset_folder, analysis=analysis) @@ -78,6 +88,16 @@ def _prepare_data_helper(self, split = False): class HyserNDOF(_Hyser): def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): + """N degree of freedom (DOF) Hyser dataset. + + Parameters + ---------- + dataset_folder: str, default='HyserNDOF' + Directory that contains Hyser N DOF dataset. + analysis: str, default='baseline' + Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on + reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + """ # TODO: Add a 'regression' flag... maybe add a 'DOFs' parameter instead of just gestures? gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser N DOF dataset. Includes combined finger movements. Ground truth finger forces are recorded for use in finger force regression.' @@ -128,6 +148,16 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class HyserRandom(_Hyser): def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): + """Random task (DOF) Hyser dataset. + + Parameters + ---------- + dataset_folder: str, default='HyserRandom' + Directory that contains Hyser random task dataset. + analysis: str, default='baseline' + Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on + reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + """ gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 @@ -207,6 +237,16 @@ def __call__(self, filename, file_data, all_files): class HyserPR(_Hyser): def __init__(self, dataset_folder = 'HyserPR', analysis = 'baseline'): + """Pattern recognition (PR) Hyser dataset. + + Parameters + ---------- + dataset_folder: str, default='HyserPR' + Directory that contains Hyser PR dataset. + analysis: str, default='baseline' + Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on + reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + """ gestures = { 1: 'Thumb Extension', 2: 'Index Finger Extension', From aa032c914b153ad1a36ed45e02a26a1a1fc07e05 Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 7 Oct 2024 16:04:15 -0300 Subject: [PATCH 086/129] Don't do any processing on the dataset --- libemg/_datasets/emg_epn612.py | 47 +++++++++++++++++----------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 2f948b6f..4881eef2 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -60,8 +60,8 @@ def prepare_data(self, split = False): if i % 150 == 0: te_reps = [0,0,0,0,0,0] - odh_tr = self._update_odh(odh_tr) - odh_te = self._update_odh(odh_te) + # odh_tr = self._update_odh(odh_tr) + # odh_te = self._update_odh(odh_te) odh_all = odh_tr + odh_te data = odh_all @@ -71,26 +71,27 @@ def prepare_data(self, split = False): return data def _update_odh(self, odh): - fe = FeatureExtractor() - for i_e, e in enumerate(odh.data): - if odh.classes[i_e][0][0] == 0: - # It is no motion and we need to crop it (make datset even) - odh.data[i_e] = e[100:200] - odh.subjects[i_e] = odh.subjects[i_e][100:200] - odh.classes[i_e] = odh.classes[i_e][100:200] - odh.reps[i_e] = odh.reps[i_e][100:200] - else: - # It is an active class and we are croppign it - if len(e) > 100: - windows = get_windows(e, 20, 5) - feats = fe.extract_features(['MAV'], windows, array=True) - mval = np.argmax(np.mean(feats, axis=1)) * 5 - max_idx = min([len(e), mval + 50]) - min_idx = max([0, mval - 50]) - odh.data[i_e] = e[min_idx:max_idx] - odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] - odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] - odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] - return odh + pass + # fe = FeatureExtractor() + # for i_e, e in enumerate(odh.data): + # if odh.classes[i_e][0][0] == 0: + # # It is no motion and we need to crop it (make datset even) + # odh.data[i_e] = e[100:200] + # odh.subjects[i_e] = odh.subjects[i_e][100:200] + # odh.classes[i_e] = odh.classes[i_e][100:200] + # odh.reps[i_e] = odh.reps[i_e][100:200] + # else: + # # It is an active class and we are croppign it + # if len(e) > 100: + # windows = get_windows(e, 20, 5) + # feats = fe.extract_features(['MAV'], windows, array=True) + # mval = np.argmax(np.mean(feats, axis=1)) * 5 + # max_idx = min([len(e), mval + 50]) + # min_idx = max([0, mval - 50]) + # odh.data[i_e] = e[min_idx:max_idx] + # odh.subjects[i_e] = odh.subjects[i_e][min_idx:max_idx] + # odh.classes[i_e] = odh.classes[i_e][min_idx:max_idx] + # odh.reps[i_e] = odh.reps[i_e][min_idx:max_idx] + # return odh \ No newline at end of file From de89e6d1adc162d64ed31d21682a16f112ef0d36 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 11 Oct 2024 11:54:32 -0300 Subject: [PATCH 087/129] Add NinaproDB8 --- libemg/_datasets/nina_pro.py | 77 +++++++++++++++++++++++++----------- libemg/datasets.py | 2 +- 2 files changed, 56 insertions(+), 23 deletions(-) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 516a9cc9..246cf395 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -47,7 +47,7 @@ def convert_to_csv(self, mat_file): mat_dir = os.path.join(*mat_dir[:-1],"") mat = sio.loadmat(mat_file) # get the data - exercise = int(mat_file.split('_')[3][1]) + exercise = int(mat_file.split('_')[-1][1]) exercise_offset = self.exercise_step[exercise-1] # 0 reps already included data = mat['emg'] restimulus = mat['restimulus'] @@ -92,26 +92,6 @@ def convert_to_csv(self, mat_file): tail = head os.remove(mat_file) -# class NinaproDB8(Ninapro): -# def __init__(self, save_dir='.', dataset_name="NinaProDB8"): -# Ninapro.__init__(self, save_dir, dataset_name) -# self.class_list = ["Thumb Flexion/Extension", "Thumb Abduction/Adduction", "Index Finger Flexion/Extension", "Middle Finger Flexion/Extension", "Combined Ring and Little Fingers Flexion/Extension", -# "Index Pointer", "Cylindrical Grip", "Lateral Grip", "Tripod Grip"] -# self.exercise_step = [0,10,20] - -# def prepare_data(self, format=OfflineDataHandler, subjects_values = [str(i) for i in range(1,13)], -# reps_values = [str(i) for i in range(22)], -# classes_values = [str(i) for i in range(9)]): - -# if format == OfflineDataHandler: -# regex_filters = [ -# RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), -# RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), -# RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') -# ] -# odh = OfflineDataHandler() -# odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") -# return odh class NinaproDB2(Ninapro): def __init__(self, dataset_folder="NinaProDB2/"): @@ -135,6 +115,9 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None if classes_values is None: classes_values = [str(i) for i in range(50)] + + # TODO: Ensure dataglove changes are implemented here + print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): print("Please download the NinaProDB2 dataset from: https://ninapro.hevs.ch/instructions/DB2.html") @@ -151,4 +134,54 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None if split: data = {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3], fast=True), 'Test': odh.isolate_data('reps', [4,5], fast=True)} - return data \ No newline at end of file + return data + +class NinaproDB8(Ninapro): + def __init__(self, dataset_folder="NinaProDB8/"): + gestures = { + 0: "rest", + 1: "thumb flexion/extension", + 2: "thumb abduction/adduction", + 3: "index finger flexion/extension", + 4: "middle finger flexion/extension", + 5: "combined ring and little fingers flexion/extension", + 6: "index pointer", + 7: "cylindrical grip", + 8: "lateral grip", + 9: "tripod grip" + } + + super().__init__( + sampling=1111, + num_channels=16, + recording_device='Delsys Trigno', + num_subjects=12, + gestures=gestures, + num_reps=22, + description='Ninapro DB8 - designed for regression of finger kinematics. Ground truth labels are provided via cyberglove data.', + citation='https://ninapro.hevs.ch/', + dataset_folder=dataset_folder + ) + self.exercise_step = [0,10,20] + + def prepare_data(self, split = False, subjects_values = None, reps_values = None, classes_values = None): + if subjects_values is None: + subjects_values = [str(i) for i in range(1,self.num_subjects + 1)] + if reps_values is None: + reps_values = [str(i) for i in range(self.num_reps)] + if classes_values is None: + classes_values = [str(i) for i in range(9)] + + self.convert_to_compatible() + + regex_filters = [ + RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), + RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), + RegexFilter(left_bound="s", right_bound="/",values=subjects_values, description='subjects') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3], fast=True), 'Test': odh.isolate_data('reps', [4, 5], fast=True)} + return data diff --git a/libemg/datasets.py b/libemg/datasets.py index f6ce4940..62258628 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -4,7 +4,7 @@ from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift from libemg._datasets.grab_myo import GRABMyoBaseline, GRABMyoCrossDay from libemg._datasets.continous_transitions import ContinuousTransitions -from libemg._datasets.nina_pro import NinaproDB2 +from libemg._datasets.nina_pro import NinaproDB2, NinaproDB8 from libemg._datasets.myodisco import MyoDisCo from libemg._datasets.fors_emg import FORSEMG from libemg._datasets.intensity import ContractionIntensity From cce14fc6e5651529bf1325cd1728400610892b7f Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 11 Oct 2024 16:54:36 -0300 Subject: [PATCH 088/129] Add note to NinaproDB8 --- libemg/_datasets/nina_pro.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 246cf395..043dc900 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -138,6 +138,7 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None class NinaproDB8(Ninapro): def __init__(self, dataset_folder="NinaProDB8/"): + # NOTE: This expects each subject's data to be in its own zip file, so the data files for one subject end up in a single directory once we unzip them gestures = { 0: "rest", 1: "thumb flexion/extension", From 0e9cf18795faddc7fa13341645105dc815d0efec Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 11 Oct 2024 16:55:17 -0300 Subject: [PATCH 089/129] Add OneSubjectEMaGerDataset import to datasets.py --- libemg/datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libemg/datasets.py b/libemg/datasets.py index 62258628..a56ca1eb 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -1,5 +1,6 @@ from libemg._datasets._3DC import _3DCDataset from libemg._datasets.one_subject_myo import OneSubjectMyoDataset +from libemg._datasets.one_subject_emager import OneSubjectEMaGerDataset from libemg._datasets.emg_epn612 import EMGEPN612 from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift from libemg._datasets.grab_myo import GRABMyoBaseline, GRABMyoCrossDay From 85b8a3a7a0ceb46b9125c0a7351f7f80018d897e Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 11 Oct 2024 16:57:54 -0300 Subject: [PATCH 090/129] Add OneSubjectEMaGerDataset to dataset list --- libemg/datasets.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libemg/datasets.py b/libemg/datasets.py index a56ca1eb..719df8cc 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -36,6 +36,7 @@ def get_dataset_list(): 'FORS-EMG': FORSEMG, 'EMGEPN612': EMGEPN612, 'ContractionIntensity': ContractionIntensity, + 'OneSubjectEMaGer': OneSubjectEMaGerDataset } def get_dataset_info(dataset): From 58bc45c7906bdbdbe3417130f2dbbd4f4aa2f22c Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 11 Oct 2024 17:55:35 -0300 Subject: [PATCH 091/129] Fix parse_windows for 2D metadata parse_windows call np.hstack to stack metadata, which works for 1D arrays but throws an error for 2D arrays since different files likely won't have the exact same number of samples. We also don't want this behaviour anyways since we want to stack along the sample axis. Replaced np.hstack with np.concatenate so metadata is always concatenated along the 0th axis. --- libemg/data_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 0c77d7ad..66e544b0 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -481,7 +481,7 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio metadata[k].append(file_metadata) - return np.vstack(window_data), {k: np.hstack(metadata[k]) for k in metadata.keys()} + return np.vstack(window_data), {k: np.concatenate(metadata[k], axis=0) for k in metadata.keys()} def isolate_channels(self, channels): From f7e6178cc633b076c637b1a5904ed9e8a8b4fa24 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Fri, 11 Oct 2024 18:41:02 -0300 Subject: [PATCH 092/129] Reimplement NinaPro cyberglove data Parsing cyberglove data wasn't brought over when modifying datasets. Reimplemented cyberglove parsing. --- libemg/_datasets/nina_pro.py | 41 ++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 043dc900..414f58fc 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -1,5 +1,5 @@ from libemg._datasets.dataset import Dataset -from libemg.data_handler import OfflineDataHandler, RegexFilter +from libemg.data_handler import OfflineDataHandler, RegexFilter, ColumnFetcher import os import scipy.io as sio import zipfile @@ -52,16 +52,25 @@ def convert_to_csv(self, mat_file): data = mat['emg'] restimulus = mat['restimulus'] rerepetition = mat['rerepetition'] + try: + cyberglove_data = mat['glove'] + except KeyError: + # No cyberglove data + cyberglove_data = None if data.shape[0] != restimulus.shape[0]: # this happens in some cases min_shape = min([data.shape[0], restimulus.shape[0]]) data = data[:min_shape,:] restimulus = restimulus[:min_shape,] rerepetition = rerepetition[:min_shape,] + if cyberglove_data is not None: + cyberglove_data = cyberglove_data[:min_shape,] # remove 0 repetition - collection buffer remove_mask = (rerepetition != 0).squeeze() data = data[remove_mask,:] restimulus = restimulus[remove_mask] rerepetition = rerepetition[remove_mask] + if cyberglove_data is not None: + cyberglove_data = cyberglove_data[remove_mask, :] # important little not here: # the "rest" really is only the rest between motions, not a dedicated rest class. # there will be many more rest repetitions (as it is between every class) @@ -73,6 +82,8 @@ def convert_to_csv(self, mat_file): data = data[remove_mask,:] restimulus = restimulus[remove_mask] rerepetition = rerepetition[remove_mask] + if cyberglove_data is not None: + cyberglove_data = cyberglove_data[remove_mask, :] tail = 0 while tail < data.shape[0]-1: rep = rerepetition[tail][0] # remove the 1 offset (0 was the collection buffer) @@ -83,8 +94,13 @@ def convert_to_csv(self, mat_file): head = data.shape[0] -1 else: head = head[0] + tail + if cyberglove_data is not None: + # Combine cyberglove and EMG data + data_for_file = np.concatenate((data[tail:head, :], cyberglove_data[tail:head, :]), axis=1) + else: + data_for_file = data[tail:head,:] + # downsample to 1kHz from 2kHz using decimation - data_for_file = data[tail:head,:] data_for_file = data_for_file[::2, :] # write to csv csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' @@ -106,6 +122,7 @@ def __init__(self, dataset_folder="NinaProDB2/"): 'https://ninapro.hevs.ch/', dataset_folder = dataset_folder) self.exercise_step = [0,0,0] + self.num_cyberglove_dofs = 22 def prepare_data(self, split = False, subjects_values = None, reps_values = None, classes_values = None): if subjects_values is None: @@ -115,9 +132,6 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None if classes_values is None: classes_values = [str(i) for i in range(50)] - - # TODO: Ensure dataglove changes are implemented here - print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): print("Please download the NinaProDB2 dataset from: https://ninapro.hevs.ch/instructions/DB2.html") @@ -128,8 +142,12 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') ] + metadata_fetchers = [ + ColumnFetcher('cyberglove', column_mask=[idx for idx in range(self.num_channels, self.num_channels + self.num_cyberglove_dofs)]) + ] + emg_column_mask = [idx for idx in range(self.num_channels)] # first columns should be EMG odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers, delimiter=",", data_column=emg_column_mask) data = odh if split: data = {'All': odh, 'Train': odh.isolate_data('reps', [0,1,2,3], fast=True), 'Test': odh.isolate_data('reps', [4,5], fast=True)} @@ -138,7 +156,7 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None class NinaproDB8(Ninapro): def __init__(self, dataset_folder="NinaProDB8/"): - # NOTE: This expects each subject's data to be in its own zip file, so the data files for one subject end up in a single directory once we unzip them + # NOTE: This expects each subject's data to be in its own zip file, so the data files for one subject end up in a single directory once we unzip them (e.g., DB8_s1) gestures = { 0: "rest", 1: "thumb flexion/extension", @@ -164,6 +182,7 @@ def __init__(self, dataset_folder="NinaProDB8/"): dataset_folder=dataset_folder ) self.exercise_step = [0,10,20] + self.num_cyberglove_dofs = 18 def prepare_data(self, split = False, subjects_values = None, reps_values = None, classes_values = None): if subjects_values is None: @@ -178,10 +197,14 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None regex_filters = [ RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), - RegexFilter(left_bound="s", right_bound="/",values=subjects_values, description='subjects') + RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') + ] + metadata_fetchers = [ + ColumnFetcher('cyberglove', column_mask=[idx for idx in range(self.num_channels, self.num_channels + self.num_cyberglove_dofs)]) ] + emg_column_mask = [idx for idx in range(self.num_channels)] # first columns should be EMG odh = OfflineDataHandler() - odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers, delimiter=",", data_column=emg_column_mask) data = odh if split: data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3], fast=True), 'Test': odh.isolate_data('reps', [4, 5], fast=True)} From 86e0aacb5dff06d7fd46aaa82a890f651f783f0f Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 17 Oct 2024 11:57:01 -0300 Subject: [PATCH 093/129] Allow empty strings in RegexFilter Added option to pass in an empty string as a description for a RegexFilter for cases where you want to filter files, like finding a labels file, but don't necessarily want that metadata to be stored. --- libemg/data_handler.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 66e544b0..05e0290d 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -41,7 +41,7 @@ def __init__(self, left_bound: str, right_bound: str, values: Sequence[str], des values: list The values between the two regexes. description: str - Description of filter - used to name the metadata field. + Description of filter - used to name the metadata field. Pass in an empty string to filter files without storing the values as metadata. """ if values is None: raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') @@ -325,6 +325,9 @@ def get_data(self, folder_location: str, regex_filters: Sequence[RegexFilter], m Raises ValueError if folder_location is not a valid directory. """ def append_to_attribute(name, value): + if name == '': + # Don't want this data saved to data handler, so skip it + return if not hasattr(self, name): setattr(self, name, []) self.extra_attributes.append(name) From 20c585b44fe3fb9c1c550425854fa511118e7311 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 17 Oct 2024 11:58:30 -0300 Subject: [PATCH 094/129] Properly handle cyberglove data Previous implementation didn't consider that some files are skipped because they don't have cyberglove data. Added logic to parse all data, and then allow user to select what they want to grab based on if they're using the cyberglove. --- libemg/_datasets/nina_pro.py | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 414f58fc..c65df0cc 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -1,3 +1,5 @@ +from pathlib import Path + from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter, ColumnFetcher import os @@ -54,9 +56,11 @@ def convert_to_csv(self, mat_file): rerepetition = mat['rerepetition'] try: cyberglove_data = mat['glove'] + cyberglove_directory = 'cyberglove' except KeyError: # No cyberglove data cyberglove_data = None + cyberglove_directory = '' if data.shape[0] != restimulus.shape[0]: # this happens in some cases min_shape = min([data.shape[0], restimulus.shape[0]]) data = data[:min_shape,:] @@ -103,14 +107,15 @@ def convert_to_csv(self, mat_file): # downsample to 1kHz from 2kHz using decimation data_for_file = data_for_file[::2, :] # write to csv - csv_file = mat_dir + 'C' + str(motion-1) + 'R' + str(rep-1 + exercise_offset) + '.csv' + csv_file = Path(mat_dir, cyberglove_directory, f"C{motion - 1}R{rep - 1 + exercise_offset}.csv") + csv_file.parent.mkdir(parents=True, exist_ok=True) np.savetxt(csv_file, data_for_file, delimiter=',') tail = head os.remove(mat_file) class NinaproDB2(Ninapro): - def __init__(self, dataset_folder="NinaProDB2/"): + def __init__(self, dataset_folder="NinaProDB2/", use_cyberglove: bool = False): Ninapro.__init__(self, 2000, 12, @@ -123,6 +128,7 @@ def __init__(self, dataset_folder="NinaProDB2/"): dataset_folder = dataset_folder) self.exercise_step = [0,0,0] self.num_cyberglove_dofs = 22 + self.use_cyberglove = use_cyberglove # needed b/c some files have EMG but no cyberglove def prepare_data(self, split = False, subjects_values = None, reps_values = None, classes_values = None): if subjects_values is None: @@ -134,17 +140,23 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): - print("Please download the NinaProDB2 dataset from: https://ninapro.hevs.ch/instructions/DB2.html") - return + raise FileNotFoundError("Please download the NinaProDB2 dataset from: https://ninapro.hevs.ch/instructions/DB2.html") self.convert_to_compatible() regex_filters = [ RegexFilter(left_bound = "/C", right_bound="R", values = classes_values, description='classes'), - RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), + RegexFilter(left_bound="R", right_bound=".csv", values=reps_values, description='reps'), RegexFilter(left_bound="DB2_s", right_bound="/",values=subjects_values, description='subjects') ] - metadata_fetchers = [ - ColumnFetcher('cyberglove', column_mask=[idx for idx in range(self.num_channels, self.num_channels + self.num_cyberglove_dofs)]) - ] + + if self.use_cyberglove: + # Only want cyberglove files + regex_filters.append(RegexFilter(left_bound="/", right_bound="/C", values=['cyberglove'], description='')) + metadata_fetchers = [ + ColumnFetcher('cyberglove', column_mask=[idx for idx in range(self.num_channels, self.num_channels + self.num_cyberglove_dofs)]) + ] + else: + metadata_fetchers = None + emg_column_mask = [idx for idx in range(self.num_channels)] # first columns should be EMG odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers, delimiter=",", data_column=emg_column_mask) From acbbf9079dc84ee0fa21594ae4d0e4e0c15a164a Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Thu, 17 Oct 2024 21:27:25 -0300 Subject: [PATCH 095/129] added tmr data --- libemg/_datasets/__init__.py | 1 + libemg/_datasets/tmr_shirleyryanabilitylab.py | 63 +++++++++++++++++++ libemg/datasets.py | 6 +- 3 files changed, 68 insertions(+), 2 deletions(-) create mode 100644 libemg/_datasets/tmr_shirleyryanabilitylab.py diff --git a/libemg/_datasets/__init__.py b/libemg/_datasets/__init__.py index 34a73f80..892b01cf 100644 --- a/libemg/_datasets/__init__.py +++ b/libemg/_datasets/__init__.py @@ -14,3 +14,4 @@ from libemg._datasets import one_subject_emager from libemg._datasets import one_subject_myo from libemg._datasets import radmand_lp +from libemg._datasets import tmr_shirleyryanabilitylab \ No newline at end of file diff --git a/libemg/_datasets/tmr_shirleyryanabilitylab.py b/libemg/_datasets/tmr_shirleyryanabilitylab.py new file mode 100644 index 00000000..8b3eabc9 --- /dev/null +++ b/libemg/_datasets/tmr_shirleyryanabilitylab.py @@ -0,0 +1,63 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter + +class TMRShirleyRyanAbilityLab(Dataset): + def __init__(self, dataset_folder="TMR/"): + Dataset.__init__(self, + 1000, + 32, + 'Ag/AgCl', + 6, + {0:"HandOpen", + 1:"KeyGrip", + 2:"PowerGrip", + 3:"FinePinchOpened", + 4:"FinePinchClosed", + 5:"TripodOpened", + 6:"TripodClosed", + 7:"Tool", + 8:"Hook", + 9:"IndexPoint", + 10:"ThumbFlexion", + 11:"ThumbExtension", + 12:"ThumbAbduction", + 13:"ThumbAdduction", + 14:"IndexFlexion", + 15:"RingFlexion", + 16:"PinkyFlexion", + 17:"WristSupination", + 18:"WristPronation", + 19:"WristFlexion", + 20:"WristExtension", + 21:"RadialDeviation", + 22:"UlnarDeviation", + 23:"NoMotion"}, + 8, + '6 subjects, 8 reps, 24 motions, pre/post intervention', + "https://pmc.ncbi.nlm.nih.gov/articles/PMC9879512/") + self.url = "https://github.com/LibEMG/TMR_ShirleyRyanAbilityLab" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + subjects_values = ["1","2","3","4","7","10"] + reps_values = [str(i) for i in range(8)] + classes_values = [str(i) for i in range(24)] + intervention_values = ["preTMR","postTMR"] + + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + regex_filters = [ + RegexFilter(left_bound="/S", right_bound="/",values=subjects_values, description='subjects'), + RegexFilter(left_bound = "_R", right_bound=".txt", values = reps_values, description='reps'), + RegexFilter(left_bound = "/C", right_bound="_R", values = classes_values, description='classes'), + RegexFilter(left_bound = "/", right_bound="/C", values = intervention_values, description='intervention') + ] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, delimiter=",") + data = odh + if split: + data = {'All': odh, 'Train': odh.isolate_data("reps", list(range(4)), fast=True), 'Test': odh.isolate_data("reps", list(range(4,8)), fast=True)} + + return data \ No newline at end of file diff --git a/libemg/datasets.py b/libemg/datasets.py index f7532943..ff9bac7c 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -12,6 +12,7 @@ from libemg._datasets.fougner_lp import FougnerLP from libemg._datasets.intensity import ContractionIntensity from libemg._datasets.kaufmann_md import KaufmannMD +from libemg._datasets.tmr_shirleyryanabilitylab import TMRShirleyRyanAbilityLab from libemg.feature_extractor import FeatureExtractor from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics @@ -41,8 +42,9 @@ def get_dataset_list(): 'ContractionIntensity': ContractionIntensity, 'RadmandLP': RadmandLP, 'FougnerLP': FougnerLP, - "KaufmannMD": KaufmannMD, - 'OneSubjectEMaGer': OneSubjectEMaGerDataset + 'KaufmannMD': KaufmannMD, + 'OneSubjectEMaGer': OneSubjectEMaGerDataset, + 'TMRShirleyRyanAbilityLab' : TMRShirleyRyanAbilityLab } def get_dataset_info(dataset): From f8dc05b414ab8643b3942fe79752dbef7b4f4c7b Mon Sep 17 00:00:00 2001 From: eeddy Date: Mon, 21 Oct 2024 19:00:32 -0300 Subject: [PATCH 096/129] Updates --- libemg/_datasets/fors_emg.py | 2 +- libemg/datasets.py | 38 +++++++++++++++++++++++++++++++----- 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py index 5c62a954..daa9ade3 100644 --- a/libemg/_datasets/fors_emg.py +++ b/libemg/_datasets/fors_emg.py @@ -19,7 +19,7 @@ def __init__(self, dataset_folder='FORS-EMG/'): def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): - print("Please download the pickled dataset from: https://www.kaggle.com/datasets/ummerummanchaity/fors-emg-a-novel-semg-dataset?resource=download") + print("Please download the dataset from: https://www.kaggle.com/datasets/ummerummanchaity/fors-emg-a-novel-semg-dataset?resource=download") return odh = OfflineDataHandler() diff --git a/libemg/datasets.py b/libemg/datasets.py index ff9bac7c..8e4927fa 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -11,6 +11,7 @@ from libemg._datasets.radmand_lp import RadmandLP from libemg._datasets.fougner_lp import FougnerLP from libemg._datasets.intensity import ContractionIntensity +from libemg._datasets.hyser import Hyser1DOF, HyserNDOF, HyserRandom, HyserPR from libemg._datasets.kaufmann_md import KaufmannMD from libemg._datasets.tmr_shirleyryanabilitylab import TMRShirleyRyanAbilityLab from libemg.feature_extractor import FeatureExtractor @@ -19,15 +20,25 @@ import pickle import time -def get_dataset_list(): +def get_dataset_list(type='CLASSIFICATION'): """Gets a list of all available datasets. + + Parameters + ---------- + type: str (default='CLASSIFICATION') + The type of datasets to return. Valid Options: 'CLASSIFICATION', 'REGRESSION', and 'ALL'. Returns ---------- dictionary A dictionary with the all available datasets and their respective classes. """ - return { + type = type.upper() + if type not in ['CLASSIFICATION', 'REGRESSION', 'ALL']: + print('Valid Options for type parameter: \'CLASSIFICATION\', \'REGRESSION\', or \'ALL\'.') + return {} + + classification = { 'OneSubjectMyo': OneSubjectMyoDataset, '3DC': _3DCDataset, 'CIIL_MinimalData': CIIL_MinimalData, @@ -36,17 +47,33 @@ def get_dataset_list(): 'GRABMyoCrossDay': GRABMyoCrossDay, 'ContinuousTransitions': ContinuousTransitions, 'NinaProDB2': NinaproDB2, - 'MyoDisCo': MyoDisCo, 'FORS-EMG': FORSEMG, 'EMGEPN612': EMGEPN612, 'ContractionIntensity': ContractionIntensity, 'RadmandLP': RadmandLP, 'FougnerLP': FougnerLP, 'KaufmannMD': KaufmannMD, + 'TMRShirleyRyanAbilityLab' : TMRShirleyRyanAbilityLab, + 'HyserPR': HyserPR, + } + + regression = { 'OneSubjectEMaGer': OneSubjectEMaGerDataset, - 'TMRShirleyRyanAbilityLab' : TMRShirleyRyanAbilityLab + 'NinaProDB8': NinaproDB8, + 'Hyser1DOF': HyserPR, + 'HyserNDOF': HyserNDOF, + 'HyserRandom': HyserRandom, } + if type == 'CLASSIFICATION': + return classification + elif type == 'REGRESSION': + return regression + else: + # Concatenate all datasets + classification.update(regression) + return classification + def get_dataset_info(dataset): """Prints out the information about a certain dataset. @@ -60,7 +87,7 @@ def get_dataset_info(dataset): else: print("ERROR: Invalid dataset name") -def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], save_dir=None): +def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], save_dir='.'): """Evaluates an algorithm against all included datasets. Parameters @@ -77,6 +104,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ The name of the datasets you want to evaluate your model on. save_dir: string (default=None) The name of the directory you want to incrementally save the results to (it will be a pickle file). + Returns ---------- dictionary From d0c1fe795eed77741476c09097b0645b06d45270 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 22 Oct 2024 08:28:43 -0300 Subject: [PATCH 097/129] Updated logging --- libemg/datasets.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 8e4927fa..88ee6c00 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -102,7 +102,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ A dictionary of parameters for the passed in features. included_dataasets: list The name of the datasets you want to evaluate your model on. - save_dir: string (default=None) + save_dir: string (default='.') The name of the directory you want to incrementally save the results to (it will be a pickle file). Returns @@ -145,9 +145,8 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ print(ca) accuracies[d] = accs - # Save to pickle file - if save_dir is not None: - with open(save_dir + str(time.time()) + '.pkl', 'wb') as handle: - pickle.dump(accuracies, handle, protocol=pickle.HIGHEST_PROTOCOL) + print(save_dir + str(time.time()) + '.pkl') + with open(save_dir + str(time.time()) + '.pkl', 'wb') as handle: + pickle.dump(accuracies, handle, protocol=pickle.HIGHEST_PROTOCOL) return accuracies \ No newline at end of file From 64cea6e24a06d68cbc0b136465302ef19f3b9ed4 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 22 Oct 2024 08:35:36 -0300 Subject: [PATCH 098/129] Updates --- libemg/_datasets/nina_pro.py | 2 ++ libemg/datasets.py | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index c65df0cc..3f969387 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -32,8 +32,10 @@ def __init__(self, def convert_to_compatible(self): # get the zip files (original format they're downloaded in) zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") + print(zip_files) # unzip the files -- if any are there (successive runs skip this) for zip_file in zip_files: + print(zip_file) with zipfile.ZipFile(zip_file, 'r') as zip_ref: zip_ref.extractall(zip_file[:-4]+'/') os.remove(zip_file) diff --git a/libemg/datasets.py b/libemg/datasets.py index 88ee6c00..1e739f74 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -87,7 +87,7 @@ def get_dataset_info(dataset): else: print("ERROR: Invalid dataset name") -def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], save_dir='.'): +def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], output_file='out.pkl'): """Evaluates an algorithm against all included datasets. Parameters @@ -102,7 +102,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ A dictionary of parameters for the passed in features. included_dataasets: list The name of the datasets you want to evaluate your model on. - save_dir: string (default='.') + output_file: string (default='out.pkl') The name of the directory you want to incrementally save the results to (it will be a pickle file). Returns @@ -145,8 +145,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ print(ca) accuracies[d] = accs - print(save_dir + str(time.time()) + '.pkl') - with open(save_dir + str(time.time()) + '.pkl', 'wb') as handle: + with open(output_file, 'wb') as handle: pickle.dump(accuracies, handle, protocol=pickle.HIGHEST_PROTOCOL) return accuracies \ No newline at end of file From d31aa871a0c370a276588544cf6f71b923eb6011 Mon Sep 17 00:00:00 2001 From: eeddy Date: Tue, 22 Oct 2024 12:51:54 -0300 Subject: [PATCH 099/129] Updates --- libemg/_datasets/fors_emg.py | 9 +++++++-- libemg/_datasets/nina_pro.py | 2 -- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/libemg/_datasets/fors_emg.py b/libemg/_datasets/fors_emg.py index daa9ade3..24adea4f 100644 --- a/libemg/_datasets/fors_emg.py +++ b/libemg/_datasets/fors_emg.py @@ -32,8 +32,13 @@ def prepare_data(self, split = False): for s in range(1, 20): for g_i, g in enumerate(['Thumb_UP', 'Index', 'Right_Angle', 'Peace', 'Index_Little', 'Thumb_Little', 'Hand_Close', 'Hand_Open', 'Wrist_Flexion', 'Wrist_Extension', 'Radial_Deviation']): for r in [1,2,3,4,5]: - for o_i, o in enumerate(['rest', 'pronation', 'supination']): - mat = scipy.io.loadmat('FORS-EMG/Subject' + str(s) + '/' + o + '/' + g + '-' + str(r) + '.mat') + for o_i, o in enumerate(['Rest', 'Pronation', 'Supination']): + try: + mat = scipy.io.loadmat('FORS-EMG/Subject' + str(s) + '/' + o + '/' + g + '-' + str(r) + '.mat') + except: + o = o.lower() + mat = scipy.io.loadmat('FORS-EMG/Subject' + str(s) + '/' + o + '/' + g + '-' + str(r) + '.mat') + odh.data.append(mat['value'].T) odh.classes.append(np.ones((len(odh.data[-1]), 1)) * g_i) odh.subjects.append(np.ones((len(odh.data[-1]), 1)) * s-1) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index 3f969387..c65df0cc 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -32,10 +32,8 @@ def __init__(self, def convert_to_compatible(self): # get the zip files (original format they're downloaded in) zip_files = find_all_files_of_type_recursively(self.dataset_folder,".zip") - print(zip_files) # unzip the files -- if any are there (successive runs skip this) for zip_file in zip_files: - print(zip_file) with zipfile.ZipFile(zip_file, 'r') as zip_ref: zip_ref.extractall(zip_file[:-4]+'/') os.remove(zip_file) From c1d2f064ee50cbc1e1b4ccc653d64c1653bcf540 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Tue, 22 Oct 2024 15:07:03 -0300 Subject: [PATCH 100/129] Add UserComplianceDataset --- libemg/_datasets/user_compliance.py | 52 +++++++++++++++++++++++++++++ libemg/datasets.py | 4 ++- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 libemg/_datasets/user_compliance.py diff --git a/libemg/_datasets/user_compliance.py b/libemg/_datasets/user_compliance.py new file mode 100644 index 00000000..2cfae0ca --- /dev/null +++ b/libemg/_datasets/user_compliance.py @@ -0,0 +1,52 @@ +from pathlib import Path + +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter, FilePackager + + +class UserComplianceDataset(Dataset): + def __init__(self, dataset_folder = 'UserComplianceDataset/', analysis = 'baseline'): + super().__init__( + sampling=1010, + num_channels=64, + recording_device='EMaGer', + num_subjects=6, + gestures={0: 'Hand Close (-) / Hand Open (+)', 1: 'Pronation (-) / Supination (+)'}, + num_reps=5, + description='Regression dataset used for investigation into user compliance during mimic training.', + citation='https://conferences.lib.unb.ca/index.php/mec/article/view/2507' + ) + self.url = 'https://github.com/LibEMG/UserComplianceDataset' + self.dataset_folder = dataset_folder + self.analysis = analysis + + def prepare_data(self, split = False): + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + regex_filters = [ + RegexFilter(left_bound='/', right_bound='/', values=['open-close', 'pro-sup'], description='movements'), + RegexFilter(left_bound='_R_', right_bound='.csv', values=[str(idx) for idx in range(self.num_reps)], description='reps'), + RegexFilter(left_bound='/', right_bound='/', values=['anticipation', 'all-or-nothing', 'baseline'], description='behaviours'), + RegexFilter(left_bound='/', right_bound='/', values=[f"subject-{str(idx).zfill(3)}" for idx in range(1, 7)], description='subjects') + ] + package_function = lambda x, y: Path(x).parent.absolute() == Path(y).parent.absolute() + metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] + odh = OfflineDataHandler() + odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + data = odh + if split: + if self.analysis == 'baseline': + data = { + 'All': odh, + 'Train': odh.isolate_data('behaviours', [2], fast=True).isolate_data('reps', [0, 1, 2, 3], fast=True), + 'Test': odh.isolate_data('behaviours', [2], fast=True).isolate_data('reps', [4], fast=True) + } + elif self.analysis == 'all-or-nothing': + data = {'All': odh, 'Train': odh.isolate_data('behaviours', [1], fast=True), 'Test': odh.isolate_data('behaviours', [2], fast=True)} + elif self.analysis == 'anticipation': + data = {'All': odh, 'Train': odh.isolate_data('behaviours', [0], fast=True), 'Test': odh.isolate_data('behaviours', [2], fast=True)} + else: + raise ValueError(f"Unexpected value for analysis. Got: {self.analysis}.") + + return data diff --git a/libemg/datasets.py b/libemg/datasets.py index 1e739f74..28d7aa17 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -7,6 +7,7 @@ from libemg._datasets.continous_transitions import ContinuousTransitions from libemg._datasets.nina_pro import NinaproDB2, NinaproDB8 from libemg._datasets.myodisco import MyoDisCo +from libemg._datasets.user_compliance import UserComplianceDataset from libemg._datasets.fors_emg import FORSEMG from libemg._datasets.radmand_lp import RadmandLP from libemg._datasets.fougner_lp import FougnerLP @@ -60,9 +61,10 @@ def get_dataset_list(type='CLASSIFICATION'): regression = { 'OneSubjectEMaGer': OneSubjectEMaGerDataset, 'NinaProDB8': NinaproDB8, - 'Hyser1DOF': HyserPR, + 'Hyser1DOF': Hyser1DOF, 'HyserNDOF': HyserNDOF, 'HyserRandom': HyserRandom, + 'UserCompliance': UserComplianceDataset } if type == 'CLASSIFICATION': From 16ead39f078668fb89ed3e0cd3e41e2b40627c51 Mon Sep 17 00:00:00 2001 From: eeddy Date: Fri, 25 Oct 2024 20:23:15 -0300 Subject: [PATCH 101/129] Updates --- libemg/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 28d7aa17..4ab9567e 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -102,8 +102,8 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ A list of features. feature_dic: dic (default={}) A dictionary of parameters for the passed in features. - included_dataasets: list - The name of the datasets you want to evaluate your model on. + included_dataasets: list (str) or list (DataSets) + The name of the datasets you want to evaluate your model on. Either pass in strings (e.g., '3DC') for names or the dataset objects (e.g., _3DCDataset()). output_file: string (default='out.pkl') The name of the directory you want to incrementally save the results to (it will be a pickle file). From 3c50c7bc1019dd11a306b0ca95b94dfb5c003ff1 Mon Sep 17 00:00:00 2001 From: eeddy Date: Fri, 25 Oct 2024 20:25:47 -0300 Subject: [PATCH 102/129] Updates --- libemg/datasets.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 4ab9567e..41d3b893 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -115,7 +115,10 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ accuracies = {} for d in included_datasets: print('Evaluating ' + d + ' dataset...') - dataset = get_dataset_list()[d]() + if isinstance(d, str): + dataset = get_dataset_list()[d]() + else: + dataset = d data = dataset.prepare_data(split=True) train_data = data['Train'] From b9edf73663a18fead3339ddfc4bba37a920b3d3a Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Mon, 28 Oct 2024 09:19:21 -0300 Subject: [PATCH 103/129] Convert labels field to classes in HyserPR --- libemg/_datasets/hyser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 98baf942..851a27b0 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -190,7 +190,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class _PRLabelsFetcher(MetadataFetcher): def __init__(self): - super().__init__(description='labels') + super().__init__(description='classes') self.sample_regex = RegexFilter(left_bound='_sample', right_bound='.hea', values=[str(idx + 1) for idx in range(204)], description='samples') def _get_labels(self, filename): From a0cce07c031e81f9843839a2c4037ff1b8c6a046 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 12:58:40 -0300 Subject: [PATCH 104/129] added CIIL_WS. Fixed dataset exist check for regression & WS --- libemg/datasets.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 41d3b893..d5e9f098 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -2,7 +2,7 @@ from libemg._datasets.one_subject_myo import OneSubjectMyoDataset from libemg._datasets.one_subject_emager import OneSubjectEMaGerDataset from libemg._datasets.emg_epn612 import EMGEPN612 -from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift +from libemg._datasets.ciil import CIIL_MinimalData, CIIL_ElectrodeShift, CIIL_WeaklySupervised from libemg._datasets.grab_myo import GRABMyoBaseline, GRABMyoCrossDay from libemg._datasets.continous_transitions import ContinuousTransitions from libemg._datasets.nina_pro import NinaproDB2, NinaproDB8 @@ -35,7 +35,7 @@ def get_dataset_list(type='CLASSIFICATION'): A dictionary with the all available datasets and their respective classes. """ type = type.upper() - if type not in ['CLASSIFICATION', 'REGRESSION', 'ALL']: + if type not in ['CLASSIFICATION', 'REGRESSION', 'WEAKLYSUPERVISED', 'ALL']: print('Valid Options for type parameter: \'CLASSIFICATION\', \'REGRESSION\', or \'ALL\'.') return {} @@ -66,14 +66,21 @@ def get_dataset_list(type='CLASSIFICATION'): 'HyserRandom': HyserRandom, 'UserCompliance': UserComplianceDataset } + + weaklysupervised = { + 'CIILWeaklySupervised': CIIL_WeaklySupervised + } if type == 'CLASSIFICATION': return classification elif type == 'REGRESSION': return regression + elif type == "WEAKLYSUPERVISED": + return weaklysupervised else: # Concatenate all datasets classification.update(regression) + classification.update(weaklysupervised) return classification def get_dataset_info(dataset): @@ -116,7 +123,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ for d in included_datasets: print('Evaluating ' + d + ' dataset...') if isinstance(d, str): - dataset = get_dataset_list()[d]() + dataset = get_dataset_list('ALL')[d]() else: dataset = d data = dataset.prepare_data(split=True) From 137c5b2f94f8ee5e2035611cfb1fce872d254e77 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 12:58:50 -0300 Subject: [PATCH 105/129] initial commit for CIIL_WS --- libemg/_datasets/ciil.py | 70 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 68 insertions(+), 2 deletions(-) diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index 54904be1..7aa8c6f8 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -1,6 +1,6 @@ from libemg._datasets.dataset import Dataset -from libemg.data_handler import OfflineDataHandler, RegexFilter - +from libemg.data_handler import OfflineDataHandler, RegexFilter, FilePackager +from pathlib import Path class CIIL_MinimalData(Dataset): def __init__(self, dataset_folder='CIILData/'): Dataset.__init__(self, @@ -78,3 +78,69 @@ def prepare_data(self, split = False): data = {'All': odh, 'Train': odh.isolate_data("sets", [0], fast=True), 'Test': odh.isolate_data("sets", [1,2,3,4], fast=True)} return data + + +class CIIL_WeaklySupervised(Dataset): + def __init__(self, dataset_folder='WS_CIILData/'): + Dataset.__init__(self, + 1000, + 8, + 'OyMotion gForcePro+ EMG Armband', + 16, + {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, + '30 min weakly supervised, 1 rep calibration, 14 reps test', + "A weakly supervised environment with sparse supervised calibration.", + 'In Submission') + self.url = "https://github.com/ECEEvanCampbell/ZSCIIL_Dataset" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download(self.url, self.dataset_folder) + + # supervised odh loading + subjects = [str(i) for i in range(0, 16)] + classes_values = [str(i) for i in range(0,5)] + reps_values = [str(i) for i in range(0,15)] + setting_values = [".csv", ""] # this is arbitrary to get a field that separates WS from S + regex_filters = [ + RegexFilter(left_bound = "", right_bound="", values = setting_values, description='settings'), + RegexFilter(left_bound = "/subject", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps'), + RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes') + ] + odh_s = OfflineDataHandler() + odh_s.get_data(folder_location=self.dataset_folder, + regex_filters=regex_filters, + delimiter=",") + + # weakly supervised odh loading + subjects = [str(i) for i in range(0, 16)] + rep = [str(i) for i in range(3)] + setting_values = ["", ".csv"] # this is arbitrary to get a field that separates WS from S + regex_filters = [ + RegexFilter(left_bound = "", right_bound="", values = setting_values, description='settings'), + RegexFilter(left_bound = "/subject", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "WS", right_bound=".csv", values = reps_values, description='reps'), + ] + metadata_fetchers = [ + FilePackager(regex_filter=[ + RegexFilter(left_bound="", right_bound="targets.csv", values=["_"], description="classses") + ], + package_function=lambda x, y: (x[2] == y[2]) and (Path(x).parent == Path(y).parent) + ) + ] + odh_ws = OfflineDataHandler() + odh_ws.get_data(folder_location=self.dataset_folder, + regex_filters=regex_filters, + metadata_fetchers=metadata_fetchers, + delimiter=",") + + data = odh_s + odh_ws + if split: + data = {'All': data, 'Pretrain': odh_ws, + 'Train': odh_s.isolate_data("reps", [0], fast=True), + 'Test': odh_s.isolate_data("reps", list(range(1,15)), fast=True)} + + return data From d9def079277c70a9596476063cd476139a434a56 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 14:16:15 -0300 Subject: [PATCH 106/129] added onedrive download method --- libemg/_datasets/ciil.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/libemg/_datasets/ciil.py b/libemg/_datasets/ciil.py index 7aa8c6f8..8b9e7a3d 100644 --- a/libemg/_datasets/ciil.py +++ b/libemg/_datasets/ciil.py @@ -1,6 +1,9 @@ from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter, FilePackager from pathlib import Path + + + class CIIL_MinimalData(Dataset): def __init__(self, dataset_folder='CIILData/'): Dataset.__init__(self, @@ -81,7 +84,7 @@ def prepare_data(self, split = False): class CIIL_WeaklySupervised(Dataset): - def __init__(self, dataset_folder='WS_CIILData/'): + def __init__(self, dataset_folder='CIIL_WeaklySupervised/'): Dataset.__init__(self, 1000, 8, @@ -91,13 +94,13 @@ def __init__(self, dataset_folder='WS_CIILData/'): '30 min weakly supervised, 1 rep calibration, 14 reps test', "A weakly supervised environment with sparse supervised calibration.", 'In Submission') - self.url = "https://github.com/ECEEvanCampbell/ZSCIIL_Dataset" + self.url = "https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EaABHYybhfJNslTVcvwPPwgB9WwqlTLCStui30maqY53kw?e=MbboMd" self.dataset_folder = dataset_folder def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_folder)): - self.download(self.url, self.dataset_folder) + self.download_via_onedrive(self.url, self.dataset_folder) # supervised odh loading subjects = [str(i) for i in range(0, 16)] @@ -106,40 +109,39 @@ def prepare_data(self, split = False): setting_values = [".csv", ""] # this is arbitrary to get a field that separates WS from S regex_filters = [ RegexFilter(left_bound = "", right_bound="", values = setting_values, description='settings'), - RegexFilter(left_bound = "/subject", right_bound="/", values = subjects, description='subjects'), - RegexFilter(left_bound = "R_", right_bound="_", values = reps_values, description='reps'), - RegexFilter(left_bound = "C_", right_bound=".csv", values = classes_values, description='classes') + RegexFilter(left_bound = "/S", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "R", right_bound=".csv", values = reps_values, description='reps'), + RegexFilter(left_bound = "C", right_bound="_R", values = classes_values, description='classes') ] odh_s = OfflineDataHandler() - odh_s.get_data(folder_location=self.dataset_folder, + odh_s.get_data(folder_location=self.dataset_folder+"CIIL_WeaklySupervised/", regex_filters=regex_filters, delimiter=",") # weakly supervised odh loading subjects = [str(i) for i in range(0, 16)] - rep = [str(i) for i in range(3)] + reps_values = [str(i) for i in range(3)] setting_values = ["", ".csv"] # this is arbitrary to get a field that separates WS from S regex_filters = [ RegexFilter(left_bound = "", right_bound="", values = setting_values, description='settings'), - RegexFilter(left_bound = "/subject", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "/S", right_bound="/", values = subjects, description='subjects'), RegexFilter(left_bound = "WS", right_bound=".csv", values = reps_values, description='reps'), ] metadata_fetchers = [ - FilePackager(regex_filter=[ - RegexFilter(left_bound="", right_bound="targets.csv", values=["_"], description="classses") - ], - package_function=lambda x, y: (x[2] == y[2]) and (Path(x).parent == Path(y).parent) + FilePackager(regex_filter=RegexFilter(left_bound="", right_bound="targets.csv", values=["_"], description="classes"), + package_function=lambda x, y: (x.split("WS")[1][0] == y.split("WS")[1][0]) and (Path(x).parent == Path(y).parent) ) ] odh_ws = OfflineDataHandler() - odh_ws.get_data(folder_location=self.dataset_folder, + odh_ws.get_data(folder_location=self.dataset_folder+"CIIL_WeaklySupervised/", regex_filters=regex_filters, metadata_fetchers=metadata_fetchers, delimiter=",") data = odh_s + odh_ws if split: - data = {'All': data, 'Pretrain': odh_ws, + data = {'All': data, + 'Pretrain': odh_ws, 'Train': odh_s.isolate_data("reps", [0], fast=True), 'Test': odh_s.isolate_data("reps", list(range(1,15)), fast=True)} From ecd68cfd2cbd095dbb3bf33762bd4a417621672d Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 14:16:23 -0300 Subject: [PATCH 107/129] added onedrive download method --- libemg/_datasets/dataset.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/libemg/_datasets/dataset.py b/libemg/_datasets/dataset.py index 947b3909..1ca104c8 100644 --- a/libemg/_datasets/dataset.py +++ b/libemg/_datasets/dataset.py @@ -1,5 +1,6 @@ import os from libemg.data_handler import OfflineDataHandler +from onedrivedownloader import download as onedrive_download # this assumes you have git downloaded (not pygit, but the command line program git) class Dataset: @@ -18,6 +19,12 @@ def download(self, url, dataset_name): clone_command = "git clone " + url + " " + dataset_name os.system(clone_command) + def download_via_onedrive(self, url, dataset_name): + onedrive_download(url=url, + filename = dataset_name, + unzip=True, + clean=True) + def remove_dataset(self, dataset_folder): remove_command = "rm -rf " + dataset_folder os.system(remove_command) From b805527bfcc57e860f4020cb7e215e76c1436169 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 14:17:49 -0300 Subject: [PATCH 108/129] added one drive downloader --- requirements.txt | 3 ++- setup.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 367075d1..3ded04a3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ dearpygui opencv-python datetime websockets==8.1 -h5py \ No newline at end of file +h5py +onedrivedownloader \ No newline at end of file diff --git a/setup.py b/setup.py index 95cbfe00..42b6768a 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,8 @@ "pythonnet", "bleak", "dearpygui", - "h5py" + "h5py", + "onedrivedownloader" ], keywords=[ "emg", From 12a0b538ee5acfacc0e08701df7bbc798ccca0fc Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 14:19:37 -0300 Subject: [PATCH 109/129] added arguments for unzip and clean --- libemg/_datasets/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libemg/_datasets/dataset.py b/libemg/_datasets/dataset.py index 1ca104c8..f43bef77 100644 --- a/libemg/_datasets/dataset.py +++ b/libemg/_datasets/dataset.py @@ -19,11 +19,11 @@ def download(self, url, dataset_name): clone_command = "git clone " + url + " " + dataset_name os.system(clone_command) - def download_via_onedrive(self, url, dataset_name): + def download_via_onedrive(self, url, dataset_name, unzip=True, clean=True): onedrive_download(url=url, filename = dataset_name, - unzip=True, - clean=True) + unzip=unzip, + clean=clean) def remove_dataset(self, dataset_folder): remove_command = "rm -rf " + dataset_folder From 4c322dedb5544f6d77dd1a0b240eccb982991741 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Mon, 28 Oct 2024 15:30:15 -0300 Subject: [PATCH 110/129] now downloads --- libemg/_datasets/emg_epn612.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 4881eef2..7412f25b 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -16,14 +16,13 @@ def __init__(self, dataset_file='EMGEPN612.pkl'): '50 Reps x 306 Users (Train), 25 Reps x 306 Users (Test)', "A large 612 user dataset for developing cross user models.", 'https://doi.org/10.5281/zenodo.4421500') - self.url = "https://github.com/libemg/OneSubjectMyoDataset" + self.url = "https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBLyFv6UrPYGwAISPDW9dBXw?e=vjCA14" self.dataset_name = dataset_file def prepare_data(self, split = False): print('\nPlease cite: ' + self.citation+'\n') if (not self.check_exists(self.dataset_name)): - print("Please download the pickled dataset from: https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EWf3sEvRxg9HuAmGoBG2vYkBDXh4xNst3FAXV0lNoodrAA?e=t6HPaR") - return + self.download_via_onedrive(self.url, self.dataset_name, unzip=False, clean=False) file = open(self.dataset_name, 'rb') data = pickle.load(file) From ebf73e4d250560b105d13b8d0e7b2d90c1854425 Mon Sep 17 00:00:00 2001 From: ECEEvanCampbell Date: Thu, 31 Oct 2024 16:33:50 -0300 Subject: [PATCH 111/129] added onesiteBP --- libemg/_datasets/one_site_biopoint.py | 48 +++++++++++++++++++++++++++ libemg/datasets.py | 2 ++ 2 files changed, 50 insertions(+) create mode 100644 libemg/_datasets/one_site_biopoint.py diff --git a/libemg/_datasets/one_site_biopoint.py b/libemg/_datasets/one_site_biopoint.py new file mode 100644 index 00000000..c92fdf9e --- /dev/null +++ b/libemg/_datasets/one_site_biopoint.py @@ -0,0 +1,48 @@ + +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler, RegexFilter +from libemg.feature_extractor import FeatureExtractor +from libemg.utils import * + +class OneSiteBiopoint(Dataset): + def __init__(self, dataset_folder='CIIL_WeaklySupervised/'): + Dataset.__init__(self, + 2000, + 1, + 'SiFi-Labs BioPoint', + 8, + {0: 'Close', 1: 'Open', 2: 'Rest', 3: 'Flexion', 4: 'Extension'}, + 'Six reps', + "A single site, multimodal sensor for gesture recognition", + 'EMBC 2024 - Not Yet Published') + self.url = "https://unbcloud-my.sharepoint.com/:u:/g/personal/ecampbe2_unb_ca/EZG9zfWg_hdJl4De1Clnl34ByTjYqStTB90Nj6EaHkGSnA?e=JQLU7z" + self.dataset_folder = dataset_folder + + def prepare_data(self, split = False): + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)): + self.download_via_onedrive(self.url, self.dataset_folder) + + subjects = [str(i) for i in range(0, 8)] + classes_values = [str(i) for i in range(0,17)] + reps_values = [str(i) for i in range(0,6)] + regex_filters = [ + RegexFilter(left_bound = "/S", right_bound="/", values = subjects, description='subjects'), + RegexFilter(left_bound = "R_", right_bound="EMG-bio.csv", values = reps_values, description='reps'), + RegexFilter(left_bound = "C_", right_bound="_R", values = classes_values, description='classes') + ] + odh_s = OfflineDataHandler() + odh_s.get_data(folder_location=self.dataset_folder+"OneSiteBioPoint/", + regex_filters=regex_filters, + delimiter=",") + + + if split: + data = {'All': data, + 'Train': odh_s.isolate_data("reps", list(range(0,3)), fast=True), + 'Test': odh_s.isolate_data("reps", list(range(3,6)), fast=True)} + + return data + + + diff --git a/libemg/datasets.py b/libemg/datasets.py index d5e9f098..6829be2d 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -15,6 +15,7 @@ from libemg._datasets.hyser import Hyser1DOF, HyserNDOF, HyserRandom, HyserPR from libemg._datasets.kaufmann_md import KaufmannMD from libemg._datasets.tmr_shirleyryanabilitylab import TMRShirleyRyanAbilityLab +from libemg._datasets.one_site_biopoint import One_Site_Biopoint from libemg.feature_extractor import FeatureExtractor from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics @@ -56,6 +57,7 @@ def get_dataset_list(type='CLASSIFICATION'): 'KaufmannMD': KaufmannMD, 'TMRShirleyRyanAbilityLab' : TMRShirleyRyanAbilityLab, 'HyserPR': HyserPR, + 'OneSiteBioPoint': OneSiteBiopoint } regression = { From c8ab655eb0c2c58ebe1f2f46e32b0d34c498f5fa Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 31 Oct 2024 13:40:17 -0300 Subject: [PATCH 112/129] Fixed continuous transitions --- libemg/_datasets/continous_transitions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index 79e15857..0313af6a 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -34,7 +34,7 @@ def prepare_data(self, split = False): odh_te.classes = [] odh_te.extra_attributes = ['subjects', 'classes'] - for s in [2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]: + for s_i, s in enumerate([2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]): data = h5py.File('ContinuousTransitions/P' + f"{s:02}" + '.hdf5', "r") cont_labels = data['continuous']['emg']['prompt'][()] cont_labels = np.hstack([np.ones((1000)) * cont_labels[0], cont_labels[0:len(cont_labels)-1000]]) # Rolling about 0.5s as per Shri's suggestion @@ -44,7 +44,7 @@ def prepare_data(self, split = False): for i in range(0, len(cont_chg_idxs)-1): odh_te.data.append(cont_emg[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]) odh_te.classes.append(np.expand_dims(cont_labels[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]-1, axis=1)) - odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s-2) + odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s_i) #TODO: update this I think! ramp_emg = data['ramp']['emg']['signal'][()] ramp_labels = data['ramp']['emg']['prompt'][()] @@ -53,7 +53,7 @@ def prepare_data(self, split = False): for i in range(0, len(r_chg_idxs)-1): odh_tr.data.append(ramp_emg[r_chg_idxs[i]+1:r_chg_idxs[i+1]]) odh_tr.classes.append(np.expand_dims(ramp_labels[r_chg_idxs[i]+1:r_chg_idxs[i+1]]-1, axis=1)) - odh_tr.subjects.append(np.ones((len(odh_tr.data[-1]), 1)) * s-2) + odh_tr.subjects.append(np.ones((len(odh_tr.data[-1]), 1)) * s_i) odh_all = odh_tr + odh_te data = odh_all From 5105af3c209f46beb2a16171bb365359c6c978ff Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 31 Oct 2024 13:43:04 -0300 Subject: [PATCH 113/129] Fixed one site bio --- libemg/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 6829be2d..1ec5403a 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -15,7 +15,7 @@ from libemg._datasets.hyser import Hyser1DOF, HyserNDOF, HyserRandom, HyserPR from libemg._datasets.kaufmann_md import KaufmannMD from libemg._datasets.tmr_shirleyryanabilitylab import TMRShirleyRyanAbilityLab -from libemg._datasets.one_site_biopoint import One_Site_Biopoint +from libemg._datasets.one_site_biopoint import OneSiteBiopoint from libemg.feature_extractor import FeatureExtractor from libemg.emg_predictor import EMGClassifier from libemg.offline_metrics import OfflineMetrics From d6afd0420f5332d567b8cfb3956055bf0e5049c3 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 13:46:24 -0300 Subject: [PATCH 114/129] Hyser labels fix --- libemg/_datasets/hyser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 851a27b0..1493ac29 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -213,7 +213,7 @@ def _get_labels(self, filename): def __call__(self, filename, file_data, all_files): labels = self._get_labels(filename) sample_idx = self.sample_regex.get_metadata(filename) - return labels[sample_idx] + return labels[sample_idx] - 1 # -1 to produce 0-indexed labels class _PRRepFetcher(_PRLabelsFetcher): @@ -222,7 +222,7 @@ def __init__(self): self.description = 'reps' def __call__(self, filename, file_data, all_files): - label = super().__call__(filename, file_data, all_files) + label = super().__call__(filename, file_data, all_files) + 1 # +1 b/c this returns 0-indexed labels, but the files are 1-indexed labels = self._get_labels(filename) same_label_mask = np.where(labels == label)[0] sample_idx = self.sample_regex.get_metadata(filename) From bae186c98e17040e53162773e600e25c16a286ff Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 13:47:27 -0300 Subject: [PATCH 115/129] Add subjects to Hyser classes --- libemg/_datasets/hyser.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 1493ac29..26c2ab31 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from copy import deepcopy from pathlib import Path +from typing import Sequence import numpy as np @@ -46,7 +47,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class Hyser1DOF(_Hyser): - def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): + def __init__(self, dataset_folder: str = 'Hyser1DOF', analysis: str = 'baseline', subjects: Sequence[str] | None = None): """1 degree of freedom (DOF) Hyser dataset. Parameters @@ -56,10 +57,12 @@ def __init__(self, dataset_folder = 'Hyser1DOF', analysis = 'baseline'): analysis: str, default='baseline' Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + subjects: Sequence[str] or None, default=None + Subjects to parse (e.g., ['01', '03', '10']). If None, parses all participants. Defaults to None. """ gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser 1 DOF dataset. Includes within-DOF finger movements. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=3, description=description, dataset_folder=dataset_folder, analysis=analysis) + super().__init__(gestures=gestures, num_reps=3, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) def _prepare_data_helper(self, split = False): filename_filters = deepcopy(self.common_regex_filters) @@ -87,7 +90,7 @@ def _prepare_data_helper(self, split = False): class HyserNDOF(_Hyser): - def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): + def __init__(self, dataset_folder: str = 'HyserNDOF', analysis: str = 'baseline', subjects: Sequence[str] | None = None): """N degree of freedom (DOF) Hyser dataset. Parameters @@ -97,11 +100,13 @@ def __init__(self, dataset_folder = 'HyserNDOF', analysis = 'baseline'): analysis: str, default='baseline' Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + subjects: Sequence[str] or None, default=None + Subjects to parse (e.g., ['01', '03', '10']). If None, parses all participants. Defaults to None. """ # TODO: Add a 'regression' flag... maybe add a 'DOFs' parameter instead of just gestures? gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser N DOF dataset. Includes combined finger movements. Ground truth finger forces are recorded for use in finger force regression.' - super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis) + super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) self.finger_combinations = { 1: 'Thumb + Index', 2: 'Thumb + Middle', @@ -147,7 +152,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: class HyserRandom(_Hyser): - def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): + def __init__(self, dataset_folder: str = 'HyserRandom', analysis: str = 'baseline', subjects: Sequence[str] | None = None): """Random task (DOF) Hyser dataset. Parameters @@ -157,10 +162,13 @@ def __init__(self, dataset_folder = 'HyserRandom', analysis = 'baseline'): analysis: str, default='baseline' Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + subjects: Sequence[str] or None, default=None + Subjects to parse (e.g., ['01', '03', '10']). If None, parses all participants. Defaults to None. """ gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' - subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 + if subjects is None: + subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: @@ -236,7 +244,7 @@ def __call__(self, filename, file_data, all_files): class HyserPR(_Hyser): - def __init__(self, dataset_folder = 'HyserPR', analysis = 'baseline'): + def __init__(self, dataset_folder: str = 'HyserPR', analysis: str = 'baseline', subjects: Sequence[str] | None = None): """Pattern recognition (PR) Hyser dataset. Parameters @@ -246,6 +254,8 @@ def __init__(self, dataset_folder = 'HyserPR', analysis = 'baseline'): analysis: str, default='baseline' Determines which type of data will be extracted and considered train/test splits. If 'baseline', only grabs data from the first session and splits based on reps. If 'sessions', grabs data from both sessions and return the first session as train and the second session as test. + subjects: Sequence[str] or None, default=None + Subjects to parse (e.g., ['01', '03', '10']). If None, parses all participants. Defaults to None. """ gestures = { 1: 'Thumb Extension', @@ -284,7 +294,7 @@ def __init__(self, dataset_folder = 'HyserPR', analysis = 'baseline'): 34: 'Thumb and Middle Fingers Pinch' } description = 'Hyser pattern recognition (PR) dataset. Includes dynamic and maintenance tasks for 34 hand gestures.' - super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis) # num_reps=2 b/c 2 trials + super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) # num_reps=2 b/c 2 trials def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) From 62a85da68842ab45fffb2d3c0a1bb1c03f57cb25 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 13:48:23 -0300 Subject: [PATCH 116/129] Continuous transitions debugging --- libemg/_datasets/continous_transitions.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index 0313af6a..c5efef76 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -34,13 +34,15 @@ def prepare_data(self, split = False): odh_te.classes = [] odh_te.extra_attributes = ['subjects', 'classes'] - for s_i, s in enumerate([2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]): + for s in [18]: + # for s in [2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]: data = h5py.File('ContinuousTransitions/P' + f"{s:02}" + '.hdf5', "r") cont_labels = data['continuous']['emg']['prompt'][()] cont_labels = np.hstack([np.ones((1000)) * cont_labels[0], cont_labels[0:len(cont_labels)-1000]]) # Rolling about 0.5s as per Shri's suggestion cont_emg = data['continuous']['emg']['signal'][()] cont_chg_idxs = np.insert(np.where(cont_labels[:-1] != cont_labels[1:])[0], 0, -1) cont_chg_idxs = np.insert(cont_chg_idxs, len(cont_chg_idxs), len(cont_emg)) + print(cont_emg) for i in range(0, len(cont_chg_idxs)-1): odh_te.data.append(cont_emg[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]) odh_te.classes.append(np.expand_dims(cont_labels[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]-1, axis=1)) From 36481e72e6273862add566fad2a5ee2079eac3da Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 13:49:00 -0300 Subject: [PATCH 117/129] Add subjects to OneSubjectEMaGerDataset --- libemg/_datasets/one_subject_emager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libemg/_datasets/one_subject_emager.py b/libemg/_datasets/one_subject_emager.py index e37139b4..2f580437 100644 --- a/libemg/_datasets/one_subject_emager.py +++ b/libemg/_datasets/one_subject_emager.py @@ -1,5 +1,6 @@ from pathlib import Path +import numpy as np from libemg._datasets.dataset import Dataset from libemg.data_handler import OfflineDataHandler, RegexFilter, FilePackager @@ -30,6 +31,9 @@ def prepare_data(self, split = False): metadata_fetchers = [FilePackager(RegexFilter(left_bound='/', right_bound='.txt', values=['labels'], description='labels'), package_function)] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + odh.subjects = [] + odh.subjects = [np.zeros((len(d), 1)) for d in odh.data] + odh.extra_attributes.append('subjects') data = odh if split: data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2, 3], fast=True), 'Test': odh.isolate_data('reps', [4], fast=True)} From e6f7a0839a51d0e732391c414898c426770a376b Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 13:49:35 -0300 Subject: [PATCH 118/129] Evaluate method fixes --- libemg/datasets.py | 57 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 13 deletions(-) diff --git a/libemg/datasets.py b/libemg/datasets.py index 1ec5403a..ba499526 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -17,7 +17,7 @@ from libemg._datasets.tmr_shirleyryanabilitylab import TMRShirleyRyanAbilityLab from libemg._datasets.one_site_biopoint import OneSiteBiopoint from libemg.feature_extractor import FeatureExtractor -from libemg.emg_predictor import EMGClassifier +from libemg.emg_predictor import EMGClassifier, EMGRegressor from libemg.offline_metrics import OfflineMetrics import pickle import time @@ -98,7 +98,7 @@ def get_dataset_info(dataset): else: print("ERROR: Invalid dataset name") -def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], output_file='out.pkl'): +def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={}, included_datasets=['OneSubjectMyo', '3DC'], output_file='out.pkl', regression=False, metrics=['CA']): """Evaluates an algorithm against all included datasets. Parameters @@ -115,12 +115,29 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ The name of the datasets you want to evaluate your model on. Either pass in strings (e.g., '3DC') for names or the dataset objects (e.g., _3DCDataset()). output_file: string (default='out.pkl') The name of the directory you want to incrementally save the results to (it will be a pickle file). - + regression: boolean (default=False) + If True, will create an EMGRegressor object. Otherwise creates an EMGClassifier object. + metrics: list (default=['CA']/['MSE']) + The metrics to extract from each dataset. Returns ---------- dictionary A dictionary with a set of accuracies for different datasets """ + + # -------------- Setup ------------------- + if metrics == ['CA'] and regression: + metrics = ['MSE'] + + metadata_operations = None + label_val = 'classes' + if regression: + metadata_operations = {'labels': lambda x: x[-1]} + label_val = 'labels' + + om = OfflineMetrics() + + # --------------- Run ----------------- accuracies = {} for d in included_datasets: print('Evaluating ' + d + ' dataset...') @@ -128,6 +145,11 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ dataset = get_dataset_list('ALL')[d]() else: dataset = d + + if isinstance(dataset, EMGEPN612): + print('EMGEPN612 Dataset is meant for cross user modelling... Skipping.') + continue + data = dataset.prepare_data(split=True) train_data = data['Train'] @@ -138,25 +160,34 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ print(str(s) + '/' + str(dataset.num_subjects) + ' completed.') s_train_dh = train_data.isolate_data('subjects', [s]) s_test_dh = test_data.isolate_data('subjects', [s]) - train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) - test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc)) + + train_windows, train_meta = s_train_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc), metadata_operations=metadata_operations) + test_windows, test_meta = s_test_dh.parse_windows(int(dataset.sampling/1000 * window_size), int(dataset.sampling/1000 * window_inc), metadata_operations=metadata_operations) fe = FeatureExtractor() train_feats = fe.extract_features(feature_list, train_windows, feature_dic=feature_dic) test_feats = fe.extract_features(feature_list, test_windows, feature_dic=feature_dic) - clf = EMGClassifier(model) ds = { 'training_features': train_feats, - 'training_labels': train_meta['classes'] + 'training_labels': train_meta[label_val] } + + if not regression: + clf = EMGClassifier(model) + else: + clf = EMGRegressor(model) clf.fit(ds) - - preds, _ = clf.run(test_feats) - om = OfflineMetrics() - ca = om.get_CA(test_meta['classes'], preds) - accs.append(ca) - print(ca) + + if regression: + preds = clf.run(test_feats) + else: + preds, _ = clf.run(test_feats) + + metrics = om.extract_offline_metrics(metrics, test_meta[label_val], preds) + accs.append(metrics) + + print(metrics) accuracies[d] = accs with open(output_file, 'wb') as handle: From 9fd078457f41f846d0dd04e09c5ec76a90d535cb Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 31 Oct 2024 13:51:52 -0300 Subject: [PATCH 119/129] Fixed continuous --- libemg/_datasets/continous_transitions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/libemg/_datasets/continous_transitions.py b/libemg/_datasets/continous_transitions.py index c5efef76..316487b3 100644 --- a/libemg/_datasets/continous_transitions.py +++ b/libemg/_datasets/continous_transitions.py @@ -34,19 +34,17 @@ def prepare_data(self, split = False): odh_te.classes = [] odh_te.extra_attributes = ['subjects', 'classes'] - for s in [18]: - # for s in [2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]: + for s_i, s in enumerate([2,3,4,5,6,7,8,9,10,11,12,13,14,15,17,18,19,20,21,22,23,25,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47]): data = h5py.File('ContinuousTransitions/P' + f"{s:02}" + '.hdf5', "r") cont_labels = data['continuous']['emg']['prompt'][()] cont_labels = np.hstack([np.ones((1000)) * cont_labels[0], cont_labels[0:len(cont_labels)-1000]]) # Rolling about 0.5s as per Shri's suggestion cont_emg = data['continuous']['emg']['signal'][()] cont_chg_idxs = np.insert(np.where(cont_labels[:-1] != cont_labels[1:])[0], 0, -1) cont_chg_idxs = np.insert(cont_chg_idxs, len(cont_chg_idxs), len(cont_emg)) - print(cont_emg) for i in range(0, len(cont_chg_idxs)-1): odh_te.data.append(cont_emg[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]) odh_te.classes.append(np.expand_dims(cont_labels[cont_chg_idxs[i]+1:cont_chg_idxs[i+1]]-1, axis=1)) - odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s_i) #TODO: update this I think! + odh_te.subjects.append(np.ones((len(odh_te.data[-1]), 1)) * s_i) ramp_emg = data['ramp']['emg']['signal'][()] ramp_labels = data['ramp']['emg']['prompt'][()] From 64335dfeefdc824b7b1a0e1bd7de1d49c661ff45 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 14:29:38 -0300 Subject: [PATCH 120/129] Fix subject indexing with Hyser --- libemg/_datasets/hyser.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 26c2ab31..d8f3bcd5 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -22,16 +22,15 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' citation='https://doi.org/10.13026/ym7v-bh53' ) - if subjects is None: - subjects = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] - self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/' self.dataset_folder = dataset_folder self.analysis = analysis + self.subjects = subjects sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired + subjects_values = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] # +1 due to Python indexing self.common_regex_filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=subjects, description='subjects'), # +1 due to Python indexing + RegexFilter(left_bound='subject', right_bound='_session', values=subjects_values, description='subjects'), RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions') ] @@ -78,6 +77,8 @@ def _prepare_data_helper(self, split = False): ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + if self.subjects is not None: + odh = odh.isolate_data('subjects', self.subjects, fast=True) data = odh if split: if self.analysis == 'sessions': @@ -139,6 +140,8 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + if self.subjects is not None: + odh = odh.isolate_data('subjects', self.subjects, fast=True) data = odh if split: if self.analysis == 'sessions': @@ -167,9 +170,10 @@ def __init__(self, dataset_folder: str = 'HyserRandom', analysis: str = 'baselin """ gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' - if subjects is None: - subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 + # if subjects is None: + # subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) + self.subjects = [s for s in self.subjects if s != '10'] # subject 10 is missing the labels file for sample1 def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) @@ -184,6 +188,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + odh = odh.isolate_data('subjects', self.subjects, fast=True) data = odh if split: if self.analysis == 'sessions': @@ -295,6 +300,7 @@ def __init__(self, dataset_folder: str = 'HyserPR', analysis: str = 'baseline', } description = 'Hyser pattern recognition (PR) dataset. Includes dynamic and maintenance tasks for 34 hand gestures.' super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) # num_reps=2 b/c 2 trials + self.subjects = [s for s in self.subjects if s not in ('03', '11')] # subjects 3 and 11 are missing classes def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) @@ -310,6 +316,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) + odh = odh.isolate_data('subjects', self.subjects, fast=True) data = odh if split: if self.analysis == 'sessions': From 279ffca6751ea5090de9a39d3564d8d76a2cbdb1 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 14:41:59 -0300 Subject: [PATCH 121/129] Handle default subject values for Hyser datasets --- libemg/_datasets/hyser.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index d8f3bcd5..e88afcda 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -28,9 +28,9 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' self.subjects = subjects sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired - subjects_values = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] # +1 due to Python indexing + self._subjects_values = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] # +1 due to Python indexing self.common_regex_filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=subjects_values, description='subjects'), + RegexFilter(left_bound='subject', right_bound='_session', values=self._subjects_values, description='subjects'), RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions') ] @@ -170,10 +170,11 @@ def __init__(self, dataset_folder: str = 'HyserRandom', analysis: str = 'baselin """ gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' - # if subjects is None: - # subjects = [str(idx + 1).zfill(2) for idx in range(20) if idx != 9] # subject 10 is missing the labels file for sample1 super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) - self.subjects = [s for s in self.subjects if s != '10'] # subject 10 is missing the labels file for sample1 + + if subjects is None: + subjects = deepcopy(self._subjects_values) + self.subjects = [s for s in subjects if s != '10'] # subject 10 is missing the labels file for sample1 def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) @@ -300,7 +301,9 @@ def __init__(self, dataset_folder: str = 'HyserPR', analysis: str = 'baseline', } description = 'Hyser pattern recognition (PR) dataset. Includes dynamic and maintenance tasks for 34 hand gestures.' super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) # num_reps=2 b/c 2 trials - self.subjects = [s for s in self.subjects if s not in ('03', '11')] # subjects 3 and 11 are missing classes + if subjects is None: + subjects = deepcopy(self._subjects_values) + self.subjects = [s for s in subjects if s not in ('03', '11')] # subjects 3 and 11 are missing classes def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) From 61ef9c007276b804a37a5f702f8d6f784440c756 Mon Sep 17 00:00:00 2001 From: eeddy Date: Thu, 31 Oct 2024 14:42:07 -0300 Subject: [PATCH 122/129] Fixed DB8 --- libemg/_datasets/nina_pro.py | 2 +- libemg/datasets.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/libemg/_datasets/nina_pro.py b/libemg/_datasets/nina_pro.py index c65df0cc..d7299376 100644 --- a/libemg/_datasets/nina_pro.py +++ b/libemg/_datasets/nina_pro.py @@ -212,7 +212,7 @@ def prepare_data(self, split = False, subjects_values = None, reps_values = None RegexFilter(left_bound="DB8_s", right_bound="/",values=subjects_values, description='subjects') ] metadata_fetchers = [ - ColumnFetcher('cyberglove', column_mask=[idx for idx in range(self.num_channels, self.num_channels + self.num_cyberglove_dofs)]) + ColumnFetcher('labels', column_mask=[idx for idx in range(self.num_channels, self.num_channels + self.num_cyberglove_dofs)]) ] emg_column_mask = [idx for idx in range(self.num_channels)] # first columns should be EMG odh = OfflineDataHandler() diff --git a/libemg/datasets.py b/libemg/datasets.py index ba499526..72161a04 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -132,7 +132,7 @@ def evaluate(model, window_size, window_inc, feature_list=['MAV'], feature_dic={ metadata_operations = None label_val = 'classes' if regression: - metadata_operations = {'labels': lambda x: x[-1]} + metadata_operations = {'labels': 'last_sample'} label_val = 'labels' om = OfflineMetrics() From 58896cebf5927a8110684c249743c00b35d46a3b Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 16:22:02 -0300 Subject: [PATCH 123/129] Hyser missing subject fixes --- libemg/_datasets/hyser.py | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index e88afcda..2692e19e 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -21,18 +21,22 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' description=description, citation='https://doi.org/10.13026/ym7v-bh53' ) + if subjects is None: + subjects = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] # +1 due to Python indexing self.url = 'https://www.physionet.org/content/hd-semg/1.0.0/' self.dataset_folder = dataset_folder self.analysis = analysis self.subjects = subjects + @property + def common_regex_filters(self): sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired - self._subjects_values = [str(idx + 1).zfill(2) for idx in range(self.num_subjects)] # +1 due to Python indexing - self.common_regex_filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=self._subjects_values, description='subjects'), + filters = [ + RegexFilter(left_bound='subject', right_bound='_session', values=self.subjects, description='subjects'), RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions') ] + return filters def prepare_data(self, split = False): if (not self.check_exists(self.dataset_folder)): @@ -77,8 +81,6 @@ def _prepare_data_helper(self, split = False): ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - if self.subjects is not None: - odh = odh.isolate_data('subjects', self.subjects, fast=True) data = odh if split: if self.analysis == 'sessions': @@ -140,8 +142,6 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - if self.subjects is not None: - odh = odh.isolate_data('subjects', self.subjects, fast=True) data = odh if split: if self.analysis == 'sessions': @@ -172,9 +172,8 @@ def __init__(self, dataset_folder: str = 'HyserRandom', analysis: str = 'baselin description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) - if subjects is None: - subjects = deepcopy(self._subjects_values) - self.subjects = [s for s in subjects if s != '10'] # subject 10 is missing the labels file for sample1 + self.subjects = [s for s in self.subjects if s != '10'] + def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) @@ -189,7 +188,11 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - odh = odh.isolate_data('subjects', self.subjects, fast=True) + for idx, subject in enumerate(odh.subjects): + if (len(self.subjects) == self.num_subjects) and (int(self.subjects[subject[0, 0]]) > 10): + # Add 1 to align with proper subject ID + odh.subjects[idx] += 1 + data = odh if split: if self.analysis == 'sessions': @@ -301,9 +304,6 @@ def __init__(self, dataset_folder: str = 'HyserPR', analysis: str = 'baseline', } description = 'Hyser pattern recognition (PR) dataset. Includes dynamic and maintenance tasks for 34 hand gestures.' super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) # num_reps=2 b/c 2 trials - if subjects is None: - subjects = deepcopy(self._subjects_values) - self.subjects = [s for s in subjects if s not in ('03', '11')] # subjects 3 and 11 are missing classes def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) @@ -319,7 +319,10 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - odh = odh.isolate_data('subjects', self.subjects, fast=True) + + # Need to remove subjects 3 and 11 b/c they're missing classes + subject_mask = [self.subjects.index(s) for s in self.subjects if s not in ('03', '11')] + odh = odh.isolate_data('subjects', subject_mask, fast=True) data = odh if split: if self.analysis == 'sessions': From fbf05810ca2d7924144e92aad4f103a4b469bc1d Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 17:15:31 -0300 Subject: [PATCH 124/129] Store metadata as values --- libemg/data_handler.py | 95 +++++++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 44 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 05e0290d..32f4be0a 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Callable, Sequence +from typing import Callable, Sequence, Any import numpy as np import numpy.typing as npt import pandas as pd @@ -29,7 +29,7 @@ from libemg.utils import get_windows, _get_fn_windows, _get_mode_windows, make_regex class RegexFilter: - def __init__(self, left_bound: str, right_bound: str, values: Sequence[str], description: str): + def __init__(self, left_bound: str, right_bound: str, values: Sequence[str] | None = None, description: str | None = None): """Filters files based on filenames that match the associated regex pattern and grabs metadata based on the regex pattern. Parameters @@ -43,8 +43,8 @@ def __init__(self, left_bound: str, right_bound: str, values: Sequence[str], des description: str Description of filter - used to name the metadata field. Pass in an empty string to filter files without storing the values as metadata. """ - if values is None: - raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') + # if values is None: + # raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') self.pattern = make_regex(left_bound, right_bound, values) self.values = values self.description = description @@ -80,8 +80,11 @@ def get_metadata(self, filename: str): """ # this is how it should work to be the same as the ODH, but we can maybe discuss redoing this so it saves the actual value instead of the indices. might be confusing to pass values to get data but indices to isolate it. also not sure if it needs to be arrays val = re.findall(self.pattern, filename)[0] - idx = self.values.index(val) - return idx + if self.values is None: + metadata = val + else: + metadata = self.values.index(val) + return metadata class MetadataFetcher(ABC): @@ -96,7 +99,7 @@ def __init__(self, description: str): self.description = description @abstractmethod - def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[str]) -> npt.NDArray: + def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[str]) -> Any: """Fetch metadata. Must return a (N x M) numpy.ndarray, where N is the number of samples in the EMG data and M is the number of columns in the metadata. If a single value array is returned (0D or 1D), it will be cast to a N x 1 array where all values are the original value. @@ -114,7 +117,7 @@ def __call__(self, filename: str, file_data: npt.NDArray, all_files: Sequence[st metadata: np.ndarray Array containing the metadata corresponding to the provided file. """ - raise NotImplementedError("Must implement __call__ method.") + ... class FilePackager(MetadataFetcher): @@ -141,6 +144,7 @@ def __init__(self, regex_filter: RegexFilter, package_function: Callable[[str, s column_mask: list or None, default=None List of integers corresponding to the indices of the columns that should be extracted from the raw file data. If None is passed, all columns are extracted. """ + assert regex_filter.description is not None, 'RegexFilter must have a description, otherwise metadata will not be stored.' super().__init__(regex_filter.description) self.regex_filter = regex_filter self.package_filters = None @@ -291,6 +295,17 @@ def __add__(self, other): setattr(new_odh, self_attribute, new_value) return new_odh + def _append_to_attribute(self, name, value): + if name is None: + # Don't want this data saved to data handler, so skip it + return + if not hasattr(self, name): + setattr(self, name, []) + self.extra_attributes.append(name) + current_value = getattr(self, name) + setattr(self, name, current_value + [value]) + + def get_data(self, folder_location: str, regex_filters: Sequence[RegexFilter], metadata_fetchers: Sequence[MetadataFetcher] | None = None, delimiter: str = ',', mrdf_key: str = 'p_signal', skiprows: int = 0, data_column: Sequence[int] | None = None, downsampling_factor: int | None = None): """Method to collect data from a folder into the OfflineDataHandler object. The relevant data files can be selected based on passing in @@ -324,15 +339,6 @@ def get_data(self, folder_location: str, regex_filters: Sequence[RegexFilter], m ValueError: Raises ValueError if folder_location is not a valid directory. """ - def append_to_attribute(name, value): - if name == '': - # Don't want this data saved to data handler, so skip it - return - if not hasattr(self, name): - setattr(self, name, []) - self.extra_attributes.append(name) - current_value = getattr(self, name) - setattr(self, name, current_value + [value]) if not os.path.isdir(folder_location): raise ValueError(f"Folder location {folder_location} is not a directory.") @@ -372,20 +378,16 @@ def append_to_attribute(name, value): # Fetch metadata from filename for regex_filter in regex_filters: - metadata_idx = regex_filter.get_metadata(file) - metadata = metadata_idx * np.ones((file_data.shape[0], 1), dtype=int) - append_to_attribute(regex_filter.description, metadata) + metadata = regex_filter.get_metadata(file) + self._append_to_attribute(regex_filter.description, metadata) # Fetch remaining metadata for metadata_fetcher in metadata_fetchers: metadata = metadata_fetcher(file, file_data, all_files) - if metadata.ndim == 0 or metadata.shape[0] == 1: - # Cast to array with the same # of samples as EMG data - metadata = np.full((file_data.shape[0], 1), fill_value=metadata) - if metadata.ndim == 1: + if isinstance(metadata, np.ndarray) and metadata.ndim == 1: # Ensure that output is always 2D array metadata = np.expand_dims(metadata, axis=1) - append_to_attribute(metadata_fetcher.description, metadata) + self._append_to_attribute(metadata_fetcher.description, metadata) def active_threshold(self, nm_windows, active_windows, active_labels, num_std=3, nm_label=0, silent=True): """Returns an update label list of the active labels for a ramp contraction. @@ -463,8 +465,9 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio window_data.append(get_windows(file,window_size,window_increment)) for k in self.extra_attributes: - if type(getattr(self,k)[i]) != np.ndarray: - file_metadata = np.ones((window_data[-1].shape[0])) * getattr(self, k)[i] + file_attribute = getattr(self, k)[i] + if not isinstance(file_attribute, np.ndarray): + file_metadata = np.full(window_data[-1].shape[0], fill_value=file_attribute) else: if metadata_operations is not None: if k in metadata_operations.keys(): @@ -476,11 +479,11 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio operation = common_metadata_operations[operation] except KeyError as e: raise KeyError(f"Unexpected metadata operation string. Please pass in a function or an accepted string {tuple(common_metadata_operations.keys())}. Got: {operation}.") - file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, operation) + file_metadata = _get_fn_windows(file_attribute, window_size, window_increment, operation) else: - file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) + file_metadata = _get_mode_windows(file_attribute, window_size, window_increment) else: - file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) + file_metadata = _get_mode_windows(file_attribute, window_size, window_increment) metadata[k].append(file_metadata) @@ -539,22 +542,26 @@ def _isolate_data_helper(self, key, values,fast): key_attr = getattr(self, key) for e in self.extra_attributes: setattr(new_odh, e, []) - - for f in range(len(key_attr)): - if fast: - if key_attr[f][0][0] in values: - keep_mask = [True] * len(key_attr[f]) - else: - keep_mask = [False] * len(key_attr[f]) + + for file_idx in range(len(key_attr)): + file_data = self.data[file_idx] + file_metadata = key_attr[file_idx] + if isinstance(file_metadata, np.ndarray): + # Maybe loop through then and each mask with previous mask to speed up + keep_mask = np.array([i in values for i in file_metadata]) + else: - keep_mask = list([i in values for i in key_attr[f]]) - - if self.data[f][keep_mask,:].shape[0]> 0: - new_odh.data.append(self.data[f][keep_mask,:]) + keep = file_metadata in values + keep_mask = np.full(file_data.shape[0], fill_value=keep) + + if file_data[keep_mask].shape[0] > 0: + new_odh.data.append(file_data[keep_mask]) for e in self.extra_attributes: - updated_arr = getattr(new_odh, e) - updated_arr.append(getattr(self, e)[f][keep_mask]) - setattr(new_odh, e, updated_arr) + new_metadata = getattr(self, e)[file_idx] + if isinstance(new_metadata, np.ndarray): + new_metadata = new_metadata[keep_mask] + new_odh._append_to_attribute(e, new_metadata) + return new_odh From 79d594cf95983496164a09c96880d02615f3dbc1 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 17:47:02 -0300 Subject: [PATCH 125/129] Add return_value parameter to RegexFilter --- libemg/data_handler.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 32f4be0a..27a8f24c 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -29,7 +29,7 @@ from libemg.utils import get_windows, _get_fn_windows, _get_mode_windows, make_regex class RegexFilter: - def __init__(self, left_bound: str, right_bound: str, values: Sequence[str] | None = None, description: str | None = None): + def __init__(self, left_bound: str, right_bound: str, values: Sequence[str] | None = None, description: str | None = None, return_value = False): """Filters files based on filenames that match the associated regex pattern and grabs metadata based on the regex pattern. Parameters @@ -43,11 +43,10 @@ def __init__(self, left_bound: str, right_bound: str, values: Sequence[str] | No description: str Description of filter - used to name the metadata field. Pass in an empty string to filter files without storing the values as metadata. """ - # if values is None: - # raise ValueError('Expected a list of values for RegexFilter, but got None. Using regex wildcard is not supported with the RegexFilter.') self.pattern = make_regex(left_bound, right_bound, values) self.values = values self.description = description + self.return_value = return_value def get_matching_files(self, files: Sequence[str]): """Filter out files that don't match the regex pattern and return the matching files. From bba969497139f76e26e38dd7497892bfd63d57de Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 17:47:26 -0300 Subject: [PATCH 126/129] Try to cast to number when grabbing metadata --- libemg/data_handler.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 27a8f24c..b44e37ea 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -79,11 +79,20 @@ def get_metadata(self, filename: str): """ # this is how it should work to be the same as the ODH, but we can maybe discuss redoing this so it saves the actual value instead of the indices. might be confusing to pass values to get data but indices to isolate it. also not sure if it needs to be arrays val = re.findall(self.pattern, filename)[0] - if self.values is None: - metadata = val + if (self.values is None) or self.return_value: + # We want to store as a number if at all possible to save on memory + try: + return int(val) + except ValueError: + ... + + try: + return float(val) + except ValueError: + # Can't cast to a number, so we return a string + return val else: - metadata = self.values.index(val) - return metadata + return self.values.index(val) class MetadataFetcher(ABC): From 2307e12aa9b619eb6257311217ac5d37ed71d87d Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 18:06:08 -0300 Subject: [PATCH 127/129] Replace list comprehension with mask operation Did some quick testing and found that performing mask operation was quicker than a list comprehension. --- libemg/data_handler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index b44e37ea..5ae6b4db 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -555,9 +555,9 @@ def _isolate_data_helper(self, key, values,fast): file_data = self.data[file_idx] file_metadata = key_attr[file_idx] if isinstance(file_metadata, np.ndarray): - # Maybe loop through then and each mask with previous mask to speed up - keep_mask = np.array([i in values for i in file_metadata]) - + keep_mask = np.full(file_metadata.shape[0], fill_value=False) + for value in values: + keep_mask = keep_mask | (file_metadata == value) else: keep = file_metadata in values keep_mask = np.full(file_data.shape[0], fill_value=keep) From d891f28fd7be5d273b6cc0b66dbd9333689c6a77 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 18:41:13 -0300 Subject: [PATCH 128/129] Handle single element arrays --- libemg/data_handler.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/libemg/data_handler.py b/libemg/data_handler.py index 5ae6b4db..6b8bf3b2 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -392,9 +392,12 @@ def get_data(self, folder_location: str, regex_filters: Sequence[RegexFilter], m # Fetch remaining metadata for metadata_fetcher in metadata_fetchers: metadata = metadata_fetcher(file, file_data, all_files) - if isinstance(metadata, np.ndarray) and metadata.ndim == 1: - # Ensure that output is always 2D array - metadata = np.expand_dims(metadata, axis=1) + if isinstance(metadata, np.ndarray): + if metadata.ndim == 0 or metadata.shape[0] == 1: + metadata = metadata.item() + elif metadata.ndim == 1: + # Ensure that output is always 2D array + metadata = np.expand_dims(metadata, axis=1) self._append_to_attribute(metadata_fetcher.description, metadata) def active_threshold(self, nm_windows, active_windows, active_labels, num_std=3, nm_label=0, silent=True): From 500aab38bd40c0e8ad02ea3667f193063ee7c685 Mon Sep 17 00:00:00 2001 From: Christian Morrell Date: Thu, 31 Oct 2024 18:41:53 -0300 Subject: [PATCH 129/129] Fixed Hyser workarounds --- libemg/_datasets/hyser.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/libemg/_datasets/hyser.py b/libemg/_datasets/hyser.py index 2692e19e..ead3542f 100644 --- a/libemg/_datasets/hyser.py +++ b/libemg/_datasets/hyser.py @@ -33,7 +33,7 @@ def __init__(self, gestures, num_reps, description, dataset_folder, analysis = ' def common_regex_filters(self): sessions_values = ['1', '2'] if self.analysis == 'sessions' else ['1'] # only grab first session unless both are desired filters = [ - RegexFilter(left_bound='subject', right_bound='_session', values=self.subjects, description='subjects'), + RegexFilter(left_bound='subject', right_bound='_session', values=self.subjects, description='subjects', return_value=True), RegexFilter(left_bound='_session', right_bound='/', values=sessions_values, description='sessions') ] return filters @@ -88,7 +88,7 @@ def _prepare_data_helper(self, split = False): elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1], fast=True), 'Test': odh.isolate_data('reps', [2], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Supported values are sessions, baseline. Got: {self.analysis}.") return data @@ -149,7 +149,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0], fast=True), 'Test': odh.isolate_data('reps', [1], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Supported values are sessions, baseline. Got: {self.analysis}.") return data @@ -171,7 +171,6 @@ def __init__(self, dataset_folder: str = 'HyserRandom', analysis: str = 'baselin gestures = {1: 'Thumb', 2: 'Index', 3: 'Middle', 4: 'Ring', 5: 'Little'} description = 'Hyser random dataset. Includes random motions performed by users. Ground truth finger forces are recorded for use in finger force regression.' super().__init__(gestures=gestures, num_reps=5, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) - self.subjects = [s for s in self.subjects if s != '10'] @@ -188,11 +187,6 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: ] odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - for idx, subject in enumerate(odh.subjects): - if (len(self.subjects) == self.num_subjects) and (int(self.subjects[subject[0, 0]]) > 10): - # Add 1 to align with proper subject ID - odh.subjects[idx] += 1 - data = odh if split: if self.analysis == 'sessions': @@ -200,7 +194,7 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0, 1, 2], fast=True), 'Test': odh.isolate_data('reps', [3, 4], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Supported values are sessions, baseline. Got: {self.analysis}.") return data @@ -230,6 +224,7 @@ def _get_labels(self, filename): def __call__(self, filename, file_data, all_files): labels = self._get_labels(filename) sample_idx = self.sample_regex.get_metadata(filename) + assert isinstance(sample_idx, int), f"Expected index, but got value of type {type(sample_idx)}." return labels[sample_idx] - 1 # -1 to produce 0-indexed labels @@ -304,6 +299,7 @@ def __init__(self, dataset_folder: str = 'HyserPR', analysis: str = 'baseline', } description = 'Hyser pattern recognition (PR) dataset. Includes dynamic and maintenance tasks for 34 hand gestures.' super().__init__(gestures=gestures, num_reps=2, description=description, dataset_folder=dataset_folder, analysis=analysis, subjects=subjects) # num_reps=2 b/c 2 trials + self.subjects = [s for s in self.subjects if s not in ('03', '11')] # subjects 3 and 11 are missing classes def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: filename_filters = deepcopy(self.common_regex_filters) @@ -320,9 +316,6 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: odh = OfflineDataHandler() odh.get_data(folder_location=self.dataset_folder, regex_filters=regex_filters, metadata_fetchers=metadata_fetchers) - # Need to remove subjects 3 and 11 b/c they're missing classes - subject_mask = [self.subjects.index(s) for s in self.subjects if s not in ('03', '11')] - odh = odh.isolate_data('subjects', subject_mask, fast=True) data = odh if split: if self.analysis == 'sessions': @@ -330,6 +323,6 @@ def _prepare_data_helper(self, split = False) -> dict | OfflineDataHandler: elif self.analysis == 'baseline': data = {'All': odh, 'Train': odh.isolate_data('reps', [0], fast=True), 'Test': odh.isolate_data('reps', [1], fast=True)} else: - raise ValueError(f"Unexpected value for analysis. Suported values are sessions, baseline. Got: {self.analysis}.") + raise ValueError(f"Unexpected value for analysis. Supported values are sessions, baseline. Got: {self.analysis}.") return data