diff --git a/.gitignore b/.gitignore index 76b73536..c52f01f3 100644 --- a/.gitignore +++ b/.gitignore @@ -70,4 +70,7 @@ MLPR.py docs/Makefile sifibridge-* *.pyc -*.model \ No newline at end of file +*.model +TEST.ipynb +/DATASET_85 +/DATASET_85PROCESSED diff --git a/docs/source/documentation/data/data_doc.md b/docs/source/documentation/data/data_doc.md index e95b69bf..4858a3ae 100644 --- a/docs/source/documentation/data/data_doc.md +++ b/docs/source/documentation/data/data_doc.md @@ -73,6 +73,57 @@ https://github.com/LibEMG/OneSubjectEMaGerDataset
+ + +
+EMGEPN100 + +
+ +**Dataset Description:** +Multi-hardware EMG dataset for 12 different hand gesture categories using the Myo armband and the G-force armband. + +| Attribute | Description | +|-------------------|------------------------------------------------------------------------------------------------------| +| **Num Subjects:** | 85 | +| **Num Reps:** | 30 Reps x 12 Gestures x 43 Users (Train group), 15 Reps x 12 Gestures x 42 Users (Test group) --> Cross User Split | +| **Classes:** | | +| **Device:** | Myo, gForce | +| **Sampling Rates:** | Myo: 200Hz, gForce: 500Hz | +| **Auto Download:** | False | + +**Using the Dataset:** +```Python +import libemg +from libemg.datasets import get_dataset_list +emg_epn100 = libemg.datasets.EMGEPN100() +# or get_dataset_list(cross_user=True)['EMGEPN100']() +odh = emg_epn100.prepare_data(split=True, segment=True, relabel_seg=None, + channel_last=True, subjects=None)['All'] +``` + +**Dataset Location** +https://laboratorio-ia.epn.edu.ec/es/recursos/dataset/emg-imu-epn-100 + +**References:** +``` +@article{vasconez-2022, + author = {Vásconez, Juan Pablo and López, Lorena Isabel Barona and Caraguay, Ángel Leonardo Valdivieso and Benalcázar, Marco E.}, + journal = {Sensors}, + month = {12}, + number = {24}, + pages = {9613}, + title = {{Hand Gesture Recognition Using EMG-IMU Signals and Deep Q-Networks}}, + volume = {22}, + year = {2022}, + doi = {10.3390/s22249613}, + url = {https://doi.org/10.3390/s22249613}, +} +``` +
+
+ +
@@ -748,7 +799,6 @@ https://github.com/LibEMG/WS_CIIL

- ## Regression diff --git a/libemg/_datasets/__init__.py b/libemg/_datasets/__init__.py index 975fd02b..ef752a37 100644 --- a/libemg/_datasets/__init__.py +++ b/libemg/_datasets/__init__.py @@ -3,6 +3,7 @@ from libemg._datasets import continous_transitions from libemg._datasets import dataset from libemg._datasets import emg_epn612 +from libemg._datasets import emg_epn100 from libemg._datasets import fors_emg from libemg._datasets import fougner_lp from libemg._datasets import grab_myo diff --git a/libemg/_datasets/emg_epn100.py b/libemg/_datasets/emg_epn100.py new file mode 100644 index 00000000..8ed80431 --- /dev/null +++ b/libemg/_datasets/emg_epn100.py @@ -0,0 +1,335 @@ +from libemg._datasets.dataset import Dataset +from libemg.data_handler import OfflineDataHandler +import numpy as np +from libemg.utils import * +import os +import warnings +from typing import Any, Dict, Iterable +import h5py +import numpy as np +from scipy.io import loadmat + + +# FIXED GLOBAL GESTURE MAP +GESTURE_MAP = { # Matches EPN-612 Class IDs + "relax": 0, + "fist": 1, + "wave in": 2, + "wave out": 3, + "open": 4, + "pinch": 5, + "up": 6, + "down": 7, + "left": 8, + "right": 9, + "forward": 10, + "backward": 11, +} + +# MATLAB-derived fixed gesture order (rep 1..180) +GESTURE_ORDER_180 = ( + ["relax"] * 15 + + ["wave in"] * 15 + + ["wave out"] * 15 + + ["fist"] * 15 + + ["open"] * 15 + + ["pinch"] * 15 + + ["up"] * 15 + + ["down"] * 15 + + ["left"] * 15 + + ["right"] * 15 + + ["forward"] * 15 + + ["backward"] * 15 +) +assert len(GESTURE_ORDER_180) == 180 + +# Assigning integer labels to devices +DEVICE_MAP = { + "myo": 0, + "gForce": 1, +} + + +# ======== UTILS ======== +def to_scalar(x): + if isinstance(x, np.ndarray) and x.size == 1: + return x.item() + return x + +def write_h5_scalar(group: h5py.Group, name, value): + name = str(name) + value = to_scalar(value) + + if isinstance(value, (int, float, np.integer, np.floating)): + group.create_dataset(name, data=value) + else: + dt = h5py.string_dtype(encoding="utf-8") + group.create_dataset(name, data=np.array(str(value), dtype=dt)) + + +# ======== METADATA EXTRACTION ======== +def extract_metadata(userData) -> Dict[str, Any]: + meta = {} + + def extract_struct(section): + out = {} + for field in section._fieldnames: + out[field] = to_scalar(getattr(section, field)) + return out + + meta["userInfo"] = extract_struct(userData.userInfo) + meta["extraInfo"] = extract_struct(userData.extraInfo) + meta["deviceInfo"] = extract_struct(userData.deviceInfo) + meta["userGroup"] = to_scalar(userData.userGroup) + meta["gestureNameMap"] = {str(v): k for k, v in GESTURE_MAP.items()} + + return meta + + +# ======== CORE PROCESSING ======== +def process_user( + mat_path: str, + out_path: str, + subject_id: int, + is_training_group: bool): + userData = loadmat(mat_path, squeeze_me=True, + struct_as_record=False)["userData"] + + reps_written = 0 + + with h5py.File(out_path, "w") as h5: + # ---- META ---- + meta_grp = h5.create_group("meta") + meta = extract_metadata(userData) + + for section, values in meta.items(): + if isinstance(values, dict): + sec_grp = meta_grp.create_group(section) + for k, v in values.items(): + write_h5_scalar(sec_grp, k, v) + else: + write_h5_scalar(meta_grp, section, values) + + # ---- REPS ---- + reps_grp = h5.create_group("reps") + + def process_block(block, rep_offset: int, max_reps: int): + nonlocal reps_written + for i in range(max_reps): + rep_id = rep_offset + i + entry = block[i] + + if not hasattr(entry, "emg"): + warnings.warn( + f"Missing EMG (subject={subject_id}, rep={rep_id})" + ) + continue + + gesture = GESTURE_ORDER_180[i] + classe = GESTURE_MAP[gesture] + + emg = np.asarray(entry.emg, dtype=np.float32) + point_begins = np.asarray(entry.pointGestureBegins, dtype=np.int64) + + rep_grp = reps_grp.create_group(f"rep_{rep_id:03d}") + rep_grp.create_dataset("emg", data=emg) + rep_grp.create_dataset("gesture", data=classe) + rep_grp.create_dataset("subject", data=subject_id) + rep_grp.create_dataset("rep", data=rep_id) + rep_grp.create_dataset("point_begins", data=point_begins) + + reps_written += 1 + + # training block: reps 0..179 + process_block(userData.training, rep_offset=0, max_reps=180) + + # testing block only for training users: reps 180..359 + if is_training_group and hasattr(userData, "testing"): + process_block(userData.testing, rep_offset=180, max_reps=180) + + print(f"Finished user subject={subject_id} | " + f"reps extracted={reps_written} | " + f"output={out_path}") + + +# ======== DATASET WALKER ======== +def process_dataset(root_in: str, root_out: str): + for split in ["training", "testing"]: + in_split = os.path.join(root_in, split) + out_split = os.path.join(root_out, split) + os.makedirs(out_split, exist_ok=True) + + user_dirs = sorted(d for d in os.listdir(in_split) if d.startswith("user_")) + + print(f"\n=== Processing split: {split} ===") + + for idx, user_dir in enumerate(user_dirs): + subject_id = idx + mat_path = os.path.join(in_split, user_dir, "userData.mat") + out_path = os.path.join(out_split, f"{user_dir}.h5") + + print(f"Starting {user_dir} -> subject={subject_id}") + + process_user(mat_path=mat_path, out_path=out_path, + subject_id=subject_id, + is_training_group=(split == "training")) + +# ======== MAIN DATASET CLASS ======== +class EMGEPN100(Dataset): + def __init__(self, dataset_folder: str='DATASET_85'): + Dataset.__init__(self, + sampling={'myo': 200, 'gForce': 500}, + num_channels={'myo': 8, 'gForce': 8}, + recording_device=['myo', 'gForce'], + num_subjects=85, + gestures= GESTURE_MAP, # Matches EPN-612 static classes IDs + num_reps="30 Reps x 12 Gestures x 43 Users (Train group), 15 Reps x 12 Gestures x 42 Users (Test group) --> Cross User Split", + description="Multi-hardware EMG dataset for 12 different hand gesture categories using the myo armband and the G-force armband.", + citation="https://doi.org/10.3390/s22249613") + self.resolution_bit = {'myo': 8, 'gForce': 12} + self.dataset_folder = dataset_folder + self.url = "https://laboratorio-ia.epn.edu.ec/es/recursos/dataset/emg-imu-epn-100" + + def _get_odh(self, processed_root, subjects, + segment, relabel_seg, channel_last): + + splits = {"training", "testing"} + odhs = [] + + for split in splits: + split_dir = os.path.join(processed_root, split) + user_files = sorted(f for f in os.listdir(split_dir) if f.endswith(".h5")) + + odh = OfflineDataHandler() + odh.subjects = [] + odh.classes = [] + odh.reps = [] + odh.devices = [] + odh.sampling_rates = [] + odh.extra_attributes = ['subjects', 'classes', 'reps', + 'devices', 'sampling_rates'] + + for user_file in user_files: + path = os.path.join(split_dir, user_file) + + with h5py.File(path, "r") as f: + subject = int(f["reps"]["rep_000"]["subject"][()]) + subject += 43 if split == "testing" else 0 # 43 training group subjects and 42 testing + if subjects is not None: + if subject not in subjects: + continue + + reps = f["reps"] + device_str = f["meta/deviceInfo/DeviceType"][()].decode("utf-8") + device = DEVICE_MAP[device_str] + fs = float(f["meta/deviceInfo/emgSamplingRate"][()]) + + for rep_name in reps: + rep_grp = reps[rep_name] + + gst = int(rep_grp["gesture"][()]) + rep_id = int(rep_grp["rep"][()]) + + _emg = rep_grp["emg"][:].astype(np.float32, copy=False) # [T, CH] + if not channel_last: + _emg = np.transpose(_emg, (1, 0)) # [CH, T] + + if segment and gst != 0: + point_begins = rep_grp["point_begins"][()] + emg = _emg[point_begins:] + else: + emg = _emg + + # ---- Preparing ODH ---- + odh.data.append(emg) + odh.classes.append(np.ones((len(emg), 1)) * gst) + odh.subjects.append(np.ones((len(emg), 1)) * subject) + odh.reps.append(np.ones((len(emg), 1)) * rep_id) + odh.devices.append(np.ones((len(emg), 1)) * device) + odh.sampling_rates.append(np.ones((len(emg), 1)) * fs) + + if segment and gst != 0 and relabel_seg is not None: + assert type(relabel_seg) is int + gst = relabel_seg + + emg = _emg[:point_begins] + + odh.data.append(emg) + odh.classes.append(np.ones((len(emg), 1)) * gst) + odh.subjects.append(np.ones((len(emg), 1)) * subject) + odh.reps.append(np.ones((len(emg), 1)) * rep_id) + odh.devices.append(np.ones((len(emg), 1)) * device) + odh.sampling_rates.append(np.ones((len(emg), 1)) * fs) + + odhs.append(odh) + + return odhs + + + def prepare_data(self, + split: bool = False, + segment: bool = True, + relabel_seg: int | None = None, + channel_last: bool = True, + subjects: Iterable[int] | None = None) -> OfflineDataHandler: + """Return processed EPN100 dataset as LibEMG ODH. + + Parameters + ---------- + split: bool or None (optional), default=False + Whether to return seperate training and testing ODHs. + window_ms: float or None (optional), default=None + Windows size in ms (for feature extraction). There are two different sensors used in this dataset with different sampling rates. + stride_ms: float or None (optional), default=None + Window stride (increment) size in ms (for feature extraction). There are two different sensors used in this dataset with different sampling rates. + segment: bool, default=True + Whether crop the segment before 'pointGestureBeging' index in the dataset. + relabel_seg: int or None (optional), default=0 + If not False, this arg will be used as the relabeling value. + channel_last: bool, default=True, + Shape will be (, T, CH) if True otherwise (, CH, T) + subjects: Iterable[int] or None (optional), default=None + Subjects to be included in the processed dataset. + + Returns + ---------- + Dic or OfflineDataHandler + A dictionary of 'All', 'Train' and 'Test' ODHs of processed data or a single OfflineDataHandler if split is False. + """ + print('\nPlease cite: ' + self.citation+'\n') + if (not self.check_exists(self.dataset_folder)) and \ + (not self.check_exists( self.dataset_folder + "PROCESSED")): + raise FileNotFoundError("Please download the EPN100+ dataset from: {} " + "and place 'testing' and 'training' folders inside: " + "'{}' folder.".format(self.url, self.dataset_folder)) + + if (not self.check_exists( self.dataset_folder + "PROCESSED")): + process_dataset(self.dataset_folder, self.dataset_folder + "PROCESSED") + + odh_tr, odh_te = self._get_odh(self.dataset_folder + "PROCESSED", + subjects, segment, relabel_seg, channel_last) + + return {'All': odh_tr + odh_te, 'Train': odh_tr, 'Test': odh_te} \ + if split else odh_tr + odh_te + + def get_device_ID(self, device_name: str): + """ + Get device label ID by name + + Parameters + ---------- + device_name: str + Name of the requested device. + + Returns + ---------- + int + Device's ID + """ + + return DEVICE_MAP[device_name] + + + + + \ No newline at end of file diff --git a/libemg/_datasets/emg_epn612.py b/libemg/_datasets/emg_epn612.py index 5e7a35e8..9c259ea9 100644 --- a/libemg/_datasets/emg_epn612.py +++ b/libemg/_datasets/emg_epn612.py @@ -102,6 +102,8 @@ def prepare_data(self, split = True, subjects = None): if split: data = {'All': odh, 'Train': odh_tr, 'Test': odh_te} + else: + data = odh_tr + odh_te return data class EMGEPN_UserIndependent(EMGEPN612): @@ -112,8 +114,11 @@ def prepare_data(self, split = True, subjects=None, feature_list = None, window_ odh = self.get_odh(subjects, feature_list, window_size, window_inc, feature_dic) odh_tr = odh.isolate_data('subjects', values=list(range(0,306))) odh_te = odh.isolate_data('subjects', values=list(range(306,612))) + if split: data = {'All': odh_tr + odh_te, 'Train': odh_tr, 'Test': odh_te} + else: + data = odh_tr + odh_te return data \ No newline at end of file diff --git a/libemg/_streamers/_OTB_MuoviPlus.py b/libemg/_streamers/_OTB_MuoviPlus.py index e779aaa4..50d697a5 100644 --- a/libemg/_streamers/_OTB_MuoviPlus.py +++ b/libemg/_streamers/_OTB_MuoviPlus.py @@ -6,7 +6,7 @@ from multiprocessing import Event, Process from libemg.shared_memory_manager import SharedMemoryManager -from crc import Crc8, CrcCalculator +from crc import Crc8, Calculator """ OT Bioelettronica @@ -404,4 +404,4 @@ def cleanup(self): def _handle_exit_signal(self, signum, frame): print(f"[OTBStreamer] Received exit signal {signum}, cleaning up.") - self.cleanup() + self.cleanup() \ No newline at end of file diff --git a/libemg/data_handler.py b/libemg/data_handler.py index a148b974..5a89517d 100644 --- a/libemg/data_handler.py +++ b/libemg/data_handler.py @@ -423,7 +423,8 @@ def active_threshold(self, nm_windows, active_windows, active_labels, num_std=3, print(f"{num_relabeled} of {len(active_labels)} active class windows were relabelled to no motion.") return active_labels - def parse_windows(self, window_size, window_increment, metadata_operations=None, discrete=False): + def parse_windows(self, window_size, window_increment, metadata_operations=None, discrete=False, + multi_rate=False, sampling_rate_key='sampling_rates'): """Parses windows based on the acquired data from the get_data function. Parameters @@ -442,6 +443,10 @@ def parse_windows(self, window_size, window_increment, metadata_operations=None, discrete: bool (optional), default=False If True, keeps windows from each file/rep separate instead of concatenating them. Useful for discrete gesture recognition where each rep should be treated independently. + multi_rate: bool default=False + Should be True if the dataset contains sensors with different sampling rates, then window_size, window_increment must be in ms. + sampling_rate_key: str, default='sampling_rates' + the key in metadata where sampling frequency is stored. Used if multi_rate is True. Returns ---------- @@ -458,7 +463,20 @@ def parse_windows(self, window_size, window_increment, metadata_operations=None, dict A dictionary containing np.ndarrays for each metadata tag. Each template/rep will have one associated value for each metadata (the mode across windows). Dimensions are Tx1 where T is the number of templates. + + If multi_rate=True: + list + A list of size windows x channels x samples. Windows size will vary + since the window size will be based on time for multiple sampling rates. + dict + A dictionary containing np.ndarrays for each metadata tag of the dataset. Each window will + have an associated value for each metadata. Each key will contain a list of the same size as windows. """ + if multi_rate: + if discrete: + raise ValueError("Discrete mode is not supported for multi-rate datasets.") + return self._multi_rate_parse_windows_helper(window_size, window_increment, metadata_operations, sampling_rate_key) + return self._parse_windows_helper(window_size, window_increment, metadata_operations, discrete) def _parse_windows_helper(self, window_size, window_increment, metadata_operations, discrete=False): @@ -525,15 +543,63 @@ def _parse_windows_helper(self, window_size, window_increment, metadata_operatio else: return np.vstack(window_data), {k: np.concatenate(metadata[k], axis=0) for k in metadata.keys()} + def _multi_rate_parse_windows_helper(self, window_ms, stride_ms, metadata_operations, + sampling_rate_key='sampling_rates'): + common_metadata_operations = { + 'mean': np.mean, + 'median': np.median, + 'last_sample': lambda x: x[-1] + } + window_data = [] + metadata = {k: [] for k in self.extra_attributes} + for i, file in enumerate(self.data): + + # Calculating window size and increment based on given time in ms and sensor sampling rate + fs = getattr(self,sampling_rate_key)[i][0].item() + window_size = int(np.ceil(window_ms * fs / 1000.0)) + window_increment = int(np.ceil(stride_ms * fs / 1000.0)) + + if window_size <= 0 or window_increment <= 0: + raise ValueError("Window or stride length <= 0 samples for the given time in ms") + + # emg data windowing + window_data.append(get_windows(file,window_size,window_increment)) - def isolate_channels(self, channels): + for k in self.extra_attributes: + if type(getattr(self,k)[i]) != np.ndarray: + file_metadata = np.ones((window_data[-1].shape[0])) * getattr(self, k)[i] + else: + if metadata_operations is not None: + if k in metadata_operations.keys(): + # do the specified operation + operation = metadata_operations[k] + + if isinstance(operation, str): + try: + operation = common_metadata_operations[operation] + except KeyError as e: + raise KeyError(f"Unexpected metadata operation string. Please pass in a function or an accepted string {tuple(common_metadata_operations.keys())}. Got: {operation}.") + file_metadata = _get_fn_windows(getattr(self,k)[i], window_size, window_increment, operation) + else: + file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) + else: + file_metadata = _get_mode_windows(getattr(self,k)[i], window_size, window_increment) + + metadata[k].append(file_metadata) + + return window_data, metadata + + def isolate_channels(self, channels, channel_last=True): """Entry point for isolating a certain range of channels. Parameters ---------- channels: list A list of values (i.e., channels) that you want to isolate. (e.g., [0,1,2]). Indexing starts at 0. - + + channel_first: bool, default=True + the order of the Time and Channel axis in data. + Returns ---------- OfflineDataHandler @@ -547,7 +613,10 @@ def isolate_channels(self, channels): new_odh = copy.deepcopy(self) # TODO: Optimize this for i in range(0, len(new_odh.data)): - new_odh.data[i] = new_odh.data[i][:,channels] + if channel_last: + new_odh.data[i] = new_odh.data[i][:,channels] + else: + new_odh.data[i] = new_odh.data[i][channels,:] return new_odh def isolate_data(self, key, values, fast=True): diff --git a/libemg/datasets.py b/libemg/datasets.py index fe6f95ce..da58e1ab 100644 --- a/libemg/datasets.py +++ b/libemg/datasets.py @@ -19,6 +19,7 @@ from libemg.offline_metrics import OfflineMetrics from libemg.filtering import Filter from libemg._datasets.emg2pose import EMG2POSEUD, EMG2POSECU +from libemg._datasets.emg_epn100 import EMGEPN100 from sklearn.preprocessing import StandardScaler import pickle import numpy as np @@ -45,6 +46,7 @@ def get_dataset_list(type='CLASSIFICATION', cross_user=False): cross_user_classification = { 'EMGEPN612': EMGEPN_UserIndependent, + 'EMGEPN100': EMGEPN100, } cross_user_regression = { @@ -69,6 +71,7 @@ def get_dataset_list(type='CLASSIFICATION', cross_user=False): 'TMR_Post' : TMR_Post, 'TMR_Pre': TMR_Pre, 'HyserPR': HyserPR, + 'EMGEPN100': EMGEPN100, } regression = { diff --git a/libemg/utils.py b/libemg/utils.py index 8422dd5a..510639a5 100644 --- a/libemg/utils.py +++ b/libemg/utils.py @@ -1,13 +1,6 @@ -import os - import numpy as np -from PIL import Image, UnidentifiedImageError -import matplotlib.pyplot as plt -from matplotlib.backends.backend_agg import FigureCanvasAgg -from matplotlib.patches import Circle - -def get_windows(data, window_size, window_increment): +def get_windows(data, window_size, window_increment, channel_last=False): """Extracts windows from a given set of data. Parameters @@ -18,30 +11,33 @@ def get_windows(data, window_size, window_increment): The number of samples in a window. window_increment: int The number of samples that advances before next window. + channel_last: bool, default=False + Output will be NxLxC if True. By default the LibEMG feature extractor assumes default is False. Returns ---------- list The set of windows extracted from the data as a NxCxL where N is the number of windows, C is the number of channels - and L is the length of each window. + and L is the length of each window. Output will be NxLxC if channel_last is True. Examples --------- >>> data = np.loadtxt('data.csv', delimiter=',') >>> windows = get_windows(data, 100, 50) """ - num_windows = int((data.shape[0]-window_size)/window_increment) + 1 - windows = [] - st_id=0 - ed_id=st_id+window_size - for _ in range(num_windows): - if data.ndim == 1: - windows.append([data[st_id:ed_id].transpose()]) # One Channel EMG - else: - windows.append(data[st_id:ed_id,:].transpose()) - st_id += window_increment - ed_id += window_increment - return np.array(windows) + data = np.array(data) + if data.ndim == 1: + data = np.expand_dims(data, axis=-1) + + T = data.shape[0] + starts = np.arange(0, T - window_size + 1, window_increment) + idx = starts[:, None] + np.arange(window_size)[None, :] + + windows = data[idx] + if not channel_last: + windows = np.transpose(windows, (0, 2, 1)) + + return windows def _get_mode_windows(data, window_size, window_increment): windows = get_windows(data, window_size, window_increment) diff --git a/requirements.txt b/requirements.txt index e8d572e1..cfcad671 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,4 +34,4 @@ sifi-bridge-py mindrove crc torch -tslearn \ No newline at end of file +tslearn diff --git a/setup.py b/setup.py index ffc6592a..e2d74d11 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,7 @@ # python -m twine upload --repository testpypi dist/* --verbose <------ testpypi # -VERSION = "3.0.0b1" +VERSION = "3.0.0b2" DESCRIPTION = "LibEMG - Myoelectric Control Library" LONG_DESCRIPTION = "A library for designing and exploring real-time and offline myoelectric control systems." diff --git a/tests/multi_rate_dataset.ipynb b/tests/multi_rate_dataset.ipynb new file mode 100644 index 00000000..20ed01bf --- /dev/null +++ b/tests/multi_rate_dataset.ipynb @@ -0,0 +1,478 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ea407210", + "metadata": {}, + "source": [ + "# EMGEPN100\n", + "# An example for datasets with multiple hardware (sampling rates)." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "6e4d00eb", + "metadata": {}, + "outputs": [], + "source": [ + "import libemg\n", + "from libemg.datasets import get_dataset_list\n", + "import numpy as np " + ] + }, + { + "cell_type": "markdown", + "id": "58dba541", + "metadata": {}, + "source": [ + "# The dataset in 'get_dataset_list'" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "a11473c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'EMGEPN100' in get_dataset_list(cross_user=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b69cec65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "'EMGEPN100' in get_dataset_list(cross_user=False)" + ] + }, + { + "cell_type": "markdown", + "id": "5e00b600", + "metadata": {}, + "source": [ + "# Prepare data" + ] + }, + { + "cell_type": "markdown", + "id": "3e6e576d", + "metadata": {}, + "source": [ + "##### Here we process the .mat files into .h5 format (done once and saved), and then prepare the data, with data being segmented based on the 'pointGestureBegins' index provided by the dataset (EMGEPN100)." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "010f9ab2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Please cite: https://doi.org/10.3390/s22249613\n", + "\n", + "\n", + "=== Processing split: training ===\n", + "Starting user_001 -> subject=0\n", + "Finished user subject=0 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_001.h5\n", + "Starting user_002 -> subject=1\n", + "Finished user subject=1 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_002.h5\n", + "Starting user_003 -> subject=2\n", + "Finished user subject=2 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_003.h5\n", + "Starting user_004 -> subject=3\n", + "Finished user subject=3 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_004.h5\n", + "Starting user_005 -> subject=4\n", + "Finished user subject=4 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_005.h5\n", + "Starting user_006 -> subject=5\n", + "Finished user subject=5 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_006.h5\n", + "Starting user_007 -> subject=6\n", + "Finished user subject=6 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_007.h5\n", + "Starting user_008 -> subject=7\n", + "Finished user subject=7 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_008.h5\n", + "Starting user_009 -> subject=8\n", + "Finished user subject=8 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_009.h5\n", + "Starting user_010 -> subject=9\n", + "Finished user subject=9 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_010.h5\n", + "Starting user_011 -> subject=10\n", + "Finished user subject=10 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_011.h5\n", + "Starting user_012 -> subject=11\n", + "Finished user subject=11 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_012.h5\n", + "Starting user_013 -> subject=12\n", + "Finished user subject=12 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_013.h5\n", + "Starting user_014 -> subject=13\n", + "Finished user subject=13 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_014.h5\n", + "Starting user_015 -> subject=14\n", + "Finished user subject=14 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_015.h5\n", + "Starting user_016 -> subject=15\n", + "Finished user subject=15 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_016.h5\n", + "Starting user_017 -> subject=16\n", + "Finished user subject=16 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_017.h5\n", + "Starting user_018 -> subject=17\n", + "Finished user subject=17 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_018.h5\n", + "Starting user_019 -> subject=18\n", + "Finished user subject=18 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_019.h5\n", + "Starting user_020 -> subject=19\n", + "Finished user subject=19 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_020.h5\n", + "Starting user_021 -> subject=20\n", + "Finished user subject=20 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_021.h5\n", + "Starting user_022 -> subject=21\n", + "Finished user subject=21 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_022.h5\n", + "Starting user_023 -> subject=22\n", + "Finished user subject=22 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_023.h5\n", + "Starting user_024 -> subject=23\n", + "Finished user subject=23 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_024.h5\n", + "Starting user_025 -> subject=24\n", + "Finished user subject=24 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_025.h5\n", + "Starting user_026 -> subject=25\n", + "Finished user subject=25 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_026.h5\n", + "Starting user_027 -> subject=26\n", + "Finished user subject=26 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_027.h5\n", + "Starting user_028 -> subject=27\n", + "Finished user subject=27 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_028.h5\n", + "Starting user_029 -> subject=28\n", + "Finished user subject=28 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_029.h5\n", + "Starting user_030 -> subject=29\n", + "Finished user subject=29 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_030.h5\n", + "Starting user_031 -> subject=30\n", + "Finished user subject=30 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_031.h5\n", + "Starting user_032 -> subject=31\n", + "Finished user subject=31 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_032.h5\n", + "Starting user_033 -> subject=32\n", + "Finished user subject=32 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_033.h5\n", + "Starting user_034 -> subject=33\n", + "Finished user subject=33 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_034.h5\n", + "Starting user_035 -> subject=34\n", + "Finished user subject=34 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_035.h5\n", + "Starting user_036 -> subject=35\n", + "Finished user subject=35 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_036.h5\n", + "Starting user_037 -> subject=36\n", + "Finished user subject=36 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_037.h5\n", + "Starting user_038 -> subject=37\n", + "Finished user subject=37 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_038.h5\n", + "Starting user_039 -> subject=38\n", + "Finished user subject=38 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_039.h5\n", + "Starting user_040 -> subject=39\n", + "Finished user subject=39 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_040.h5\n", + "Starting user_041 -> subject=40\n", + "Finished user subject=40 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_041.h5\n", + "Starting user_042 -> subject=41\n", + "Finished user subject=41 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_042.h5\n", + "Starting user_043 -> subject=42\n", + "Finished user subject=42 | reps extracted=360 | output=DATASET_85PROCESSED\\training\\user_043.h5\n", + "\n", + "=== Processing split: testing ===\n", + "Starting user_001 -> subject=0\n", + "Finished user subject=0 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_001.h5\n", + "Starting user_002 -> subject=1\n", + "Finished user subject=1 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_002.h5\n", + "Starting user_003 -> subject=2\n", + "Finished user subject=2 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_003.h5\n", + "Starting user_004 -> subject=3\n", + "Finished user subject=3 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_004.h5\n", + "Starting user_005 -> subject=4\n", + "Finished user subject=4 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_005.h5\n", + "Starting user_006 -> subject=5\n", + "Finished user subject=5 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_006.h5\n", + "Starting user_007 -> subject=6\n", + "Finished user subject=6 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_007.h5\n", + "Starting user_008 -> subject=7\n", + "Finished user subject=7 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_008.h5\n", + "Starting user_009 -> subject=8\n", + "Finished user subject=8 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_009.h5\n", + "Starting user_010 -> subject=9\n", + "Finished user subject=9 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_010.h5\n", + "Starting user_011 -> subject=10\n", + "Finished user subject=10 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_011.h5\n", + "Starting user_012 -> subject=11\n", + "Finished user subject=11 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_012.h5\n", + "Starting user_013 -> subject=12\n", + "Finished user subject=12 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_013.h5\n", + "Starting user_014 -> subject=13\n", + "Finished user subject=13 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_014.h5\n", + "Starting user_015 -> subject=14\n", + "Finished user subject=14 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_015.h5\n", + "Starting user_016 -> subject=15\n", + "Finished user subject=15 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_016.h5\n", + "Starting user_017 -> subject=16\n", + "Finished user subject=16 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_017.h5\n", + "Starting user_018 -> subject=17\n", + "Finished user subject=17 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_018.h5\n", + "Starting user_019 -> subject=18\n", + "Finished user subject=18 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_019.h5\n", + "Starting user_020 -> subject=19\n", + "Finished user subject=19 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_020.h5\n", + "Starting user_021 -> subject=20\n", + "Finished user subject=20 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_021.h5\n", + "Starting user_022 -> subject=21\n", + "Finished user subject=21 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_022.h5\n", + "Starting user_023 -> subject=22\n", + "Finished user subject=22 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_023.h5\n", + "Starting user_024 -> subject=23\n", + "Finished user subject=23 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_024.h5\n", + "Starting user_025 -> subject=24\n", + "Finished user subject=24 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_025.h5\n", + "Starting user_026 -> subject=25\n", + "Finished user subject=25 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_026.h5\n", + "Starting user_027 -> subject=26\n", + "Finished user subject=26 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_027.h5\n", + "Starting user_028 -> subject=27\n", + "Finished user subject=27 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_028.h5\n", + "Starting user_029 -> subject=28\n", + "Finished user subject=28 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_029.h5\n", + "Starting user_030 -> subject=29\n", + "Finished user subject=29 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_030.h5\n", + "Starting user_031 -> subject=30\n", + "Finished user subject=30 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_031.h5\n", + "Starting user_032 -> subject=31\n", + "Finished user subject=31 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_032.h5\n", + "Starting user_033 -> subject=32\n", + "Finished user subject=32 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_033.h5\n", + "Starting user_034 -> subject=33\n", + "Finished user subject=33 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_034.h5\n", + "Starting user_035 -> subject=34\n", + "Finished user subject=34 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_035.h5\n", + "Starting user_036 -> subject=35\n", + "Finished user subject=35 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_036.h5\n", + "Starting user_037 -> subject=36\n", + "Finished user subject=36 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_037.h5\n", + "Starting user_038 -> subject=37\n", + "Finished user subject=37 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_038.h5\n", + "Starting user_039 -> subject=38\n", + "Finished user subject=38 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_039.h5\n", + "Starting user_040 -> subject=39\n", + "Finished user subject=39 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_040.h5\n", + "Starting user_041 -> subject=40\n", + "Finished user subject=40 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_041.h5\n", + "Starting user_042 -> subject=41\n", + "Finished user subject=41 | reps extracted=180 | output=DATASET_85PROCESSED\\testing\\user_042.h5\n" + ] + } + ], + "source": [ + "emg_epn100 = libemg.datasets.EMGEPN100() # or get_dataset_list(cross_user=True)['EMGEPN100']()\n", + "dataset = emg_epn100.prepare_data(split=True, segment=True, relabel_seg=None, \n", + " channel_last=True, subjects=None)['All']" + ] + }, + { + "cell_type": "markdown", + "id": "01ad978e", + "metadata": {}, + "source": [ + "# Windowing" + ] + }, + { + "cell_type": "markdown", + "id": "d6d828b4", + "metadata": {}, + "source": [ + "##### Simple windowing while ignoring the sampling rate differences. All windows will have similar shapes but logically not recommended for datasets with multiple sampling rates." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "952c5b79", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(304244, 8, 20)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset = dataset.isolate_data(\"subjects\", list(range(10)), fast=True)\n", + "windows, meta = dataset.parse_windows(20, 20)\n", + "windows.shape" + ] + }, + { + "cell_type": "markdown", + "id": "bfe3698f", + "metadata": {}, + "source": [ + "##### Here we set multi_rate=True so the given window size and window increment will be time based (ms). The actual window size will be calculated based on frequency in the sampling_rate_key.\n", + "##### The output will be a list of reps for the windows, and is non rectangular due to different sampling rates and fixed time-based window size. The metadata file, will have a list of the same size as windows, for each of the keys." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "862839d5", + "metadata": {}, + "outputs": [], + "source": [ + "# Time based windowing\n", + "windows, meta = dataset.parse_windows(250, # ms\n", + " 20, # ms\n", + " multi_rate=True, \n", + " sampling_rate_key='sampling_rates')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "ceb3670c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3600, 3600, 3600)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(windows), len(meta['sampling_rates']), len(meta['classes'])" + ] + }, + { + "cell_type": "markdown", + "id": "19b69a65", + "metadata": {}, + "source": [ + "##### Here we see that there are two different Time axis shapes: 125 and 50, both 250ms for 500 Hz and 200 Hz sensors respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2f667152", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 50, 125])" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.unique([w.shape[-1] for w in windows])" + ] + }, + { + "cell_type": "markdown", + "id": "a99b7c53", + "metadata": {}, + "source": [ + "##### We can also easily isolate different sensors with different sampling rates so we can use the normal window parser for rectangular batch of windows with fixed time-based window size" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "e1529d1c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(288025, 8, 40)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Window size of 40 @ 500 Hz sensor = 80 ms windows\n", + "\n", + "dataset_g = dataset.isolate_data(\"devices\", [emg_epn100.get_device_ID('gForce')], fast=True)\n", + "windows, meta = dataset_g.parse_windows(40, 20)\n", + "windows.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "65ec0717", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(12619, 8, 40)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Window size of 40 @ 200 Hz sensor = 200 ms windows\n", + "\n", + "dataset_m = dataset.isolate_data(\"devices\", [emg_epn100.get_device_ID('myo')], fast=True)\n", + "windows, meta = dataset_m.parse_windows(40, 20)\n", + "windows.shape" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}