Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
"source": [
"## How to use in Local?\n",
"\n",
"This notebook reads the raw data from given input directory and writes transformed data to given output directory.\n",
"\n",
"1. Import repo to your local folder.\n",
"2. Be sure that you have a valid python environment with the necesserary packages.\n",
"3. Run the notebook.\n"
"1. Download this repo locally.\n",
"1. Run this sample notebook which shows how to:\n",
" 1. Generate a mapping config file from a given conceptual mapping spreadsheet.\n",
" 1. Read raw data from a given input directory, and use the mapping config file to generate transformed data in the given output directory.\n"
]
},
{
Expand All @@ -24,14 +23,14 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "2",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import helpers.transform as transform\n",
"import helpers.create_config as create_config\n",
"import transform\n",
"import create_config\n",
"import pandas as pd"
]
},
Expand All @@ -40,41 +39,40 @@
"id": "3",
"metadata": {},
"source": [
"## Input and Output Directories"
"## Parameters"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"id": "4",
"metadata": {},
"outputs": [],
"source": [
" # INPUT DATA LOCATION\n",
"INPUT_DATA_FOLDER = \"data/raw\"\n",
" # Folder from where to read the raw data.\n",
"INPUT_DATA_FOLDER = \"../data/raw\"\n",
"\n",
"# DTDL FOLDER\n",
"ENTITY_PATH = \"../appendix/Entities/\"\n",
"# Folder from where to read the entity metadata.\n",
"ENTITY_PATH = \"../../appendix/Entities/\"\n",
"\n",
"# CONFIG LOCATION\n",
"CONFIG_FOLDER = \"config\"\n",
"# Folder to where to write the mapping config file.\n",
"CONFIG_FOLDER = \"../config\"\n",
"\n",
"# OUTPUT DATA LOCATION\n",
"OUTPUT_DATA_FOLDER = \"data/transformed\""
"# Folder to where to write the transformed data.\n",
"OUTPUT_DATA_FOLDER = \"../data/transformed\""
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 3,
"id": "5",
"metadata": {},
"outputs": [],
"source": [
"EXCEL_MAPPING_FILE = f\"{CONFIG_FOLDER}/conceptual_mapping.xlsx\"\n",
"EXCEL_MAPPING_FILE = f\"{CONFIG_FOLDER}/conceptual-mapping.xlsx\"\n",
"OUTPUT_MAPPING_FOLDER = f\"{OUTPUT_DATA_FOLDER}/mapping\"\n",
"CONFIG_FILE = f\"{CONFIG_FOLDER}/config.json\"\n",
"\n",
"\n",
"# CREATE THE OUTPUT FOLDER IF NOT EXISTING\n",
"if not os.path.exists(OUTPUT_MAPPING_FOLDER):\n",
" os.makedirs(OUTPUT_MAPPING_FOLDER)\n",
Expand Down Expand Up @@ -107,12 +105,12 @@
"id": "8",
"metadata": {},
"source": [
"### Extracting Variables from Config"
"## Extracting Variables from Config"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 5,
"id": "9",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -143,8 +141,22 @@
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
Expand Down