Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ you have [access to OpenTME on Hugging Face](#hugging-face-access), and a [molab
1. [Getting started](https://molab.marimo.io/github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/tutorials/1_getting_started.py)
2. [Description of the OpenTME features](https://molab.marimo.io/github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/tutorials/2_description_opentme_features.py)
3. [Including TCGA metadata](https://molab.marimo.io/github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/tutorials/3_including_TCGA_metadata.py)
4. [Using custom data](https://molab.marimo.io/github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/tutorials/4_using_custom_data.py)
* **Examples:** Concise notebooks demonstrating specific types of analysis, such as tumor immune phenotype classification and Kaplan–Meier survival plots. Note that these are examples that may be used as a starting point for your own analysis.
1. [Clustering](https://molab.marimo.io/github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/examples/clustering.py)
2. [Outlier detection](https://molab.marimo.io/github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/examples/outlier_detection.py)
Expand Down
155 changes: 155 additions & 0 deletions src/aignostics_tme_studio/notebooks/tutorials/4_using_custom_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import marimo

__generated_with = "0.23.0"
app = marimo.App(width="medium")


@app.cell(hide_code=True)
def _():
# Show logo
from aignostics_tme_studio.styling import styling_utils

styling_utils.get_aignx_logo()
return (styling_utils,)


@app.cell(hide_code=True)
def _(styling_utils):
styling_utils.load_css()


@app.cell(hide_code=True)
def _():
# Get Hugging Face token
import marimo as mo

_md = mo.md("""Enter your hugging face token in the below box to enable access to OpenTME.""")

_hf_instructions = """Create an access token by going to [hf.co/settings/tokens](https://hf.co/settings/tokens)
1. Go to "Repositories permissions".
2. Select "datasets/Aignostics/OpenTME" and check boxes for read and view access.
3. Click "create token". Enter your hugging face token in the below box to enable access to OpenTME.
"""
_acc = mo.accordion({"Click here for instructions to create a Hugging Face token": _hf_instructions})
hf_token = mo.ui.text(kind="password", label="Your HF Token from hf.co/settings/tokens")
mo.vstack([_md, _acc, hf_token])
return hf_token, mo


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Using custom data

You might have your own set of TME features produced by Atlas H&E-TME analysis, for example obtained via the
[Research Access Program](https://www.aignostics.com/products/atlas-he-tme/for-academics#research-access-program).
You can use TME Studio to explore your features the same way you would explore OpenTME.


For some notebooks in this repository such as the Clustering example, the hugging face download code may simply be
replaced by loading a local `.csv` file, and you're good to go. For other notebooks, such as the IDE classification
and Kaplan-Meier analysis, you will also need to add your own survival data.
""")


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Differences between OpenTME and custom datasets
There are two changes to look out for when using custom data:
### Metadata.
**OpenTME**: Since OpenTME contains analysis results of TCGA, the first 5 columns are metadata specific to TCGA:
`['TCGA_FILE_NAME', 'TCGA_SLIDE_UUID', 'TCGA_CASE_ID', 'TCGA_PROJECT_ID', 'INDICATION']`.

**Custom Data:** will contain columns `SLIDE_UUID` and `SLIDE_NAME`. The `SLIDE_UUID` is an aignostics' internal
identifier created for your slide. The `SLIDE_NAME` is the filename you provided for each slide, and can be used to
identify your slide.

### Research use only header
**OpenTME**: file names are postfixed `_RUO` to indicate research use only.

**Custom Data:** Your data csv will have a comment header "`# For research use only`" on the first line. This must
be skipped when loading the csv! Make sure you load the csvs with the `skiprows` argument:
`pd.read_csv("path/to/your/file.csv", skiprows=1)`
""")


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
## Example
An example file with TME features is provided in this repository at
`src/aignostics_tme_studio/notebooks/tutorials/public/concatenated_slide_readouts.csv`, containing data for
10 slides from the TCGA bladder cohort. The data is loaded is the cell below.
""")


@app.cell
def _():
# Load dataframe
import pandas as pd

from aignostics_tme_studio.utils import config

df = pd.read_csv(config.EXAMPLE_CUSTOM_DATA_FILE_PATH, skiprows=1)
df
return df, pd


@app.cell(hide_code=True)
def _(mo):
mo.md(r"""
# Using TME Studio

The lists of available features and output classes in the OpenTME settings can be used for finding features in your
custom data in the same way that they can be used to find features in OpenTME. See for example [tutorial 2 -
description of all OpenTME features](
/?file=src/aignostics_tme_studio/notebooks/tutorials/2_description_opentme_features.py). A demonstrative
example is given below:
""")


@app.cell
def _(df, hf_token, pd):
from huggingface_hub import hf_hub_download

from aignostics_tme_studio.utils import config, utils

# load model output class settings
model_settings_path = hf_hub_download(
repo_id=config.REPO_ID,
filename=config.MODEL_SETTINGS_FILENAME,
repo_type="dataset",
token=hf_token.value or None,
)
model_variables = utils.load_munch(model_settings_path)

# load available features
features_path = hf_hub_download(
repo_id=config.REPO_ID,
filename=config.FEAT_SETTINGS_FILENAME,
repo_type="dataset",
token=hf_token.value or None,
)
features = utils.load_features(features_path)
feat = features["cell_features"][3]

# Find all CC columns by looking over the cell classes and CC features
columns = []
for _cls in model_variables["cell_cls"]:
# Replace the placeholder by the cell classes
_column = feat.formatter.format(cell_cls=_cls)

# Columns are always uppercase.
columns.append(utils.to_allcaps(_column))
df[columns]

import plotly.express as px

# Plotly expects dataframes in long form
df_melt = pd.melt(df[columns])
px.box(df_melt, x="variable", y="value")


if __name__ == "__main__":
app.run()

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/aignostics_tme_studio/utils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

# Files need to be loaded over HTTP to allow loading in molab
METADATA_FILE_PATH = "https://github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/tutorials/public/metadata.csv?raw=true"
EXAMPLE_CUSTOM_DATA_FILE_PATH = "https://github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/notebooks/tutorials/public/concatenated_slide_readouts.csv?raw=true"
LOGO_FILE_PATH = "https://github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/styling/images/logo_lavender.png?raw=true"
CSS_FILE_PATH = (
"https://github.com/aignostics/tme-studio/blob/main/src/aignostics_tme_studio/styling/style.css?raw=true"
Expand Down
Loading