diff --git a/DESCRIPTION b/DESCRIPTION
index 1959e65..76c4edd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -25,7 +25,11 @@ Imports:
     tidyr,
     htmlwidgets,
     grDevices,
-    stats
+    stats,
+    text2vec,
+    stopwords,
+    xml2,
+    rentrez
 Suggests: 
     data.table,
     BiocStyle,
diff --git a/NAMESPACE b/NAMESPACE
index c433f2b..28f35aa 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,6 +4,7 @@ export(annotateProteinInfoFromIndra)
 export(cytoscapeNetwork)
 export(cytoscapeNetworkOutput)
 export(exportNetworkToHTML)
+export(filterSubnetworkByContext)
 export(getSubnetworkFromIndra)
 export(previewNetworkInBrowser)
 export(renderCytoscapeNetwork)
@@ -16,13 +17,27 @@ importFrom(httr,GET)
 importFrom(httr,POST)
 importFrom(httr,add_headers)
 importFrom(httr,content)
+importFrom(httr,content_type_json)
 importFrom(httr,status_code)
 importFrom(jsonlite,fromJSON)
 importFrom(jsonlite,toJSON)
 importFrom(r2r,hashmap)
 importFrom(r2r,keys)
 importFrom(r2r,query)
+importFrom(rentrez,entrez_fetch)
 importFrom(stats,cor)
 importFrom(stats,setNames)
+importFrom(stopwords,stopwords)
+importFrom(text2vec,TfIdf)
+importFrom(text2vec,create_dtm)
+importFrom(text2vec,create_vocabulary)
+importFrom(text2vec,fit_transform)
+importFrom(text2vec,itoken)
+importFrom(text2vec,prune_vocabulary)
+importFrom(text2vec,vocab_vectorizer)
+importFrom(text2vec,word_tokenizer)
 importFrom(tidyr,pivot_wider)
 importFrom(utils,browseURL)
+importFrom(xml2,read_xml)
+importFrom(xml2,xml_find_all)
+importFrom(xml2,xml_text)
diff --git a/R/filterSubnetworkByContext.R b/R/filterSubnetworkByContext.R
new file mode 100644
index 0000000..2bb07b4
--- /dev/null
+++ b/R/filterSubnetworkByContext.R
@@ -0,0 +1,380 @@
+#' Filter a subnetwork by contextual relevance
+#'
+#' Fetches PubMed abstracts for evidence PMIDs, scores each abstract against a
+#' user-supplied query, and returns only the nodes, edges, and evidence rows
+#' whose abstracts meet the scoring cutoff.
+#'
+#' Two scoring methods are available, controlled by the \code{method} argument:
+#'
+#' \describe{
+#'   \item{\code{"tag_count"} (default)}{
+#'     Counts how many tags from \code{query} appear as substrings in the
+#'     abstract (case-insensitive). The score for each abstract is an integer
+#'     in \code{[0, length(query)]}. Set \code{cutoff} to the minimum number of
+#'     tags that must appear - e.g. \code{cutoff = 2} keeps abstracts that
+#'     mention at least 2 of your tags. \code{query} must be a character
+#'     \emph{vector} of tags when using this method.
+#'   }
+#'   \item{\code{"cosine"}}{
+#'     Scores abstracts using TF-IDF cosine similarity against \code{query}.
+#'     Scores are in \code{[-1, 1]} (in practice \code{[0, 1]} for text).
+#'     Set \code{cutoff} to a decimal threshold - e.g. \code{cutoff = 0.10}.
+#'     \code{query} should be a single character string; expand it with
+#'     synonyms and related terms for better recall under exact token matching.
+#'   }
+#' }
+#'
+#' @param nodes  A dataframe of network nodes.
+#' @param edges  A dataframe of network edges with columns: source, target,
+#'               interaction, site, evidenceLink, stmt_hash.
+#' @param query  For \code{method = "tag_count"}: a character vector of tags,
+#'               e.g. \code{c("CHEK1", "DNA damage", "DNA damage repair")}.
+#'               For \code{method = "cosine"}: a single character string.
+#' @param cutoff Numeric threshold applied to the chosen scoring method.
+#'               \itemize{
+#'                 \item \code{"tag_count"}: integer >= 0; abstracts must
+#'                   contain at least this many tags. Max possible value is
+#'                   \code{length(query)}. Default \code{1}.
+#'                 \item \code{"cosine"}: numeric in \code{[-1, 1]}; abstracts
+#'                   must score >= this value. Default \code{0.10}.
+#'               }
+#' @param method One of \code{"tag_count"} (default) or \code{"cosine"}.
+#'
+#' @return A named list with three elements:
+#'   \item{nodes}{Filtered nodes dataframe (only nodes present in kept edges)}
+#'   \item{edges}{Filtered edges dataframe}
+#'   \item{evidence}{Dataframe with columns: source, target, interaction, site,
+#'     evidenceLink, stmt_hash, text, pmid, score. The \code{score} column
+#'     contains tag counts (integer) or cosine similarities (numeric) depending
+#'     on the method used.}
+#'
+#' @importFrom text2vec itoken word_tokenizer create_vocabulary prune_vocabulary
+#'   vocab_vectorizer create_dtm TfIdf fit_transform
+#' @importFrom stopwords stopwords
+#' @export
+filterSubnetworkByContext <- function(nodes,
+                                      edges,
+                                      query,
+                                      cutoff = NULL,
+                                      method = c("tag_count", "cosine")) {
+    
+    method <- match.arg(method)
+
+    if (method == "tag_count") {
+        if (!is.character(query) || length(query) < 1 || 
+            any(is.na(query)) || any(!nzchar(query))) {
+            stop("`query` must be a character vector of tags when method = 'tag_count'.")
+        }
+        if (is.null(cutoff)) cutoff <- 1L
+        if (!is.numeric(cutoff) || length(cutoff) != 1L || is.na(cutoff) ||
+            cutoff < 0 || cutoff > length(query) || cutoff != as.integer(cutoff)) {
+            stop("`cutoff` must be a single integer in [0, length(query)] when method = 'tag_count'.")
+        }
+        cat(sprintf(
+            "Method: tag_count | Tags: %d | Cutoff: >= %d tag(s)\n",
+            length(query), cutoff
+        ))
+    } else {
+        if (!is.character(query) || length(query) != 1L ||
+            is.na(query) || !nzchar(query)) {
+            stop("`query` must be a single character string when method = 'cosine'.")
+        }
+        if (is.null(cutoff)) cutoff <- 0.10
+        if (!is.numeric(cutoff) || length(cutoff) != 1L || is.na(cutoff) ||
+            !is.finite(cutoff) || cutoff < 0 || cutoff > 1) {
+            stop("`cutoff` must be a single numeric value in [0, 1] when method = 'cosine'.")
+        }
+        cat(sprintf(
+            "Method: cosine | Cutoff: >= %.2f\n", cutoff
+        ))
+    }
+    
+    evidence <- .extract_evidence_text(edges)
+    
+    if (nrow(evidence) == 0) {
+        evidence$score <- if (method == "tag_count") integer(0) else numeric(0)
+        warning("No evidence text found - returning unfiltered inputs.")
+        return(list(nodes = nodes, edges = edges, evidence = evidence))
+    }
+    pmids <- unique(evidence$pmid[!is.na(evidence$pmid) & nchar(evidence$pmid) > 0])
+    
+    if (length(pmids) == 0) {
+        evidence$score <- if (method == "tag_count") {
+            rep(NA_integer_, nrow(evidence))
+        } else {
+            rep(NA_real_, nrow(evidence))
+        }
+        warning("No PMIDs found in evidence - returning unfiltered inputs.")
+        return(list(nodes = nodes, edges = edges, evidence = evidence))
+    }
+    
+    abstract_list <- .fetch_clean_abstracts_xml(pmids)
+    abstracts_df  <- data.frame(
+        pmid     = names(abstract_list),
+        abstract = unlist(abstract_list, use.names = FALSE),
+        stringsAsFactors = FALSE
+    )
+    
+    if (method == "tag_count") {
+        abstracts_df$score <- .score_by_tag_count(abstracts_df$abstract, query)
+    } else {
+        abstracts_df$score <- .score_by_cosine(query, abstracts_df$abstract)
+    }
+    
+    passing_pmids <- abstracts_df$pmid[abstracts_df$score >= cutoff]
+    
+    cat(sprintf(
+        "\n%d / %d abstracts passed cutoff (score >= %s)\n",
+        length(passing_pmids), nrow(abstracts_df), cutoff
+    ))
+    
+    evidence_scored <- merge(
+        evidence,
+        abstracts_df[, c("pmid", "score")],
+        by    = "pmid",
+        all.x = TRUE
+    )
+    evidence_scored$score[is.na(evidence_scored$score)] <- 0
+    
+    evidence_filtered <- evidence_scored[
+        evidence_scored$pmid %in% passing_pmids,
+        c("source", "target", "interaction", "site",
+          "evidenceLink", "stmt_hash", "text", "pmid", "score")
+    ]
+    
+    surviving_hashes <- unique(evidence_filtered$stmt_hash)
+    edges_filtered   <- edges[edges$stmt_hash %in% surviving_hashes, ]
+    
+    surviving_nodes  <- union(edges_filtered$source, edges_filtered$target)
+    if (!"id" %in% names(nodes)) {
+        stop("`nodes` must contain an `id` column.")
+    }
+    nodes_filtered   <- nodes[nodes$id %in% surviving_nodes, ]
+    
+    cat(sprintf(
+        "Retained: %d edges (of %d), %d nodes (of %d), %d evidence rows (of %d)\n",
+        nrow(edges_filtered),    nrow(edges),
+        nrow(nodes_filtered),    nrow(nodes),
+        nrow(evidence_filtered), nrow(evidence_scored)
+    ))
+    
+    return(list(
+        nodes    = nodes_filtered,
+        edges    = edges_filtered,
+        evidence = evidence_filtered
+    ))
+}
+
+
+#' Score abstracts by tag count
+#'
+#' For each abstract, counts how many tags appear as case-insensitive substrings.
+#'
+#' @param abstracts Character vector of abstract texts.
+#' @param tags      Character vector of tags to search for.
+#' @return Integer vector of tag hit counts, same length as \code{abstracts}.
+#' @keywords internal
+#' @noRd
+.score_by_tag_count <- function(abstracts, tags) {
+    abstracts_lower <- tolower(abstracts)
+    tags_lower      <- tolower(tags)
+    
+    sapply(abstracts_lower, function(abstract) {
+        sum(sapply(tags_lower, function(tag) grepl(tag, abstract, fixed = TRUE)))
+    }, USE.NAMES = FALSE)
+}
+
+
+#' Score abstracts by TF-IDF cosine similarity
+#'
+#' Vectorises the query and all abstracts together with TF-IDF, then returns
+#' the cosine similarity of each abstract against the query vector.
+#'
+#' @param query     Single character string query.
+#' @param abstracts Character vector of abstract texts.
+#' @return Numeric vector of cosine similarities in \code{[0, 1]}, same length
+#'   as \code{abstracts}.
+#' @keywords internal
+#' @noRd
+#' @importFrom text2vec itoken word_tokenizer create_vocabulary prune_vocabulary
+#'   vocab_vectorizer create_dtm TfIdf fit_transform
+#' @importFrom stopwords stopwords
+.score_by_cosine <- function(query, abstracts) {
+    all_texts  <- c(query, abstracts)
+    
+    tokens     <- itoken(all_texts,
+                         preprocessor = tolower,
+                         tokenizer    = word_tokenizer)
+    vocab      <- create_vocabulary(tokens, stopwords = stopwords("en"))
+    vocab      <- prune_vocabulary(vocab, term_count_min = 1)
+    vectorizer <- vocab_vectorizer(vocab)
+    dtm        <- create_dtm(tokens, vectorizer)
+    tfidf      <- TfIdf$new()
+    dtm_tfidf  <- fit_transform(dtm, tfidf)
+    
+    .cos_sim <- function(a, b) {
+        a     <- as.numeric(a)
+        b     <- as.numeric(b)
+        denom <- sqrt(sum(a^2)) * sqrt(sum(b^2))
+        if (denom == 0) return(0)
+        sum(a * b) / denom
+    }
+    
+    query_vec     <- dtm_tfidf[1, , drop = FALSE]
+    abstract_vecs <- dtm_tfidf[-1, , drop = FALSE]
+    
+    scores <- sapply(seq_len(nrow(abstract_vecs)), function(i) {
+        .cos_sim(query_vec, abstract_vecs[i, , drop = FALSE])
+    })
+    
+    round(scores, 4)
+}
+
+
+
+#' Extract evidence text from edges dataframe via INDRA API
+#' @param df Edges dataframe with columns: source, target, interaction, site,
+#'           evidenceLink, stmt_hash
+#' @return Dataframe with additional columns: text, pmid
+#' @keywords internal
+#' @noRd
+.extract_evidence_text <- function(df) {
+    
+    required_cols <- c("source", "target", "interaction", "site", "evidenceLink", "stmt_hash")
+    missing_cols  <- setdiff(required_cols, names(df))
+    if (length(missing_cols) > 0) {
+        stop(sprintf("Missing required columns: %s", paste(missing_cols, collapse = ", ")))
+    }
+    
+    results_list  <- list()
+    result_count  <- 0
+    unique_hashes <- unique(df$stmt_hash)
+    n_hashes      <- length(unique_hashes)
+    
+    cat(sprintf("Processing %d unique statement hashes...\n", n_hashes))
+    
+    for (i in seq_along(unique_hashes)) {
+        stmt_hash <- unique_hashes[i]
+        
+        if (i %% 10 == 0) cat(sprintf("Progress: %d/%d\n", i, n_hashes))
+        
+        evidence_list    <- .query_indra_evidence(stmt_hash)
+        if (is.null(evidence_list) || length(evidence_list) == 0) next
+        
+        matching_indices <- which(df$stmt_hash == stmt_hash)
+        
+        for (evidence in evidence_list) {
+            if (!is.null(evidence[["text"]]) && nchar(evidence[["text"]]) > 0) {
+                for (idx in matching_indices) {
+                    result_count <- result_count + 1
+                    results_list[[result_count]] <- data.frame(
+                        source       = df$source[idx],
+                        target       = df$target[idx],
+                        interaction  = df$interaction[idx],
+                        site         = df$site[idx],
+                        evidenceLink = df$evidenceLink[idx],
+                        stmt_hash    = df$stmt_hash[idx],
+                        text         = evidence[["text"]],
+                        pmid         = if (is.null(evidence[["pmid"]])) "" else evidence[["pmid"]],
+                        stringsAsFactors = FALSE
+                    )
+                }
+            }
+        }
+    }
+    
+    if (result_count == 0) {
+        warning("No evidence text found for any statement hash")
+        return(data.frame(
+            source = character(), target = character(), interaction = character(),
+            site = character(), evidenceLink = character(), stmt_hash = character(),
+            text = character(), pmid = character(), stringsAsFactors = FALSE
+        ))
+    }
+    
+    results_df <- do.call(rbind, results_list)
+    cat(sprintf("\nComplete! Found %d evidence text entries.\n", nrow(results_df)))
+    return(results_df)
+}
+
+
+#' Fetch and clean PubMed abstracts via rentrez
+#' @param pmids Character vector of PubMed IDs
+#' @return Named list: pmid -> abstract text
+#' @keywords internal
+#' @noRd
+#' @importFrom rentrez entrez_fetch
+#' @importFrom xml2 read_xml xml_find_all xml_text
+.fetch_clean_abstracts_xml <- function(pmids) {
+    results <- list()
+    total   <- length(pmids)
+    
+    cat(sprintf("Fetching %d abstracts...\n", total))
+    
+    for (i in seq_along(pmids)) {
+        pmid <- pmids[i]
+        
+        record <- tryCatch(
+            entrez_fetch(db = "pubmed", id = pmid, rettype = "xml"),
+            error = function(e) {
+                cat(sprintf("Error fetching PMID %s at %d/%d: %s\n", pmid, i, total, e$message))
+                NULL
+            }
+        )
+        
+        if (is.null(record)) {
+            results[[pmid]] <- ""
+            next
+        }
+        
+        doc <- read_xml(record)
+        abstract_nodes <- xml_find_all(doc, ".//AbstractText")
+        
+        if (length(abstract_nodes) > 0) {
+            results[[pmid]] <- paste(trimws(xml_text(abstract_nodes)), collapse = " ")
+        } else {
+            results[[pmid]] <- ""
+        }
+        
+        if (i %% 10 == 0 || i == total) {
+            cat(sprintf("Progress: %d/%d (%.1f%%)\n", i, total, (i / total) * 100))
+        }
+        
+        Sys.sleep(0.34)
+    }
+    
+    cat("Done fetching abstracts!\n")
+    return(results)
+}
+
+
+#' Query INDRA API for evidence text
+#' @param stmt_hash A statement hash string
+#' @return A list of evidence objects from the API, or NULL if error
+#' @keywords internal
+#' @noRd
+#' @importFrom httr POST status_code content content_type_json
+#' @importFrom jsonlite fromJSON
+.query_indra_evidence <- function(stmt_hash) {
+    url <- "https://discovery.indra.bio/api/get_evidences_for_stmt_hash"
+    
+    tryCatch({
+        response <- POST(
+            url,
+            body   = list(stmt_hash = stmt_hash),
+            encode = "json",
+            content_type_json()
+        )
+        
+        if (status_code(response) != 200) {
+            warning(sprintf("API returned status %d for stmt_hash: %s",
+                            status_code(response), stmt_hash))
+            return(NULL)
+        }
+        
+        content(response, as = "parsed")
+    }, error = function(e) {
+        warning(sprintf("Error querying stmt_hash %s: %s", stmt_hash, e$message))
+        return(NULL)
+    })
+}
\ No newline at end of file
diff --git a/R/utils_getSubnetworkFromIndra.R b/R/utils_getSubnetworkFromIndra.R
index 228ff02..ef14ef2 100644
--- a/R/utils_getSubnetworkFromIndra.R
+++ b/R/utils_getSubnetworkFromIndra.R
@@ -426,3 +426,5 @@
     correlations <- cor(wide_data, use = "pairwise.complete.obs")
     return(correlations)
 }
+
+
diff --git a/man/filterSubnetworkByContext.Rd b/man/filterSubnetworkByContext.Rd
new file mode 100644
index 0000000..91d176c
--- /dev/null
+++ b/man/filterSubnetworkByContext.Rd
@@ -0,0 +1,70 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/filterSubnetworkByContext.R
+\name{filterSubnetworkByContext}
+\alias{filterSubnetworkByContext}
+\title{Filter a subnetwork by contextual relevance}
+\usage{
+filterSubnetworkByContext(
+  nodes,
+  edges,
+  query,
+  cutoff = NULL,
+  method = c("tag_count", "cosine")
+)
+}
+\arguments{
+\item{nodes}{A dataframe of network nodes.}
+
+\item{edges}{A dataframe of network edges with columns: source, target,
+interaction, site, evidenceLink, stmt_hash.}
+
+\item{query}{For \code{method = "tag_count"}: a character vector of tags,
+e.g. \code{c("CHEK1", "DNA damage", "DNA damage repair")}.
+For \code{method = "cosine"}: a single character string.}
+
+\item{cutoff}{Numeric threshold applied to the chosen scoring method.
+\itemize{
+  \item \code{"tag_count"}: integer >= 0; abstracts must
+    contain at least this many tags. Max possible value is
+    \code{length(query)}. Default \code{1}.
+  \item \code{"cosine"}: numeric in \code{[-1, 1]}; abstracts
+    must score >= this value. Default \code{0.10}.
+}}
+
+\item{method}{One of \code{"tag_count"} (default) or \code{"cosine"}.}
+}
+\value{
+A named list with three elements:
+  \item{nodes}{Filtered nodes dataframe (only nodes present in kept edges)}
+  \item{edges}{Filtered edges dataframe}
+  \item{evidence}{Dataframe with columns: source, target, interaction, site,
+    evidenceLink, stmt_hash, text, pmid, score. The \code{score} column
+    contains tag counts (integer) or cosine similarities (numeric) depending
+    on the method used.}
+}
+\description{
+Fetches PubMed abstracts for evidence PMIDs, scores each abstract against a
+user-supplied query, and returns only the nodes, edges, and evidence rows
+whose abstracts meet the scoring cutoff.
+}
+\details{
+Two scoring methods are available, controlled by the \code{method} argument:
+
+\describe{
+  \item{\code{"tag_count"} (default)}{
+    Counts how many tags from \code{query} appear as substrings in the
+    abstract (case-insensitive). The score for each abstract is an integer
+    in \code{[0, length(query)]}. Set \code{cutoff} to the minimum number of
+    tags that must appear - e.g. \code{cutoff = 2} keeps abstracts that
+    mention at least 2 of your tags. \code{query} must be a character
+    \emph{vector} of tags when using this method.
+  }
+  \item{\code{"cosine"}}{
+    Scores abstracts using TF-IDF cosine similarity against \code{query}.
+    Scores are in \code{[-1, 1]} (in practice \code{[0, 1]} for text).
+    Set \code{cutoff} to a decimal threshold - e.g. \code{cutoff = 0.10}.
+    \code{query} should be a single character string; expand it with
+    synonyms and related terms for better recall under exact token matching.
+  }
+}
+}
diff --git a/tests/testthat/test-filterSubnetworkByContext.R b/tests/testthat/test-filterSubnetworkByContext.R
new file mode 100644
index 0000000..4f77b03
--- /dev/null
+++ b/tests/testthat/test-filterSubnetworkByContext.R
@@ -0,0 +1,154 @@
+make_edges <- function() {
+    data.frame(
+        source       = c("A", "B"),
+        target       = c("B", "C"),
+        interaction  = c("activates", "inhibits"),
+        site         = c("T308", "S473"),
+        evidenceLink = c("https://example.com/1", "https://example.com/2"),
+        stmt_hash    = c("hash1", "hash2"),
+        stringsAsFactors = FALSE
+    )
+}
+
+make_nodes <- function() {
+    data.frame(
+        id    = c("A", "B", "C"),
+        label = c("GeneA", "GeneB", "GeneC"),
+        stringsAsFactors = FALSE
+    )
+}
+
+describe(".score_by_tag_count", {
+    
+    test_that("returns 0 for an abstract that contains none of the tags", {
+        scores <- .score_by_tag_count("nothing relevant here", c("CHEK1", "DNA damage"))
+        expect_equal(scores, 0L)
+    })
+    
+    test_that("counts multiple matching tags correctly", {
+        abstract <- "CHEK1 is involved in DNA damage repair pathways."
+        scores   <- .score_by_tag_count(abstract, c("chek1", "DNA damage", "apoptosis"))
+        expect_equal(scores, 2L)
+    })
+    
+})
+
+describe(".score_by_cosine", {
+    
+    test_that("returns a numeric vector of the same length as abstracts", {
+        abstracts <- c("DNA damage repair involves CHEK1.", "Unrelated text about metabolism.")
+        scores    <- .score_by_cosine("CHEK1 DNA damage", abstracts)
+        expect_true(is.numeric(scores))
+        expect_length(scores, 2)
+    })
+    
+    test_that("scores a highly relevant abstract higher than an irrelevant one", {
+        relevant   <- "CHEK1 mediates the DNA damage checkpoint response."
+        irrelevant <- "Photosynthesis occurs in the chloroplast of plant cells."
+        scores     <- .score_by_cosine("CHEK1 DNA damage checkpoint", c(relevant, irrelevant))
+        expect_gt(scores[1], scores[2])
+    })
+    
+})
+
+describe(".extract_evidence_text", {
+    
+    test_that("stops when required columns are missing from the edges dataframe", {
+        bad_df <- data.frame(source = "A", target = "B", stringsAsFactors = FALSE)
+        expect_error(
+            .extract_evidence_text(bad_df),
+            regexp = "Missing required columns"
+        )
+    })
+    
+    test_that("returns an empty dataframe with correct columns when the INDRA API returns nothing", {
+        edges <- make_edges()
+        
+        # Mock .query_indra_evidence to always return NULL
+        mockery::stub(.extract_evidence_text, ".query_indra_evidence", NULL)
+        
+        result <- suppressWarnings(.extract_evidence_text(edges))
+        expect_s3_class(result, "data.frame")
+        expect_true(all(c("source", "target", "interaction", "site",
+                          "evidenceLink", "stmt_hash", "text", "pmid") %in% names(result)))
+        expect_equal(nrow(result), 0)
+    })
+    
+})
+
+describe(".fetch_clean_abstracts_xml", {
+    
+    test_that("returns an empty list when given an empty pmids vector", {
+        result <- .fetch_clean_abstracts_xml(character(0))
+        expect_true(is.list(result))
+        expect_length(result, 0)
+    })
+    
+    test_that("stores an empty string for a PMID that triggers an API error", {
+        mockery::stub(
+            .fetch_clean_abstracts_xml,
+            "entrez_fetch",
+            function(...) stop("network error")
+        )
+        result <- suppressMessages(.fetch_clean_abstracts_xml(c("99999999")))
+        expect_true("99999999" %in% names(result))
+        expect_equal(result[["99999999"]], "")
+    })
+    
+})
+
+describe("filterSubnetworkByContext", {
+    
+    test_that("returns nodes, edges, and evidence that match the query tags (happy path)", {
+        nodes <- make_nodes()
+        edges <- make_edges()
+        
+        # --- mock .extract_evidence_text ---
+        mock_evidence <- data.frame(
+            source       = c("A", "B"),
+            target       = c("B", "C"),
+            interaction  = c("activates", "inhibits"),
+            site         = c("T308", "S473"),
+            evidenceLink = c("https://example.com/1", "https://example.com/2"),
+            stmt_hash    = c("hash1", "hash2"),
+            text         = c(
+                "CHEK1 phosphorylates CDC25A in response to DNA damage.",
+                "Unrelated text about lipid metabolism and glucose uptake."
+            ),
+            pmid         = c("11111111", "22222222"),
+            stringsAsFactors = FALSE
+        )
+        mockery::stub(filterSubnetworkByContext, ".extract_evidence_text", mock_evidence)
+        
+        # --- mock .fetch_clean_abstracts_xml ---
+        mock_abstracts <- list(
+            "11111111" = "CHEK1 phosphorylates CDC25A in response to DNA damage.",
+            "22222222" = "Unrelated text about lipid metabolism and glucose uptake."
+        )
+        mockery::stub(filterSubnetworkByContext, ".fetch_clean_abstracts_xml", mock_abstracts)
+        
+        result <- filterSubnetworkByContext(
+            nodes  = nodes,
+            edges  = edges,
+            query  = c("CHEK1", "DNA damage"),
+            cutoff = 1,
+            method = "tag_count"
+        )
+        
+        # Structure check
+        expect_named(result, c("nodes", "edges", "evidence"))
+        
+        # Only the CHEK1/DNA-damage abstract passed the cutoff
+        expect_equal(nrow(result$edges), 1)
+        expect_equal(result$edges$stmt_hash, "hash1")
+        
+        # Nodes should only contain those referenced by surviving edges
+        expect_true(all(result$nodes$id %in% c("A", "B", "C")))
+        expect_false("C" %in% result$nodes$id)   # C only appears in the filtered-out edge
+        
+        # Evidence rows carry a score column
+        expect_true("score" %in% names(result$evidence))
+        expect_true(all(result$evidence$score >= 1))
+    })
+    
+})
\ No newline at end of file
diff --git a/vignettes/Filter-By-Context.Rmd b/vignettes/Filter-By-Context.Rmd
new file mode 100644
index 0000000..fbc167a
--- /dev/null
+++ b/vignettes/Filter-By-Context.Rmd
@@ -0,0 +1,410 @@
+---
+title: "Filtering Subnetworks by Biological Context"
+author: ""
+date: "`r Sys.Date()`"
+output:
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 3
+vignette: >
+  %\VignetteIndexEntry{Filtering Subnetworks by Biological Context}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r setup, include=FALSE}
+knitr::opts_chunk$set(
+  collapse  = TRUE,
+  comment   = "#>",
+  eval      = FALSE
+)
+```
+
+## Overview
+
+This vignette demonstrates how to use `filterSubnetworkByContext()` to filter a
+protein interaction subnetwork by the contextual relevance of its supporting
+literature. The function:
+
+1. Retrieves evidence sentences from the INDRA database for each edge in the network
+2. Fetches the corresponding PubMed abstracts
+3. Scores each abstract against a user-supplied text query using TF-IDF cosine score
+4. Returns only the nodes, edges, and evidence whose abstracts exceed a score cutoff
+
+This is useful when a subnetwork contains many edges supported by literature from
+unrelated biological contexts, and you want to focus on edges relevant to a
+specific research question — in this case, **DNA damage repair in cancer**.
+
+## Input Data
+
+`filterSubnetworkByContext()` expects a nodes and edges dataframe, typically
+produced by `getSubnetworkFromIndra()`. For this example we construct a small
+representative input table directly, mimicking the structure of a proteomics
+experiment centred on the DNA damage response kinase **CHK1**.
+
+The input table contains one row per protein with columns for the UniProt
+mnemonic identifier, the log2 fold-change, and the adjusted p-value from a
+differential expression analysis.
+
+```{r input-data}
+input <- data.frame(
+  Protein   = c("CHK1_HUMAN", "RFA1_HUMAN", "CLH1_HUMAN", "CRTC3_HUMAN"),
+  log2FC    = c(2.31, 1.87, 1.45, 1.12),
+  adj.pvalue = c(0.0021, 0.0089, 0.0310, 0.0490),
+  stringsAsFactors = FALSE
+)
+
+input
+```
+
+```
+##       Protein log2FC adj.pvalue
+## 1  CHK1_HUMAN   2.31     0.0021
+## 2  RFA1_HUMAN   1.87     0.0089
+## 3  CLH1_HUMAN   1.45     0.0310
+## 4 CRTC3_HUMAN   1.12     0.0490
+```
+
+All four proteins are up-regulated (positive log2FC) and statistically
+significant (adj.pvalue < 0.05).
+
+---
+
+## Building the Subnetwork
+
+### Step 1 — Annotate proteins with INDRA metadata
+
+`annotateProteinInfoFromIndra()` maps UniProt mnemonics to HGNC gene identifiers
+and other metadata used downstream by the INDRA query engine.
+
+```{r annotate}
+library(MSstatsBioNet)
+annotated_df <- annotateProteinInfoFromIndra(input, "Uniprot_Mnemonic")
+```
+
+### Step 2 — Retrieve the interaction subnetwork
+
+`getSubnetworkFromIndra()` queries the INDRA database for curated causal
+interactions among the annotated proteins and returns a list containing `$nodes`
+and `$edges` dataframes.
+
+Key parameters used here:
+
+- **`pvalueCutoff = 0.2`** — relaxed threshold to retain more candidate edges for
+  downstream context filtering
+- **`evidence_count_cutoff = 1`** — keep edges supported by at least one
+  literature statement
+- **`force_include_other = "HGNC:1925"`** — always include CHK1 (HGNC:1925)
+  regardless of significance, as it is the focal protein of interest
+- **`filter_by_curation = FALSE`** — include both curated and automatically
+  extracted interactions
+
+```{r subnetwork}
+subnetwork <- getSubnetworkFromIndra(
+  annotated_df,
+  pvalueCutoff          = 0.2,
+  logfc_cutoff          = NULL,
+  evidence_count_cutoff = 1,
+  sources_filter        = NULL,
+  force_include_other   = "HGNC:1925",
+  filter_by_curation    = FALSE
+)
+
+# Inspect the unfiltered network
+nrow(subnetwork$nodes)
+nrow(subnetwork$edges)
+```
+
+---
+
+## Filtering by Context: Tag Count
+
+### Defining the Query
+
+The query string is compared against each PubMed abstract supporting the network
+edges. A richer query — one that includes synonyms, abbreviations, and related
+terms — improves recall under TF-IDF, which relies on exact token matching rather
+than semantic understanding.
+
+The expanded query below was produced with the help of a chatbot and
+covers the major vocabulary used in the DNA damage repair and cancer literature.
+
+```{r tags}
+tags <- c(
+  "dna damage repair",
+  "cancer",
+  "oncology",
+  "dna repair",
+  "genome integrity",
+  "genomic instability",
+  "double strand_break",
+  "dsb",
+  "single strand_break",
+  "ssb",
+  "base excision repair",
+  "ber",
+  "nucleotide excision repair",
+  "ner",
+  "mismatch repair",
+  "mmr",
+  "homologous recombination",
+  "hr",
+  "non homologous end joining",
+  "nhej",
+  "brca1",
+  "brca2",
+  "atm",
+  "atr",
+  "p53",
+  "tp53",
+  "parp",
+  "tumor suppressor",
+  "oncogene",
+  "carcinogenesis",
+  "tumorigenesis",
+  "chemotherapy resistance",
+  "radiation resistance",
+  "genotoxic stress",
+  "replication stress",
+  "oxidative dna_damage",
+  "somatic mutation",
+  "tumor mutational burden",
+  "tmb"
+)
+```
+
+> **Tip:** You can iteratively refine `tags` by inspecting the
+> scores in `filtered_network$evidence` and adding terms that appear frequently
+> in high-scoring abstracts but are absent from your query.
+
+`filterSubnetworkByContext()` ties everything together. The
+`cutoff` parameter controls stringency — only edges whose supporting
+abstracts score at or above this value are retained.
+
+```{r filter}
+filtered_network <- filterSubnetworkByContext(
+  nodes             = subnetwork$nodes,
+  edges             = subnetwork$edges,
+  method            = "tag_count",
+  cutoff = 3,
+  query             = tags
+)
+```
+
+The function prints a progress summary to the console:
+
+```
+Processing N unique statement hashes...
+Fetching M abstracts...
+Progress: M/M (100.0%)
+Done fetching abstracts!
+
+X / M abstracts passed score cutoff (>= 0.10)
+Retained: A edges (of B), C nodes (of D), E evidence rows (of F)
+```
+
+
+### Filtered nodes
+
+```{r nodes}
+filtered_network$nodes
+```
+
+Only proteins connected by at least one contextually relevant edge are retained.
+
+### Filtered edges
+
+```{r edges}
+filtered_network$edges
+```
+
+Each row represents a causal interaction (e.g. phosphorylation, activation)
+supported by literature that passed the score threshold.
+
+### Evidence with scores
+
+```{r evidence}
+filtered_network$evidence
+```
+
+The evidence dataframe contains the following columns:
+
+| Column | Description |
+|---|---|
+| `source` | Source protein / gene |
+| `target` | Target protein / gene |
+| `interaction` | Interaction type (e.g. Phosphorylation) |
+| `site` | Modification site if applicable |
+| `evidenceLink` | URL to the INDRA evidence viewer |
+| `stmt_hash` | Unique INDRA statement identifier |
+| `text` | Sentence extracted from the supporting paper |
+| `pmid` | PubMed ID of the source article |
+| `score` | Cosine score of the abstract vs. query |
+
+You can sort by score to identify the most on-topic supporting evidence:
+
+```{r sort-evidence}
+filtered_network$evidence[
+  order(filtered_network$evidence$score, decreasing = TRUE), 
+]
+```
+
+### Defining a cutoff
+
+```{r explore-cutoff}
+# Run with permissive cutoff to see full score distribution
+exploratory <- filterSubnetworkByContext(
+  nodes             = subnetwork$nodes,
+  edges             = subnetwork$edges,
+  cutoff = 0.0,
+  query             = tags
+)
+
+summary(exploratory$evidence$score)
+hist(exploratory$evidence$score,
+     breaks = 30,
+     main   = "Distribution of abstract scores",
+     xlab   = "Number of tags matched",
+     col    = "steelblue")
+```
+
+---
+
+## Filtering by Context: Cosine score
+
+### Defining the Query
+
+The query string is compared against each PubMed abstract supporting the network
+edges. A richer query — one that includes synonyms, abbreviations, and related
+terms — improves recall under TF-IDF, which relies on exact token matching rather
+than semantic understanding.
+
+The expanded query below was produced with the help of a chatbot and
+covers the major vocabulary used in the DNA damage repair and cancer literature.
+
+```{r query}
+my_query <- "DNA damage repair cancer oncology DNA repair genome integrity
+  genomic instability double strand break DSB single strand break SSB
+  base excision repair BER nucleotide excision repair NER mismatch repair MMR
+  homologous recombination HR non-homologous end joining NHEJ BRCA1 BRCA2
+  ATM ATR p53 TP53 PARP tumor suppressor oncogene carcinogenesis tumorigenesis
+  chemotherapy resistance radiation resistance genotoxic stress replication stress
+  oxidative DNA damage somatic mutation tumor mutational burden TMB"
+```
+
+> **Tip:** You can iteratively refine `my_query` by inspecting the
+> scores in `filtered_network$evidence` and adding terms that appear frequently
+> in high-scoring abstracts but are absent from your query.
+
+`filterSubnetworkByContext()` ties everything together. The
+`cutoff` parameter controls stringency — only edges whose supporting
+abstracts score at or above this value are retained.
+
+```{r}
+filtered_network <- filterSubnetworkByContext(
+  nodes             = subnetwork$nodes,
+  edges             = subnetwork$edges,
+  method            = "cosine",
+  cutoff = 0.10,
+  query             = my_query
+)
+```
+
+The function prints a progress summary to the console:
+
+```
+Processing N unique statement hashes...
+Fetching M abstracts...
+Progress: M/M (100.0%)
+Done fetching abstracts!
+
+X / M abstracts passed score cutoff (>= 0.10)
+Retained: A edges (of B), C nodes (of D), E evidence rows (of F)
+```
+
+
+### Filtered nodes
+
+```{r}
+filtered_network$nodes
+```
+
+Only proteins connected by at least one contextually relevant edge are retained.
+
+### Filtered edges
+
+```{r}
+filtered_network$edges
+```
+
+Each row represents a causal interaction (e.g. phosphorylation, activation)
+supported by literature that passed the score threshold.
+
+### Evidence with scores
+
+```{r}
+filtered_network$evidence
+```
+
+The evidence dataframe contains the following columns:
+
+| Column | Description |
+|---|---|
+| `source` | Source protein / gene |
+| `target` | Target protein / gene |
+| `interaction` | Interaction type (e.g. Phosphorylation) |
+| `site` | Modification site if applicable |
+| `evidenceLink` | URL to the INDRA evidence viewer |
+| `stmt_hash` | Unique INDRA statement identifier |
+| `text` | Sentence extracted from the supporting paper |
+| `pmid` | PubMed ID of the source article |
+| `score` | Relevance score (tag count or cosine similarity) |
+
+You can sort by score to identify the most on-topic supporting evidence:
+
+```{r}
+filtered_network$evidence[
+  order(filtered_network$evidence$score, decreasing = TRUE), 
+]
+```
+
+### Choosing a score Cutoff
+
+The right cutoff depends on how broadly the query overlaps with the literature
+in your network. As a rough guide:
+
+| Cutoff | Effect |
+|---|---|
+| `0.05` | Permissive — removes only completely off-topic abstracts |
+| `0.10` | Recommended default for domain-specific queries |
+| `0.20` | Stringent — retains only highly on-topic edges |
+| `> 0.30` | Very stringent — use only with highly specific queries |
+
+To explore the score distribution before committing to a cutoff, run the
+function at a low threshold and inspect the scores:
+
+```{r}
+# Run with permissive cutoff to see full score distribution
+exploratory <- filterSubnetworkByContext(
+  nodes             = subnetwork$nodes,
+  edges             = subnetwork$edges,
+  cutoff = 0.0,
+  method = "cosine",
+  query             = my_query
+)
+
+summary(exploratory$evidence$score)
+hist(exploratory$evidence$score,
+     breaks = 30,
+     main   = "Distribution of abstract scores",
+     xlab   = "Cosine score to query",
+     col    = "steelblue")
+```
+
+---
+
+## Session Info
+
+```{r session-info, eval=TRUE}
+sessionInfo()
+```