diff --git a/api/__init__.py b/api/__init__.py index 43164b5..9ba5405 100644 --- a/api/__init__.py +++ b/api/__init__.py @@ -52,6 +52,7 @@ def create_app(): # Now add routes from api.resources.gene_information import gene_information + from api.resources.gaia import gaia from api.resources.rnaseq_gene_expression import rnaseq_gene_expression from api.resources.microarray_gene_expression import microarray_gene_expression from api.resources.proxy import bar_proxy @@ -66,6 +67,7 @@ def create_app(): from api.resources.llama3 import llama3 bar_api.add_namespace(gene_information) + bar_api.add_namespace(gaia) bar_api.add_namespace(rnaseq_gene_expression) bar_api.add_namespace(microarray_gene_expression) bar_api.add_namespace(bar_proxy) diff --git a/api/models/gaia.py b/api/models/gaia.py new file mode 100644 index 0000000..ec27635 --- /dev/null +++ b/api/models/gaia.py @@ -0,0 +1,62 @@ +from typing import List +from sqlalchemy import ForeignKey +from sqlalchemy.orm import relationship +from api import db + + +class Genes(db.Model): + __bind_key__ = "gaia" + __tablename__ = "genes" + + id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) + species: db.Mapped[str] = db.mapped_column(db.String(64), nullable=False) + locus: db.Mapped[str] = db.mapped_column(db.String(64), nullable=True) + geneid: db.Mapped[str] = db.mapped_column(db.String(32), nullable=True) + children: db.Mapped[List["Aliases"]] = relationship() + + +class Aliases(db.Model): + __bind_key__ = "gaia" + __tablename__ = "aliases" + + id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) + genes_id: db.Mapped[int] = db.mapped_column(ForeignKey("genes.id", ondelete="CASCADE"), nullable=False) + alias: db.Mapped[str] = db.mapped_column(db.String(256), nullable=False) + + +class PublicationFigures(db.Model): + __bind_key__ = "gaia" + __tablename__ = "publication_figures" + + id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) + title: db.Mapped[str] = db.mapped_column(db.String(512), nullable=True) + abstract: db.Mapped[str] = db.mapped_column(db.Text, nullable=True) + children: db.Mapped[List["PubIds"]] = relationship() + children: db.Mapped[List["Figures"]] = relationship() + + +class PubIds(db.Model): + __bind_key__ = "gaia" + __tablename__ = "pub_ids" + + id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) + publication_figures_id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False) + publication_figures_id: db.Mapped[int] = db.mapped_column( + ForeignKey("publication_figures.id", ondelete="CASCADE"), nullable=False + ) + pubmed: db.Mapped[str] = db.mapped_column(db.String(16), nullable=True) + pmc: db.Mapped[str] = db.mapped_column(db.String(16), nullable=True) + + +class Figures(db.Model): + __bind_key__ = "gaia" + __tablename__ = "figures" + + id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True) + publication_figures_id: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False) + publication_figures_id: db.Mapped[int] = db.mapped_column( + ForeignKey("publication_figures.id", ondelete="CASCADE"), nullable=False + ) + img_name: db.Mapped[str] = db.mapped_column(db.String(64), nullable=False) + caption: db.Mapped[str] = db.mapped_column(db.Text, nullable=True) + img_url: db.Mapped[str] = db.mapped_column(db.String(256), nullable=True) diff --git a/api/resources/gaia.py b/api/resources/gaia.py new file mode 100644 index 0000000..8c7818e --- /dev/null +++ b/api/resources/gaia.py @@ -0,0 +1,176 @@ +from flask import request +from flask_restx import Namespace, Resource, fields +from markupsafe import escape +from api import db +from api.utils.bar_utils import BARUtils +from api.models.gaia import Genes, Aliases, PubIds, Figures +from sqlalchemy import func, or_ +from marshmallow import Schema, ValidationError, fields as marshmallow_fields +import json + +gaia = Namespace("Gaia", description="Gaia", path="/gaia") + +parser = gaia.parser() +parser.add_argument( + "terms", + type=list, + action="append", + required=True, + help="Publication IDs", + default=["32492426", "32550561"], +) + +publication_request_fields = gaia.model( + "Publications", + { + "pubmeds": fields.List( + required=True, + example=["32492426", "32550561"], + cls_or_instance=fields.String, + ), + }, +) + + +# Validation is done in a different way to keep things simple +class PublicationSchema(Schema): + pubmeds = marshmallow_fields.List(cls_or_instance=marshmallow_fields.String) + + +@gaia.route("/aliases/") +class GaiaAliases(Resource): + @gaia.param("identifier", _in="path", default="ABI3") + def get(self, identifier=""): + + # Escape input + identifier = escape(identifier) + + # Is it valid + if BARUtils.is_gaia_alias(identifier): + query_ids = [] + data = [] + + # Check if alias exists + # Note: This check can be done in on query, but optimizer is not using indexes for some reason + query = db.select(Aliases.genes_id, Aliases.alias).filter(Aliases.alias == identifier) + rows = db.session.execute(query).fetchall() + + if rows and len(rows) > 0: + # Alias exists. Get the genes_ids + for row in rows: + query_ids.append(row.genes_id) + + else: + # Alias doesn't exist. Get the ids if it's locus or ncbi id + query = db.select(Genes.id).filter(or_(Genes.locus == identifier, Genes.geneid == identifier)) + rows = db.session.execute(query).fetchall() + + if rows and len(rows) > 0: + for row in rows: + query_ids.append(row.id) + else: + return BARUtils.error_exit("Nothing found"), 404 + + # Left join is important in case aliases do not exist for the given locus / geneid + query = ( + db.select(Genes.species, Genes.locus, Genes.geneid, func.json_arrayagg(Aliases.alias).label("aliases")) + .select_from(Genes) + .outerjoin(Aliases, Aliases.genes_id == Genes.id) + .filter(Genes.id.in_(query_ids)) + .group_by(Genes.species, Genes.locus, Genes.geneid) + ) + + rows = db.session.execute(query).fetchall() + + if rows and len(rows) > 0: + for row in rows: + + # JSONify aliases + if row.aliases: + aliases = json.loads(row.aliases) + else: + aliases = [] + + record = { + "species": row.species, + "locus": row.locus, + "geneid": row.geneid, + "aliases": aliases, + } + + # Add the record to data + data.append(record) + + # Return final data + return BARUtils.success_exit(data) + + else: + return BARUtils.error_exit("Invalid identifier"), 400 + + +@gaia.route("/publication_figures") +class GaiaPublicationFigures(Resource): + @gaia.expect(publication_request_fields) + def post(self): + json_data = request.get_json() + + # Validate json + try: + json_data = PublicationSchema().load(json_data) + except ValidationError as err: + return BARUtils.error_exit(err.messages), 400 + + pubmeds = json_data["pubmeds"] + + # Check if pubmed ids are valid + for pubmed in pubmeds: + if not BARUtils.is_integer(pubmed): + return BARUtils.error_exit("Invalid Pubmed ID"), 400 + + # It is valid. Continue + data = [] + + # Left join is important in case aliases do not exist for the given locus / geneid + query = ( + db.select(Figures.img_name, Figures.caption, Figures.img_url, PubIds.pubmed, PubIds.pmc) + .select_from(Figures) + .join(PubIds, PubIds.publication_figures_id == Figures.publication_figures_id) + .filter(PubIds.pubmed.in_(pubmeds)) + .order_by(PubIds.pubmed.desc()) + ) + + rows = db.session.execute(query).fetchall() + + record = {} + + if rows and len(rows) > 0: + for row in rows: + + # Check if record has an id. If it doesn't, this is first row. + if "id" in record: + # Check if this is a new pubmed id + if record["id"]["pubmed"] != row.pubmed: + # new record. Add old now to data and create a new record + data.append(record) + record = {} + + # Check if figures exists, if not add it. + if record.get("figures") is None: + # Create a new figures record + record["figures"] = [] + + # Now append figure to the record + figure = {"img_name": row.img_name, "caption": row.caption, "img_url": row.img_url} + record["figures"].append(figure) + + # Now add the id. If it exists don't add + if record.get("id") is None: + record["id"] = {} + record["id"]["pubmed"] = row.pubmed + record["id"]["pmc"] = row.pmc + + # The last record + data.append(record) + + # Return final data + return BARUtils.success_exit(data) diff --git a/api/utils/bar_utils.py b/api/utils/bar_utils.py index 12e5b7c..9cff5fd 100644 --- a/api/utils/bar_utils.py +++ b/api/utils/bar_utils.py @@ -252,6 +252,17 @@ def is_integer(data): else: return False + @staticmethod + def is_gaia_alias(data): + """Check if the input is a valid gaia alias. + :param data + :return: True if valid gaia alias + """ + if re.search(r"^[a-z0-9_]{1,50}$", data, re.I): + return True + else: + return False + @staticmethod def format_poplar(poplar_gene): """Format Poplar gene ID to be Potri.016G107900, i.e. capitalized P and G diff --git a/docs/requirements.txt b/docs/requirements.txt index c4cb67e..adc542f 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,24 +1,25 @@ accessible-pygments==0.0.5 alabaster==1.0.0 babel==2.17.0 -beautifulsoup4==4.13.4 -certifi==2025.7.14 -charset-normalizer==3.4.2 -docutils==0.21.2 -furo==2025.7.19 -idna==3.10 +beautifulsoup4==4.14.3 +certifi==2026.1.4 +charset-normalizer==3.4.4 +docutils==0.22.4 +furo==2025.12.19 +idna==3.11 imagesize==1.4.1 Jinja2==3.1.6 -MarkupSafe==3.0.2 -packaging==25.0 +MarkupSafe==3.0.3 +packaging==26.0 Pygments==2.19.2 pytz==2025.2 -requests==2.32.4 +requests==2.32.5 +roman-numerals==4.1.0 roman-numerals-py==3.1.0 -setuptools==80.9.0 +setuptools==80.10.1 snowballstemmer==3.0.1 -soupsieve==2.7 -Sphinx==8.2.3 +soupsieve==2.8.3 +Sphinx==9.1.0 sphinx-basic-ng==1.0.0b2 sphinx-copybutton==0.5.2 sphinxcontrib-applehelp==2.0.0 @@ -27,6 +28,6 @@ sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jsmath==1.0.1 sphinxcontrib-qthelp==2.0.0 sphinxcontrib-serializinghtml==2.0.0 -typing_extensions==4.14.1 -urllib3==2.5.0 +typing_extensions==4.15.0 +urllib3==2.6.3 wheel==0.45.1 diff --git a/requirements.txt b/requirements.txt index 7794b99..c85fc34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,61 +1,61 @@ aniso8601==10.0.1 async-timeout==5.0.1 attrs==25.4.0 -black==25.9.0 +black==26.1.0 blinker==1.9.0 cachelib==0.13.0 -certifi==2025.10.5 +certifi==2026.1.4 charset-normalizer==3.4.4 -click==8.3.0 -coverage==7.11.0 -Deprecated==1.2.18 +click==8.3.1 +coverage==7.13.1 +Deprecated==1.3.1 flake8==7.3.0 Flask==3.1.2 Flask-Caching==2.3.1 -flask-cors==6.0.1 -Flask-Limiter==4.0.0 +flask-cors==6.0.2 +Flask-Limiter==4.1.1 flask-marshmallow==1.3.0 flask-restx==1.3.2 Flask-SQLAlchemy==3.1.1 -greenlet==3.2.4 +greenlet==3.3.0 idna==3.11 importlib_resources==6.5.2 iniconfig==2.3.0 itsdangerous==2.2.0 Jinja2==3.1.6 -jsonschema==4.25.1 +jsonschema==4.26.0 jsonschema-specifications==2025.9.1 limits==5.6.0 markdown-it-py==4.0.0 MarkupSafe==3.0.3 -marshmallow==4.0.1 +marshmallow==4.2.0 mccabe==0.7.0 mdurl==0.1.2 mypy_extensions==1.1.0 mysqlclient==2.2.7 ordered-set==4.1.0 -packaging==25.0 -pathspec==0.12.1 -platformdirs==4.5.0 +packaging==26.0 +pathspec==1.0.3 +platformdirs==4.5.1 pluggy==1.6.0 pycodestyle==2.14.0 pyflakes==3.4.0 Pygments==2.19.2 pyrsistent==0.20.0 -pytest==8.4.2 +pytest==9.0.2 python-dateutil==2.9.0.post0 -pytokens==0.2.0 +pytokens==0.4.0 pytz==2025.2 -redis==7.0.1 +redis==7.1.0 referencing==0.37.0 requests==2.32.5 rich==14.2.0 -rpds-py==0.28.0 -setuptools==80.9.0 +rpds-py==0.30.0 +setuptools==80.10.1 six==1.17.0 -SQLAlchemy==2.0.44 +SQLAlchemy==2.0.46 typing_extensions==4.15.0 -urllib3==2.5.0 -Werkzeug==3.1.3 +urllib3==2.6.3 +Werkzeug==3.1.5 wheel==0.45.1 -wrapt==1.17.3 +wrapt==2.0.1