diff --git a/.gitignore b/.gitignore index 238f632..97a8b8f 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,6 @@ MANIFEST *.env !example.env !docker.env -data/ *.bin # custom shortcut files diff --git a/pyproject.toml b/pyproject.toml index 72bd15c..d9740ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,24 @@ repository = "https://github.com/zerospeech/vocolab" voco = "vocolab.admin.main:run_cli" [project.optional-dependencies] + +extras = [ + # todo migrate towards normal install when package is published + "vocolab-ext @ https://github.com/zerospeech/vocolab-ext/archive/master.zip" +] + +eval = [ + "vocolab[extras]", + "celery", +] + +zerospeech = [ + "vocolab[extras]", + # todo migrate this to pip version + "zerospeech-benchmarks @ https://github.com/zerospeech/vocolab-ext/archive/develop.zip" +] + dev = [ - "zerospeech-benchmarks[all]", "ipython", "jupyterlab", "pytest", @@ -40,7 +56,6 @@ dev = [ ] - [build-system] requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index c2c9f62..991aeea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ requests==2.28.2 Jinja2==3.1.2 gunicorn==20.1.0 # todo: breaking update to 4.0.0 -filesplit==3.0.2 +filesplit>=4.0.1 toml;python_version<'3.11' python-dateutil numpy diff --git a/samples/benchmark.json b/samples/benchmark.json new file mode 100644 index 0000000..462a4ed --- /dev/null +++ b/samples/benchmark.json @@ -0,0 +1,11 @@ +[ + { + "label": "test-challenge", + "start_date": "2022-06-30", + "end_date": null, + "url": "https://vocolab.com/challenge/test", + "active": false, + "evaluator": null, + "auto_eval": false + } +] \ No newline at end of file diff --git a/samples/benchmark_list.json b/samples/benchmark_list.json new file mode 100644 index 0000000..24c3be5 --- /dev/null +++ b/samples/benchmark_list.json @@ -0,0 +1,82 @@ +[ + { + "label": "test-challenge", + "start_date": "2022-02-21", + "end_date": null, + "url": "https://zerospeech.com/track/test", + "active": true, + "evaluator": null, + "auto_eval": false + }, + { + "label": "abx15", + "start_date": "2015-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zr2015-and-abx15", + "active": false, + "evaluator": null + }, + { + "label": "abx17", + "start_date": "2017-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zrc2017-and-abx17", + "active": false, + "evaluator": null, + "auto_eval": false + }, + { + "label": "abxLS", + "start_date": "2021-12-12", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#abxls-dataset-and-benchmark", + "active": false, + "evaluator": null, + "auto_eval": false + }, + { + "label": "tde15", + "start_date": "2015-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", + "active": false, + "evaluator": null, + "auto_eval": false + }, + { + "label": "tde17", + "start_date": "2017-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", + "active": false, + "evaluator": null, + "auto_eval": false + }, + { + "label": "sLM21", + "start_date": "2021-12-12", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_4/tasks_goals/", + "active": false, + "evaluator": null, + "auto_eval": false + }, + { + "label": "prosAudit", + "start_date": "2023-02-01", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_4/tasks_goals/", + "active": true, + "evaluator": null, + "auto_eval": false + }, + { + "label": "ttso19", + "start_date": "2019-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_3/tasks_goals/", + "active": false, + "evaluator": null, + "auto_eval": false + } +] \ No newline at end of file diff --git a/samples/challenge.json b/samples/challenge.json deleted file mode 100644 index 5e8727a..0000000 --- a/samples/challenge.json +++ /dev/null @@ -1,8 +0,0 @@ -[{ -"label": "test-challenge", -"start_date": "2022-06-30", -"end_date": null, -"url": "https://vocolab.com/challenge/test", -"active": false, -"evaluator": null -}] \ No newline at end of file diff --git a/samples/challenges.json b/samples/challenges.json deleted file mode 100644 index 2ad57d0..0000000 --- a/samples/challenges.json +++ /dev/null @@ -1,56 +0,0 @@ -[ - { - "id": 1, - "label": "test-challenge", - "start_date": "2022-02-21", - "end_date": null, - "url": "https://zerospeech.com/track/test", - "active": true, - "evaluator": null - }, - { - "id": 2, - "label": "zr2015", - "start_date": "2015-01-20", - "end_date": "2015-04-30", - "url": "https://zerospeech.com/track/2015", - "active": false, - "evaluator": null - }, - { - "id": 3, - "label": "zr2017", - "start_date": "2017-01-20", - "end_date": "2017-04-30", - "url": "https://zerospeech.com/track/2017", - "active": false, - "evaluator": null - }, - { - "id": 4, - "label": "zr2019", - "start_date": "2019-01-20", - "end_date": "2019-04-30", - "url": "https://zerospeech.com/track/2019", - "active": false, - "evaluator": null - }, - { - "id": 5, - "label": "zr2020", - "start_date": "2020-01-20", - "end_date": "2020-04-30", - "url": "https://zerospeech.com/track/2020", - "active": false, - "evaluator": null - }, - { - "id": 6, - "label": "zr2021", - "start_date": "2020-12-12", - "end_date": null, - "url": "https://zerospeech.com/track/2021", - "active": true, - "evaluator": null - } -] \ No newline at end of file diff --git a/samples/leaderboards_list.json b/samples/leaderboards_list.json new file mode 100644 index 0000000..4d3e3f9 --- /dev/null +++ b/samples/leaderboards_list.json @@ -0,0 +1,58 @@ +[ + { + "label": "test-leaderboard", + "benchmark_id": 1, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "abx-15-leaderboard", + "benchmark_id": 2, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "abx-17-leaderboard", + "benchmark_id": 3, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "abx-LS-leaderboard", + "benchmark_id": 4, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "sLM21-leaderboard", + "benchmark_id": 7, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "tde-15-leaderboard", + "benchmark_id": 5, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "tde-17-leaderboard", + "benchmark_id": 6, + "archived": false, + "static_files": false, + "sorting_key": null + }, + { + "label": "tts0-leaderboard", + "benchmark_id": 8, + "archived": false, + "static_files": false, + "sorting_key": null + } +] \ No newline at end of file diff --git a/tests/fixtures/db.py b/tests/fixtures/db.py index c750e77..b657a0b 100644 --- a/tests/fixtures/db.py +++ b/tests/fixtures/db.py @@ -1,11 +1,11 @@ import pytest -from vocolab.db import zrDB, create_db +from vocolab.data.db import zrDB, build_database_from_schema @pytest.fixture(scope="session") async def db(): - create_db() + build_database_from_schema() # connect to Database await zrDB.connect() diff --git a/vocolab/admin/commands/api.py b/vocolab/admin/commands/api.py index cb9ace8..85c2792 100644 --- a/vocolab/admin/commands/api.py +++ b/vocolab/admin/commands/api.py @@ -10,15 +10,15 @@ from jinja2 import Environment, FileSystemLoader from vocolab import get_settings, out -from vocolab.admin import cmd_lib -from vocolab.db.base import create_db +from vocolab.core import cmd_lib +from vocolab.data import db _settings = get_settings() class APICMD(cmd_lib.CMD): """ Command for api instance administration """ - + def __init__(self, root, name, cmd_path): super(APICMD, self).__init__(root, name, cmd_path) @@ -27,6 +27,30 @@ def run(self, argv): self.parser.print_help() +class APILockCMD(cmd_lib.CMD): + """ Command to check API Lock status """ + + def __init__(self, root, name, cmd_path): + super(APILockCMD, self).__init__(root, name, cmd_path) + self.parser.add_argument( + "action", choices=['lock', 'unlock', 'status'], default='status', + nargs="?", help="Action to perform (default status)" + ) + + def run(self, argv): + args = self.parser.parse_args(argv) + + if args.action == "lock": + _settings.data_lock.touch() + elif args.action == "unlock": + _settings.data_lock.unlink() + else: + if _settings.is_locked(): + out.cli.print(f"API is locked") + else: + out.cli.print(f"API is not locked") + + class RunAPICMD(cmd_lib.CMD): """ Commands to run the api daemon """ @@ -99,11 +123,11 @@ def run(self, argv): exec_args.extend(['zerospeech.api:app', '--reload', '--debug', '--no-access-log']) execv(executable, exec_args) - - + + class APInitEnvironmentCMD(cmd_lib.CMD): """ Initialise components needed for the API """ - + def __init__(self, root, name, cmd_path): super(APInitEnvironmentCMD, self).__init__(root, name, cmd_path) @@ -121,7 +145,7 @@ def run(self, argv): _settings.static_files_directory.mkdir(exist_ok=True, parents=True) # create tables out.cli.info(f"creating : tables in database ...") - create_db() + db.build_database_from_schema() class ConfigFiles(cmd_lib.CMD): @@ -137,11 +161,11 @@ def run(self, argv): class GunicornConfigGeneration(cmd_lib.CMD): """ Generate a template gunicorn config file """ - + def __init__(self, root, name, cmd_path): super(GunicornConfigGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir))\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir)) \ .get_template("gunicorn_app.wsgi") def run(self, argv): @@ -169,7 +193,7 @@ class SystemDSocketFileGeneration(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(SystemDSocketFileGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir))\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir)) \ .get_template("gunicorn.socket") def run(self, argv): @@ -201,7 +225,7 @@ def __init__(self, root, name, cmd_path): super(SystemDUnitGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") self.parser.add_argument('gunicorn_config_file', type=str, help="File to configure gunicorn with") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True)\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True) \ .get_template("api.service") def run(self, argv): @@ -238,12 +262,12 @@ class NginxConfigGeneration(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(NginxConfigGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True)\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True) \ .get_template("nginx.conf") def run(self, argv): args = self.parser.parse_args(argv) - default_url = urlparse(_settings.API_BASE_URL) + default_url = urlparse(_settings.api_options.API_BASE_URL) data = dict( url=f"{default_url.netloc}{default_url.path}", bind_url=_settings.server_options.SERVER_BIND, diff --git a/vocolab/admin/commands/challenges.py b/vocolab/admin/commands/challenges.py index 0f66577..a2217d5 100644 --- a/vocolab/admin/commands/challenges.py +++ b/vocolab/admin/commands/challenges.py @@ -7,16 +7,15 @@ from rich.table import Table from vocolab import out -from vocolab.admin import cmd_lib -from vocolab.db import schema, models -from vocolab.db.q import challengesQ +from vocolab.core import cmd_lib +from vocolab.data import models, model_queries -class ChallengesCMD(cmd_lib.CMD): +class BenchmarksCMD(cmd_lib.CMD): """ Command for challenge administration (default: list)""" def __init__(self, root, name, cmd_path): - super(ChallengesCMD, self).__init__(root, name, cmd_path) + super(BenchmarksCMD, self).__init__(root, name, cmd_path) # custom arguments self.parser.add_argument('-a', '--include-all', @@ -28,28 +27,27 @@ def run(self, argv): # fetch data loop = asyncio.get_event_loop() - challenge_lst = loop.run_until_complete( - challengesQ.list_challenges(include_all=args.include_all) + challenge_lst: model_queries.BenchmarkList = loop.run_until_complete( + model_queries.BenchmarkList.get(include_all=args.include_all) ) # Prepare output table = Table(show_header=True, header_style="bold magenta") - table.add_column("ID") - table.add_column("label") + table.add_column("Label") table.add_column("active") table.add_column("url") table.add_column("start_date") table.add_column("end_date") table.add_column("evaluator") - for ch in challenge_lst: + for ch in challenge_lst.items: if ch.end_date: end_date_str = ch.end_date.strftime('%d/%m/%Y') else: end_date_str = None table.add_row( - f"{ch.id}", f"{ch.label}", f"{ch.active}", f"{ch.url}", + f"{ch.label}", f"{ch.active}", f"{ch.url}", f"{ch.start_date.strftime('%d/%m/%Y')}", f"{end_date_str}", f"{ch.evaluator}" ) @@ -57,11 +55,11 @@ def run(self, argv): out.cli.print(table) -class AddChallengeCMD(cmd_lib.CMD): +class AddBenchmarkCMD(cmd_lib.CMD): """ Command to create new challenges """ def __init__(self, root, name, cmd_path): - super(AddChallengeCMD, self).__init__(root, name, cmd_path) + super(AddBenchmarkCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('--dry-run', dest='dry_run', action='store_true', @@ -76,7 +74,7 @@ def run(self, argv): if args.from_file: file_path = Path(args.from_file) if not (file_path.is_file() and file_path.suffix == '.json'): - raise ValueError(f"Input file needs to exist and be a Valid JSON file.") + raise ValueError("Input file needs to exist and be a Valid JSON file.") obj = json.load(file_path.open()) obj_list = [models.cli.NewChallenge(**item) for item in obj] @@ -105,7 +103,7 @@ def run(self, argv): if not args.dry_run: for item in obj_list: - asyncio.run(challengesQ.create_new_challenge(item)) + asyncio.run(model_queries.Benchmark.create(item)) out.cli.print(f"insertion of {item.label} was successful:white_check_mark:", style="bold green") else: @@ -119,26 +117,31 @@ def run(self, argv): out.cli.error(f":x:\t{e}") -class SetChallenge(cmd_lib.CMD): +class SetBenchmarkCMD(cmd_lib.CMD): """ Command to alter properties of Challenges""" def __init__(self, root, name, cmd_path): - super(SetChallenge, self).__init__(root, name, cmd_path) - self.challenge_fields = schema.Challenge.get_field_names() - self.challenge_fields.remove('id') + super(SetBenchmarkCMD, self).__init__(root, name, cmd_path) + self.challenge_fields = model_queries.Benchmark.get_field_names() # arguments - self.parser.add_argument('id', help='ID of the challenge to update') + self.parser.add_argument('label', help='Name of the challenge to update') self.parser.add_argument('field_name', type=str, choices=self.challenge_fields, help='The name of the field') self.parser.add_argument('value', help='The new value of the field') + @staticmethod + async def update_property(benchmark_id: str, field_name: str, value: str): + ch = await model_queries.Benchmark.get(benchmark_id=benchmark_id) + return await ch.update_property( + variable_name=field_name, + value=value, + allow_parsing=True + ) + def run(self, argv): args = self.parser.parse_args(argv) res = asyncio.run( - challengesQ.update_challenge_property( - challenge_id=args.id, variable_name=args.field_name, value=args.value, - allow_parsing=True - ) + self.update_property(args.id, args.field_name, args.value) ) out.cli.info(f"Field {args.field_name}={res} :white_check_mark:") diff --git a/vocolab/admin/commands/evaluators.py b/vocolab/admin/commands/evaluators.py index 55c68cb..bde192e 100644 --- a/vocolab/admin/commands/evaluators.py +++ b/vocolab/admin/commands/evaluators.py @@ -1,13 +1,13 @@ import asyncio import sys +from typing import List from rich.prompt import Confirm from rich.table import Table from vocolab import get_settings, out -from vocolab.admin import cmd_lib -from vocolab.db.q import challenges as ch_queries -from vocolab.lib import evaluators_lib +from vocolab.data import model_queries +from vocolab.core import evaluators_lib, cmd_lib _settings = get_settings() @@ -20,7 +20,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): _ = self.parser.parse_args(argv) - evaluators = asyncio.run(ch_queries.get_evaluators()) + evaluators: model_queries.EvaluatorList = asyncio.run(model_queries.EvaluatorList.get()) # Prepare output table = Table(show_header=True, header_style="bold magenta") @@ -31,12 +31,11 @@ def run(self, argv): table.add_column("script_path") table.add_column("executor_arguments") - for ev in evaluators: + for ev in evaluators.items: table.add_row( f"{ev.id}", f"{ev.label}", f"{ev.host}", f"{ev.executor}", f"{ev.script_path}", f"{ev.executor_arguments}" ) - # print out.cli.print(table) @@ -83,6 +82,11 @@ def __init__(self, root, name, cmd_path): super(DiscoverEvaluatorsCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('host') + @staticmethod + async def add_evaluators_list(eval_list): + for item in eval_list: + await model_queries.EvaluatorItem.add_or_update(evl_item=item) + def run(self, argv): args = self.parser.parse_args(argv) @@ -100,7 +104,7 @@ def run(self, argv): out.cli.print(f"Found evaluators : {[ev.label for ev in evaluators]}") response = Confirm.ask("Do want to import them into the database?") if response: - asyncio.run(ch_queries.add_evaluator(lst_eval=evaluators)) + asyncio.run(self.add_evaluators_list(evaluators)) out.cli.print(":heavy_check_mark: successfully inserted evaluators") @@ -113,14 +117,19 @@ def __init__(self, root, name, cmd_path): # arguments self.parser.add_argument("evaluator_id", type=int, help='The id of the entry') + @staticmethod + async def update_eval_args(evaluator_id: int, arg_list: List[str]): + evaluator = await model_queries.EvaluatorItem.get(evaluator_id) + await evaluator.update_args(arg_list) + def run(self, argv): """ Update base arguments of an evaluator Pass a list of arguments to give to the evaluator """ - args, rest = self.parser.parse_known_args(argv) + args, rest = self.parser.parse_known_args() asyncio.run( - ch_queries.edit_evaluator_args(eval_id=args.evaluator_id, arg_list=rest) + self.update_eval_args(args.evaluator_id, rest) ) out.cli.info(":heavy_check_mark: successfully updated evaluator") diff --git a/vocolab/admin/commands/leaderboards.py b/vocolab/admin/commands/leaderboards.py index f11d0cc..46d35f8 100644 --- a/vocolab/admin/commands/leaderboards.py +++ b/vocolab/admin/commands/leaderboards.py @@ -7,10 +7,8 @@ from rich.table import Table from vocolab import out -from vocolab.admin import cmd_lib -from vocolab.db import schema -from vocolab.db.q import leaderboardQ -from vocolab.lib import leaderboards_lib +from vocolab.core import leaderboards_lib, cmd_lib +from vocolab.data import model_queries class LeaderboardCMD(cmd_lib.CMD): @@ -22,28 +20,22 @@ def __init__(self, root, name, cmd_path): def run(self, argv): _ = self.parser.parse_args(argv) try: - leaderboards = asyncio.run(leaderboardQ.list_leaderboards()) + leaderboards: model_queries.LeaderboardList = asyncio.run(model_queries.LeaderboardList.get_all()) except ValueError: - leaderboards = [] + leaderboards = model_queries.LeaderboardList(items=[]) table = Table(show_header=True, header_style="bold magenta") - table.add_column('ID') table.add_column('Label') table.add_column('Archived') - table.add_column('External Entries', no_wrap=False, overflow='fold') table.add_column('Static Files') - table.add_column('Challenge ID') - table.add_column('EntryFile', no_wrap=False, overflow='fold') - table.add_column('LeaderboardFile', no_wrap=False, overflow='fold') + table.add_column('Benchmark ID') table.add_column('Key', no_wrap=False, overflow='fold') - for entry in leaderboards: table.add_row( - f"{entry.id}", f"{entry.label}", f"{entry.archived}", - f"{entry.external_entries}", f"{entry.static_files}", f"{entry.challenge_id}", - f"{entry.entry_file}", f"{entry.path_to}", f"{entry.sorting_key}" + f"{entry.label}", f"{entry.archived}", + f"{entry.static_files}", f"{entry.benchmark_id}", + f"{entry.sorting_key}" ) - # print table out.cli.print(table, no_wrap=False) @@ -57,34 +49,16 @@ def __init__(self, root, name, cmd_path): @staticmethod def ask_input(): label = Prompt.ask("Label: ") - challenge_id = IntPrompt.ask("Challenge ID") - - path_to = Prompt.ask(f"Leaderboard Compiled filename (default: {label}.json)") - if not path_to: - path_to = f"{label}.json" - - entry_file = out.cli.raw.input( - f"Leaderboard individual entry filename (default: {label}-entry.json ): ") - if not entry_file: - entry_file = f"{label}-entry.json" - - while True: - external_entries = out.cli.raw.input("Location of external entries: ") - external_entries = Path(external_entries) - if external_entries.is_dir(): - break - else: - out.cli.error(f"External entries must be a valid directory") - + benchmark_id = IntPrompt.ask("Benchmark ID") add_static_files = Confirm.ask("Does this leaderboard include static files", default=True) + archived = not Confirm.ask("Does this leaderboard accept new entries", default=True) return dict( label=label, - challenge_id=challenge_id, - path_to=path_to, - entry_file=entry_file, - external_entries=external_entries, + benchmark_id=benchmark_id, + archived=archived, static_files=add_static_files, + sorting_key=None ) def run(self, argv): @@ -107,14 +81,14 @@ def run(self, argv): lds = [self.ask_input()] for item in lds: - asyncio.run(leaderboards_lib.create( - challenge_id=item.get("challenge_id"), - label=item.get("label"), - entry_file=item.get("entry_file"), - external_entries=item.get("external_entries"), - static_files=item.get("static_files", False), - archived=item.get("archived", False), - path_to=item.get("path_to") + asyncio.run(model_queries.Leaderboard.create( + model_queries.Leaderboard( + label=item.get("label"), + benchmark_id=item.get("benchmark_id"), + static_files=item.get("static_files", False), + archived=item.get("archived", False), + sorting_key=item.get("sorting_key", None), + ) )) out.cli.info(f"Successfully created leaderboard : {item.get('label')}") @@ -124,52 +98,94 @@ class EditLeaderboardCMD(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(EditLeaderboardCMD, self).__init__(root, name, cmd_path) - self.leaderboard_fields = schema.LeaderBoard.get_field_names() - self.leaderboard_fields.remove('id') + self.leaderboard_fields = model_queries.Leaderboard.get_field_names() # arguments - self.parser.add_argument("leaderboard_id", type=int, help='The id of the entry') + self.parser.add_argument("leaderboard_id", type=str, help='The id of the entry') self.parser.add_argument("field_name", type=str, choices=self.leaderboard_fields, help="The name of the field") self.parser.add_argument('field_value', help="The new value of the field") + @staticmethod + async def update_value(leaderboard_id: str, field_name: str, value: str): + leaderboard = await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) + return await leaderboard.update_property(variable_name=field_name, value=value, allow_parsing=True) + def run(self, argv): args = self.parser.parse_args(argv) - res = asyncio.run(leaderboardQ.update_leaderboard_value( + res = asyncio.run(self.update_value( leaderboard_id=args.leaderboard_id, - variable_name=args.field_name, - value=args.field_value, - allow_parsing=True + field_name=args.field_name, + value=args.field_value )) out.cli.info(f"Field {args.field_name}={res} :white_check_mark:") class ShowLeaderboardCMD(cmd_lib.CMD): """ Print final leaderboard object """ - + def __init__(self, root, name, cmd_path): super(ShowLeaderboardCMD, self).__init__(root, name, cmd_path) - self.parser.add_argument('leaderboard_id', type=int) + self.parser.add_argument('label', type=str) self.parser.add_argument('--raw-output', action="store_true", help="Print in raw json without formatting") + @staticmethod + async def get_leaderboard(label: str): + return await model_queries.Leaderboard.get(label) + def run(self, argv): args = self.parser.parse_args(argv) - leaderboard = asyncio.run(leaderboards_lib.get_leaderboard(leaderboard_id=args.leaderboard_id)) + ld = asyncio.run(self.get_leaderboard(label=args.label)) + leaderboard_obj = ld.get_dir().load_object(from_cache=True, raw=True) + if args.raw_output: - out.cli.raw.out(json.dumps(leaderboard)) + out.cli.raw.out(json.dumps(leaderboard_obj, indent=4)) else: - out.cli.print(leaderboard) - + out.cli.print(leaderboard_obj) + class BuildLeaderboardCMD(cmd_lib.CMD): """ Compile entries into the leaderboard """ - + def __init__(self, root, name, cmd_path): super(BuildLeaderboardCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('leaderboard_id', type=int, help='The id of the leaderboard') + @staticmethod + async def get_leaderboard(label: str): + return await model_queries.Leaderboard.get(label) + + def run(self, argv): + args = self.parser.parse_args(argv) + ld = asyncio.run(self.get_leaderboard(label=args.label)) + ld.get_dir().mkcache() + out.cli.info(f"Successfully build {ld}") + + +class LeaderboardEntries(cmd_lib.CMD): + """ Leaderboard entries """ + + def __init__(self, root, name, cmd_path): + super(LeaderboardEntries, self).__init__(root, name, cmd_path) + self.parser.add_argument('--by-leaderboard', type="str") + self.parser.add_argument('--by-model', type="str") + self.parser.add_argument('--by-benchmark', type="str") + + +class ImportLeaderboardEntries(cmd_lib.CMD): + """ Compile entries into the leaderboard """ + + def __init__(self, root, name, cmd_path): + super(BuildLeaderboardCMD, self).__init__(root, name, cmd_path) + self.parser.add_argument('leaderboard_id', type=int, help='The id of the leaderboard') + + @staticmethod + async def get_leaderboard(label: str): + return await model_queries.Leaderboard.get(label) + def run(self, argv): args = self.parser.parse_args(argv) - ld_file = asyncio.run(leaderboards_lib.build_leaderboard(leaderboard_id=args.leaderboard_id)) - out.cli.info(f"Successfully build {ld_file}") + ld = asyncio.run(self.get_leaderboard(label=args.label)) + ld.get_dir().mkcache() + out.cli.info(f"Successfully build {ld}") diff --git a/vocolab/admin/commands/messaging.py b/vocolab/admin/commands/messaging.py index f820d56..59b6f58 100644 --- a/vocolab/admin/commands/messaging.py +++ b/vocolab/admin/commands/messaging.py @@ -1,10 +1,10 @@ import sys from vocolab import out, get_settings -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib # api settings -from vocolab.db.models.tasks import SimpleLogMessage, SubmissionUpdateMessage, UpdateType +from vocolab.data.models.tasks import SimpleLogMessage, SubmissionUpdateMessage, UpdateType from vocolab.worker import server as message_server _settings = get_settings() diff --git a/vocolab/admin/commands/settings.py b/vocolab/admin/commands/settings.py index ffbeb09..43bd760 100644 --- a/vocolab/admin/commands/settings.py +++ b/vocolab/admin/commands/settings.py @@ -5,7 +5,7 @@ from rich.markdown import Markdown from vocolab import get_settings, out -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib _settings = get_settings() diff --git a/vocolab/admin/commands/submissions.py b/vocolab/admin/commands/submissions.py index 271c47f..6796066 100644 --- a/vocolab/admin/commands/submissions.py +++ b/vocolab/admin/commands/submissions.py @@ -1,16 +1,12 @@ +import argparse import asyncio -import shutil import sys -from pathlib import Path from rich.table import Table from vocolab import out, get_settings -from vocolab.admin import cmd_lib -from vocolab.db.models.api import NewSubmissionRequest, NewSubmission -from vocolab.db.q import challengesQ, userQ -from vocolab.db import schema as db_challenges -from vocolab.lib import submissions_lib +from vocolab.core import submission_lib, cmd_lib +from vocolab.data import model_queries # api settings _settings = get_settings() @@ -24,25 +20,27 @@ def __init__(self, root, name, cmd_path): # custom arguments self.parser.add_argument('-u', '--user', type=int, help='Filter by user ID') - self.parser.add_argument('-t', '--track', type=int, help='Filter by track ID') + self.parser.add_argument('-b', '--benchmark', type=int, help='Filter by track ID') self.parser.add_argument('-s', '--status', - choices=db_challenges.SubmissionStatus.get_values(), + choices=model_queries.SubmissionStatus.get_values(), help='Filter by status') - def run(self, argv): - args = self.parser.parse_args(argv) - fn_args = {} - + @staticmethod + async def fetch_by(args: argparse.Namespace) -> model_queries.ChallengeSubmissionList: if args.user: - fn_args['by_user'] = args.user + return await model_queries.ChallengeSubmissionList.get_from_user(user_id=args.user) - if args.track: - fn_args['by_track'] = args.track + elif args.benchmark: + return await model_queries.ChallengeSubmissionList.get_from_challenge(benchmark_id=args.benchmark) - if args.status: - fn_args['by_status'] = args.status + elif args.status: + return await model_queries.ChallengeSubmissionList.get_by_status(status=args.status) - items = asyncio.run(challengesQ.list_submission(**fn_args)) + return await model_queries.ChallengeSubmissionList.get_all() + + def run(self, argv): + args = self.parser.parse_args(argv) + items: model_queries.ChallengeSubmissionList = asyncio.run(self.fetch_by(args)) # Prepare output table = Table(show_header=True, header_style="bold magenta") @@ -56,7 +54,7 @@ def run(self, argv): for i in items: table.add_row( - f"{i.id}", f"{i.user_id}", f"{i.track_id}", f"{i.submit_date.strftime('%d/%m/%Y')}", + f"{i.id}", f"{i.user_id}", f"{i.benchmark_id}", f"{i.submit_date.strftime('%d/%m/%Y')}", f"{i.status}", f"{i.evaluator_id}", f"{i.author_label}" ) # print @@ -72,16 +70,18 @@ def __init__(self, root, name, cmd_path): # custom arguments self.parser.add_argument("submission_id") self.parser.add_argument( - 'status', choices=[str(el.value) for el in db_challenges.SubmissionStatus] # noqa: enum has value attribute + 'status', choices=[str(el.value) for el in model_queries.SubmissionStatus] # noqa: enum has value attribute ) + @staticmethod + async def set_status(submission_id: str, status: model_queries.SubmissionStatus): + submission = await model_queries.ChallengeSubmission.get(submission_id=submission_id) + await submission.update_status(status=status) + def run(self, argv): args = self.parser.parse_args(argv) - submission_fs = submissions_lib.get_submission_dir(args.submission_id, as_obj=True) - submission_fs.clean_all_locks() - asyncio.run(challengesQ.update_submission_status( - by_id=args.submission_id, status=args.status - )) + status = model_queries.SubmissionStatus(args.status) + asyncio.run(self.set_status(args.submission_id, status)) class CreateSubmissionCMD(cmd_lib.CMD): @@ -89,62 +89,75 @@ class CreateSubmissionCMD(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(CreateSubmissionCMD, self).__init__(root, name, cmd_path) + self.parser.add_argument("model_id", type=str) self.parser.add_argument("challenge_id", type=int) self.parser.add_argument("user_id", type=int) self.parser.add_argument("archive") def run(self, argv): args = self.parser.parse_args(argv) - archive = Path(args.archive) - - if not archive.is_file(): - out.cli.error(f'Requested file {archive} does not exist') - - async def create_submission(ch_id, user_id): - try: - _challenge = await challengesQ.get_challenge(challenge_id=ch_id) - _user = await userQ.get_user(by_uid=user_id) - - if not _user.enabled: - out.cli.error(f'User {_user.username} is not allowed to perform this action') - sys.exit(1) - - _submission_id = await challengesQ.add_submission(new_submission=NewSubmission( - user_id=_user.id, - track_id=_challenge.id - ), evaluator_id=_challenge.evaluator) - return _challenge, _user, _submission_id - except ValueError: - out.cli.exception() - sys.exit(1) - - # fetch db items - challenge, user, submission_id = asyncio.run(create_submission(args.challenge_id, args.user_id)) - - # create entry on disk - submissions_lib.make_submission_on_disk( - submission_id, user.username, challenge.label, - NewSubmissionRequest( - filename=archive.name, hash=submissions_lib.md5sum(archive), - multipart=False - ) - ) - # fetch folder - folder = submissions_lib.get_submission_dir(submission_id) - # copy file - shutil.copy(archive, folder / 'archive.zip') - submissions_lib.unzip(folder / 'archive.zip', folder / 'input') - - # set status - (folder / 'upload.lock').unlink() - asyncio.run( - challengesQ.update_submission_status(by_id=submission_id, status=db_challenges.SubmissionStatus.uploaded) - ) + # todo use new method + + # def run(self, argv): + # args = self.parser.parse_args(argv) + # archive = Path(args.archive) + # + # if not archive.is_file(): + # out.cli.error(f'Requested file {archive} does not exist') + + # async def create_submission(ch_id, user_id): + # try: + # _challenge = await model_queries.Challenge.get(challenge_id=ch_id) + # _user = await model_queries.User.get(by_uid=user_id) + # + # _model_id = await model_queries.ModelID.get(model_id=args.model_id) + # if _model_id is None: + # out.cli.error(f"Model: {args.model_id} does not exist please create it !!") + # sys.exit(1) + # + # if not _user.enabled: + # out.cli.error(f'User {_user.username} is not allowed to perform this action') + # sys.exit(1) + # + # _submission_id = await model_queries.ChallengeSubmission.create( + # username=_user.username, + # + # new_submission=NewSubmission( + # user_id=_user.id, + # track_id=_challenge.id + # ), evaluator_id=_challenge.evaluator) + # return _challenge, _user, _submission_id + # except ValueError: + # out.cli.exception() + # sys.exit(1) + # + # # fetch db items + # challenge, user, submission_id = asyncio.run(create_submission(args.challenge_id, args.user_id)) + # + # # create entry on disk + # submission_lib.make_submission_on_disk( + # submission_id, user.username, challenge.label, + # NewSubmissionRequest( + # filename=archive.name, hash=submission_lib.md5sum(archive), + # multipart=False + # ) + # ) + # # fetch folder + # folder = submission_lib.get_submission_dir(submission_id) + # # copy file + # shutil.copy(archive, folder / 'archive.zip') + # submission_lib.unzip(folder / 'archive.zip', folder / 'input') + # + # # set status + # (folder / 'upload.lock').unlink() + # asyncio.run( + # challengesQ.update_submission_status(by_id=submission_id, status=db_challenges.SubmissionStatus.uploaded) + # ) class EvalSubmissionCMD(cmd_lib.CMD): """ Launches the evaluation of a submission """ - sub_status = db_challenges.SubmissionStatus + sub_status = model_queries.SubmissionStatus no_eval = { sub_status.uploading, sub_status.on_queue, sub_status.invalid, sub_status.uploading, sub_status.validating, sub_status.evaluating, @@ -164,8 +177,8 @@ def run(self, argv): else: extra_arguments = [] - submission: db_challenges.ChallengeSubmission = asyncio.run( - challengesQ.get_submission(by_id=args.submission_id)) + submission: model_queries.ChallengeSubmission = asyncio.run( + model_queries.ChallengeSubmission.get(submission_id=args.submission_id)) if submission.status in self.no_eval: out.cli.print(f"Cannot evaluate a submission that has status : {submission.status}") @@ -173,7 +186,7 @@ def run(self, argv): asyncio.run( # todo check if status is correctly set. - submissions_lib.evaluate(submission_id=submission.id, extra_args=extra_arguments) + submission_lib.evaluate(submission_id=submission.id, extra_args=extra_arguments) ) @@ -186,6 +199,8 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("submission_id") def run(self, argv): + # todo recheck this + args = self.parser.parse_args(argv) if args.hostname not in list(_settings.task_queue_options.REMOTE_STORAGE.keys()): out.cli.warning(f"Host {args.hostname} is not a valid remote storage host!\n") @@ -200,7 +215,7 @@ def run(self, argv): sys.exit(1) # transferring - submissions_lib.fetch_submission_from_remote(host=args.hostname, submission_id=args.submission_id) + submission_lib.fetch_submission_from_remote(host=args.hostname, submission_id=args.submission_id) class UploadSubmissionToRemote(cmd_lib.CMD): @@ -213,6 +228,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this if args.hostname not in list(_settings.task_queue_options.REMOTE_STORAGE.keys()): out.cli.warning(f"Host {args.hostname} is not a valid remote storage host!\n") @@ -227,7 +243,7 @@ def run(self, argv): sys.exit(1) # transferring - submissions_lib.transfer_submission_to_remote(host=args.hostname, submission_id=args.submission_id) + submission_lib.transfer_submission_to_remote(host=args.hostname, submission_id=args.submission_id) class DeleteSubmissionCMD(cmd_lib.CMD): @@ -242,23 +258,24 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this if args.delete_by == 'by_id': - del_id = asyncio.run(submissions_lib.delete_submission(by_id=args.selector)) - submissions_lib.delete_submission_files(del_id[0]) + del_id = asyncio.run(submission_lib.delete_submission(by_id=args.selector)) + submission_lib.delete_submission_files(del_id[0]) out.cli.info(f"Successfully deleted: {args.selector}") elif args.delete_by == 'by_user': - deleted = asyncio.run(submissions_lib.delete_submission(by_user=int(args.selector))) + deleted = asyncio.run(submission_lib.delete_submission(by_user=int(args.selector))) for d in deleted: - submissions_lib.delete_submission_files(d) + submission_lib.delete_submission_files(d) out.cli.info(f"Successfully deleted: {d}") elif args.delete_by == 'by_track': - deleted = asyncio.run(submissions_lib.delete_submission(by_track=int(args.selector))) + deleted = asyncio.run(submission_lib.delete_submission(by_track=int(args.selector))) for d in deleted: - submissions_lib.delete_submission_files(d) + submission_lib.delete_submission_files(d) out.cli.info(f"Successfully deleted: {d}") else: out.cli.error("Error type of deletion unknown") @@ -275,6 +292,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this asyncio.run(challengesQ.update_submission_evaluator( args.evaluator_id, by_id=args.submission_id @@ -292,6 +310,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this asyncio.run(challengesQ.update_submission_author_label( args.author_label, by_id=args.submission_id )) @@ -311,16 +330,17 @@ def __init__(self, root, name, cmd_path): async def archive_submission(*args): for submission_id in args: # archive leaderboard entry - await submissions_lib.archive_leaderboard_entries(submission_id) + await submission_lib.archive_leaderboard_entries(submission_id) # remove submission from db - await submissions_lib.delete_submission(by_id=submission_id) + await submission_lib.delete_submission(by_id=submission_id) # zip & archive files - submissions_lib.archive_submission_files(submission_id) + submission_lib.archive_submission_files(submission_id) out.cli.info(f"Successfully archived: {submission_id}") def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this if args.type == 'by_id': asyncio.run(self.archive_submission(args.selector)) diff --git a/vocolab/admin/commands/task_worker.py b/vocolab/admin/commands/task_worker.py index 3d47d12..6fae2b0 100644 --- a/vocolab/admin/commands/task_worker.py +++ b/vocolab/admin/commands/task_worker.py @@ -6,8 +6,8 @@ from jinja2 import Environment, FileSystemLoader from vocolab import out, get_settings -from vocolab.admin import cmd_lib -from vocolab.db.models import tasks +from vocolab.core import cmd_lib +from vocolab.data import models from vocolab.worker import server _settings = get_settings() @@ -54,7 +54,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) server.echo().delay( - tasks.SimpleLogMessage( + models.tasks.SimpleLogMessage( label="cli-echo-testing", message=f"{args.message}" ).dict() diff --git a/vocolab/admin/commands/test.py b/vocolab/admin/commands/test.py index 01b86e4..8c4ec04 100644 --- a/vocolab/admin/commands/test.py +++ b/vocolab/admin/commands/test.py @@ -6,9 +6,7 @@ from pydantic import EmailStr from vocolab import get_settings, out -from vocolab.admin import cmd_lib -from vocolab.db.models.misc import UserCreate -from vocolab.lib import notify +from vocolab.core import notify, cmd_lib _settings = get_settings() @@ -60,25 +58,4 @@ def __init__(self, root, name, cmd_path): def run(self, argv): _ = self.parser.parse_args(argv) - out.cli.print("-- New User Info --", style="bold") - first_name = out.cli.raw.input("First Name: ") - last_name = out.cli.raw.input("Last Name: ") - email = out.cli.raw.input("Email: ") - affiliation = out.cli.raw.input("Affiliation: ") - - clean_last_name = ''.join([i if i in string.ascii_letters else ' ' for i in last_name]) - def_username = f"{first_name[0]}{clean_last_name.replace(' ', '')}".lower() - username = out.cli.raw.input(f"Username(default {def_username}): ") - username = username if username else def_username - - password = out.cli.raw.input("Password: ", password=True) - - user = UserCreate( - username=username, - email=EmailStr(email), - pwd=password, - first_name=first_name, - last_name=last_name, - affiliation=affiliation - ) - out.cli.print(user) + out.cli.info("Nothing to see here move along") diff --git a/vocolab/admin/commands/user.py b/vocolab/admin/commands/user.py index 83453c7..73b7bb0 100644 --- a/vocolab/admin/commands/user.py +++ b/vocolab/admin/commands/user.py @@ -3,17 +3,15 @@ import string import sys from pathlib import Path +from typing import Tuple, Optional from pydantic import EmailStr from rich.prompt import Prompt from rich.table import Table from vocolab import out, get_settings -from vocolab.admin import cmd_lib -from vocolab.db.models.misc import UserCreate -from vocolab.db.q import userQ, challengesQ -from vocolab.lib import notify -from vocolab.lib.misc import CustomTypesJsonEncoder +from vocolab.core import notify, cmd_lib, users_lib +from vocolab.data import models, model_queries _settings = get_settings() @@ -30,7 +28,7 @@ def run(self, argv): args = self.parser.parse_args(argv) # fetch data - user_lst = asyncio.run(userQ.get_user_list()) + user_lst: model_queries.UserList = asyncio.run(model_queries.UserList.get()) if args.mail_list: for u in user_lst: @@ -57,72 +55,27 @@ def run(self, argv): out.cli.print(table) -class UserSessionsCMD(cmd_lib.CMD): - """ List logged users """ - - def __init__(self, root, name, cmd_path): - super(UserSessionsCMD, self).__init__(root, name, cmd_path) - - @staticmethod - def just_print(): - """ Prints a list of logged users """ - user_lst = asyncio.run(userQ.get_logged_user_list()) - - # Prepare output - table = Table(show_header=True, header_style="bold magenta") - table.add_column("ID") - table.add_column("Username") - table.add_column("Email") - table.add_column("Active") - table.add_column("Verified") - - for usr in user_lst: - table.add_row( - f"{usr.id}", usr.username, usr.email, f"{usr.active}", f"{usr.verified}" - ) - - out.cli.print(table) - - def run(self, argv): - _ = self.parser.parse_args(argv) - self.just_print() - - -class CloseUserSessionsCMD(cmd_lib.CMD): - """ Close user sessions """ - - def __init__(self, root, name, cmd_path): - super(CloseUserSessionsCMD, self).__init__(root, name, cmd_path) - self.parser.add_argument("-u", "--user-id") - self.parser.add_argument("-a", "--close-all", action='store_true') - - def run(self, argv): - args = self.parser.parse_args(argv) - - if args.user_id: - asyncio.run(userQ.delete_session(by_uid=args.user_id)) - out.cli.print(f"All sessions of user {args.user_id} were closed", style="bold") - elif args.close_all: - asyncio.run(userQ.delete_session(clear_all=True)) - out.cli.print(f"All sessions were closed", style="bold") - else: - self.parser.print_help() - - sys.exit(0) - - -class CreateUserSessionsCMD(cmd_lib.CMD): +class CreateUserSessionCMD(cmd_lib.CMD): """ Create a session for a user """ def __init__(self, root, name, cmd_path): - super(CreateUserSessionsCMD, self).__init__(root, name, cmd_path) + super(CreateUserSessionCMD, self).__init__(root, name, cmd_path) self.parser.add_argument("user_id", type=int) + @staticmethod + async def login(user_id: int) -> Tuple[str, str, str]: + user = await model_queries.User.get(by_uid=user_id) + token = model_queries.Token( + user_email=user.email + ) + return token.encode(), user.username, user.email + def run(self, argv): args = self.parser.parse_args(argv) - - usr, token = asyncio.run(userQ.admin_login(by_uid=args.user_id)) - out.cli.print(f"{usr.username}, {usr.email}, {token}") + token, username, email = asyncio.run( + self.login(args.user_id) + ) + out.cli.print(f"{username}, {email}, {token}") sys.exit(0) @@ -134,15 +87,26 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument('-f', '--from-file', type=str, help="Load users from a json file") @staticmethod - def _make_usr(user: UserCreate): - _ = asyncio.run(userQ.create_user(usr=user)) + async def _make_usr(user: models.api.UserCreateRequest): + verify_code = await model_queries.User.create(new_usr=user) + # notify user for verification + await notify.email.template_email( + emails=[user.email], + subject='[Zerospeech] Account Verification', + data=dict( + username=user.username, + admin_email=_settings.app_options.admin_email, + url=f"{_settings.api_options.API_BASE_URL}{_settings.email_verif_path}?v={verify_code}&username={user.username}" + ), + template_name='email_validation.jinja2' + ) def _create_from_file(self, file: Path): with file.open() as fp: user_list = json.load(fp) for data in user_list: - user = UserCreate( + user = models.api.UserCreateRequest( username=data.get("username"), email=EmailStr(data.get('email')), pwd=data.get("password"), @@ -150,7 +114,7 @@ def _create_from_file(self, file: Path): last_name=data.get('last_name'), affiliation=data.get('affiliation') ) - self._make_usr(user) + asyncio.run(self._make_usr(user)) def _create_form_input(self): @@ -167,7 +131,7 @@ def _create_form_input(self): password = out.cli.raw.input("Password: ", password=True) - user = UserCreate( + user = models.api.UserCreateRequest( username=username, email=EmailStr(email), pwd=password, @@ -175,7 +139,7 @@ def _create_form_input(self): last_name=last_name, affiliation=affiliation ) - self._make_usr(user) + asyncio.run(self._make_usr(user)) def run(self, argv): args = self.parser.parse_args(argv) @@ -204,29 +168,28 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("--send-all", action='store_true', help="resend verification email to all unverified users") + @staticmethod + async def verify_single(user_id: int): + user = await model_queries.User.get(by_uid=user_id) + await user.verify(verification_code=user.verified, force=True) + + @staticmethod + async def verify_all(): + await model_queries.UserList.verify() + def run(self, argv): args = self.parser.parse_args(argv) if args.verify: # verify user - asyncio.run(userQ.admin_verification(user_id=args.verify)) + asyncio.run(self.verify_single(user_id=args.verify)) elif args.verify_all: # verify all users - users = asyncio.run(userQ.get_user_list()) - for u in users: - if u.verified != 'True': - asyncio.run(userQ.admin_verification(user_id=u.id)) + asyncio.run(self.verify_all()) elif args.send: # send verification email try: - with (_settings.DATA_FOLDER / 'email_verification.path').open() as fp: - verification_path = fp.read() - except FileNotFoundError: - out.cli.error("Path file not found in settings") - sys.exit(1) - - try: - user = asyncio.run(userQ.get_user(by_uid=args.send)) + user = asyncio.run(model_queries.User.get(by_uid=args.send)) except ValueError: out.cli.error(f"User with id: {args.send} does not exist !!") sys.exit(1) @@ -238,7 +201,7 @@ def run(self, argv): data=dict( username=user.username, admin_email=_settings.app_options.admin_email, - url=f"{_settings.api_options.API_BASE_URL}{verification_path}?v={user.verified}&username={user.username}" + url=f"{_settings.api_options.API_BASE_URL}{_settings.email_verif_path}?v={user.verified}&username={user.username}" ), template_name='email_validation.jinja2' )) @@ -255,7 +218,7 @@ def run(self, argv): out.cli.error("Path file not found in settings") sys.exit(1) - users = asyncio.run(userQ.get_user_list()) + users = asyncio.run(model_queries.UserList.get()) for u in users: if u.verified != 'True': asyncio.run(notify.email.template_email( @@ -264,7 +227,7 @@ def run(self, argv): data=dict( username=u.username, admin_email=_settings.app_options.admin_email, - url=f"{_settings.API_BASE_URL}{verification_path}?v={u.verified}&username={u.username}" + url=f"{_settings.api_options.API_BASE_URL}{verification_path}?v={u.verified}&username={u.username}" ), template_name='email_validation.jinja2' )) @@ -284,24 +247,33 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("--activate-all", action='store_true', help="activate all users") self.parser.add_argument("--deactivate-all", action='store_true', help="deactivate all users") + @staticmethod + async def toggle_status(user_id: int, activate: bool): + user = await model_queries.User.get(by_uid=user_id) + await user.toggle_status(active=activate) + + @staticmethod + async def toggle_all(activate: bool): + await model_queries.UserList.toggle_status(active=activate) + def run(self, argv): args = self.parser.parse_args(argv) if args.activate: # activate user - asyncio.run(userQ.toggle_user_status(user_id=args.activate, active=True)) + asyncio.run(self.toggle_status(user_id=args.activate, activate=True)) out.cli.info("User activated successfully") elif args.deactivate: # deactivate user - asyncio.run(userQ.toggle_user_status(user_id=args.deactivate, active=False)) + asyncio.run(self.toggle_status(user_id=args.activate, activate=False)) out.cli.info("User deactivated successfully") elif args.activate_all: # activate all users - asyncio.run(userQ.toggle_all_users_status(active=True)) + asyncio.run(self.toggle_all(activate=True)) out.cli.info("Users activated successfully") elif args.deactivate_all: # deactivate all users - asyncio.run(userQ.toggle_all_users_status(active=False)) + asyncio.run(self.toggle_all(activate=False)) out.cli.info("Users deactivated successfully") else: self.parser.print_help() @@ -315,6 +287,15 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("-r", "--reset", metavar="UID", help="reset & send a new password session to user") + @staticmethod + async def reset_password_session(user_id): + user = await model_queries.User.get(by_uid=user_id) + token = model_queries.Token( + allow_password_reset=True, + user_email=user.email + ) + return token.encode(), user.username, user.email + def run(self, argv): args = self.parser.parse_args(argv) @@ -326,15 +307,13 @@ def run(self, argv): out.cli.error("Path file not found in settings") sys.exit(1) - user = asyncio.run(userQ.get_user(by_uid=args.reset)) - out.cli.ic(user) - session = asyncio.run(userQ.create_password_reset_session(username=user.username, email=user.email)) + token, username, email = asyncio.run(self.reset_password_session(args.reset)) asyncio.run(notify.email.template_email( - emails=[user.email], + emails=[email], subject='[Zerospeech] Password Reset', data=dict( - username=user.username, - url=f"{_settings.API_BASE_URL}{password_reset_path}?v={session.token}", + username=username, + url=f"{_settings.api_options.API_BASE_URL}{password_reset_path}?v={token}", admin_email=_settings.app_options.admin_email ), template_name='password_reset.jinja2' @@ -350,12 +329,15 @@ def __init__(self, root, name, cmd_path): super(CheckPasswordCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('user_id', type=int) + @staticmethod + async def check_password(user_id: int, password: str): + user = await model_queries.User.get(by_uid=user_id) + return user.password_matches(password) + def run(self, argv): args = self.parser.parse_args(argv) pwd = Prompt.ask('password', password=True) - - user = asyncio.run(userQ.get_user(by_uid=args.user_id)) - if userQ.check_users_password(password=pwd, user=user): + if asyncio.run(self.check_password(args.user_id, pwd)): out.cli.info("--> Passwords match !!") sys.exit(0) else: @@ -363,37 +345,6 @@ def run(self, argv): sys.exit(1) -class ResetSessionsCMD(cmd_lib.CMD): - """ Check the list of reset sessions """ - - def __init__(self, root, name, cmd_path): - super(ResetSessionsCMD, self).__init__(root, name, cmd_path) - self.parser.add_argument('--all', action='store_true', help="Show all sessions (even expired ones)") - self.parser.add_argument('--clean', action='store_true', help="Clean expired sessions") - - def run(self, argv): - args = self.parser.parse_args(argv) - - if args.clean: - # clean sessions - asyncio.run(userQ.clear_expired_password_reset_sessions()) - out.cli.info('removed all expired password reset sessions :heavy_check_mark:') - else: - sessions = asyncio.run(userQ.get_password_reset_sessions(args.all)) - # print - table = Table(show_header=True, header_style="bold magenta") - table.add_column("user_id") - table.add_column("token") - table.add_column("expiration_date") - - for item in sessions: - table.add_row( - f"{item.user_id}", item.token, f"{item.expiration_date.isoformat()}" - ) - - out.cli.print(table) - - class NotifyCMD(cmd_lib.CMD): """ Notify all users """ @@ -405,7 +356,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) - user_list = asyncio.run(userQ.get_user_list()) + user_list = asyncio.run(model_queries.UserList.get()) email_list = [user.email for user in user_list] with args.body.open() as fp: body = fp.read() @@ -426,27 +377,21 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument('--save', help="path to save user details as info") @staticmethod - async def delete_user(user_id: int): - user_submissions = await challengesQ.list_submission(by_user=user_id) - if len(user_submissions) > 0: - out.cli.print(f"User {user_id} has {len(user_submissions)} unarchived submissions !!\n" - f"Cannot delete, archive submissions and try again !!") - sys.exit(1) + async def delete_user(user_id: int, save_to_file: Optional[Path] = None): + user = await model_queries.User.get(by_uid=user_id) + profile_data = users_lib.UserProfileData.load(username=user.username) + profile_data.delete() - user = await userQ.get_user(by_uid=user_id) - user_dict = user.dict() + # todo: check if user has any assets and skip deletion + # user-assets: leaderboard-entries, models, submissions + await user.delete() - await userQ.delete_session(by_uid=user_id) - await userQ.clear_password_reset_sessions(by_uid=user_id) - await userQ.delete_user(uid=user_id) - return user_dict def run(self, argv): args = self.parser.parse_args(argv) - user_dict = asyncio.run(self.delete_user(args.user_id)) - out.cli.info(f'User {args.user_id} deleted successfully !!') - + save_to = None if args.save: - out.cli.info(f"backing up user @ {args.save}") - with Path(args.save).with_suffix('.json').open('w') as fp: - json.dump(user_dict, fp, cls=CustomTypesJsonEncoder) + save_to = Path(args.save) + + # Delete user + asyncio.run(self.delete_user(args.user_id, save_to)) diff --git a/vocolab/admin/main.py b/vocolab/admin/main.py index 55f8a04..bb69927 100644 --- a/vocolab/admin/main.py +++ b/vocolab/admin/main.py @@ -1,11 +1,12 @@ import sys from vocolab import get_settings, out -from vocolab.admin import cmd_lib, commands +from vocolab.core import cmd_lib +from vocolab.admin import commands # settings _settings = get_settings() -has_db = (_settings.DATA_FOLDER / _settings.database_options.db_file).is_file() +has_db = _settings.database_file.is_file() has_users = has_db and _settings.user_data_dir.is_dir() has_challenges = has_db has_submissions = _settings.submission_dir.is_dir() @@ -21,15 +22,15 @@ def build_cli(): # user functions tree.add_cmd_tree( commands.user.UsersCMD(CMD_NAME, 'users', ''), - commands.user.UserSessionsCMD(CMD_NAME, 'sessions', 'users'), - commands.user.CloseUserSessionsCMD(CMD_NAME, 'close', 'users:sessions'), - commands.user.CreateUserSessionsCMD(CMD_NAME, 'create', 'users:sessions'), + # commands.user.UserSessionsCMD(CMD_NAME, 'sessions', 'users'), + # commands.user.CloseUserSessionsCMD(CMD_NAME, 'close', 'users:sessions'), + commands.user.CreateUserSessionCMD(CMD_NAME, 'create', 'users:sessions'), commands.user.CreateUserCMD(CMD_NAME, 'create', 'users'), commands.user.VerifyUserCMD(CMD_NAME, 'verify', 'users'), commands.user.UserActivationCMD(CMD_NAME, 'activate', 'users'), commands.user.PasswordUserCMD(CMD_NAME, 'password', 'users'), commands.user.CheckPasswordCMD(CMD_NAME, 'check', 'users:password'), - commands.user.ResetSessionsCMD(CMD_NAME, 'reset', 'users:password'), + # commands.user.ResetSessionsCMD(CMD_NAME, 'reset', 'users:password'), commands.user.NotifyCMD(CMD_NAME, 'notify', 'users'), commands.user.DeleteUser(CMD_NAME, 'delete', 'users') ) @@ -37,9 +38,9 @@ def build_cli(): if has_challenges: # challenge functions tree.add_cmd_tree( - commands.challenges.ChallengesCMD(CMD_NAME, 'challenges', ''), - commands.challenges.AddChallengeCMD(CMD_NAME, 'add', 'challenges'), - commands.challenges.SetChallenge(CMD_NAME, 'set', 'challenges') + commands.challenges.BenchmarksCMD(CMD_NAME, 'benchmarks', ''), + commands.challenges.AddBenchmarkCMD(CMD_NAME, 'add', 'benchmarks'), + commands.challenges.SetBenchmarkCMD(CMD_NAME, 'set', 'benchmarks') ) if has_db: @@ -81,6 +82,7 @@ def build_cli(): commands.settings.GenerateEnvFileCMD(CMD_NAME, 'template', 'settings'), commands.api.APICMD(CMD_NAME, 'api', ''), commands.api.DebugAPICMD(CMD_NAME, 'serve', 'api'), + commands.api.APILockCMD(CMD_NAME, 'lock', 'api'), commands.api.APInitEnvironmentCMD(CMD_NAME, 'init', 'api'), commands.api.ConfigFiles(CMD_NAME, 'config', 'api'), commands.api.GunicornConfigGeneration(CMD_NAME, 'gunicorn', 'api:config'), diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index ae06cac..7c93d0c 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -12,9 +12,8 @@ from pydantic import EmailStr from vocolab import exc, out -from vocolab.db import schema, models -from vocolab.db.q import userQ -from vocolab.lib import api_lib, notify +from vocolab.data import models, model_queries +from vocolab.core import api_lib, notify from vocolab.settings import get_settings router = APIRouter() @@ -26,14 +25,12 @@ async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> models.api.LoggedItem: """ Authenticate a user """ try: - out.console.print(f"{form_data.username=}, {form_data.password=}") - user = await userQ.get_user_for_login(login_id=form_data.username, password=form_data.password) - out.console.print(f'login {user=}') + user = await model_queries.User.login(login_id=form_data.username, password=form_data.password) if user is None: raise ValueError('Bad login') - token = schema.Token(user_email=user.email) - return models.api.LoggedItem(access_token=token.encode(), token_type="bearer") + token = model_queries.Token(user_email=user.email) + return models.api.LoggedItem(username=user.username, access_token=token.encode(), token_type="bearer") except ValueError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -47,7 +44,10 @@ async def post_signup(request: Request, affiliation: str = Form(...), email: EmailStr = Form(...), username: str = Form(...), password: str = Form(...)) -> str: """ Create a new user via the HTML form (returns a html page) """ - user = models.misc.UserCreate( + if _settings.is_locked(): + raise exc.APILockedException() + + user = models.api.UserCreateRequest( username=username, email=email, pwd=password, @@ -82,12 +82,15 @@ async def password_reset_request( html_response: bool = False, username: str = Form(...), email: EmailStr = Form(...)): """ Request a users password to be reset """ - user = await userQ.get_user(by_username=username) - if user.username != username: + if _settings.is_locked(): + raise exc.APILockedException() + + user = await model_queries.User.get(by_username=username) + if user.email != email: raise ValueError('Bad request, no such user') # session = await userQ.create_password_reset_session(username=username, email=email) - token = schema.Token(user_email=user.email, allow_password_reset=True) + token = model_queries.Token(user_email=user.email, allow_password_reset=True) data = { 'username': username, 'url': f"{api_lib.url_for(request, 'password_update_page')}?v={token.encode()}", @@ -105,7 +108,7 @@ async def password_reset_request( if html_response: data = dict( image_dir=f"{request.base_url}static/img", - title=f"Password Change Request Received !", + title="Password Change Request Received !", body=f"A verification email will be sent to {email}", success=True ) @@ -117,16 +120,19 @@ async def password_reset_request( async def post_password_update(v: str, request: Request, html_response: bool = False, password: str = Form(...), password_validation: str = Form(...), session_code: str = Form(...)): """Update a users password (requires a reset session)""" + if _settings.is_locked(): + raise exc.APILockedException() + try: if v != session_code: raise ValueError('session validation not passed !!!') - token = schema.Token.decode(v) + token = model_queries.Token.decode(v) if not token.allow_password_reset: raise ValueError('bad session') - user = await userQ.get_user(by_email=token.user_email) - await userQ.update_users_password(user=user, password=password, password_validation=password_validation) + user = await model_queries.User.get(by_email=token.user_email) + await user.change_password(new_password=password, password_validation=password_validation) except ValueError as e: out.log.error( f'{request.client.host}:{request.client.port} requested bad password reset session as {v} - [{e}]') diff --git a/vocolab/api/endpoints/benchmarks.py b/vocolab/api/endpoints/benchmarks.py new file mode 100644 index 0000000..b9b01a0 --- /dev/null +++ b/vocolab/api/endpoints/benchmarks.py @@ -0,0 +1,40 @@ +""" Routing for /challenges section of the API +This section handles challenge data +""" +from typing import List + +from fastapi import ( + APIRouter +) + +from vocolab.data import models, model_queries +from vocolab.settings import get_settings + +router = APIRouter() +_settings = get_settings() + + +@router.get('/list') +async def get_challenge_list(include_inactive: bool = False): + """ Return a list of all active benchmarks """ + return await model_queries.BenchmarkList.get(include_all=include_inactive) + + +@router.get('/{benchmark_id}/info') +async def get_challenge_info(benchmark_id: str): + """ Return information of a specific benchmark """ + # todo add leaderboards to challenge info + return await model_queries.Benchmark.get(benchmark_id=benchmark_id, allow_inactive=True) + + +@router.get('/{benchmark_id}/submissions/list', + responses={404: {"model": models.api.Message}}) +async def get_sub_list(benchmark_id: str) -> model_queries.ChallengeSubmissionList: + """ Return information of a specific benchmark """ + return await model_queries.ChallengeSubmissionList.get_from_challenge(benchmark_id) + + +@router.get('/{benchmark_id}/leaderboards/list', responses={404: {"model": models.api.Message}}) +async def get_all_leaderboards(benchmark_id: str) -> model_queries.LeaderboardList: + """ Return information of a specific challenge """ + return await model_queries.LeaderboardList.get_by_challenge(benchmark_id=benchmark_id) diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py deleted file mode 100644 index 2ccd5d0..0000000 --- a/vocolab/api/endpoints/challenges.py +++ /dev/null @@ -1,83 +0,0 @@ -""" Routing for /challenges section of the API -This section handles challenge data -""" -from typing import List - -from fastapi import ( - APIRouter, Depends, UploadFile, File, BackgroundTasks -) - -from vocolab import out, exc -from vocolab.db import schema, models -from vocolab.db.q import challengesQ -from vocolab.lib import api_lib, submissions_lib -from vocolab.settings import get_settings - -router = APIRouter() -_settings = get_settings() - - -@router.get('/', response_model=List[models.api.ChallengePreview]) -async def get_challenge_list(include_inactive: bool = False): - """ Return a list of all active challenges """ - challenge_lst = await challengesQ.list_challenges(include_all=include_inactive) - return [models.api.ChallengePreview(id=ch.id, label=ch.label, active=ch.active) for ch in challenge_lst] - - -@router.get('/{challenge_id}', response_model=models.api.ChallengesResponse, - responses={404: {"model": models.api.Message}}) -async def get_challenge_info(challenge_id: int): - """ Return information of a specific challenge """ - # todo add leaderboards to challenge info - return await challengesQ.get_challenge(challenge_id=challenge_id, allow_inactive=True) - - -# todo test submit creation -@router.post('/{challenge_id}/submission/create', responses={404: {"model": models.api.Message}}) -async def create_submission( - challenge_id: int, data: models.api.NewSubmissionRequest, - current_user: schema.User = Depends(api_lib.get_current_active_user) -): - """ Create a new submission """ - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') - - # create db entry - submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( - user_id=current_user.id, - track_id=challenge.id, - ), evaluator_id=challenge.evaluator) - # create disk entry - submissions_lib.make_submission_on_disk( - submission_id, current_user.username, challenge.label, meta=data - ) - return submission_id - - -@router.put("/{challenge_id}/submission/upload", response_model=models.api.UploadSubmissionPartResponse) -async def upload_submission( - challenge_id: int, - submission_id: str, - part_name: str, - background_tasks: BackgroundTasks, - file_data: UploadFile = File(...), - current_user: schema.User = Depends(api_lib.get_current_active_user), -): - out.console.info(f"user: {current_user.username}") - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') - try: - is_completed, remaining = submissions_lib.add_part(submission_id, part_name, file_data) - - if is_completed: - # run the completion of the submission on the background - background_tasks.add_task(submissions_lib.complete_submission, submission_id, with_eval=True) - - return models.api.UploadSubmissionPartResponse( - completed=is_completed, remaining=[n.file_name for n in remaining] - ) - except exc.VocoLabException as e: - out.log.exception() - raise e diff --git a/vocolab/api/endpoints/leaderboards.py b/vocolab/api/endpoints/leaderboards.py index 9a3ddd5..0d74d26 100644 --- a/vocolab/api/endpoints/leaderboards.py +++ b/vocolab/api/endpoints/leaderboards.py @@ -1,53 +1,65 @@ """ Routing for /leaderboards section of the API This section handles leaderboard data """ -from datetime import datetime -from typing import List +import tempfile +from pathlib import Path from fastapi import ( - APIRouter + APIRouter, BackgroundTasks ) -from vocolab import exc -from vocolab.db import models -from vocolab.db.q import leaderboardQ -from vocolab.lib import api_lib +from fastapi.responses import FileResponse +from starlette.background import BackgroundTask + +from vocolab_ext import leaderboards as leaderboard_ext + +from vocolab.data import model_queries from vocolab.settings import get_settings router = APIRouter() _settings = get_settings() -@router.get('/', response_model=List[models.api.LeaderboardPublicView], responses={404: {"model": models.api.Message}}) -async def get_leaderboards_list(): - """ Returns the list of leaderboards """ - lst = await leaderboardQ.list_leaderboards() - - # strip non public values from entries +@router.get("/list") +async def get_list() -> list[str]: + ld_list = await model_queries.LeaderboardList.get_all() return [ - models.api.LeaderboardPublicView( - id=ld.id, - challenge_id=ld.challenge_id, - label=ld.label, - entry_file=ld.entry_file, - archived=ld.archived, - static_files=ld.static_files - ) - for ld in lst + ld.label for ld in ld_list ] -@router.get('/{leaderboard_id}/json', responses={404: {"model": models.api.Message}}) -async def get_leaderboard_data(leaderboard_id: int): - """ Return leaderboard of a specific challenge """ - try: - leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - except ValueError: - raise exc.ResourceRequestedNotFound(f'No leaderboard with id {leaderboard_id}') - - if leaderboard.path_to.is_file(): - return api_lib.file2dict(leaderboard.path_to) - else: - return dict( - updatedOn=datetime.now().isoformat(), - data=[] - ) +@router.get('{leaderboard}/info') +async def get_leaderboard_info(leaderboard: str): + """ Return information of a specific challenge """ + return await model_queries.Leaderboard.get(leaderboard_id=leaderboard) + + +@router.get("{leaderboard}/json") +async def get_leaderboard_entries_as_json(leaderboard: str): + """ Return a leaderboard into a json format """ + entry_list = await model_queries.LeaderboardEntryList.get_from_leaderboard(leaderboard) + return entry_list.as_leaderboard() + + +@router.get("{leaderboard}/csv") +async def get_leaderboard_entries_as_csv(leaderboard: str): + def clean(file: tempfile.NamedTemporaryFile): + """ clean temp file """ + Path(file.name).unlink(missing_ok=True) + + # load objects + entry_list = await model_queries.LeaderboardEntryList.get_from_leaderboard(leaderboard) + ld_mngr = leaderboard_ext.LeaderboardManager.load_leaderboard_from_obj(leaderboard, entry_list.as_leaderboard()) + + # Write csv into tmp file + tmp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False) + ld_mngr.export_as_csv(file=Path(tmp_file.name)) + + # return file w/ clean-up bg-task + return FileResponse(tmp_file.name, background=BackgroundTask(clean, file=tmp_file)) + + +@router.get("{leaderboard}/entry/{entry_id}") +async def get_leaderboard_entry(leaderboard: str, entry_id: str): + entry = await model_queries.LeaderboardEntry.get(entry_id) + assert entry.leaderboard_id == leaderboard + return entry.data diff --git a/vocolab/api/endpoints/models.py b/vocolab/api/endpoints/models.py new file mode 100644 index 0000000..309314e --- /dev/null +++ b/vocolab/api/endpoints/models.py @@ -0,0 +1,38 @@ +""" Routing for /challenges section of the API +This section handles challenge data +""" + +from fastapi import ( + APIRouter +) + +from vocolab.data import model_queries +from vocolab.settings import get_settings + +router = APIRouter() +_settings = get_settings() + + +@router.get('/list') +async def get_model_list(): + """ Request the full model list """ + # todo check if extra formatting is needed + return await model_queries.ModelIDList.get() + + +@router.get('/{model_id}/info') +async def get_model_info(model_id: str): + return await model_queries.ModelID.get(model_id) + + +@router.get('/{model_id}/submissions/list') +async def get_model_submissions(model_id: str): + """ Get all submissions corresponding to a model_id """ + model = await model_queries.ModelID.get(model_id) + # todo load submissions + + +@router.get('/{model_id}/challenges/list') +async def get_model_submission_info(model_id: str): + # todo: check + pass diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py new file mode 100644 index 0000000..7f27b68 --- /dev/null +++ b/vocolab/api/endpoints/submissions.py @@ -0,0 +1,98 @@ +""" Routing for /challenges section of the API +This section handles challenge data +""" + +from fastapi import ( + APIRouter, Depends, UploadFile, File, BackgroundTasks, + HTTPException +) + +from vocolab import out, exc +from vocolab.core import api_lib, submission_lib +from vocolab.data import models, model_queries +from vocolab.settings import get_settings + +router = APIRouter() +_settings = get_settings() + + +@router.get("/list") +async def get_sub_list(): + # todo implement this + pass + + +@router.get("/{submission_id}/info") +async def get_sub_info(submission_id: str): + """ Returns entry of submission """ + return await model_queries.ChallengeSubmission.get(submission_id) + + +@router.get("/{submission_id}/scores") +async def get_submission_scores(submission_id: str): + # todo implement this + pass + + +@router.get("/{submission_id}/content/status") +async def submission_mode(submission_id: str): + """ Returns the status of a submission """ + sub = await model_queries.ChallengeSubmission.get(submission_id) + return dict( + submission_id=sub.id, + status=sub.status + ) + + +@router.post("/{submission_id}/content/add") +async def upload_submission( + submission_id: str, + part_name: str, + background_tasks: BackgroundTasks, + file: UploadFile = File(...), + current_user: model_queries.User = Depends(api_lib.get_current_active_user), +): + if _settings.is_locked(): + raise exc.APILockedException() + + out.console.info(f"user: {current_user.username} is uploading {file.filename}") + submission = await model_queries.ChallengeSubmission.get(submission_id) + if submission is None: + raise HTTPException(status_code=404, detail="submission not found") + + if submission.user_id != current_user.id: + raise HTTPException(status_code=403, detail="Operation not allowed") + + try: + sub_dir = submission_lib.SubmissionDir.load(model_id=submission.model_id, submission_id=submission.id) + except FileNotFoundError: + raise HTTPException(status_code=417, detail="Expected submission directory to exist") + + try: + is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file) + + if is_completed: + # run the completion of the submission on the background + async def bg_task(): + sub_dir.complete_upload() + await submission.update_status(model_queries.SubmissionStatus.uploaded) + + background_tasks.add_task(bg_task) + + return models.api.UploadSubmissionPartResponse( + completed=is_completed, remaining=[n.file_name for n in remaining] + ) + except exc.VocoLabException as e: + out.log.exception() + raise e + + +@router.delete("/{submission_id}/remove") +async def remove_submission(submission_id: str, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + if _settings.is_locked(): + raise exc.APILockedException() + + out.log.info(f"user {current_user.username} requested that the submission {submission_id} gets deleted !") + # todo implement delete operation + pass diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index a476ff6..013f7d9 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -1,169 +1,136 @@ """ Routing for /users section of the API This section handles user data """ -from typing import Dict, List +import functools import pydantic from fastapi import ( - APIRouter, Depends, Response + APIRouter, Depends, Response, HTTPException ) -from vocolab import exc, out -from vocolab.lib import api_lib, users_lib, submissions_lib -from vocolab.db import schema, models -from vocolab.db.q import challengesQ, leaderboardQ +from vocolab import out, exc +from vocolab.core import api_lib, users_lib, submission_lib +from vocolab.data import model_queries, models from vocolab.settings import get_settings - router = APIRouter() _settings = get_settings() +NonAllowedOperation = functools.partial(HTTPException, status_code=401, detail="Operation not allowed") -def drop_keys(data: Dict, keys: List[str]): - for k in keys: - try: - del data[k] - except KeyError: - pass +@router.get("/{username}/profile") +def get_profile(username: str, + current_user: model_queries.User = Depends( + api_lib.get_current_active_user)) -> users_lib.UserProfileData: + if current_user.username != username: + raise NonAllowedOperation() -@router.get("/profile") -def get_profile(current_user: schema.User = Depends(api_lib.get_current_active_user)) -> models.api.UserProfileResponse: try: - user_data = users_lib.get_user_data(current_user.username).dict() - drop_keys(user_data, ['verified', 'email', 'created']) - return models.api.UserProfileResponse( - verified=current_user.verified == "True", - email=current_user.email, - created=current_user.created_at, - **user_data - ) + user_data = current_user.get_profile_data() + # re-update verification + user_data.verified = current_user.is_verified() + return user_data except pydantic.ValidationError: out.log.error("Failed to validate profile data") out.console.exception() -@router.post("/profile") +@router.post("/{username}/profile") def update_profile( - user_data: models.api.UserData, current_user: schema.User = Depends(api_lib.get_current_active_user)): - users_lib.update_user_data(current_user.username, data=user_data) + username: str, + user_data: users_lib.UserProfileData, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + if _settings.is_locked(): + raise exc.APILockedException() + + if current_user.username != username: + raise NonAllowedOperation() + + if user_data.username != current_user.username: + raise NonAllowedOperation() + + user_data.verified = current_user.is_verified() + user_data.save() return Response(status_code=200) -@router.get('/submissions') -async def submissions_list(current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return a list of all user submissions """ - submissions = await challengesQ.get_user_submissions(user_id=current_user.id) - submissions = [ - models.api.SubmissionPreview( - submission_id=s.id, - track_id=s.track_id, - track_label=(await challengesQ.get_challenge(challenge_id=s.track_id)).label, - status=s.status - ) - for s in submissions - ] - - data = {} - for sub in submissions: - if sub.track_label in data.keys(): - data[sub.track_label].append(sub) - else: - data[sub.track_label] = [sub] - - return data - - -@router.get('/submissions/tracks/{track_id}') -async def submissions_list_by_track( - track_id: int, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return a list of all user submissions """ - track = await challengesQ.get_challenge(challenge_id=track_id) - submissions = await challengesQ.get_user_submissions(user_id=current_user.id) - - return [ - models.api.SubmissionPreview( - submission_id=s.id, - track_id=s.track_id, - track_label=track.label, - status=s.status - ) - for s in submissions if s.track_id == track.id - ] - - -@router.get('/submissions/{submissions_id}') -async def get_submission(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return information on a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - - track = await challengesQ.get_challenge(challenge_id=submission.track_id) - leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) - - if submission.evaluator_id is not None: - evaluator = await challengesQ.get_evaluator(by_id=submission.evaluator_id) - evaluator_cmd = f"{evaluator.executor} {evaluator.script_path} {evaluator.executor_arguments.replace(';', ' ')}" - evaluator_label = evaluator.label - else: - evaluator_cmd = "" - evaluator_label = "" - - return models.api.SubmissionView( - submission_id=submission.id, +@router.get("/{username}/models/list") +async def list_users_models(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ Returning list of models of current user """ + if current_user.username != username: + raise NonAllowedOperation() + return await model_queries.ModelIDList.get_by_user(current_user.id) + + +@router.post("/{username}/models/create") +async def create_new_model(username: str, author_name: str, data: models.api.NewModelIdRequest, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ Create a new model id""" + if _settings.is_locked(): + raise exc.APILockedException() + + if current_user.username != username: + raise NonAllowedOperation() + + try: + # create in DB + model_id = await model_queries.ModelID.create(user_id=current_user.id, first_author_name=author_name, data=data) + # create on disk + submission_lib.ModelDir.make(model_id) + except Exception as e: + out.console.print(e) + raise e + + return dict( + model_id=model_id, user=current_user.username, + ) + + +@router.get("/{username}/submissions/list") +async def list_users_submissions(username: str, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ List submissions created by the user """ + if current_user.username != username: + raise NonAllowedOperation() + + return await model_queries.ChallengeSubmissionList.get_from_user(user_id=current_user.id) + + +@router.post("/{username}/submissions/create") +async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ Create a new empty submission with the given information """ + if _settings.is_locked(): + raise exc.APILockedException() + + if current_user.username != username: + raise NonAllowedOperation() + + new_submission = await model_queries.ChallengeSubmission.create( user_id=current_user.id, username=current_user.username, - track_label=track.label, - track_id=track.id, - status=submission.status, - date=submission.submit_date, - evaluator_cmd=evaluator_cmd, - evaluator_label=evaluator_label, - leaderboards=[(ld.label, ld.id) for ld in leaderboards] + model_id=data.model_id, + benchmark_id=data.benchmark_id, + has_scores=data.has_scores, + author_label=data.author_label ) + # create model_id & submission dir + model_dir = submission_lib.ModelDir.load(data.model_id) + model_dir.make_submission( + submission_id=new_submission.id, + benchmark_label=new_submission.benchmark_id, + auto_eval=new_submission.auto_eval, + username=current_user.username, + leaderboard_file=data.leaderboard, + filehash=data.hash, + multipart=data.multipart, + has_scores=data.has_scores, + index=data.index + ) -@router.get('/submissions/{submissions_id}/status') -async def get_submission_status( - submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return status of a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - - return submission.status - - -@router.get('/submissions/{submissions_id}/log') -async def get_submission_status( - submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return status of a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - - log = submissions_lib.SubmissionLogger(submissions_id) - return log.get_text() - - -@router.get('/submissions/{submissions_id}/scores') -async def get_user_results(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return status of a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - sub_location = submissions_lib.get_submission_dir(submission_id=submission.id) - - leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) - result = {} - for ld in leaderboards: - ld_file = sub_location / ld.entry_file - if ld_file.is_file(): - result[ld.label] = api_lib.file2dict(ld_file) - - return result + return dict( + status=new_submission.status, benchmark=new_submission.benchmark_id, + user=current_user.username, submission_id=new_submission.id, auto_eval=new_submission.auto_eval + ) diff --git a/vocolab/api/main.py b/vocolab/api/main.py index 79da1df..2fcfc83 100644 --- a/vocolab/api/main.py +++ b/vocolab/api/main.py @@ -9,10 +9,11 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import JSONResponse +from pydantic import ValidationError from vocolab import settings, out from vocolab.api import router as v1_router -from vocolab.db import zrDB, create_db +from vocolab.data import db from vocolab.exc import VocoLabException _settings = settings.get_settings() @@ -30,15 +31,6 @@ middleware=middleware ) -# app.add_middleware( -# CORSMiddleware, -# allow_origins=["*"], -# # allow_origin_regex=_settings.origin_regex, -# allow_credentials=True, -# allow_methods=["*"], -# allow_headers=["*"], -# ) - @app.middleware("http") async def log_requests(request: Request, call_next): @@ -49,7 +41,11 @@ async def log_requests(request: Request, call_next): start_time = time.time() - response = await call_next(request) + try: + response = await call_next(request) + except Exception as e: + print(e) + raise e process_time = (time.time() - start_time) * 1000 formatted_process_time = '{0:.2f}'.format(process_time) @@ -77,6 +73,19 @@ async def value_error_reformatting(request: Request, exc: ValueError): @app.exception_handler(VocoLabException) +async def zerospeech_error_formatting(request: Request, exc: VocoLabException): + if hasattr(exc, 'data'): + content = dict(message=f"{str(exc)}", data=str(exc.data)) + else: + content = dict(message=f"{str(exc)}") + + return JSONResponse( + status_code=exc.status, + content=content, + ) + + +@app.exception_handler(ValidationError) async def zerospeech_error_formatting(request: Request, exc: VocoLabException): if exc.data: content = dict(message=f"{str(exc)}", data=str(exc.data)) @@ -92,9 +101,9 @@ async def zerospeech_error_formatting(request: Request, exc: VocoLabException): @app.on_event("startup") async def startup(): # conditional creation of the necessary files - create_db() + db.build_database_from_schema() # pool connection to databases - await zrDB.connect() + await db.zrDB.connect() # create data_folders _settings.user_data_dir.mkdir(exist_ok=True, parents=True) _settings.leaderboard_dir.mkdir(exist_ok=True) @@ -112,7 +121,7 @@ async def startup(): async def shutdown(): # clean up db connection pool out.log.info("shutdown of api server") - await zrDB.disconnect() + await db.zrDB.disconnect() # sub applications diff --git a/vocolab/api/pages/users.py b/vocolab/api/pages/users.py index b9223ac..0c3b3c8 100644 --- a/vocolab/api/pages/users.py +++ b/vocolab/api/pages/users.py @@ -8,8 +8,8 @@ from fastapi.responses import HTMLResponse from vocolab import exc, out -from vocolab.lib import api_lib -from vocolab.db.q import userQ +from vocolab.core import api_lib +from vocolab.data import model_queries from vocolab.settings import get_settings router = APIRouter() @@ -32,9 +32,9 @@ async def email_verification(v: str, username: str, request: Request): """ Verify a new users email address """ msg = 'Success' res = False - try: - res = await userQ.verify_user(username=username, verification_code=v) + usr = await model_queries.User.get(by_username=username) + res = await usr.verify(verification_code=v) except ValueError: msg = 'Username does not exist' except exc.ActionNotValid as e: @@ -59,7 +59,11 @@ async def email_verification(v: str, username: str, request: Request): async def password_update_page(v: str, request: Request): """ An HTML page-form that allows a user to change their password """ try: - user = await userQ.get_user(by_password_reset_session=v) + token = model_queries.Token.decode(v) + if not token.allow_password_reset and not token.is_expired(): + raise ValueError('bad session') + + user = await model_queries.User.get(by_email=token.user_email) except ValueError as e: out.log.error( f'{request.client.host}:{request.client.port} requested bad password reset session as {v} - [{e}]') diff --git a/vocolab/api/router.py b/vocolab/api/router.py index 14244e7..9dba4da 100644 --- a/vocolab/api/router.py +++ b/vocolab/api/router.py @@ -1,9 +1,11 @@ +from datetime import datetime from pathlib import Path -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException, status +from pydantic import BaseModel, EmailStr from vocolab.api.endpoints import ( - users, auth, challenges, leaderboards + users, auth, benchmarks, leaderboards, models, submissions ) from vocolab.api.pages import users as user_pages from vocolab.settings import get_settings @@ -13,28 +15,47 @@ api_router = APIRouter() +class APIIndex(BaseModel): + app: str + version: str + maintainers: str + contact: EmailStr + installation_datetime: datetime + + @api_router.get("/") -def index(): +def index() -> APIIndex: """ API Index """ install_time = (Path.home() / '.voco-installation') if install_time.is_file(): with install_time.open() as fp: installation_datetime = fp.read() else: - installation_datetime = '' + installation_datetime = datetime.now().isoformat() - return { + return APIIndex.parse_obj({ "app": _settings.app_options.app_name, "version": _settings.app_options.version, "maintainers": _settings.app_options.maintainers, "contact": _settings.app_options.admin_email, "installation_datetime": installation_datetime - } + }) + + +@api_router.get("/error") +def get_error(): + """ This route throws an error (used for testing)""" + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password" + ) api_router.include_router(auth.router, prefix="/auth", tags=["auth"]) api_router.include_router(users.router, prefix="/users", tags=["user-data"]) -api_router.include_router(challenges.router, prefix="/challenges", tags=["challenges"]) +api_router.include_router(benchmarks.router, prefix="/benchmarks", tags=["benchmarks"]) +api_router.include_router(models.router, prefix="/models", tags=["model"]) +api_router.include_router(submissions.router, prefix="/submissions", tags=["submissions"]) api_router.include_router(leaderboards.router, prefix="/leaderboards", tags=["leaderboards"]) api_router.include_router(user_pages.router, prefix="/page", tags=["pages"]) diff --git a/vocolab/lib/__init__.py b/vocolab/core/__init__.py similarity index 100% rename from vocolab/lib/__init__.py rename to vocolab/core/__init__.py diff --git a/vocolab/lib/api_lib.py b/vocolab/core/api_lib.py similarity index 72% rename from vocolab/lib/api_lib.py rename to vocolab/core/api_lib.py index 11da18f..4cd7620 100644 --- a/vocolab/lib/api_lib.py +++ b/vocolab/core/api_lib.py @@ -5,23 +5,22 @@ from fastapi.security import OAuth2PasswordBearer from jinja2 import FileSystemLoader, Environment -from vocolab import settings -from vocolab.db import schema, models -from vocolab.db.q import userQ -from vocolab.lib import notify, _fs +from vocolab import settings, out +from vocolab.data import model_queries, models +from vocolab.core import notify, commons _settings = settings.get_settings() oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login") # export -file2dict = _fs.commons.load_dict_file +file2dict = commons.load_dict_file -def validate_token(token: str = Depends(oauth2_scheme)) -> schema.Token: +def validate_token(token: str = Depends(oauth2_scheme)) -> model_queries.Token: """ Dependency for validating the current users session via the token""" try: - token = schema.Token.decode(token) + token = model_queries.Token.decode(token) if token.is_expired(): raise ValueError('Token has expired') @@ -34,22 +33,28 @@ def validate_token(token: str = Depends(oauth2_scheme)) -> schema.Token: status_code=status.HTTP_401_UNAUTHORIZED, detail="Token is invalid or has expired !", ) + except Exception as e: + out.console.exception() + raise e -async def get_user(token: schema.Token = Depends(validate_token)) -> schema.User: +async def get_user(token: model_queries.Token = Depends(validate_token)) -> model_queries.User: """ Dependency for fetching current user from database using token entry """ try: - return await userQ.get_user(by_email=token.user_email) + return await model_queries.User.get(by_email=token.user_email) except ValueError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, detail="User is not in database !" ) + except Exception as e: + out.console.exception() + raise e -async def get_current_active_user(current_user: schema.User = Depends(get_user)) -> schema.User: +async def get_current_active_user(current_user: model_queries.User = Depends(get_user)) -> model_queries.User: """ Dependency for validating current user """ - if current_user.verified == 'True': + if current_user.is_verified(): if current_user.active: return current_user else: @@ -71,9 +76,9 @@ def generate_html_response(data: Dict[str, Any], template_name: str) -> str: return template.render(**data) -async def signup(request: Request, user: models.misc.UserCreate): +async def signup(request: Request, user: models.api.UserCreateRequest): """ Creates a new user and schedules the registration email """ - verification_code = await userQ.create_user(usr=user) + verification_code = await model_queries.User.create(new_usr=user) data = { 'username': user.username, # todo check if url needs update @@ -84,13 +89,14 @@ async def signup(request: Request, user: models.misc.UserCreate): loop = asyncio.get_running_loop() loop.create_task(notify.email.template_email( emails=[user.email], - subject='[Zerospeech] Account Verification', + subject=f'[{_settings.app_options.platform_name}] Account Verification', data=data, template_name='email_validation.jinja2') ) def get_base_url(request: Request) -> str: + """ Get base url taking into account http -> https redirection """ base_url = f"{request.base_url}" headers = request.headers @@ -101,6 +107,7 @@ def get_base_url(request: Request) -> str: def url_for(request: Request, path_requested: str) -> str: + """ Query API path url taking into account http -> https redirections """ url = request.url_for(path_requested) headers = request.headers diff --git a/vocolab/admin/cmd_lib.py b/vocolab/core/cmd_lib.py similarity index 100% rename from vocolab/admin/cmd_lib.py rename to vocolab/core/cmd_lib.py diff --git a/vocolab/lib/_fs/commons.py b/vocolab/core/commons.py similarity index 96% rename from vocolab/lib/_fs/commons.py rename to vocolab/core/commons.py index 046db04..4070e5b 100644 --- a/vocolab/lib/_fs/commons.py +++ b/vocolab/core/commons.py @@ -13,6 +13,15 @@ from vocolab import out +def drop_keys(data: Dict, keys: List[str]): + """ Filter keys from a dictionary """ + for k in keys: + try: + del data[k] + except KeyError: + pass + + def load_dict_file(location: Path) -> Union[Dict, List]: """ Load a dict type file (json, yaml, toml)""" with location.open() as fp: diff --git a/vocolab/lib/evaluators_lib.py b/vocolab/core/evaluators_lib.py similarity index 83% rename from vocolab/lib/evaluators_lib.py rename to vocolab/core/evaluators_lib.py index 7e52799..e8b42aa 100644 --- a/vocolab/lib/evaluators_lib.py +++ b/vocolab/core/evaluators_lib.py @@ -4,15 +4,13 @@ import yaml from vocolab import get_settings -from vocolab.db import models -from vocolab.lib import ( - _fs -) +from vocolab.data import models +from vocolab.core import commons _settings = get_settings() # export -check_host = _fs.commons.check_host +check_host = commons.check_host def discover_evaluators(hostname: str, bin_location) -> List[models.cli.NewEvaluatorItem]: @@ -20,9 +18,9 @@ def discover_evaluators(hostname: str, bin_location) -> List[models.cli.NewEvalu cmd = shlex.split(f'cat {bin_location}/index.yml') if hostname not in ('localhost', '127.0.0.1', _settings.app_options.hostname): - code, res = _fs.commons.ssh_exec(hostname, cmd) + code, res = commons.ssh_exec(hostname, cmd) else: - code, res = _fs.commons.execute(cmd) + code, res = commons.execute(cmd) if code != 0: raise FileNotFoundError(f"Host {hostname} has not evaluators at this location: {bin_location}") diff --git a/vocolab/core/leaderboards_lib/__init__.py b/vocolab/core/leaderboards_lib/__init__.py new file mode 100644 index 0000000..5b25c77 --- /dev/null +++ b/vocolab/core/leaderboards_lib/__init__.py @@ -0,0 +1 @@ +from .leaderboards import * diff --git a/vocolab/core/leaderboards_lib/leaderboards.py b/vocolab/core/leaderboards_lib/leaderboards.py new file mode 100644 index 0000000..b133886 --- /dev/null +++ b/vocolab/core/leaderboards_lib/leaderboards.py @@ -0,0 +1,106 @@ +import json +import shutil +from pathlib import Path +from typing import Generator, Optional, Any + +from pydantic import BaseModel +from vocolab_ext.leaderboards import LeaderboardRegistry, LeaderboardManager + +from vocolab import get_settings + +_settings = get_settings() + +# Load leaderboard manager from extensions +leaderboard_manager: LeaderboardManager = LeaderboardRegistry().load(_settings.extensions.leaderboards_extension) + + +class LeaderboardDir(BaseModel): + """ Handler class for disk storage of Leaderboards """ + location: Path + sorting_key: Optional[str] + leaderboard_type: str + + @property + def label(self) -> str: + """ Leaderboard label """ + return self.location.name + + @property + def cached_store(self): + """ Object used to cache build leaderboard (for faster serving) """ + return self.location / 'leaderboard.json' + + @property + def entry_dir(self) -> Path: + """ Location where all leaderboard entries are stored """ + return self.location / 'entries' + + @property + def entries(self) -> Generator[Any, None, None]: + """ Generator containing entry objects """ + for item in self.entry_dir.glob("*.json"): + with item.open() as fp: + yield leaderboard_manager.load_entry_from_obj(self.leaderboard_type, json.load(fp)) + + @property + def static_dir(self): + """ Location containing static items of leaderboard """ + return self.location / 'static' + + def has_static(self): + """ Boolean checking whether this leaderboard has static files """ + return self.static_dir.is_dir() + + def load_object(self, from_cache: bool = True, raw: bool = False): + """ Loads leaderboard object (cached or from entries)""" + if self.cached_store.is_file(): + if raw: + with self.cached_store.open() as fp: + return json.load(fp) + if from_cache: + with self.cached_store.open() as fp: + data = json.load(fp) + return leaderboard_manager.load_leaderboard_from_obj(name=self.leaderboard_type, obj=data) + + # leaderboard file not found, build it + self.mkcache() + # recall function + return self.load_object(from_cache=True, raw=raw) + + def mkcache(self): + """ Create cached version of final leaderboard """ + # load entries into object + ld_m: LeaderboardManager = leaderboard_manager.create_from_entry_folder(self.leaderboard_type, self.entry_dir) + # export as json + ld_m.export_as_csv(self.cached_store) + + @classmethod + def load(cls, label: str, sorting_key: Optional[str] = None): + """ Load leaderboard dir """ + loc = _settings.leaderboard_dir / label + if not loc.is_dir(): + raise ValueError(f'Leaderboard named {label} does not exist') + return cls( + location=loc, + sorting_key=sorting_key + ) + + @classmethod + def create(cls, label, sorting_key: str, static_files: bool = False) -> "LeaderboardDir": + """ Creates necessary files/architecture to store a leaderboard on disk """ + loc = _settings.leaderboard_dir / label + if loc.is_dir(): + raise ValueError(f'Leaderboard with {label} already exists') + + lead = cls(location=loc, sorting_key=sorting_key) + + lead.location.mkdir(parents=True) + lead.entry_dir.mkdir(parents=True) + if static_files: + lead.static_dir.mkdir(parents=True) + + return lead + + def delete(self): + """ Remove all files relative to this leaderboard """ + shutil.rmtree(self.location) diff --git a/vocolab/lib/misc/__init__.py b/vocolab/core/misc/__init__.py similarity index 100% rename from vocolab/lib/misc/__init__.py rename to vocolab/core/misc/__init__.py diff --git a/vocolab/lib/misc/various_definitions.py b/vocolab/core/misc/various_definitions.py similarity index 100% rename from vocolab/lib/misc/various_definitions.py rename to vocolab/core/misc/various_definitions.py diff --git a/vocolab/lib/misc/various_functions.py b/vocolab/core/misc/various_functions.py similarity index 97% rename from vocolab/lib/misc/various_functions.py rename to vocolab/core/misc/various_functions.py index b9c140a..3c07e64 100644 --- a/vocolab/lib/misc/various_functions.py +++ b/vocolab/core/misc/various_functions.py @@ -3,11 +3,10 @@ import json import shutil import subprocess -from collections import Callable from contextlib import contextmanager from datetime import datetime, date, time from pathlib import Path -from typing import List, Tuple, Dict +from typing import List, Tuple, Dict, Callable from dateutil import parser diff --git a/vocolab/lib/notify/__init__.py b/vocolab/core/notify/__init__.py similarity index 100% rename from vocolab/lib/notify/__init__.py rename to vocolab/core/notify/__init__.py diff --git a/vocolab/lib/notify/email.py b/vocolab/core/notify/email.py similarity index 100% rename from vocolab/lib/notify/email.py rename to vocolab/core/notify/email.py diff --git a/vocolab/lib/notify/mattermost.py b/vocolab/core/notify/mattermost.py similarity index 100% rename from vocolab/lib/notify/mattermost.py rename to vocolab/core/notify/mattermost.py diff --git a/samples/example.env b/vocolab/core/old_implementations/__init__.py similarity index 100% rename from samples/example.env rename to vocolab/core/old_implementations/__init__.py diff --git a/vocolab/lib/_fs/__init__.py b/vocolab/core/old_implementations/commons/__init__.py similarity index 81% rename from vocolab/lib/_fs/__init__.py rename to vocolab/core/old_implementations/commons/__init__.py index ae4f24b..f86e3db 100644 --- a/vocolab/lib/_fs/__init__.py +++ b/vocolab/core/old_implementations/commons/__init__.py @@ -3,7 +3,6 @@ These functions are split into the following submodules dependent on their use-case """ from . import users -from . import submissions from . import leaderboards -from . import commons +from .core import * from . import file_spilt diff --git a/vocolab/lib/_fs/file_spilt.py b/vocolab/core/old_implementations/commons/file_spilt.py similarity index 100% rename from vocolab/lib/_fs/file_spilt.py rename to vocolab/core/old_implementations/commons/file_spilt.py diff --git a/vocolab/lib/_fs/leaderboards.py b/vocolab/core/old_implementations/commons/leaderboards.py similarity index 100% rename from vocolab/lib/_fs/leaderboards.py rename to vocolab/core/old_implementations/commons/leaderboards.py diff --git a/vocolab/lib/_fs/users.py b/vocolab/core/old_implementations/commons/users.py similarity index 100% rename from vocolab/lib/_fs/users.py rename to vocolab/core/old_implementations/commons/users.py diff --git a/vocolab/lib/_fs/submissions.py b/vocolab/core/old_implementations/submissions.py similarity index 99% rename from vocolab/lib/_fs/submissions.py rename to vocolab/core/old_implementations/submissions.py index 87f8a3b..cad7ca7 100644 --- a/vocolab/lib/_fs/submissions.py +++ b/vocolab/core/old_implementations/submissions.py @@ -11,7 +11,7 @@ from vocolab import get_settings, exc from vocolab.db import models -from .commons import md5sum, rsync, ssh_exec, zip_folder +from .._fs.commons import md5sum, rsync, ssh_exec, zip_folder _settings = get_settings() diff --git a/vocolab/lib/submissions_lib.py b/vocolab/core/old_implementations/submissions_lib.py similarity index 100% rename from vocolab/lib/submissions_lib.py rename to vocolab/core/old_implementations/submissions_lib.py diff --git a/vocolab/core/submission_lib/__init__.py b/vocolab/core/submission_lib/__init__.py new file mode 100644 index 0000000..ecee522 --- /dev/null +++ b/vocolab/core/submission_lib/__init__.py @@ -0,0 +1 @@ +from .submission import * diff --git a/vocolab/core/submission_lib/logs.py b/vocolab/core/submission_lib/logs.py new file mode 100644 index 0000000..657c4e8 --- /dev/null +++ b/vocolab/core/submission_lib/logs.py @@ -0,0 +1,112 @@ +from datetime import datetime +from pathlib import Path +from typing import Optional, TextIO + +from pydantic import BaseModel + +from ..commons import ssh_exec +from ...settings import get_settings + +_settings = get_settings() + + +class SubmissionLogger(BaseModel, arbitrary_types_allowed=True): + """ Class managing individual logging of submission life-cycle """ + root_dir: Path + fp_write: Optional[TextIO] = None + + @property + def submission_id(self) -> str: + return self.root_dir.name + + @property + def submission_log(self) -> Path: + """ File storing generic submission_logs""" + return self.root_dir / 'submission.log' + + @property + def eval_log_file(self) -> Path: + """ Logfile storing latest evaluation process """ + return self.root_dir / 'evaluation.log' + + @property + def slurm_log_file(self) -> Path: + """ Logfile storing latest slurm output (used during eval process)""" + return self.root_dir / "slurm.log" + + def __enter__(self): + """ Logging context open """ + self.fp_write = self.submission_log.open('a') + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ Logging context close """ + if self.fp_write is not None: + self.fp_write.close() + self.fp_write = None + + @staticmethod + def when(): + return f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + + def header(self, who: str, task: str, + multipart: bool = False, has_scores: bool = True, auto_eval: bool = False): + """ + who: user that did the submission (should be owner or admin) + task: the benchmark/task that the submission correspongs + has_scores: whether the submission has scores + multipart: whether the submission was uploaded as multipart + auto_eval: whether an auto-evaluation pipeline is set up for this submission + """ + with self.submission_log.open('w') as fp: + fp.write(f"[{self.when()}]: Submission {self.submission_id} was created\n") + fp.write(f"--> user: {who}\n") + fp.write(f"--> challenge: {task}\n") + fp.write(f"--> has_scores: {has_scores}") + fp.write(f"--> is_multipart: {multipart}\n") + fp.write(f"--> auto_eval: {auto_eval}\n") + + @property + def slurm_logs(self): + """ """ + lines = [] + if self.slurm_log_file.is_file(): + with self.slurm_log_file.open() as fp: + lines = fp.readlines() + return lines + + def append_eval(self, eval_output): + with self.eval_log_file.open('a') as fp: + fp.write(f"-------- start of evaluation output --------\n") + fp.write(f"---> {datetime.now().isoformat()}") + fp.write(f"{eval_output.rstrip()}\n") + for line in self.slurm_logs: + fp.write(f"{line.strip()}\n") + fp.write(f"-------- end of evaluation output ----------\n") + + def log(self, msg, date: bool = True): + """ Create a new log entry """ + if date: + msg = f"[{self.when()}] {msg}" + + if self.fp_write: + self.fp_write.write(f"{msg}\n") + else: + with self.submission_log.open('a') as fp: + fp.write(f"{msg}\n") + + def get_text(self): + """ Get full submission log """ + if self.submission_log.is_file(): + with self.submission_log.open('r') as fp: + return fp.readlines() + return [] + + def fetch_remote(self, host, remote_submission_location): + """ Fetch eval & append log from remote """ + return_code, result = ssh_exec(host, [f'cat', f'{remote_submission_location}/{self.eval_log_file}']) + if return_code == 0: + self.log(result, date=False) + else: + self.log(f"Failed to fetch {host}:{remote_submission_location}/{self.submission_log} !!") + diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py new file mode 100644 index 0000000..143c2e4 --- /dev/null +++ b/vocolab/core/submission_lib/submission.py @@ -0,0 +1,384 @@ +import functools +import json +import shutil +from datetime import datetime +from pathlib import Path +from typing import List, Optional + +from fastapi import UploadFile +from pydantic import BaseModel + +from vocolab import get_settings, exc +from .logs import SubmissionLogger +from .upload import MultiPartUploadHandler, SinglePartUploadHandler, ManifestIndexItem +from ..commons import unzip, ssh_exec, rsync, zip_folder, scp +from ...data.models.api import SubmissionRequestFileIndexItem + +_settings = get_settings() + + +class SubmissionInfo(BaseModel): + model_id: str + username: str + benchmark_label: str + submission_id: str + created_at: datetime + leaderboard_entry: Optional[str] + + +class SubmissionDir(BaseModel, arbitrary_types_allowed=True): + """ Handler interfacing a submission directory stored on disk """ + root_dir: Path + + @classmethod + def load(cls, model_id: str, submission_id: str): + """ Load item from model-id & submission-id""" + root = _settings.submission_dir / model_id / submission_id + if not root.is_dir(): + raise FileNotFoundError(f'Submission {model_id}/{submission_id} does not exist') + return cls(root_dir=root) + + @property + def submission_id(self) -> str: + """ Returns the submission id """ + return self.root_dir.name + + @property + def content_location(self) -> Path: + return self.root_dir / 'content' + + def has_content(self) -> bool: + """ Check if submission has content """ + return self.content_location.is_dir() and any(Path(self.content_location).iterdir()) + + @property + def scores(self) -> Path: + """ the scores folders contains all the output files created by the evaluation process """ + return self.content_location / 'scores' + + def has_scores(self) -> bool: + return self.scores.is_dir() + + @property + def info_file(self) -> Path: + """ info file contains meta data relative to the submission """ + return self.root_dir / 'info.json' + + def has_info(self) -> bool: + """ Check whether info file is present""" + return self.info_file.is_file() + + @functools.lru_cache() + def info(self) -> SubmissionInfo: + """ Load submission information """ + with self.info_file.open() as fp: + return SubmissionInfo.parse_obj(json.load(fp)) + + @property + def multipart_dir(self) -> Path: + """ multipart dir contains the chunks & index for multipart uploads """ + return self.root_dir / '.parts' + + @property + def multipart_index_file(self) -> Path: + """ multipart index file contains info pertaining to multipart upload + - split & merge manifest (order to merge the files) + - checksums to verify upload & merge + """ + return self.multipart_dir / 'upload.json' + + def is_multipart(self) -> bool: + """ Check whether file was uploaded as multipart """ + return self.multipart_dir.is_dir() and self.multipart_index_file.is_file() + + @property + def upload_lock(self) -> Path: + """ a lockfile locking the submission while upload has not completed """ + return self.root_dir / 'upload.lock' + + @property + def content_archive_hash_file(self) -> Path: + return self.root_dir / 'archive.hash' + + @property + def eval_lock(self) -> Path: + """ a lockfile locking the submission while evaluation is ongoing """ + return self.root_dir / 'eval.lock' + + @property + def error_lock(self) -> Path: + """ a lockfile locking the submission while evaluation is ongoing """ + return self.root_dir / 'error.lock' + + @property + def clean_lock(self) -> Path: + """ a lockfile marking the submission for deletion """ + return self.root_dir / 'clean.lock' + + @property + def interrupted_lock(self) -> Path: + """ a lockfile to signify that a process was running and was interrupted """ + return self.root_dir / 'interrupted.lock' + + def clean_all_locks(self): + """ Remove all lock files in submission""" + self.upload_lock.unlink(missing_ok=True) + self.eval_lock.unlink(missing_ok=True) + self.error_lock.unlink(missing_ok=True) + self.interrupted_lock.unlink(missing_ok=True) + self.clean_lock.unlink(missing_ok=True) + + @property + def log_handler(self) -> SubmissionLogger: + """ build the SubmissionLogger class that allows to log submission relative events """ + return SubmissionLogger(root_dir=self.root_dir) + + def get_leaderboard_items(self): + if not self.has_info(): + raise ValueError('Submission has no info index') + return self.info.leaderboard_entries + + def add_content(self, file_name: str, data: UploadFile): + """ Add content to the submission + *) multipart: + - add part to the tmp folder + - check if completed + - if completed merge parts + *) singlepart: + - add uploaded data to the submission + + Multipart is completed when all the parts have been successfully uploaded + Singlepart is completed when the target archive has been successfully uploaded + + If upload is completed --> unzip content into the content folder. + """ + if self.is_multipart(): + """ Multipart upload """ + handler = MultiPartUploadHandler.load_from_index(self.multipart_index_file) + handler.add_part( + logger=self.log_handler, + file_name=file_name, + data=data + ) + handler.dump_to_index(self.multipart_index_file) + else: + """ Single part upload """ + handler = SinglePartUploadHandler(root_dir=self.root_dir) + handler.write_data( + logger=self.log_handler, + file_name=file_name, + data=data + ) + + if handler.completed(): + return True, [] + return False, handler.remaining_items + + def complete_upload(self): + """ Actions to perform after upload has completed on a submission (extract files, update metadata, etc)""" + logger = self.log_handler + if self.is_multipart(): + handler = MultiPartUploadHandler.load_from_index(self.multipart_index_file) + if not handler.completed(): + raise exc.FailedOperation(f'Cannot Complete incomplete submission {self.submission_id} !!!') + + # merge parts to target archive + logger.log(f"upload of parts for {self.submission_id} completed, merging parts...") + handler.merge_parts() + logger.log("parts merged successfully") + else: + handler = SinglePartUploadHandler(root_dir=self.root_dir) + if not handler.completed(): + raise exc.FailedOperation(f'Cannot Complete incomplete submission {self.submission_id} !!!') + + logger.log(f"upload for {self.submission_id} completed") + + # unzip archive to content location + logger.log(f"unzipping archive {handler.target_file} into {self.content_location}") + unzip(handler.target_file, self.content_location) + # clean-up download artifacts + # handler.clean() + + def send_content(self, hostname: str) -> Path: + """ Send content to a remote host for evaluation (return target location) """ + is_remote = hostname != _settings.app_options.hostname + transfer_root_dir = _settings.task_queue_options.REMOTE_STORAGE.get(hostname) + model_id = self.info.model_id + remote_submission_dir = transfer_root_dir / model_id / self.submission_id + logger = self.log_handler + + # if host is local & submission dir is current, do nothing + if (not is_remote) and (transfer_root_dir == _settings.submission_dir): + return self.root_dir + + code, _ = ssh_exec(hostname, ['mkdir', '-p', f'{remote_submission_dir}']) + if code != 0: + logger.log(f"failed to write on {hostname}") + raise ValueError(f"No write permissions on {hostname}") + + # sync files + res = rsync(src=self.root_dir, dest_host=hostname, dest=remote_submission_dir) + if res.returncode == 0: + logger.log(f"copied files from {self.root_dir} to {hostname} for processing.") + return remote_submission_dir + else: + logger.log(f"failed to copy {self.root_dir} to {hostname} for processing.") + logger.log(res.stderr.decode()) + raise ValueError(f"Failed to copy files to host {hostname}") + + def fetch_content(self, hostname: str): + """ Fetch updated content from remote (after evaluation) """ + is_remote = hostname != _settings.app_options.hostname + transfer_root_dir = _settings.task_queue_options.REMOTE_STORAGE.get(hostname) + model_id = self.info.model_id + remote_submission_dir = transfer_root_dir / model_id / self.submission_id + logger = self.log_handler + + # if host is local & submission dir is current, do nothing + if (not is_remote) and (transfer_root_dir == _settings.submission_dir): + return self.root_dir + + # fetch log files + logger.fetch_remote(hostname, remote_submission_dir) + + # sync files + res = rsync(src_host=hostname, src=remote_submission_dir, dest=self.root_dir) + + if res.returncode == 0: + logger.log(f"fetched result files from {hostname} to {self.root_dir}") + return self.root_dir + else: + logger.log(f"failed to fetch results from {hostname} to {self.root_dir}.") + logger.log(res.stderr.decode()) + raise ValueError(f"Failed to copy files from host {hostname}") + + def __archive_zip(self): + """ Archive submission as zip """ + if _settings.remote_archive: + host = _settings.ARCHIVE_HOST + with _settings.get_temp_dir() as tmp: + archive_file = tmp / f'{self.info.model_id}_{self.info.submission_id}' + zip_folder(archive_file=archive_file, location=self.root_dir) + res = scp(src=archive_file, host=host, dest=_settings.submission_archive_dir) + if res.returncode != 0: + raise ValueError(f"Failed to transfer to {host}") + else: + """ Archive files to local archive as a zip file""" + zip_folder( + archive_file=self.root_dir / f'{self.info.model_id}_{self.info.submission_id}', + location=self.root_dir + ) + + def __archive_raw(self): + if _settings.remote_archive: + host = _settings.ARCHIVE_HOST + code, _ = ssh_exec(host, ['mkdir', '-p', f"{self.root_dir}"]) + if code != 0: + raise ValueError(f"No write permissions on {host}") + + res = rsync(src=self.root_dir, dest_host=host, dest=self.root_dir) + if res.returncode != 0: + raise ValueError(f"Failed to copy files to host {host}") + + else: + _res = rsync(src=self.root_dir, dest=self.root_dir) + if _res.returncode != 0: + raise ValueError("Failed to copy files to archive") + + def archive(self, zip_files: bool = False): + """Transfer submission to archive """ + if zip_files: + self.__archive_zip() + else: + self.__archive_raw() + + def remove_all(self): + """ Remove all files related to this submission """ + shutil.rmtree(self.root_dir) + + +class ModelDir(BaseModel): + root_dir: Path + + @property + def label(self): + return self.root_dir.name + + @classmethod + def make(cls, model_id: str): + root = _settings.submission_dir / model_id + root.mkdir(exist_ok=True, parents=True) + + @classmethod + def load(cls, model_id: str): + root = _settings.submission_dir / model_id + + if not root.is_dir(): + raise FileNotFoundError('Model Folder not found') + + return cls(root_dir=root) + + def make_submission( + self, submission_id: str, benchmark_label: str, auto_eval: bool, + username: str, filehash: str, has_scores: bool, multipart: bool, + index: Optional[List[SubmissionRequestFileIndexItem]], + leaderboard_file: Optional[str] = None + ) -> SubmissionDir: + root_dir = self.root_dir / submission_id + if root_dir.is_dir(): + raise FileExistsError(f'Submission {submission_id} cannot be created as it already exists') + # create the dir + root_dir.mkdir() + submission_dir = SubmissionDir(root_dir=root_dir) + submission_dir.content_location.mkdir() + + # Submission generic info + sub_info = SubmissionInfo( + model_id=self.label, + username=username, + benchmark_label=benchmark_label, + submission_id=submission_id, + created_at=datetime.now(), + leaderboard_entry=leaderboard_file + ) + # save info to file + with submission_dir.info_file.open('w') as fp: + fp.write(sub_info.json(indent=4)) + + if multipart: + if len(index) <= 0: + raise ValueError('Parts list is empty') + submission_dir.multipart_dir.mkdir(exist_ok=True) + upload_handler = MultiPartUploadHandler( + store_location=submission_dir.multipart_dir, + target_location=submission_dir.root_dir, + merge_hash=filehash, + index=[ManifestIndexItem.from_api(i) for i in index] + ) + with submission_dir.multipart_index_file.open('w') as fp: + fp.write( + upload_handler.json(indent=4) + ) + else: + with submission_dir.content_archive_hash_file.open('w') as fp: + fp.write(filehash) + + submission_dir.log_handler.header( + who=username, + task=benchmark_label, + multipart=multipart, + has_scores=has_scores, + auto_eval=auto_eval + ) + return submission_dir + + @property + def submissions(self) -> List[SubmissionDir]: + return [ + SubmissionDir.load(self.label, sub_id.name) + for sub_id in self.root_dir.iterdir() + if sub_id.is_dir() + ] + + def get_submission(self, submission_id: str): + return SubmissionDir.load(self.label, submission_id) diff --git a/vocolab/core/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py new file mode 100644 index 0000000..41ea21b --- /dev/null +++ b/vocolab/core/submission_lib/upload.py @@ -0,0 +1,194 @@ +import json +import shutil +from pathlib import Path +from typing import List, Optional + +import numpy as np +import pandas as pd +from fastapi import UploadFile +from filesplit.merge import Merge +from pydantic import BaseModel, Field + +from vocolab import exc +from .logs import SubmissionLogger +from ..commons import md5sum +from ...data.models.api import SubmissionRequestFileIndexItem + +""" +####### File Splitting Note ####### +Splitting & Merging of archives uses the protocol defined by the filesplit package. +This protocol requires the split to use the same method as a manifest is created which +then allows to merge the parts into the original file. + +For more information see documentation : https://pypi.org/project/filesplit/ + +NOTE: v3.0.2 is currently used, an update to v4 needs to be implemented. +""" + + +class SinglePartUploadHandler(BaseModel): + root_dir: Path + + @property + def target_file(self): + return self.root_dir / 'content_archive.zip' + + @property + def hash_file_location(self) -> Path: + """ singlepart upload can be verified by the checksum inside this file """ + return self.root_dir / 'archive.hash' + + @property + def file_hash(self): + """ Load promised md5sum of content archive """ + with self.hash_file_location.open() as fp: + return fp.read().replace('\n', '') + + def completed(self) -> bool: + return self.target_file.is_file() + + def write_data(self, logger: SubmissionLogger, file_name: str, data: UploadFile): + logger.log(f"adding a new part to upload: {file_name}") + + # Add the part + with self.target_file.open('wb') as fp: + for d in data.file: + fp.write(d) + + calc_hash = md5sum(self.target_file) + + if not self.file_hash == calc_hash: + # todo: more stuff see multipart fail + self.target_file.unlink() + raise exc.ValueNotValid("Hash does not match expected!") + + logger.log(f" --> file was uploaded successfully", date=False) + + def clean(self): + """ Delete download artifacts """ + pass + + +class ManifestIndexItem(BaseModel): + """ Model representing a file item in the SplitManifest """ + file_name: str + file_size: int + file_hash: str + + def __eq__(self, other: 'ManifestIndexItem'): + return self.file_hash == other.file_hash + + def __hash__(self): + return int(self.file_hash, 16) + + @classmethod + def from_api(cls, item: SubmissionRequestFileIndexItem): + return cls( + file_name=item.filename, + file_size=item.filesize, + file_hash=item.filehash + ) + + +class MultiPartUploadHandler(BaseModel): + """ Data Model used for the binary split function as a manifest to allow merging """ + store_location: Path + merge_hash: str + index: List[ManifestIndexItem] + received: Optional[List[ManifestIndexItem]] = Field(default_factory=list) + target_location: Path + + @property + def target_file(self): + return self.target_location / 'submission.zip' + + @property + def success(self): + return self.target_file.is_file() + + @property + def remaining_items(self) -> set[ManifestIndexItem]: + """ Return a set with remaining items """ + return set(self.index) - set(self.received) + + @property + def remaining_nb(self) -> int: + return len(self.remaining_items) + + def completed(self) -> bool: + return len(self.received) == len(self.index) + + @classmethod + def load_from_index(cls, file: Path): + """ Load """ + with file.open() as fp: + return cls.parse_obj(json.load(fp)) + + def dump_to_index(self, file: Path): + with file.open("w") as fp: + fp.write(self.json(indent=4)) + + def dump_manifest(self): + # todo: implement + pass + + def add_part(self, logger: SubmissionLogger, file_name: str, data: UploadFile): + """ Add a part to a multipart upload type submission. + + - Write the data into a file inside the submission folder. + + :raises + - JSONError, ValidationError: If manifest is not properly formatted + - ResourceRequestedNotFound: if file not present in the manifest + - ValueNotValid if md5 hash of file does not match md5 recorded in the manifest + """ + logger.log(f"adding a new part to upload: {self.store_location / file_name}") + + # write data on disk + file_part = self.store_location / file_name + with file_part.open('wb') as fp: + for d in data.file: + fp.write(d) + + calc_hash = md5sum(file_part) + new_item_mf = ManifestIndexItem( + file_name=file_name, + file_hash=calc_hash, + file_size=file_part.stat().st_size + ) + + if new_item_mf not in self.index: + logger.log(f"(ERROR) file {file_name} was not found in manifest, upload canceled!!") + file_part.unlink() + logger.log(f"(ERROR) {data}, upload canceled!!") + raise exc.ResourceRequestedNotFound(f"Part {file_name} is not part of submission {logger.submission_id}!!") + + # up count of received parts + self.received.append(new_item_mf) + logger.log(" --> part was added successfully", date=False) + + def merge_parts(self): + """ Merge parts into the target file using filesplit protocol """ + for item in self.index: + assert md5sum(self.store_location / item.file_name) == item.file_hash, \ + f"file {item.file_name} does not match md5" + + df = pd.DataFrame([ + (i.file_name, i.file_size, False) + for i in self.index + ]) + df.columns = ['filename', 'filesize', 'header'] + df.to_csv((self.store_location / 'manifest')) + + merge = Merge( + inputdir=str(self.store_location), + outputdir=str(self.target_location), + outputfilename=self.target_file.name + ) + merge.merge() + # Check + assert md5sum(self.target_file) == self.merge_hash, "output file does not match original md5" + + def clean(self): + """ Delete index & parts used for multipart upload """ + shutil.rmtree(self.store_location) diff --git a/vocolab/core/users_lib.py b/vocolab/core/users_lib.py new file mode 100644 index 0000000..43348ff --- /dev/null +++ b/vocolab/core/users_lib.py @@ -0,0 +1,66 @@ +import hashlib +import json +import os +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Extra, EmailStr + +from vocolab import get_settings, exc + +_settings = get_settings() + + +class UserProfileData(BaseModel): + username: str + affiliation: str + first_name: Optional[str] + last_name: Optional[str] + verified: bool + email: EmailStr + created: Optional[datetime] + + class Config: + extra = Extra.allow + + @classmethod + def load(cls, username: str): + db_file = (_settings.user_data_dir / f"{username}.json") + if not db_file.is_file(): + raise exc.UserNotFound('user requested has no data entry') + + with db_file.open() as fp: + return cls.parse_obj(json.load(fp)) + + def save(self): + if not _settings.user_data_dir.is_dir(): + _settings.user_data_dir.mkdir(parents=True) + + with (_settings.user_data_dir / f"{self.username}.json").open('w') as fp: + fp.write(self.json(indent=4)) + + def delete(self): + """ Delete profile data from disk""" + file = (_settings.user_data_dir / f"{self.username}.json") + file.unlink(missing_ok=True) + + +def hash_pwd(*, password: str, salt=None): + """ Creates a hash of the given password. + If salt is None generates a random salt. + + :arg password the password to hash + :arg salt a value to salt the hashing + :returns hashed_password, salt + """ + + if salt is None: + salt = os.urandom(32) # make random salt + + hash_pass = hashlib.pbkdf2_hmac( + 'sha256', # The hash digest algorithm for HMAC + password.encode('utf-8'), # Convert the password to bytes + salt, # Provide the salt + 100000 # It is recommended to use at least 100,000 iterations of SHA-256 + ) + return hash_pass, salt diff --git a/vocolab/lib/worker_lib/__init__.py b/vocolab/core/worker_lib/__init__.py similarity index 100% rename from vocolab/lib/worker_lib/__init__.py rename to vocolab/core/worker_lib/__init__.py diff --git a/vocolab/lib/worker_lib/tasks/__init__.py b/vocolab/core/worker_lib/tasks/__init__.py similarity index 100% rename from vocolab/lib/worker_lib/tasks/__init__.py rename to vocolab/core/worker_lib/tasks/__init__.py diff --git a/vocolab/lib/worker_lib/tasks/echo.py b/vocolab/core/worker_lib/tasks/echo.py similarity index 73% rename from vocolab/lib/worker_lib/tasks/echo.py rename to vocolab/core/worker_lib/tasks/echo.py index 4e378f6..cb11e98 100644 --- a/vocolab/lib/worker_lib/tasks/echo.py +++ b/vocolab/core/worker_lib/tasks/echo.py @@ -1,11 +1,11 @@ import os from vocolab import out, get_settings -from vocolab.db.models import tasks +from vocolab.data import models _settings = get_settings() -def echo_fn(slm: tasks.SimpleLogMessage): +def echo_fn(slm: models.tasks.SimpleLogMessage): """ Simple task that echoes a message into the log""" out.log.info(f"{os.getpid()} | \[{slm.timestamp.isoformat()}\] {slm.message}") diff --git a/vocolab/lib/worker_lib/tasks/eval.py b/vocolab/core/worker_lib/tasks/eval.py similarity index 67% rename from vocolab/lib/worker_lib/tasks/eval.py rename to vocolab/core/worker_lib/tasks/eval.py index d20c86e..0528f0d 100644 --- a/vocolab/lib/worker_lib/tasks/eval.py +++ b/vocolab/core/worker_lib/tasks/eval.py @@ -4,8 +4,8 @@ from typing import List from vocolab import out, get_settings, exc -from vocolab.db.models import tasks -from vocolab.lib import submissions_lib +from vocolab.data import models +from vocolab.core import submission_lib _settings = get_settings() @@ -25,26 +25,26 @@ def verify_host_bin(): raise exc.ServerError(f"No bin directory configured for current host {_settings.app_options.hostname}") -def build_cmd(_cmd: tasks.SubmissionEvaluationMessage) -> List[str]: +def build_cmd(_cmd: models.tasks.SubmissionEvaluationMessage) -> List[str]: """ Build a subprocess command from an evaluation message """ executor = _cmd.executor.to_exec() if executor is None: raise ValueError(f'{_cmd.executor} is not present in system') - sub_dir = submissions_lib.get_submission_dir(_cmd.submission_id) + sub_dir = submission_lib.get_submission_dir(_cmd.submission_id) bin_path = Path(_cmd.bin_path).resolve() verify_bin(bin_path) script = bin_path / _cmd.script_name cmd_list = [executor] - if _cmd.executor == tasks.ExecutorsType.sbatch: + if _cmd.executor == models.tasks.ExecutorsType.sbatch: cmd_list.extend([ f"--job-name='{_cmd.label}'", # name the job on slurmDB f"--output={sub_dir}/slurm.log", "--wait", # wait for the process to complete ]) - elif _cmd.executor == tasks.ExecutorsType.docker: + elif _cmd.executor == models.tasks.ExecutorsType.docker: raise NotImplementedError("should add some verification for docker-run support") # custom executor args from DB @@ -57,7 +57,7 @@ def build_cmd(_cmd: tasks.SubmissionEvaluationMessage) -> List[str]: return cmd_list -def eval_subprocess(_cmd: tasks.SubmissionEvaluationMessage): +def eval_subprocess(_cmd: models.tasks.SubmissionEvaluationMessage): """ Evaluate a subprocess type BrokerCMD """ cmd_array = build_cmd(_cmd) out.log.debug(f"$> {shlex.join(cmd_array)}") @@ -71,27 +71,29 @@ def eval_subprocess(_cmd: tasks.SubmissionEvaluationMessage): return result.returncode, output -def post_eval_update(status: int, sem: tasks.SubmissionEvaluationMessage): +def post_eval_update(status: int, sem: models.tasks.SubmissionEvaluationMessage): """ Send message to update queue that evaluation is completed. """ - from vocolab.worker.server import update - from vocolab.db.models.tasks import SubmissionUpdateMessage, UpdateType - - sum_ = SubmissionUpdateMessage( - label=f"{_settings.app_options.hostname}-completed-{sem.submission_id}", - submission_id=sem.submission_id, - updateType=UpdateType.evaluation_undefined, - hostname=f"{_settings.app_options.hostname}" - ) - if status == 0: - sum_.updateType = UpdateType.evaluation_complete - else: - sum_.updateType = UpdateType.evaluation_failed - - # send update to channel - update.delay(sum_=sum_.dict()) - - -def evaluate_submission_fn(sem: tasks.SubmissionEvaluationMessage): + pass + # todo recheck + # from vocolab.worker.server import update + # from vocolab.db.models.tasks import SubmissionUpdateMessage, UpdateType + # + # sum_ = SubmissionUpdateMessage( + # label=f"{_settings.app_options.hostname}-completed-{sem.submission_id}", + # submission_id=sem.submission_id, + # updateType=UpdateType.evaluation_undefined, + # hostname=f"{_settings.app_options.hostname}" + # ) + # if status == 0: + # sum_.updateType = UpdateType.evaluation_complete + # else: + # sum_.updateType = UpdateType.evaluation_failed + # + # # send update to channel + # update.delay(sum_=sum_.dict()) + + +def evaluate_submission_fn(sem: models.tasks.SubmissionEvaluationMessage): status, eval_output = eval_subprocess(sem) if status == 0: out.log.info(f"Evaluation of {sem.submission_id} was completed successfully") @@ -100,7 +102,7 @@ def evaluate_submission_fn(sem: tasks.SubmissionEvaluationMessage): f"with a non zero return code. see logs for details!!") # write output in log - with submissions_lib.SubmissionLogger(sem.submission_id) as lg: + with submission_lib.SubmissionLogger(sem.submission_id) as lg: lg.append_eval(eval_output) # send submission evaluation result diff --git a/vocolab/core/worker_lib/tasks/update.py b/vocolab/core/worker_lib/tasks/update.py new file mode 100644 index 0000000..30244ff --- /dev/null +++ b/vocolab/core/worker_lib/tasks/update.py @@ -0,0 +1,31 @@ +import asyncio + +from vocolab import out, get_settings +from vocolab.data import models +from vocolab.core import submission_lib + +_settings = get_settings() + + +def update_task_fn(sum_: models.tasks.SubmissionUpdateMessage): + async def eval_function(msg: models.tasks.SubmissionUpdateMessage): + """ Evaluate a function type BrokerCMD """ + with submission_lib.SubmissionLogger(msg.submission_id) as lg: + out.log.debug(msg.dict()) + + if msg.updateType == models.tasks.UpdateType.evaluation_complete: + await submission_lib.complete_evaluation( + submission_id=msg.submission_id, hostname=msg.hostname, + logger=lg) + elif msg.updateType == models.tasks.UpdateType.evaluation_failed: + await submission_lib.fail_evaluation( + submission_id=msg.submission_id, hostname=msg.hostname, + logger=lg) + elif msg.updateType == models.tasks.UpdateType.evaluation_canceled: + await submission_lib.cancel_evaluation( + submission_id=msg.submission_id, hostname=msg.hostname, + logger=lg) + else: + raise ValueError("Unknown update task !!!") + + asyncio.run(eval_function(sum_)) diff --git a/vocolab/lib/worker_lib/utils.py b/vocolab/core/worker_lib/utils.py similarity index 100% rename from vocolab/lib/worker_lib/utils.py rename to vocolab/core/worker_lib/utils.py diff --git a/vocolab/data/__init__.py b/vocolab/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vocolab/db/admin.py b/vocolab/data/admin.py similarity index 100% rename from vocolab/db/admin.py rename to vocolab/data/admin.py diff --git a/vocolab/data/db.py b/vocolab/data/db.py new file mode 100644 index 0000000..6c0ef2c --- /dev/null +++ b/vocolab/data/db.py @@ -0,0 +1,21 @@ +import databases +import sqlalchemy + +from vocolab import get_settings +from vocolab.data import exc as db_exc +from .tables import tables_metadata + + +_settings = get_settings() + +# Database Connection +zrDB = databases.Database(_settings.database_connection_url) + +def build_database_from_schema(): + if not _settings.database_file.is_file(): + _settings.database_file.touch() + + engine = sqlalchemy.create_engine( + _settings.database_connection_url, connect_args={"check_same_thread": False} + ) + tables_metadata.create_all(engine) diff --git a/vocolab/db/exc.py b/vocolab/data/exc.py similarity index 100% rename from vocolab/db/exc.py rename to vocolab/data/exc.py diff --git a/vocolab/data/model_queries/__init__.py b/vocolab/data/model_queries/__init__.py new file mode 100644 index 0000000..0944ac8 --- /dev/null +++ b/vocolab/data/model_queries/__init__.py @@ -0,0 +1,4 @@ +from .auth import * +from .challenges import * +from .models import * +from .leaderboard import * diff --git a/vocolab/data/model_queries/auth.py b/vocolab/data/model_queries/auth.py new file mode 100644 index 0000000..8229d6f --- /dev/null +++ b/vocolab/data/model_queries/auth.py @@ -0,0 +1,232 @@ +import json +import secrets +from datetime import datetime +from typing import Optional, List, Iterable + +from email_validator import validate_email, EmailNotValidError +from jose import jwt, JWTError # noqa: false flags from requirements https://youtrack.jetbrains.com/issue/PY-27985 +from pydantic import BaseModel, EmailStr, Field, ValidationError + +from vocolab.data import models, tables, exc as db_exc +from ..db import zrDB +from ...core import users_lib +from ...settings import get_settings + +_settings = get_settings() + + +class User(BaseModel): + id: int + username: str + email: EmailStr + active: bool + verified: str + hashed_pswd: bytes + salt: bytes + created_at: Optional[datetime] + + @property + def enabled(self) -> bool: + """ Check if a user is enabled (active & verified)""" + return self.active and self.is_verified() + + class Config: + orm_mode = True + + def is_verified(self) -> bool: + """ Check whether a user has been verified""" + return self.verified == 'True' + + def password_matches(self, password: str) -> bool: + """ Check if given password matches users """ + hashed_pwd, _ = users_lib.hash_pwd(password=password, salt=self.salt) + return hashed_pwd == self.hashed_pswd + + async def change_password(self, new_password: str, password_validation: str): + """ Modify a users password """ + if new_password != password_validation: + raise ValueError('passwords do not match') + + hashed_pswd, salt = users_lib.hash_pwd(password=new_password) + query = tables.users_table.update().where( + tables.users_table.c.id == self.id + ).values(hashed_pswd=hashed_pswd, salt=salt) + await zrDB.execute(query) + + async def delete(self): + query = tables.users_table.delete().where( + tables.users_table.c.id == self.id + ) + await zrDB.execute(query) + + async def verify(self, verification_code: str, force: bool = False) -> bool: + """ Verify a user using a verification code, (can be forced) """ + if self.is_verified(): + return True + + query = tables.users_table.update().where( + tables.users_table.c.id == self.id + ).values(verified='True') + + if self.verified == verification_code or force: + await zrDB.execute(query) + return True + return False + + async def toggle_status(self, active: bool = True): + """ Toggles a users status from active to inactive """ + query = tables.users_table.update().where( + tables.users_table.c.id == self.id + ).values( + active=active + ) + await zrDB.execute(query) + + def get_profile_data(self) -> users_lib.UserProfileData: + """ Load users profile data """ + return users_lib.UserProfileData.load(self.username) + + @classmethod + async def get(cls, *, by_uid: Optional[int] = None, by_username: Optional[str] = None, + by_email: Optional[str] = None) -> "User": + """ Get a user from the database """ + if by_uid: + query = tables.users_table.select().where( + tables.users_table.c.id == by_uid + ) + elif by_username: + query = tables.users_table.select().where( + tables.users_table.c.username == by_username + ) + elif by_email: + query = tables.users_table.select().where( + tables.users_table.c.email == by_email + ) + else: + raise ValueError('a value must be provided : uid, username, email') + + user_data = await zrDB.fetch_one(query) + if user_data is None: + raise ValueError(f'database does not contain a user for given description') + + return cls(**user_data) + + @classmethod + async def login(cls, login_id: str, password: str) -> Optional["User"]: + try: + validate_email(login_id) # check if email is valid + query = tables.users_table.select().where( + tables.users_table.c.email == login_id + ) + except EmailNotValidError: + query = tables.users_table.select().where( + tables.users_table.c.username == login_id + ) + + user_data = await zrDB.fetch_one(query) + if user_data is None: + return None + + current_user = cls(**user_data) + # check password + hashed_pswd, _ = users_lib.hash_pwd(password=password, salt=current_user.salt) + if current_user.enabled and hashed_pswd == current_user.hashed_pswd: + return current_user + return None + + @classmethod + async def create(cls, *, new_usr: models.api.UserCreateRequest): + """ Create a new user entry in the users database """ + hashed_pswd, salt = users_lib.hash_pwd(password=new_usr.pwd) + verification_code = secrets.token_urlsafe(8) + try: + # insert user entry into the database + query = tables.users_table.insert().values( + username=new_usr.username, + email=new_usr.email, + active=True, + verified=verification_code, + hashed_pswd=hashed_pswd, + salt=salt, + created_at=datetime.now() + ) + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + # create user profile data + profile_data = users_lib.UserProfileData( + username=new_usr.username, + email=new_usr.email, + affiliation=new_usr.affiliation, + first_name=new_usr.first_name, + last_name=new_usr.last_name, + verified=False, + created=datetime.now() + ) + profile_data.save() + return verification_code + + +class UserList(BaseModel): + items: List[User] + + + def __iter__(self) -> Iterable[User]: + return iter(self.items) + + @classmethod + async def get(cls, active_only: bool = False) -> "UserList": + """ Get all existing users, flag allows to filter non-active users """ + query = tables.users_table.select() + if active_only: + query = tables.users_table.select().where( + tables.users_table.c.active == True + ) + user_list = await zrDB.fetch_all(query) + if user_list is None: + raise ValueError(f'database does not contain any user') + return cls(items=user_list) + + @classmethod + async def toggle_status(cls, active: bool = True): + """ Toggles all users status from active to inactive """ + query = tables.users_table.update().values( + active=active + ) + return await zrDB.execute(query) + + @classmethod + async def verify(cls): + query = tables.users_table.update().values( + verify="True" + ) + await zrDB.execute(query) + + +class Token(BaseModel): + """ API Session Token """ + expires_at: datetime = Field(default_factory=lambda: datetime.now() + _settings.user_options.session_expiry_delay) + created_at: datetime = Field(default_factory=lambda: datetime.now()) + allow_password_reset: bool = False # used for password reset sessions + user_email: EmailStr + + def is_expired(self) -> bool: + """ Check if Token has expired """ + return self.expires_at < datetime.now() + + def encode(self) -> str: + """ Encode into a token string """ + # passing by json allows to convert datetimes to strings using pydantic serializer + as_dict = json.loads(self.json()) + return jwt.encode(claims=as_dict, key=_settings.secret, algorithm=_settings.api_options.token_encryption) + + @classmethod + def decode(cls, encoded_token: str): + """ Decode token from encoded string """ + try: + payload = jwt.decode(token=encoded_token, key=_settings.secret, + algorithms=[_settings.api_options.token_encryption]) + return Token(**payload) + except (JWTError, ValidationError) as e: + raise ValueError("Invalid token") from e diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py new file mode 100644 index 0000000..75b38cf --- /dev/null +++ b/vocolab/data/model_queries/challenges.py @@ -0,0 +1,184 @@ +import shlex +from datetime import date +from typing import Optional, List, Any, Iterable + +from pydantic import BaseModel, HttpUrl + +from vocolab import get_settings +from vocolab.core import misc +from vocolab.data import models, tables +from ..db import zrDB, db_exc + +st = get_settings() + + +class EvaluatorItem(BaseModel): + """ Data representation of an evaluator """ + id: int + label: str + executor: models.tasks.ExecutorsType + host: Optional[str] + script_path: str + executor_arguments: str + + class Config: + orm_mode = True + + async def update_args(self, arg_list: List[str]): + query = tables.evaluators_table.update().where( + tables.evaluators_table.c.id == self.id + ).values(executor_arguments=shlex.join(arg_list)) + await zrDB.execute(query) + + @classmethod + async def add_or_update(cls, *, evl_item: models.cli.NewEvaluatorItem): + query = tables.evaluators_table.select().where( + tables.evaluators_table.c.label == evl_item.label + ).where( + tables.evaluators_table.c.host == evl_item.host + ) + res = await zrDB.fetch_one(query) + + if res is None: + await zrDB.execute(tables.evaluators_table.insert(), evl_item.dict()) + else: + update_query = tables.evaluators_table.update().where( + tables.evaluators_table.c.id == res.id + ).values( + executor=evl_item.executor, script_path=evl_item.script_path, + executor_arguments=evl_item.executor_arguments + ) + await zrDB.execute(update_query) + + @classmethod + async def get(cls, by_id: int) -> Optional["EvaluatorItem"]: + query = tables.evaluators_table.select().where( + tables.evaluators_table.c.id == by_id + ) + result = await zrDB.fetch_one(query) + if not result: + return None + return cls.parse_obj(result) + + +class EvaluatorList(BaseModel): + items: List[EvaluatorItem] + + def __iter__(self) -> Iterable[EvaluatorItem]: + return iter(self.items) + + @classmethod + async def get(cls) -> "EvaluatorList": + query = tables.evaluators_table.select() + results = await zrDB.fetch_all(query) + if not results: + return cls(items=[]) + return cls(items=results) + + +class Benchmark(BaseModel): + """ Data representation of a challenge """ + label: str + start_date: date + end_date: Optional[date] + active: bool + url: HttpUrl + evaluator: Optional[int] + auto_eval: bool = st.task_queue_options.AUTO_EVAL + + class Config: + orm_mode = True + + def is_active(self) -> bool: + """ Checks if challenge is active """ + present = date.today() + if self.end_date: + return self.start_date <= present <= self.end_date and self.active + else: + return self.start_date <= present and self.active + + @classmethod + def get_field_names(cls): + return list(cls.__fields__.keys()) + + @classmethod + async def create(cls, item: models.cli.NewChallenge): + try: + query = tables.benchmarks_table.insert().values( + **item.dict() + ) + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + @classmethod + async def get(cls, *, benchmark_id: str, allow_inactive: bool = False) -> "Benchmark": + query = tables.benchmarks_table.select().where( + tables.benchmarks_table.c.label == benchmark_id + ) + ch_data = await zrDB.fetch_one(query) + if ch_data is None: + raise ValueError(f'There is no challenge with the following id: {benchmark_id}') + ch = cls.parse_obj(ch_data) + if allow_inactive: + return ch + else: + if not ch.is_active(): + raise ValueError(f"The Challenge {ch.label} is not active") + return ch + + async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): + """ Update a property """ + if not hasattr(self, variable_name): + raise ValueError(f'Class Challenge does not have a member called ! {variable_name}') + + variable_type = type(getattr(self, variable_name)) + + if allow_parsing: + value = misc.str2type(value, variable_type) + + if value is not None and not isinstance(value, variable_type): + raise ValueError(f"Challenge.{variable_name} should be of type {variable_type}") + + setattr(self, variable_name, value) + + # update database + query = tables.benchmarks_table.update().where( + tables.benchmarks_table.c.label == self.label + ).values({f"{variable_name}": value}) + + try: + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + return value + + async def delete(self): + """ Remove from database """ + query = tables.benchmarks_table.delete().where( + tables.benchmarks_table.c.label == self.label + ) + await zrDB.execute(query) + + +class BenchmarkList(BaseModel): + items: List[Benchmark] + + def __iter__(self) -> Iterable[Benchmark]: + yield from self.items + + def filter_active(self) -> "BenchmarkList": + self.items = [i for i in self.items if i.is_active()] + return self + + @classmethod + async def get(cls, include_all: bool = False) -> "BenchmarkList": + query = tables.benchmarks_table.select() + challenges = await zrDB.fetch_all(query) + if challenges is None: + raise ValueError('No challenges were found') + + if include_all: + return cls(items=challenges) + return cls(items=challenges).filter_active() diff --git a/vocolab/data/model_queries/leaderboard.py b/vocolab/data/model_queries/leaderboard.py new file mode 100644 index 0000000..ccbf43d --- /dev/null +++ b/vocolab/data/model_queries/leaderboard.py @@ -0,0 +1,217 @@ +from dataclasses import asdict +from datetime import datetime +from pathlib import Path +from typing import Optional, Any, Iterable + +from pydantic import BaseModel, Json +from vocolab_ext.leaderboards import LeaderboardEntryBase + +from vocolab import get_settings +from vocolab.core import misc, leaderboards_lib +from vocolab.data import tables +from .auth import User +from ..db import zrDB, db_exc + +st = get_settings() + + +class Leaderboard(BaseModel): + """ Data representation of a Leaderboard """ + label: str # Name of leaderboard + benchmark_id: str # Label of the Benchmark + archived: bool # is_archived + static_files: bool # has static files + sorting_key: Optional[str] # path to the item to use as sorting key + + @classmethod + def get_field_names(cls): + return list(cls.__fields__.keys()) + + class Config: + orm_mode = True + + def get_dir(self) -> leaderboards_lib.LeaderboardDir: + return leaderboards_lib.LeaderboardDir.load( + label=self.label, + sorting_key=self.sorting_key + ) + + @classmethod + async def create(cls, ld_data: 'Leaderboard'): + query = tables.leaderboards_table.insert().values( + label=ld_data.label, + benchmark_id=ld_data.benchmark_id, + archived=ld_data.archived, + static_files=ld_data.static_files, + sorting_key=ld_data.sorting_key + ) + try: + result = await zrDB.execute(query) + + # make necessary folders in storage + _ = leaderboards_lib.LeaderboardDir.create( + label=ld_data.label, + sorting_key=ld_data.sorting_key, + static_files=ld_data.static_files + ) + + return result + except Exception as e: + db_exc.parse_user_insertion(e) + + async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): + """ Update a named property """ + if not hasattr(self, variable_name): + raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') + + variable_type = type(getattr(self, variable_name)) + + if allow_parsing: + value = misc.str2type(value, variable_type) + + if value is not None and not isinstance(value, variable_type): + raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") + + if value is None: + if not self.__fields__.get(variable_name).allow_none: + raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') + else: + if not isinstance(value, variable_type): + raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") + + # set value + setattr(self, variable_name, value) + + # Path is not supported by sqlite as a raw type + if variable_type == Path: + value = str(value) + + query = tables.leaderboards_table.update().where( + tables.leaderboards_table.c.label == self.label + ).values({f"{variable_name}": str(value)}) + try: + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + return value + + @classmethod + async def get(cls, leaderboard_id: str) -> Optional["Leaderboard"]: + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.label == leaderboard_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + return None + return cls.parse_obj(ld) + + +class LeaderboardList(BaseModel): + items: list[Leaderboard] + + def __iter__(self) -> Iterable[Leaderboard]: + return iter(self.items) + + @classmethod + async def get_all(cls) -> "LeaderboardList": + query = tables.leaderboards_table.select() + ld_list = await zrDB.fetch_all(query) + if not ld_list: + return cls(items=[]) + return cls(items=ld_list) + + @classmethod + async def get_by_challenge(cls, benchmark_id: str) -> "LeaderboardList": + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.benchmark_id == benchmark_id + ) + ld_list = await zrDB.fetch_all(query) + if not ld_list: + return cls(items=[]) + return cls(items=ld_list) + + +class LeaderboardEntry(BaseModel): + """ Data representation of a leaderboard entry """ + id: Optional[int] + data: Json + entry_path: Path + submission_id: str + leaderboard_id: str + model_id: str + user_id: int + authors: str + author_label: str + description: str + submitted_at: datetime + + async def base(self) -> LeaderboardEntryBase: + user = await User.get(by_uid=self.user_id) + return LeaderboardEntryBase( + submission_id=self.submission_id, + model_id=self.model_id, + description=self.description, + authors=self.authors, + author_label=self.author_label, + submission_date=self.submitted_at, + submitted_by=user.username + ) + + async def update(self, base: LeaderboardEntryBase): + self.submission_id = base.submission_id + self.model_id = base.model_id + self.description = base.description + self.authors = base.authors + self.author_label = base.author_label + self.submitted_at = base.submission_date + + base_dict = asdict(base) + del base["submitted_by"] + query = tables.leaderboards_table.update().where( + tables.leaderboard_entry_table.c.id == self.id + ).values( + **base_dict + ) + await zrDB.execute(query) + # todo: check how this would work ??? + (await self.leaderboard()).get_dir().update_entry(await self.base()) + + async def leaderboard(self) -> Leaderboard: + return await Leaderboard.get(self.leaderboard_id) + + @classmethod + async def get(cls, by_id) -> Optional["LeaderboardEntry"]: + query = tables.leaderboard_entry_table.select().where( + tables.leaderboard_entry_table.c.id == by_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + return None + return cls.parse_obj(ld) + + +class LeaderboardEntryList(BaseModel): + items: list[LeaderboardEntry] + + def __iter__(self) -> Iterable[LeaderboardEntry]: + yield from self.items + + def as_leaderboard(self) -> dict: + # todo: check data format + return dict( + last_modified=datetime.now().isoformat(), + data=[ + entry.data + for entry in self + ] + ) + + @classmethod + async def get_from_leaderboard(cls, leaderboard_label: str): + """ Get all entries of leaderboard""" + query = tables.leaderboard_entry_table.select().where( + tables.leaderboard_entry_table.c.leaderboard_id == leaderboard_label + ) + entries = await zrDB.fetch_all(query) + return cls(items=entries) diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py new file mode 100644 index 0000000..20c5548 --- /dev/null +++ b/vocolab/data/model_queries/models.py @@ -0,0 +1,310 @@ +import math +from datetime import datetime +from enum import Enum +from itertools import chain, product +from typing import Optional, List, Iterable + +from pydantic import BaseModel, AnyHttpUrl + +from vocolab import get_settings +from vocolab.data import db, tables, models +from .challenges import Benchmark + +_settings = get_settings() + + +class ModelID(BaseModel): + """ Data representation of a Model id & its metadata""" + id: str + user_id: int + created_at: datetime + description: str + gpu_budget: Optional[str] + train_set: str + authors: str + institution: str + team: Optional[str] + paper_url: Optional[AnyHttpUrl] + code_url: Optional[AnyHttpUrl] + + @staticmethod + def nth_word(n: int) -> str: + """ Calculate the nth word of the english lower alphabet + + This function returns a string that counts using the lower english alphabet. + 0 -> '' + 1 -> a + ... + 26 -> z + 27 -> aa + 28 -> ab + ... + 53 -> ba + ... etc + + Note: This methods becomes kind of slow for numbers larger than 10_000_000 + + """ + nb_letters = 26 # use english alphabet [1, 26] lowercase letters + letters = [chr(97 + i) for i in range(nb_letters)] + # calculate word_length + word_length = math.ceil(math.log((n + 1) * (nb_letters - 1) + 1) / math.log(nb_letters)) + # Build all possible combinations for the given word length + it = chain.from_iterable( + (product(letters, repeat=i) for i in range(word_length)) + ) + # find word in iterable + word = next(w for i, w in enumerate(it) if i == n) + return ''.join(word) + + @classmethod + async def create(cls, user_id: int, first_author_name: str, data: models.api.NewModelIdRequest): + """ Create a new ModelID entry in the database + + ids are created using the 3 first letters of first name of first author, + the last 2 digits of the current year and are padded with extra letters to + avoid duplicates. + """ + new_model_id = f"{first_author_name[:3]}{str(datetime.now().year)[2:]}" + + counter = 1 + new_model_id_extended = f"{new_model_id}{cls.nth_word(counter)}" + while await cls.exists(new_model_id_extended): + counter += 1 + new_model_id_extended = f"{new_model_id}{cls.nth_word(counter)}" + + # create db entry + query = tables.models_table.insert().values( + id=new_model_id_extended, user_id=user_id, **data.dict()) + await db.zrDB.execute(query) + return new_model_id_extended + + @classmethod + async def exists(cls, model_id: str) -> bool: + item = await db.zrDB.fetch_one( + tables.models_table.select().where( + tables.models_table.c.id == model_id + ) + ) + return item is not None + + @classmethod + async def get(cls, model_id: str) -> Optional["ModelID"]: + """ Fetch a model_id entry from the database """ + item_data = await db.zrDB.fetch_one( + tables.models_table.select().where( + tables.models_table.c.id == model_id + ) + ) + if item_data is None: + return None + return cls.parse_obj(item_data) + + +class ModelIDList(BaseModel): + items: List[ModelID] + + def __iter__(self) -> Iterable[ModelID]: + return iter(self.items) + + @classmethod + async def get(cls) -> "ModelIDList": + items = db.zrDB.fetch_all(tables.models_table.select()) + return cls(items=items) + + @classmethod + async def get_by_user(cls, user_id: int) -> "ModelIDList": + """ Load models by user """ + query = tables.models_table.select().where( + tables.models_table.c.user_id == user_id + ) + items = await db.zrDB.fetch_all(query) + print(items, f"{type(items)=}") + if not items: + return cls(items=[]) + return cls.parse_obj(dict(items=items)) + + + +class SubmissionStatus(str, Enum): + """ Definition of different states of submissions """ + # TODO: maybe add submission type (with scores...) + uploading = 'uploading' + uploaded = 'uploaded' + on_queue = 'on_queue' + validating = 'validating' # todo verify usage + invalid = 'invalid' + evaluating = 'evaluating' + completed = 'completed' + canceled = 'canceled' + failed = 'failed' + no_eval = 'no_eval' + no_auto_eval = 'no_auto_eval' + excluded = 'excluded' + + @classmethod + def get_values(cls): + return [el.value for el in cls] # noqa: enum is not typed correctly + + +class ChallengeSubmission(BaseModel): + """ Data representation of a submission to a challenge """ + id: str + user_id: int + benchmark_id: str + model_id: str + submit_date: datetime + status: SubmissionStatus + auto_eval: bool + has_scores: bool + evaluator_id: Optional[int] + author_label: Optional[str] = None + + class Config: + orm_mode = True + + @classmethod + async def create( + cls, user_id: int, username: str, + model_id: str, benchmark_id: str, + has_scores: bool, author_label: str + ) -> "ChallengeSubmission": + """ Creates a database entry for the new submission """ + benchmark = await Benchmark.get(benchmark_id=benchmark_id) + + submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" + entry = cls.parse_obj(dict( + id=submission_id, + model_id=model_id, + benchmark_id=benchmark_id, + user_id=user_id, + has_scores=has_scores, + submit_date=datetime.now(), + status=SubmissionStatus.uploading, + evaluator_id=benchmark.evaluator, + auto_eval=benchmark.auto_eval, + author_label=author_label + )) + + await db.zrDB.execute( + query=tables.submissions_table.insert(), + values=entry.dict() + ) + return entry + + @classmethod + async def get(cls, submission_id: str) -> Optional["ChallengeSubmission"]: + item_data = await db.zrDB.fetch_one( + tables.submissions_table.select().where( + tables.submissions_table.c.id == submission_id + ) + ) + if item_data is None: + return None + return cls.parse_obj(item_data) + + async def update_status(self, status: SubmissionStatus): + self.status = status + await db.zrDB.execute( + tables.submissions_table.update().where( + tables.submissions_table.c.id == self.id + ).values(status=status.value) + ) + + async def update_evaluator(self, evaluator_id: int): + self.evaluator_id = evaluator_id + await db.zrDB.execute( + tables.submissions_table.update().where( + tables.submissions_table.c.id == self.id + ).values(evaluator_id=evaluator_id) + ) + + async def delete(self): + await db.zrDB.execute( + tables.submissions_table.delete().where( + tables.submissions_table.c.id == self.id + ) + ) + + +class ChallengeSubmissionList(BaseModel): + """ Data representation of a list of Submissions """ + items: List[ChallengeSubmission] + + def __iter__(self) -> Iterable[ChallengeSubmission]: + return iter(self.items) + + @classmethod + async def get_from_challenge(cls, benchmark_id: str): + """ Get submissions filtered by benchmark """ + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.benchmark_id == benchmark_id + ) + ) + if items is None: + items = [] + + return cls(items=items) + + @classmethod + async def get_from_model(cls, model_id: str): + """ Get submissions filtered by model """ + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.model_id == model_id + ) + ) + if items is None: + items = [] + + return cls(items=items) + + @classmethod + async def get_from_user(cls, user_id: int): + """ Get submissions filtered by user """ + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.user_id == user_id + ) + ) + if items is None: + items = [] + + return cls.parse_obj(dict(items=items)) + + @classmethod + async def get_by_status(cls, status: SubmissionStatus): + """ Get submissions filtered by status """ + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.status == status.value + ) + ) + if items is None: + items = [] + + return cls(items=items) + + @classmethod + async def get_all(cls): + """ Get all submissions """ + items = await db.zrDB.fetch_all( + tables.submissions_table.select() + ) + if items is None: + items = [] + + return cls(items=items) + + async def update_evaluators(self, evaluator_id: int): + for e in self.items: + e.evaluator_id = evaluator_id + + # todo check if query works successfully + items_id = set([e.id for e in self.items]) + await db.zrDB.execute( + tables.submissions_table.update().where( + tables.submissions_table.c.id.in_(items_id) + ).values(evaluator_id=evaluator_id) + ) diff --git a/vocolab/db/models/__init__.py b/vocolab/data/models/__init__.py similarity index 75% rename from vocolab/db/models/__init__.py rename to vocolab/data/models/__init__.py index 4897053..7673ca1 100644 --- a/vocolab/db/models/__init__.py +++ b/vocolab/data/models/__init__.py @@ -2,4 +2,3 @@ from . import cli from . import misc from . import tasks -from . import file_split diff --git a/vocolab/db/models/api/__init__.py b/vocolab/data/models/api/__init__.py similarity index 58% rename from vocolab/db/models/api/__init__.py rename to vocolab/data/models/api/__init__.py index 4d32e84..046617b 100644 --- a/vocolab/db/models/api/__init__.py +++ b/vocolab/data/models/api/__init__.py @@ -1,5 +1,4 @@ -from .users import * from .auth import * from .challenges import * from .commons import * -from .leaerboards import * +from .models import * diff --git a/vocolab/data/models/api/auth.py b/vocolab/data/models/api/auth.py new file mode 100644 index 0000000..ca1afb5 --- /dev/null +++ b/vocolab/data/models/api/auth.py @@ -0,0 +1,36 @@ +""" Dataclasses representing API/auth input output data types """ +from pydantic import BaseModel, EmailStr, validator + + +class UserCreateRequest(BaseModel): + """ Dataclass for user creation """ + username: str + email: EmailStr + pwd: str + first_name: str + last_name: str + affiliation: str + + @validator('username', 'pwd', 'first_name', 'last_name', 'affiliation') + def non_empty_string(cls, v): + assert v, "UserCreate does not accept empty fields" + return v + + +class LoggedItem(BaseModel): + """ Return type of the /login function """ + username: str + access_token: str + token_type: str + + +class CurrentUser(BaseModel): + """ Basic userinfo Model """ + username: str + email: EmailStr + + +class PasswordResetRequest(BaseModel): + """ Input Schema for /password/reset request """ + username: str + email: EmailStr diff --git a/vocolab/db/models/api/challenges.py b/vocolab/data/models/api/challenges.py similarity index 70% rename from vocolab/db/models/api/challenges.py rename to vocolab/data/models/api/challenges.py index 259e88d..6f5eb16 100644 --- a/vocolab/db/models/api/challenges.py +++ b/vocolab/data/models/api/challenges.py @@ -1,26 +1,9 @@ """ Dataclasses representing API/challenge input output data types """ from datetime import date -from typing import Optional, List, Tuple +from pathlib import Path +from typing import Optional, List, Tuple, Dict -from pydantic import BaseModel, HttpUrl - - -class ChallengePreview(BaseModel): - """ Used as response type for root challenge list request""" - id: int - label: str - active: bool - - -class ChallengesResponse(BaseModel): - """ Used as response type for preview of a challenge """ - id: int - label: str - start_date: date - end_date: Optional[date] - active: bool - url: HttpUrl - evaluator: Optional[int] +from pydantic import BaseModel class SubmissionRequestFileIndexItem(BaseModel): @@ -29,17 +12,22 @@ class SubmissionRequestFileIndexItem(BaseModel): File index is used to verify correct number of files/parts have been uploaded """ - file_name: str - file_size: int - file_hash: Optional[str] = None + filename: str + filesize: int + filehash: Optional[str] = None class NewSubmissionRequest(BaseModel): """ Dataclass used for input in the creation of a new submission to a challenge """ + model_id: str + benchmark_id: str + author_label: str filename: str hash: str + has_scores: bool multipart: bool index: Optional[List[SubmissionRequestFileIndexItem]] + leaderboard: Optional[str] class NewSubmission(BaseModel): diff --git a/vocolab/db/models/api/commons.py b/vocolab/data/models/api/commons.py similarity index 100% rename from vocolab/db/models/api/commons.py rename to vocolab/data/models/api/commons.py diff --git a/vocolab/data/models/api/models.py b/vocolab/data/models/api/models.py new file mode 100644 index 0000000..0728734 --- /dev/null +++ b/vocolab/data/models/api/models.py @@ -0,0 +1,16 @@ +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, AnyHttpUrl, Field + + +class NewModelIdRequest(BaseModel): + description: str + gpu_budget: str + train_set: str + authors: str + institution: str + team: str + paper_url: Optional[AnyHttpUrl] + code_url: Optional[AnyHttpUrl] + created_at: datetime = Field(default_factory=lambda: datetime.now()) diff --git a/vocolab/db/models/cli.py b/vocolab/data/models/cli.py similarity index 85% rename from vocolab/db/models/cli.py rename to vocolab/data/models/cli.py index e0f6b60..1220b22 100644 --- a/vocolab/db/models/cli.py +++ b/vocolab/data/models/cli.py @@ -4,18 +4,21 @@ from pydantic import BaseModel, AnyHttpUrl +from vocolab import get_settings from .tasks import ExecutorsType +st = get_settings() + class NewChallenge(BaseModel): """ Dataclass for challenge creation """ - id: Optional[int] label: str active: bool url: AnyHttpUrl evaluator: Optional[int] start_date: date end_date: Optional[date] + auto_eval: bool = st.task_queue_options.AUTO_EVAL class NewEvaluatorItem(BaseModel): diff --git a/vocolab/data/models/misc.py b/vocolab/data/models/misc.py new file mode 100644 index 0000000..b53f9e5 --- /dev/null +++ b/vocolab/data/models/misc.py @@ -0,0 +1,3 @@ +from pydantic import BaseModel, EmailStr, validator + + diff --git a/vocolab/db/models/tasks.py b/vocolab/data/models/tasks.py similarity index 100% rename from vocolab/db/models/tasks.py rename to vocolab/data/models/tasks.py diff --git a/vocolab/data/tables.py b/vocolab/data/tables.py new file mode 100644 index 0000000..a308f97 --- /dev/null +++ b/vocolab/data/tables.py @@ -0,0 +1,114 @@ +import sqlalchemy + +tables_metadata = sqlalchemy.MetaData() + +_user_id = "users_credentials.id" +_benchmark_id = "benchmarks.label" + + +""" Table Representing Users""" +users_table = sqlalchemy.Table( + "users_credentials", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), + sqlalchemy.Column("username", sqlalchemy.String, unique=True), + sqlalchemy.Column("email", sqlalchemy.String, unique=True), + sqlalchemy.Column("active", sqlalchemy.Boolean), + sqlalchemy.Column("verified", sqlalchemy.String), + sqlalchemy.Column("hashed_pswd", sqlalchemy.BLOB), + sqlalchemy.Column("salt", sqlalchemy.BLOB), + sqlalchemy.Column("created_at", sqlalchemy.DATETIME) +) + +""" +Table indexing of model ids +""" +models_table = sqlalchemy.Table( + "models", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_user_id)), + sqlalchemy.Column("created_at", sqlalchemy.DateTime), + sqlalchemy.Column("description", sqlalchemy.String), + sqlalchemy.Column("gpu_budget", sqlalchemy.String), + sqlalchemy.Column("train_set", sqlalchemy.String), + sqlalchemy.Column("authors", sqlalchemy.String), + sqlalchemy.Column("institution", sqlalchemy.String), + sqlalchemy.Column("team", sqlalchemy.String), + sqlalchemy.Column("paper_url", sqlalchemy.String), + sqlalchemy.Column("code_url", sqlalchemy.String), +) + + +""" +Table indexing the existing evaluators +""" +evaluators_table = sqlalchemy.Table( + "evaluators", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), + sqlalchemy.Column("label", sqlalchemy.String, unique=True), + sqlalchemy.Column("host", sqlalchemy.String), + sqlalchemy.Column("executor", sqlalchemy.String), + sqlalchemy.Column("script_path", sqlalchemy.String), + sqlalchemy.Column("executor_arguments", sqlalchemy.String) +) + +""" +Table used to index the existing challenges & their metadata +""" +benchmarks_table = sqlalchemy.Table( + "benchmarks", + tables_metadata, + sqlalchemy.Column("label", sqlalchemy.String, unique=True, primary_key=True), + sqlalchemy.Column("start_date", sqlalchemy.Date), + sqlalchemy.Column("end_date", sqlalchemy.Date), + sqlalchemy.Column("active", sqlalchemy.Boolean), + sqlalchemy.Column("url", sqlalchemy.String), + sqlalchemy.Column("evaluator", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), + sqlalchemy.Column("auto_eval", sqlalchemy.Boolean) +) + +""" +Table indexing the existing leaderboards and their metadata +""" +leaderboards_table = sqlalchemy.Table( + "leaderboards", + tables_metadata, + sqlalchemy.Column('label', sqlalchemy.String, unique=True, primary_key=True), + sqlalchemy.Column('benchmark_id', sqlalchemy.String, sqlalchemy.ForeignKey(_benchmark_id)), + sqlalchemy.Column('archived', sqlalchemy.Boolean), + sqlalchemy.Column('static_files', sqlalchemy.Boolean), + sqlalchemy.Column('sorting_key', sqlalchemy.String), +) + +""" +Table entry indexing submissions to challenges +""" +submissions_table = sqlalchemy.Table( + "benchmark_submissions", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_user_id)), + sqlalchemy.Column("benchmark_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_benchmark_id)), + sqlalchemy.Column("model_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), + sqlalchemy.Column("submit_date", sqlalchemy.DateTime), + sqlalchemy.Column("status", sqlalchemy.String), + sqlalchemy.Column("has_scores", sqlalchemy.Boolean), + sqlalchemy.Column("auto_eval", sqlalchemy.Boolean), + sqlalchemy.Column("evaluator_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), + sqlalchemy.Column("author_label", sqlalchemy.String) +) + +""" Table indexing all leaderboard entries and their location (as stores json files)""" +leaderboard_entry_table = sqlalchemy.Table( + "leaderboard_entries", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), + sqlalchemy.Column("data", sqlalchemy.JSON), + sqlalchemy.Column("entry_path", sqlalchemy.String), + sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("benchmark_submissions.id")), + sqlalchemy.Column("leaderboard_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.label")), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_user_id)), + sqlalchemy.Column("submitted_at", sqlalchemy.DATETIME) +) \ No newline at end of file diff --git a/vocolab/db/__init__.py b/vocolab/db/__init__.py deleted file mode 100644 index 1d9ac5b..0000000 --- a/vocolab/db/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from vocolab.db.base import ( - create_db, zrDB, users_metadata -) diff --git a/vocolab/db/base.py b/vocolab/db/base.py deleted file mode 100644 index 817e858..0000000 --- a/vocolab/db/base.py +++ /dev/null @@ -1,22 +0,0 @@ -import databases -import sqlalchemy - -from vocolab.db.schema import users_metadata, challenge_metadata -from vocolab.settings import get_settings - -_settings = get_settings() - -_USERS_CONN = f"sqlite:///{_settings.DATA_FOLDER}/{_settings.database_options.db_file}" - -zrDB = databases.Database(_USERS_CONN) - - -def create_db(): - if not (_settings.DATA_FOLDER / _settings.database_options.db_file).is_file(): - (_settings.DATA_FOLDER / _settings.database_options.db_file).touch() - - engine = sqlalchemy.create_engine( - _USERS_CONN, connect_args={"check_same_thread": False} - ) - users_metadata.create_all(engine) - challenge_metadata.create_all(engine) diff --git a/vocolab/db/models/api/auth.py b/vocolab/db/models/api/auth.py deleted file mode 100644 index a6452b7..0000000 --- a/vocolab/db/models/api/auth.py +++ /dev/null @@ -1,22 +0,0 @@ -""" Dataclasses representing API/auth input output data types """ -from pydantic import BaseModel, EmailStr - - -class LoggedItem(BaseModel): - """ Return type of the /login function """ - access_token: str - token_type: str - - -class CurrentUser(BaseModel): - """ Basic userinfo Model """ - username: str - email: EmailStr - - -class PasswordResetRequest(BaseModel): - """ Input Schema for /password/reset request """ - username: str - email: EmailStr - - diff --git a/vocolab/db/models/api/leaerboards.py b/vocolab/db/models/api/leaerboards.py deleted file mode 100644 index 0ba4103..0000000 --- a/vocolab/db/models/api/leaerboards.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - - -class LeaderboardPublicView(BaseModel): - id: int - challenge_id: int - label: str - entry_file: str - archived: bool - static_files: bool diff --git a/vocolab/db/models/api/users.py b/vocolab/db/models/api/users.py deleted file mode 100644 index c00583f..0000000 --- a/vocolab/db/models/api/users.py +++ /dev/null @@ -1,21 +0,0 @@ -""" Input/Output Dataclass types for the /users section of the API """ -from datetime import datetime -from typing import Optional - -from pydantic import BaseModel, Extra, EmailStr - - -class UserData(BaseModel): - username: str - affiliation: str - first_name: Optional[str] - last_name: Optional[str] - - class Config: - extra = Extra.allow - - -class UserProfileResponse(UserData): - verified: bool - email: EmailStr - created: Optional[datetime] diff --git a/vocolab/db/models/file_split.py b/vocolab/db/models/file_split.py deleted file mode 100644 index 1078e45..0000000 --- a/vocolab/db/models/file_split.py +++ /dev/null @@ -1,28 +0,0 @@ -from pathlib import Path -from typing import List, Optional - -from pydantic import BaseModel - - -class ManifestIndexItem(BaseModel): - """ Model representing a file item in the SplitManifest """ - file_name: str - file_size: int - file_hash: str - - def __eq__(self, other: 'ManifestIndexItem'): - return self.file_hash == other.file_hash - - def __hash__(self): - return int(self.file_hash, 16) - - -class SplitManifest(BaseModel): - """ Data Model used for the binary split function as a manifest to allow merging """ - filename: str - tmp_location: Path - hash: str - index: Optional[List[ManifestIndexItem]] - received: Optional[List[ManifestIndexItem]] = [] - multipart: bool = True - hashed_parts: bool = True diff --git a/vocolab/db/models/misc.py b/vocolab/db/models/misc.py deleted file mode 100644 index 4122b3f..0000000 --- a/vocolab/db/models/misc.py +++ /dev/null @@ -1,17 +0,0 @@ -from pydantic import BaseModel, EmailStr, validator - - -class UserCreate(BaseModel): - """ Dataclass for user creation """ - username: str - email: EmailStr - pwd: str - first_name: str - last_name: str - affiliation: str - - @validator('username', 'pwd', 'first_name', 'last_name', 'affiliation') - def non_empty_string(cls, v): - assert v, "UserCreate does not accept empty fields" - return v - diff --git a/vocolab/db/q/__init__.py b/vocolab/db/q/__init__.py deleted file mode 100644 index efd3979..0000000 --- a/vocolab/db/q/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from vocolab.db.q import users as userQ # noqa: allow non standard names -from vocolab.db.q import challenges as challengesQ # noqa: allow non standard names -from vocolab.db.q import leaderboards as leaderboardQ # noqa: allow non standard names diff --git a/vocolab/db/q/challenges.py b/vocolab/db/q/challenges.py deleted file mode 100644 index 7a7f393..0000000 --- a/vocolab/db/q/challenges.py +++ /dev/null @@ -1,275 +0,0 @@ -from datetime import datetime -from typing import List, Any, Optional -from uuid import uuid4 - -from vocolab import get_settings -from vocolab.db import models, zrDB, schema, exc as db_exc -from vocolab.lib import misc - -_settings = get_settings() - - -async def create_new_challenge(item: models.cli.NewChallenge): - """ Creates a new challenge entry in the database """ - try: - query = schema.challenges_table.insert().values( - **item.dict() - ) - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - -async def list_challenges(*, include_all: bool = False) -> List[schema.Challenge]: - """ Returns a list of all the challenges - - flag include_all allows to filter out inactive challenges - """ - query = schema.challenges_table.select() - challenges = await zrDB.fetch_all(query) - if challenges is None: - raise ValueError('No challenges were found') - - challenges = [schema.Challenge(**c) for c in challenges] - if include_all: - return challenges - else: - return [c for c in challenges if c.is_active()] - - -async def get_challenge(*, - challenge_id: int, allow_inactive=False, - ) -> schema.Challenge: - """ Fetches the Challenge object from the database - - :note: in strict mode (allow_inactive = False) the function raises a ValueError - if the challenge has expired or is inactive. - """ - query = schema.challenges_table.select().where( - schema.challenges_table.c.id == challenge_id - ) - ch = await zrDB.fetch_one(query) - if ch is None: - raise ValueError(f'There is no challenge with the following id: {challenge_id}') - ch = schema.Challenge(**ch) - if allow_inactive: - return ch - else: - if not ch.is_active(): - raise ValueError(f"The Challenge {ch.label}[{ch.id}] is not active") - return ch - - -async def update_challenge_property(*, challenge_id: int, variable_name: str, value: Any, - allow_parsing: bool = False): - """ Update the property of a challenge """ - field = schema.Challenge.__fields__.get(variable_name, None) - if field is None: - raise ValueError(f'Class Challenge does not have a member called ! {variable_name}') - - if allow_parsing: - value = misc.str2type(value, field.type_) - - if value is not None and not isinstance(value, field.type_): - raise ValueError(f"Challenge.{variable_name} should be of type {field.type_}") - - query = schema.challenges_table.update().where( - schema.challenges_table.c.id == challenge_id - ).values({f"{variable_name}": value}) - - try: - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - return value - - -async def delete_challenge(*, ch_id: int): - """ Delete the database entry of a challenge """ - query = schema.challenges_table.delete().where( - schema.challenges_table.c.id == ch_id - ) - return await zrDB.execute(query) - - -async def add_submission(*, new_submission: models.api.NewSubmission, evaluator_id: int): - """ Creates a database entry to a new submission """ - submission_id = datetime.now().strftime('%Y%m-%d%H-%M%S-') + str(uuid4()) - query = schema.submissions_table.insert() - values = new_submission.dict() - values["id"] = submission_id - values["submit_date"] = datetime.now() - values["status"] = schema.SubmissionStatus.uploading - values["evaluator_id"] = evaluator_id - values["author_label"] = None # default value for author_label is None - # todo: check if this should be fetched from challenge entry ? - values["auto_eval"] = _settings.task_queue_options.AUTO_EVAL # auto eval default from settings - await zrDB.execute(query=query, values=values) - return submission_id - - -async def list_submission(*, by_track: int = None, by_user: int = None, by_status=None): - """ Fetches a list of submission from the database """ - query = schema.submissions_table.select() - - if by_track: - query = query.where( - schema.submissions_table.c.track_id == by_track - ) - - if by_user: - query = query.where( - schema.submissions_table.c.user_id == by_user - ) - - if by_status: - query = query.where( - schema.submissions_table.c.status == by_status - ) - - sub_list = await zrDB.fetch_all(query) - - # map & return - return [schema.ChallengeSubmission(**sub) for sub in sub_list] - - -async def get_submission(*, by_id: str) -> schema.ChallengeSubmission: - """ Fetches a submission from the database """ - query = schema.submissions_table.select().where( - schema.submissions_table.c.id == by_id - ) - sub = await zrDB.fetch_one(query) - if sub is None: - raise ValueError(f'There is no challenge with the following id: {by_id}') - # map & return - return schema.ChallengeSubmission(**sub) - - -async def get_user_submissions(*, user_id: int) -> List[schema.ChallengeSubmission]: - """ Fetch all the submissions of a specific user """ - query = schema.submissions_table.select().where( - schema.submissions_table.c.user_id == user_id - ) - subs = await zrDB.fetch_all(query) - if subs is None: - return [] - return [schema.ChallengeSubmission(**it) for it in subs] - - -async def update_submission_status(*, by_id: str, status: schema.SubmissionStatus): - """ Update the status of a submission """ - query = schema.submissions_table.update().where( - schema.submissions_table.c.id == by_id - ).values(status=status) - return await zrDB.execute(query) - - -async def update_submission_evaluator(evaluator_id: int, *, by_id: Optional[str] = None, by_track: Optional[int] = None, - by_user: Optional[int] = None): - """ Update the set evaluator for a specific submission. """ - - if by_id: - query = schema.submissions_table.update().where( - schema.submissions_table.c.id == by_id - ) - elif by_track: - query = schema.submissions_table.update().where( - schema.submissions_table.c.track_id == by_track - ) - elif by_user: - query = schema.submissions_table.update().where( - schema.submissions_table.c.user_id == by_user - ) - else: - raise ValueError(f'Selector not specified') - - # execute query and update values on db - query = query.values(evaluator_id=evaluator_id) - return await zrDB.execute(query) - - -async def update_submission_author_label(label: str, *, by_id: Optional[str] = None, by_user: Optional[int] = None): - """ Update or set """ - if by_id: - query = schema.submissions_table.update().where( - schema.submissions_table.c.id == by_id - ) - elif by_user: - query = schema.submissions_table.update().where( - schema.submissions_table.c.user_id == by_user - ) - else: - raise ValueError(f'Selector not specified') - - # execute query and update values on db - query = query.values(author_label=label) - return await zrDB.execute(query) - - -async def drop_submission(*, by_id: str): - """ Delete db entry of a submission """ - query = schema.submissions_table.delete().where( - schema.submissions_table.c.id == by_id - ) - await zrDB.execute(query) - - -async def submission_status(*, by_id: str) -> schema.SubmissionStatus: - """ Returns the status of a submission """ - query = schema.submissions_table.select().where( - schema.submissions_table.c.id == by_id - ) - sub = await zrDB.fetch_one(query) - if sub is None: - raise ValueError(f'There is no challenge with the following id: {by_id}') - # map & return - return schema.ChallengeSubmission(**sub).status - - -async def get_evaluators(): - """ Returns a list of the evaluators """ - query = schema.evaluators_table.select() - results = await zrDB.fetch_all(query) - if not results: - return [] - return [schema.EvaluatorItem(**i) for i in results] - - -async def get_evaluator(*, by_id: int) -> Optional[schema.EvaluatorItem]: - """ Returns a specific evaluator """ - - query = schema.evaluators_table.select().where( - schema.evaluators_table.c.id == by_id - ) - result = await zrDB.fetch_one(query) - if not result: - return None - return schema.EvaluatorItem(**result) - - -async def add_evaluator(*, lst_eval: List[models.cli.NewEvaluatorItem]): - """ Insert a list of evaluators into the database """ - for i in lst_eval: - query = schema.evaluators_table.select().where( - schema.evaluators_table.c.label == i.label - ).where( - schema.evaluators_table.c.host == i.host - ) - res = await zrDB.fetch_one(query) - - if res is None: - await zrDB.execute(schema.evaluators_table.insert(), i.dict()) - else: - update_query = schema.evaluators_table.update().where( - schema.evaluators_table.c.id == res.id - ).values(executor=i.executor, script_path=i.script_path, executor_arguments=i.executor_arguments) - await zrDB.execute(update_query) - - -async def edit_evaluator_args(*, eval_id: int, arg_list: List[str]): - """ update evaluator base arguments """ - query = schema.evaluators_table.update().where( - schema.evaluators_table.c.id == eval_id - ).values(executor_arguments=";".join(arg_list)) - await zrDB.execute(query) diff --git a/vocolab/db/q/leaderboards.py b/vocolab/db/q/leaderboards.py deleted file mode 100644 index 4248748..0000000 --- a/vocolab/db/q/leaderboards.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -Database functions that manipulate the leaderboard table -""" -from pathlib import Path -from typing import Any, List, Optional -from vocolab.db import schema, zrDB, exc as db_exc -from vocolab.lib import misc - - -async def get_leaderboard(*, leaderboard_id: int) -> schema.LeaderBoard: - """ Fetches the leaderboard object with the corresponding id - - :raise ValueError if the item is not is the database - :raise SQLAlchemy exceptions if database connection or condition fails - """ - query = schema.leaderboards_table.select().where( - schema.leaderboards_table.c.id == leaderboard_id - ) - ld = await zrDB.fetch_one(query) - if ld is None: - raise ValueError(f'Leaderboard: {leaderboard_id} not found in database !!!') - - return schema.LeaderBoard(**ld) - - -async def get_leaderboards(*, by_challenge_id: Optional[int] = None) -> List[schema.LeaderBoard]: - """ A list of leaderboards - - :param by_challenge_id: filter leaderboards by challenge id - :raise ValueError if the item is not is the database - :raise SQLAlchemy exceptions if database connection or condition fails - """ - if by_challenge_id: - query = schema.leaderboards_table.select().where( - schema.leaderboards_table.c.challenge_id == by_challenge_id - ) - else: - raise ValueError("No parameter given") - - lst_ld = await zrDB.fetch_all(query) - return [schema.LeaderBoard(**ld) for ld in lst_ld] - - -async def list_leaderboards() -> List[schema.LeaderBoard]: - """ Fetch a list of all the leaderboards present in the database - - :raise ValueError if the leaderboard database is empty - :raise SQLAlchemy exceptions if database connection or condition fails - """ - query = schema.leaderboards_table.select() - leaderboards = await zrDB.fetch_all(query) - if not leaderboards: - raise ValueError('No leaderboards found') - - return [schema.LeaderBoard(**ld) for ld in leaderboards] - - -async def create_leaderboard(*, lead_data: schema.LeaderBoard) -> int: - """ Create a new leaderboard entry in database from item object - - :returns the id of the leaderboard created - """ - query = schema.leaderboards_table.insert().values( - label=lead_data.label, - challenge_id=lead_data.challenge_id, - path_to=f"{lead_data.path_to}", - entry_file=lead_data.entry_file, - archived=lead_data.archived, - external_entries=f"{lead_data.external_entries}", - static_files=lead_data.static_files - ) - try: - result = await zrDB.execute(query) - return result - except Exception as e: - db_exc.parse_user_insertion(e) - - -async def update_leaderboard_value(*, leaderboard_id, variable_name: str, value: Any, allow_parsing: bool = False): - """ Update a value in the leaderboard corresponding to the given id - - :raise ValueError if given variable does not exist or does not match corresponding type - """ - field = schema.LeaderBoard.__fields__.get(variable_name, None) - if field is None: - raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') - - if allow_parsing: - value = misc.str2type(value, field.type_) - - if value is None: - if not field.allow_none: - raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') - else: - if not isinstance(value, field.type_): - raise ValueError(f"Leaderboard.{variable_name} should be of type {field.type_}") - - # Path is not supported by sqlite as a raw type - if field.type_ == Path: - value = str(value) - - query = schema.leaderboards_table.update().where( - schema.leaderboards_table.c.id == leaderboard_id - ).values({f"{variable_name}": str(value)}) - try: - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - return value diff --git a/vocolab/db/q/users.py b/vocolab/db/q/users.py deleted file mode 100644 index 1171dde..0000000 --- a/vocolab/db/q/users.py +++ /dev/null @@ -1,203 +0,0 @@ -import secrets -from datetime import datetime -from typing import Optional, List - - -from email_validator import validate_email, EmailNotValidError - -from vocolab import exc, out -from vocolab.db import zrDB, models, schema, exc as db_exc -from vocolab.lib import users_lib -from vocolab.settings import get_settings - -_settings = get_settings() - - -async def create_user(*, usr: models.misc.UserCreate): - """ Create a new user entry in the users' database.""" - - hashed_pswd, salt = users_lib.hash_pwd(password=usr.pwd) - verification_code = secrets.token_urlsafe(8) - try: - # insert user entry into the database - query = schema.users_table.insert().values( - username=usr.username, - email=usr.email, - active=True, - verified=verification_code, - hashed_pswd=hashed_pswd, - salt=salt, - created_at=datetime.now() - ) - await zrDB.execute(query) - - except Exception as e: - db_exc.parse_user_insertion(e) - - # create user profile data - data = models.api.UserData( - username=usr.username, - affiliation=usr.affiliation, - first_name=usr.first_name, - last_name=usr.last_name - ) - users_lib.update_user_data(usr.username, data) - - return verification_code - - -async def verify_user(*, username: str, verification_code: str): - """ User verification using a specific code. - If the code is correct verification succeeds - If not the function raises a ValueNotValid Exception - If user is already verified we raise an ActionNotValid Exception - """ - user = await get_user(by_username=username) - if secrets.compare_digest(user.verified, verification_code): - query = schema.users_table.update().where( - schema.users_table.c.id == user.id - ).values( - verified='True' - ) - await zrDB.execute(query) - return True - elif secrets.compare_digest(user.verified, 'True'): - raise exc.ActionNotValid("Email already verified") - else: - raise exc.ValueNotValid("validation code was not correct") - - -async def admin_verification(*, user_id: int): - """ Verify a user, raises an ValueError if user does not exist. - To only be used for administration. - Users need to validate their accounts. - - - bypasses code verification - - no exception is raised if user already active - """ - query = schema.users_table.update().where( - schema.users_table.c.id == user_id - ).values( - verified='True' - ) - res = await zrDB.execute(query) - - if res == 0: - raise ValueError(f'user {user_id} was not found') - - -def check_users_password(*, password: str, user: schema.User): - """ Verify that a given password matches the users """ - hashed_pwd, _ = users_lib.hash_pwd(password=password, salt=user.salt) - return hashed_pwd == user.hashed_pswd - - -async def get_user_for_login(login_id: str, password: str) -> Optional[schema.User]: - """ - :params login_id: the login id can be username or email - :params password: the user's password - """ - try: - validate_email(login_id) # check if email is valid - query = schema.users_table.select().where( - schema.users_table.c.email == login_id - ) - except EmailNotValidError: - query = schema.users_table.select().where( - schema.users_table.c.username == login_id - ) - - user = await zrDB.fetch_one(query) - if user is None: - return None - user = schema.User(**user) - out.console.print(f"===> {user=}") - - hashed_pswd, _ = users_lib.hash_pwd(password=password, salt=user.salt) - if user.enabled and hashed_pswd == user.hashed_pswd: - return user - return None - - -async def get_user(*, by_uid: Optional[int] = None, by_username: Optional[str] = None, - by_email: Optional[str] = None) -> schema.User: - """ Get a user from the database using uid, username or email as a search parameter. - - :rtype: schema.User - :returns the user object - :raises ValueError if the user does not exist or no search value was provided - """ - - if by_uid: - query = schema.users_table.select().where( - schema.users_table.c.id == by_uid - ) - elif by_username: - query = schema.users_table.select().where( - schema.users_table.c.username == by_username - ) - elif by_email: - query = schema.users_table.select().where( - schema.users_table.c.email == by_email - ) - else: - raise ValueError('a value must be provided : uid, username, email') - - user = await zrDB.fetch_one(query) - if user is None: - raise ValueError(f'database does not contain a user for given credentials') - - return schema.User(**user) - - -async def get_user_list() -> List[schema.User]: - """ Return a list of all users """ - query = schema.users_table.select() - user_list = await zrDB.fetch_all(query) - if user_list is None: - raise ValueError(f'database does not contain any user') - return [schema.User(**usr) for usr in user_list] - - -async def delete_user(*, uid: int): - """ Deletes all password reset sessions from the password_reset_users table """ - query = schema.users_table.delete().where( - schema.users_table.c.id == uid - ) - # returns number of deleted entries - return await zrDB.execute(query) - - -async def update_users_password(*, user: schema.User, password: str, password_validation: str): - """ Change a users password """ - - if password != password_validation: - raise ValueError('passwords do not match') - - hashed_pswd, salt = users_lib.hash_pwd(password=password) - query = schema.users_table.update().where( - schema.users_table.c.id == user.id - ).values(hashed_pswd=hashed_pswd, salt=salt) - - await zrDB.execute(query) - - -async def toggle_user_status(*, user_id: int, active: bool = True): - """ Toggles a users status for active to inactive """ - query = schema.users_table.update().where( - schema.users_table.c.id == user_id - ).values( - active=active - ) - res = await zrDB.execute(query) - - if res == 0: - raise ValueError(f'user {user_id} was not found') - - -async def toggle_all_users_status(*, active: bool = True): - """ Toggles a users status for active to inactive """ - query = schema.users_table.update().values( - active=active - ) - return await zrDB.execute(query) diff --git a/vocolab/db/schema/__init__.py b/vocolab/db/schema/__init__.py deleted file mode 100644 index a861fb9..0000000 --- a/vocolab/db/schema/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .auth import * -from .challenges import * - diff --git a/vocolab/db/schema/auth.py b/vocolab/db/schema/auth.py deleted file mode 100644 index 59257ad..0000000 --- a/vocolab/db/schema/auth.py +++ /dev/null @@ -1,71 +0,0 @@ -import json -from datetime import datetime -from typing import Optional - -import sqlalchemy -from pydantic import BaseModel, EmailStr, Field, ValidationError -from jose import jwt, JWTError # noqa: false flags from requirements https://youtrack.jetbrains.com/issue/PY-27985 - -from ...settings import get_settings - - -_settings = get_settings() -users_metadata = sqlalchemy.MetaData() - - -class User(BaseModel): - id: int - username: str - email: EmailStr - active: bool - verified: str - hashed_pswd: bytes - salt: bytes - created_at: Optional[datetime] - - @property - def enabled(self): - return self.active and self.verified == 'True' - - class Config: - orm_mode = True - - -users_table = sqlalchemy.Table( - "users_credentials", - users_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column("username", sqlalchemy.String, unique=True), - sqlalchemy.Column("email", sqlalchemy.String, unique=True), - sqlalchemy.Column("active", sqlalchemy.Boolean), - sqlalchemy.Column("verified", sqlalchemy.String), - sqlalchemy.Column("hashed_pswd", sqlalchemy.BLOB), - sqlalchemy.Column("salt", sqlalchemy.BLOB), - sqlalchemy.Column("created_at", sqlalchemy.DATETIME) -) - -class Token(BaseModel): - """ API Session Token """ - expires_at: datetime = Field(default_factory=lambda: datetime.now() + _settings.user_options.session_expiry_delay) - created_at: datetime = Field(default_factory=lambda: datetime.now()) - allow_password_reset: bool = False # used for password reset sessions - user_email: EmailStr - - def is_expired(self) -> bool: - """ Check if Token has expired """ - return self.expires_at < datetime.now() - - def encode(self) -> str: - """ Encode into a token string """ - # passing by json allows to convert datetimes to strings using pydantic serializer - as_dict = json.loads(self.json()) - return jwt.encode(claims=as_dict, key=_settings.secret, algorithm=_settings.api_options.token_encryption) - - @classmethod - def decode(cls, encoded_token: str): - """ Decode token from encoded string """ - try: - payload = jwt.decode(token=encoded_token, key=_settings.secret, algorithms=[_settings.api_options.token_encryption]) - return Token(**payload) - except (JWTError, ValidationError) as e: - raise ValueError("Invalid token") from e diff --git a/vocolab/db/schema/challenges.py b/vocolab/db/schema/challenges.py deleted file mode 100644 index 40c0c4d..0000000 --- a/vocolab/db/schema/challenges.py +++ /dev/null @@ -1,233 +0,0 @@ -from datetime import datetime, date -from pathlib import Path -from typing import Optional - -import sqlalchemy -from pydantic import BaseModel, HttpUrl - -from vocolab.db.models.tasks import ExecutorsType -from datetime import datetime -from enum import Enum -from typing import Optional - -import sqlalchemy -from pydantic import BaseModel, AnyHttpUrl - -challenge_metadata = sqlalchemy.MetaData() - - -class ModelID(BaseModel): - """ Data representation of a Model id & its metadata""" - id: str - user_id: int - created_at: datetime - description: str - gpu_budget: str - train_set: str - authors: str - institution: str - team: str - paper_url: AnyHttpUrl - code_url: AnyHttpUrl - -""" -Table indexing of model ids -""" -models_table = sqlalchemy.Table( - "models", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), - sqlalchemy.Column("user_id", sqlalchemy.Integer), - sqlalchemy.Column("created_at", sqlalchemy.DateTime), - sqlalchemy.Column("description", sqlalchemy.String), - sqlalchemy.Column("gpu_budget", sqlalchemy.String), - sqlalchemy.Column("train_set", sqlalchemy.String), - sqlalchemy.Column("authors", sqlalchemy.String), - sqlalchemy.Column("institution", sqlalchemy.String), - sqlalchemy.Column("team", sqlalchemy.String), - sqlalchemy.Column("paper_url", sqlalchemy.String), - sqlalchemy.Column("code_url", sqlalchemy.String), -) - -class EvaluatorItem(BaseModel): - """ Data representation of an evaluator """ - id: int - label: str - executor: ExecutorsType - host: Optional[str] - script_path: str - executor_arguments: str - - class Config: - orm_mode = True - -""" -Table indexing the existing evaluators -""" -evaluators_table = sqlalchemy.Table( - "evaluators", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), - sqlalchemy.Column("label", sqlalchemy.String, unique=True), - sqlalchemy.Column("host", sqlalchemy.String), - sqlalchemy.Column("executor", sqlalchemy.String), - sqlalchemy.Column("script_path", sqlalchemy.String), - sqlalchemy.Column("executor_arguments", sqlalchemy.String) -) - - -class Challenge(BaseModel): - """ Data representation of a challenge """ - id: int - label: str - start_date: date - end_date: Optional[date] - active: bool - url: HttpUrl - evaluator: Optional[int] - - class Config: - orm_mode = True - - def is_active(self) -> bool: - """ Checks if challenge is active """ - present = date.today() - if self.end_date: - return self.start_date <= present <= self.end_date and self.active - else: - return self.start_date <= present and self.active - - @classmethod - def get_field_names(cls): - return list(cls.__fields__.keys()) - -""" -Table used to index the existing challenges & their metadata -""" -challenges_table = sqlalchemy.Table( - "challenges", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column("label", sqlalchemy.String, unique=True), - sqlalchemy.Column("start_date", sqlalchemy.Date), - sqlalchemy.Column("end_date", sqlalchemy.Date), - sqlalchemy.Column("active", sqlalchemy.Boolean), - sqlalchemy.Column("url", sqlalchemy.String), - sqlalchemy.Column("evaluator", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")) -) - - -class LeaderBoard(BaseModel): - """ Data representation of a Leaderboard """ - id: Optional[int] - challenge_id: int # Id to linked challenge - label: str # Name of leaderboard - path_to: Path # Path to build result - entry_file: str # filename in submission results - archived: bool # is_archived - external_entries: Optional[Path] # Location of external entries (baselines, toplines, archived) - static_files: bool # has static files - sorting_key: Optional[str] # path to the item to use as sorting key - - @classmethod - def get_field_names(cls): - return list(cls.__fields__.keys()) - - class Config: - orm_mode = True - - -""" -Table indexing the existing leaderboards and their metadata -""" -leaderboards_table = sqlalchemy.Table( - "leaderboards", - challenge_metadata, - sqlalchemy.Column('id', sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column('challenge_id', sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), - sqlalchemy.Column('label', sqlalchemy.String, unique=True), - sqlalchemy.Column('path_to', sqlalchemy.String), - sqlalchemy.Column('entry_file', sqlalchemy.String), - sqlalchemy.Column('archived', sqlalchemy.Boolean), - sqlalchemy.Column('external_entries', sqlalchemy.String), - sqlalchemy.Column('static_files', sqlalchemy.Boolean), - sqlalchemy.Column('sorting_key', sqlalchemy.String), -) - - - -class SubmissionStatus(str, Enum): - """ Definition of different states of submissions """ - # TODO: maybe add submission type (with scores...) - uploading = 'uploading' - uploaded = 'uploaded' - on_queue = 'on_queue' - validating = 'validating' # todo verify usage - invalid = 'invalid' - evaluating = 'evaluating' - completed = 'completed' - canceled = 'canceled' - failed = 'failed' - no_eval = 'no_eval' - no_auto_eval = 'no_auto_eval' - excluded = 'excluded' - - @classmethod - def get_values(cls): - return [el.value for el in cls] # noqa enum has attr values - - - -class ChallengeSubmission(BaseModel): - """ Data representation of a submission to a challenge """ - id: str - user_id: int - track_id: int - submit_date: datetime - status: SubmissionStatus - auto_eval: bool - evaluator_id: Optional[int] - author_label: Optional[str] = None - - class Config: - orm_mode = True - - -""" -Table entry indexing submissions to challenges -""" -submissions_table = sqlalchemy.Table( - "challenge_submissions", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), - sqlalchemy.Column("user_id", sqlalchemy.Integer), - sqlalchemy.Column("track_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), - sqlalchemy.Column("submit_date", sqlalchemy.DateTime), - sqlalchemy.Column("status", sqlalchemy.String), - sqlalchemy.Column("auto_eval", sqlalchemy.Boolean), - sqlalchemy.Column("evaluator_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), - sqlalchemy.Column("author_label", sqlalchemy.String) -) - - -class LeaderboardEntry: - """ Data representation of a leaderboard entry """ - id: Optional[int] - entry_path: Path - model_id: str - submission_id: str - leaderboard_id: int - submitted_at: datetime - - -""" Table indexing all leaderboard entries and their location (as stores json files)""" -leaderboard_entry_table = sqlalchemy.Table( - "leaderboard_entries", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), - sqlalchemy.Column("entry_path", sqlalchemy.String), - sqlalchemy.Column("model_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.id")), - sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("challenge_submissions.id")), - sqlalchemy.Column("leaderboard_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), - sqlalchemy.Column("submitted_at", sqlalchemy.String) -) diff --git a/vocolab/exc.py b/vocolab/exc.py index c84b259..5280157 100644 --- a/vocolab/exc.py +++ b/vocolab/exc.py @@ -1,7 +1,11 @@ """ A File containing Exceptions definitions """ from typing import Any, Optional -from starlette import status as http_status +from fastapi import status as http_status + +from vocolab.settings import get_settings + +_settings = get_settings() class VocoLabException(Exception): @@ -29,6 +33,16 @@ def __str__(self): return f"{self.__class__.__name__}: {self.message}" +class APILockedException(VocoLabException): + """ Error to return when write operations are not permitted""" + + def __init__(self): + super(APILockedException, self).__init__( + msg=f"The {_settings.app_options.app_name} is in LOCKED mode, write operations are not allowed", + status=http_status.HTTP_423_LOCKED + ) + + class OptionMissing(VocoLabException): """ Generic Exception used when a function was called with incorrect or missing arguments """ pass @@ -77,3 +91,8 @@ class SecurityError(VocoLabException): class ServerError(VocoLabException): """ Error with the starting of a server/service """ pass + + +class FailedOperation(VocoLabException): + """ Could not complete the requested operation """ + pass diff --git a/vocolab/lib/leaderboards_lib.py b/vocolab/lib/leaderboards_lib.py deleted file mode 100644 index b49b82f..0000000 --- a/vocolab/lib/leaderboards_lib.py +++ /dev/null @@ -1,120 +0,0 @@ -import json -from datetime import datetime -from typing import Dict - -from vocolab import out, get_settings -from vocolab.db import schema -from vocolab.db.q import leaderboardQ, challengesQ -from vocolab.lib import _fs, misc - -_settings = get_settings() - - -def get_static_location(label: str): - # todo: check why this is in static files ? - return _settings.static_files_directory / 'leaderboards' / label - - -def rebuild_leaderboard_index(leaderboard_entries, *, key): - """ sort entries by using a specific key and re-write the index with the new ordering """ - - leaderboard_entries = sorted(leaderboard_entries, key=lambda x: misc.key_to_value(x, key=key)) - - for i, entry in enumerate(leaderboard_entries, 1): - entry['index'] = i - - return leaderboard_entries - - -async def build_leaderboard(*, leaderboard_id: int): - leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - leaderboard_entries = [] - static_location = get_static_location(leaderboard.label) - - # create static dir - if leaderboard.static_files: - static_location.mkdir(exist_ok=True, parents=True) - - # load external entries - external_entries = [ - *leaderboard.external_entries.rglob('*.json'), - *leaderboard.external_entries.rglob('*.yaml'), - *leaderboard.external_entries.rglob('*.yml') - ] - for item in external_entries: - leaderboard_entries.append(_fs.commons.load_dict_file(item)) - - # copy external static files - if leaderboard.static_files and (leaderboard.external_entries / 'static').is_dir(): - _fs.commons.copy_all_contents(leaderboard.external_entries / 'static', static_location) - - if not leaderboard.archived: - submission_list = await challengesQ.list_submission(by_track=leaderboard.challenge_id) - for sub in submission_list: - # skip not completed submissions - if sub.status != schema.SubmissionStatus.completed: - continue - - # append submission to leaderboard - sub_location = _fs.submissions.get_submission_dir(sub.id) - leaderboard_entry = _fs.leaderboards.load_entry_from_sub(sub.id, leaderboard.entry_file) - - # if author_label is set use database value over local - if sub.author_label and len(leaderboard_entry) > 0: - leaderboard_entry['author_label'] = sub.author_label - - # append to leaderboard - leaderboard_entries.append(leaderboard_entry) - - # grab all static files - # todo: check is static file section is obsolete ? - if leaderboard.static_files and (sub_location / 'static').is_dir(): - _fs.commons.copy_all_contents(sub_location / 'static', static_location) - - if leaderboard.sorting_key: - try: - leaderboard_entries = rebuild_leaderboard_index(leaderboard_entries, key=leaderboard.sorting_key) - except KeyError: - out.log.error(f"Failed to build index for leaderboard={leaderboard.label} " - f"with sorting_key: {leaderboard.sorting_key}") - # Export to file - with (_settings.leaderboard_dir / leaderboard.path_to).open('w') as fp: - json.dump(dict( - updatedOn=datetime.now().isoformat(), - data=leaderboard_entries - ), fp) - - return _settings.leaderboard_dir / leaderboard.path_to - - -async def get_leaderboard(*, leaderboard_id) -> Dict: - """ Load leaderboard object file """ - leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - return _fs.commons.load_dict_file(_settings.leaderboard_dir / leaderboard.path_to) - - -async def create(*, challenge_id, label, entry_file, external_entries, static_files, path_to, archived): - """ Create a new leaderboard """ - if external_entries is not None: - external_entries = (_fs.leaderboards.get_leaderboard_archive_location() / external_entries) - - ld = schema.LeaderBoard( - challenge_id=challenge_id, - label=label, - entry_file=entry_file, - archived=archived, - external_entries=external_entries, - path_to=(_fs.leaderboards.get_leaderboard_location() / path_to), - static_files=static_files - ) - lead_id = await leaderboardQ.create_leaderboard(lead_data=ld) - # issue: do we want auto-build on creation ? - await build_leaderboard(leaderboard_id=lead_id) - return lead_id - - -async def build_all_challenge(challenge_id: int): - leaderboard_list = await leaderboardQ.get_leaderboards(by_challenge_id=challenge_id) - - for ld in leaderboard_list: - await build_leaderboard(leaderboard_id=ld.id) diff --git a/vocolab/lib/testing/__init__.py b/vocolab/lib/testing/__init__.py deleted file mode 100644 index 54ebb0c..0000000 --- a/vocolab/lib/testing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .submissions import * diff --git a/vocolab/lib/testing/submissions.py b/vocolab/lib/testing/submissions.py deleted file mode 100644 index 4e3de0c..0000000 --- a/vocolab/lib/testing/submissions.py +++ /dev/null @@ -1,32 +0,0 @@ -import uuid -from pathlib import Path - -import numpy as np -import yaml - -from vocolab.settings import get_settings - -_settings = get_settings() - - -def create_fake_submission(username: str, challenge_label: str) -> Path: - """ Creates some fake files for testing submissions """ - submission_id = str(uuid.uuid4()) - location = (_settings.user_data_dir / username / 'submissions' / challenge_label / submission_id) - location.mkdir(parents=True, exist_ok=True) - for i in range(100): - np.savetxt(str(location / f'fx_{i}.txt'), np.random.rand(8, 8)) # noqa: numpy sucks at typing - - with (location / 'meta.yml').open('w') as fp: - v = dict( - author='Test Guy et al.', - description='This is data for tests', - ) - yaml.dump(v, fp) - - return location - - - - - diff --git a/vocolab/lib/users_lib.py b/vocolab/lib/users_lib.py deleted file mode 100644 index 43cb17d..0000000 --- a/vocolab/lib/users_lib.py +++ /dev/null @@ -1,31 +0,0 @@ -import hashlib -import os -from typing import Callable - -from vocolab.db import models -from vocolab.lib import _fs - -# export functions -update_user_data = _fs.users.update_user_data -get_user_data: Callable[[str], models.api.UserData] = _fs.users.get_user_data - - -def hash_pwd(*, password: str, salt=None): - """ Creates a hash of the given password. - If salt is None generates a random salt. - - :arg password the password to hash - :arg salt a value to salt the hashing - :returns hashed_password, salt - """ - - if salt is None: - salt = os.urandom(32) # make random salt - - hash_pass = hashlib.pbkdf2_hmac( - 'sha256', # The hash digest algorithm for HMAC - password.encode('utf-8'), # Convert the password to bytes - salt, # Provide the salt - 100000 # It is recommended to use at least 100,000 iterations of SHA-256 - ) - return hash_pass, salt diff --git a/vocolab/lib/worker_lib/tasks/update.py b/vocolab/lib/worker_lib/tasks/update.py deleted file mode 100644 index fd87250..0000000 --- a/vocolab/lib/worker_lib/tasks/update.py +++ /dev/null @@ -1,31 +0,0 @@ -import asyncio - -from vocolab import out, get_settings -from vocolab.db.models import tasks -from vocolab.lib import submissions_lib - -_settings = get_settings() - - -def update_task_fn(sum_: tasks.SubmissionUpdateMessage): - async def eval_function(msg: tasks.SubmissionUpdateMessage): - """ Evaluate a function type BrokerCMD """ - with submissions_lib.SubmissionLogger(msg.submission_id) as lg: - out.log.debug(msg.dict()) - - if msg.updateType == tasks.UpdateType.evaluation_complete: - await submissions_lib.complete_evaluation( - submission_id=msg.submission_id, hostname=msg.hostname, - logger=lg) - elif msg.updateType == tasks.UpdateType.evaluation_failed: - await submissions_lib.fail_evaluation( - submission_id=msg.submission_id, hostname=msg.hostname, - logger=lg) - elif msg.updateType == tasks.UpdateType.evaluation_canceled: - await submissions_lib.cancel_evaluation( - submission_id=msg.submission_id, hostname=msg.hostname, - logger=lg) - else: - raise ValueError("Unknown update task !!!") - - asyncio.run(eval_function(sum_)) diff --git a/vocolab/settings.py b/vocolab/settings.py index 069c518..1a92372 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -1,22 +1,22 @@ -import secrets -import shutil - import os import platform +import secrets +import shutil +import tempfile +from contextlib import contextmanager from datetime import timedelta from functools import lru_cache -from pathlib import Path -from typing import List, Union, Set, Dict, Optional, Literal from importlib.metadata import version, PackageNotFoundError +from pathlib import Path +from typing import List, Union, Set, Dict, Optional, Generator try: from tomllib import load as toml_load except ImportError: from toml import load as toml_load - from pydantic import ( - BaseSettings, EmailStr, DirectoryPath, HttpUrl, IPvAnyNetwork, BaseModel, Field + BaseSettings, EmailStr, DirectoryPath, HttpUrl, IPvAnyNetwork, BaseModel ) @@ -38,10 +38,6 @@ class ConsoleOutputSettings(BaseModel): ERROR_LOG_FILE: Optional[Path] = None -class DatabaseSettings(BaseModel): - db_file: str = 'vocolab.db' - - class CeleryWorkerOptions(BaseModel): celery_bin: Path = Path(shutil.which('celery')) celery_nodes: Dict[str, str] = { @@ -73,10 +69,11 @@ class TaskQueueSettings(BaseModel): HOSTS: Set[str] = set() REMOTE_STORAGE: Dict[str, Path] = dict() REMOTE_BIN: Dict[str, Path] = dict() - AUTO_EVAL: bool = True + AUTO_EVAL: bool = False class AppSettings(BaseModel): + platform_name: str = "VOCOLAB" app_name: str = "VocoLab Challenge API" maintainers: str = "Organisation Name" admin_email: EmailStr = EmailStr("contact@email.com") @@ -150,10 +147,18 @@ class UserSettings(BaseModel): submission_interval: timedelta = timedelta(days=1) +class VocolabExtensions(BaseModel): + leaderboards_extension: Optional[str] = None + submission_extension: Optional[str] = None + + class _VocoLabSettings(BaseSettings): """ Base Settings for module """ app_home: DirectoryPath = Path(__file__).parent - DATA_FOLDER: DirectoryPath = Path('data/') + DATA_FOLDER: DirectoryPath = Path('/data') + TMP_ROOT: DirectoryPath = Path('/tmp') + ARCHIVE_FOLDER: Path = Path('/archive') + ARCHIVE_HOST: str = "localhost" # Settings Categories app_options: AppSettings = AppSettings() @@ -165,10 +170,17 @@ class _VocoLabSettings(BaseSettings): notify_options: NotifySettings = NotifySettings() server_options: ServerSettings = ServerSettings() user_options: UserSettings = UserSettings() - database_options: DatabaseSettings = DatabaseSettings() + extensions: VocolabExtensions = VocolabExtensions() CUSTOM_TEMPLATES_DIR: Optional[Path] = None + @property + def data_lock(self) -> Path: + return self.DATA_FOLDER / 'readonly.lock' + + def is_locked(self) -> bool: + return self.data_lock.is_file() + @property def static_files_directory(self) -> Path: """ Directory containing static files served by the API """ @@ -192,7 +204,13 @@ def leaderboard_dir(self) -> Path: @property def submission_archive_dir(self) -> Path: """directory pointing to archived submissions """ - return self.DATA_FOLDER / 'submissions/archive' + return self.ARCHIVE_FOLDER / 'submissions' + + @property + def remote_archive(self) -> bool: + return self.ARCHIVE_HOST not in ( + 'localhost', '127.0.0.1', self.app_options.hostname + ) @property def templates_dir(self) -> Path: @@ -231,7 +249,36 @@ def secret(self): with (self.DATA_FOLDER / '.secret').open('rb') as fp: return fp.read().decode() + @property + def database_file(self): + """ Path to the database file """ + return self.DATA_FOLDER / 'vocolab.db' + + @property + def database_connection_url(self): + """ Database connection url """ + return f"sqlite:///{self.database_file}" + + @property + def email_verif_path(self) -> str: + """ Load API path for verifying emails """ + with (self.DATA_FOLDER / 'email_verification.path').open() as fp: + return fp.read().strip() + @property + def password_reset_path(self) -> str: + """ Load API path for resetting passwords """ + with (self.DATA_FOLDER / 'password_reset.path').open() as fp: + return fp.read().strip() + + @contextmanager + def get_temp_dir(self) -> Generator[Path, None, None]: + """ Create a temporary directory """ + temp_dir = tempfile.TemporaryDirectory(prefix="voco-", dir=str(self.TMP_ROOT)) + try: + yield Path(temp_dir.name) + finally: + temp_dir.cleanup() class Config: env_prefix = 'VC_' diff --git a/vocolab/worker/echo_test.py b/vocolab/worker/echo_test.py index 2a4cc32..9b74411 100644 --- a/vocolab/worker/echo_test.py +++ b/vocolab/worker/echo_test.py @@ -1,11 +1,11 @@ -from vocolab.db.models import tasks as model_task +from vocolab.data import models from vocolab.worker.server import echo while True: msg = input("msg1: ") if msg == "quit": break - slm = model_task.SimpleLogMessage(label="test-client", message=msg) + slm = models.tasks.SimpleLogMessage(label="test-client", message=msg) echo.delay(slm.dict()) print("submitted\nNext") diff --git a/vocolab/worker/server.py b/vocolab/worker/server.py index f8bb290..c24b869 100644 --- a/vocolab/worker/server.py +++ b/vocolab/worker/server.py @@ -5,8 +5,8 @@ from celery import Celery from vocolab import out, get_settings -from vocolab.db.models import tasks -from vocolab.lib import worker_lib +from vocolab.data import models +from vocolab.core import worker_lib # """"""""""""""""""""""""""""""""""""" # todo: read up on what is the best pool/supervisor @@ -28,19 +28,19 @@ @app.task(name='echo-task', ignore_result=True) def echo(slm: Dict): - slm = tasks.SimpleLogMessage(**slm) + slm = models.tasks.SimpleLogMessage(**slm) worker_lib.tasks.echo_fn(slm) @app.task(name='update-task', ignore_result=True) def update(sum_: Dict): - sum_ = tasks.SubmissionUpdateMessage(**sum_) + sum_ = models.tasks.SubmissionUpdateMessage(**sum_) out.log.log(f'updating {sum_.submission_id}') worker_lib.tasks.update_task_fn(sum_) @app.task(name='eval-task', ignore_result=True) def evaluate(sem: Dict): - sem = tasks.SubmissionEvaluationMessage(**sem) + sem = models.tasks.SubmissionEvaluationMessage(**sem) out.log.log(f'evaluating {sem.submission_id}') worker_lib.tasks.evaluate_submission_fn(sem)