From c181a60888cc32f54efabd8ed55367ad26e73e1e Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Fri, 27 Jan 2023 19:39:28 +0100 Subject: [PATCH 01/28] WIP: update Submission procedure --- vocolab/api/endpoints/challenges.py | 10 +- vocolab/db/models/api/challenges.py | 9 +- vocolab/db/models/file_split.py | 28 --- vocolab/lib/old_submission_lib/__init__.py | 0 .../submissions.py | 2 +- .../submissions_lib.py | 0 vocolab/lib/submission_lib/__init__.py | 0 vocolab/lib/submission_lib/logs.py | 112 +++++++++ vocolab/lib/submission_lib/submission.py | 220 ++++++++++++++++++ vocolab/lib/submission_lib/upload.py | 187 +++++++++++++++ 10 files changed, 537 insertions(+), 31 deletions(-) delete mode 100644 vocolab/db/models/file_split.py create mode 100644 vocolab/lib/old_submission_lib/__init__.py rename vocolab/lib/{_fs => old_submission_lib}/submissions.py (99%) rename vocolab/lib/{ => old_submission_lib}/submissions_lib.py (100%) create mode 100644 vocolab/lib/submission_lib/__init__.py create mode 100644 vocolab/lib/submission_lib/logs.py create mode 100644 vocolab/lib/submission_lib/submission.py create mode 100644 vocolab/lib/submission_lib/upload.py diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py index 2ccd5d0..772acb1 100644 --- a/vocolab/api/endpoints/challenges.py +++ b/vocolab/api/endpoints/challenges.py @@ -1,6 +1,7 @@ """ Routing for /challenges section of the API This section handles challenge data """ +from datetime import datetime from typing import List from fastapi import ( @@ -32,7 +33,14 @@ async def get_challenge_info(challenge_id: int): return await challengesQ.get_challenge(challenge_id=challenge_id, allow_inactive=True) -# todo test submit creation +@router.get('/model/create') +async def get_model_id(first_author_name: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): + new_model_id = f"{first_author_name[:3]}{str(datetime.now().year)[2:]}" + # todo: check + return new_model_id + + +# todo: update submission process @router.post('/{challenge_id}/submission/create', responses={404: {"model": models.api.Message}}) async def create_submission( challenge_id: int, data: models.api.NewSubmissionRequest, diff --git a/vocolab/db/models/api/challenges.py b/vocolab/db/models/api/challenges.py index 259e88d..b7bae1b 100644 --- a/vocolab/db/models/api/challenges.py +++ b/vocolab/db/models/api/challenges.py @@ -1,6 +1,7 @@ """ Dataclasses representing API/challenge input output data types """ from datetime import date -from typing import Optional, List, Tuple +from pathlib import Path +from typing import Optional, List, Tuple, Dict from pydantic import BaseModel, HttpUrl @@ -36,9 +37,15 @@ class SubmissionRequestFileIndexItem(BaseModel): class NewSubmissionRequest(BaseModel): """ Dataclass used for input in the creation of a new submission to a challenge """ + username: str + track_label: str + track_id: int + model_id: str filename: str hash: str multipart: bool + has_scores: bool + leaderboards: Dict[str, Path] index: Optional[List[SubmissionRequestFileIndexItem]] diff --git a/vocolab/db/models/file_split.py b/vocolab/db/models/file_split.py deleted file mode 100644 index 1078e45..0000000 --- a/vocolab/db/models/file_split.py +++ /dev/null @@ -1,28 +0,0 @@ -from pathlib import Path -from typing import List, Optional - -from pydantic import BaseModel - - -class ManifestIndexItem(BaseModel): - """ Model representing a file item in the SplitManifest """ - file_name: str - file_size: int - file_hash: str - - def __eq__(self, other: 'ManifestIndexItem'): - return self.file_hash == other.file_hash - - def __hash__(self): - return int(self.file_hash, 16) - - -class SplitManifest(BaseModel): - """ Data Model used for the binary split function as a manifest to allow merging """ - filename: str - tmp_location: Path - hash: str - index: Optional[List[ManifestIndexItem]] - received: Optional[List[ManifestIndexItem]] = [] - multipart: bool = True - hashed_parts: bool = True diff --git a/vocolab/lib/old_submission_lib/__init__.py b/vocolab/lib/old_submission_lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vocolab/lib/_fs/submissions.py b/vocolab/lib/old_submission_lib/submissions.py similarity index 99% rename from vocolab/lib/_fs/submissions.py rename to vocolab/lib/old_submission_lib/submissions.py index 87f8a3b..cad7ca7 100644 --- a/vocolab/lib/_fs/submissions.py +++ b/vocolab/lib/old_submission_lib/submissions.py @@ -11,7 +11,7 @@ from vocolab import get_settings, exc from vocolab.db import models -from .commons import md5sum, rsync, ssh_exec, zip_folder +from .._fs.commons import md5sum, rsync, ssh_exec, zip_folder _settings = get_settings() diff --git a/vocolab/lib/submissions_lib.py b/vocolab/lib/old_submission_lib/submissions_lib.py similarity index 100% rename from vocolab/lib/submissions_lib.py rename to vocolab/lib/old_submission_lib/submissions_lib.py diff --git a/vocolab/lib/submission_lib/__init__.py b/vocolab/lib/submission_lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vocolab/lib/submission_lib/logs.py b/vocolab/lib/submission_lib/logs.py new file mode 100644 index 0000000..73305b3 --- /dev/null +++ b/vocolab/lib/submission_lib/logs.py @@ -0,0 +1,112 @@ +from datetime import datetime +from pathlib import Path +from typing import Optional, TextIO + +from pydantic import BaseModel + +from .._fs.commons import ssh_exec +from ...settings import get_settings + +_settings = get_settings() + + +class SubmissionLogger(BaseModel): + """ Class managing individual logging of submission life-cycle """ + root_dir: Path + fp_write: Optional[TextIO] = None + + @property + def submission_id(self) -> str: + return self.root_dir.name + + @property + def submission_log(self) -> Path: + """ File storing generic submission_logs""" + return self.root_dir / 'submission.log' + + @property + def eval_log_file(self) -> Path: + """ Logfile storing latest evaluation process """ + return self.root_dir / 'evaluation.log' + + @property + def slurm_log_file(self) -> Path: + """ Logfile storing latest slurm output (used during eval process)""" + return self.root_dir / "slurm.log" + + def __enter__(self): + """ Logging context open """ + self.fp_write = self.submission_log.open('a') + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """ Logging context close """ + if self.fp_write is not None: + self.fp_write.close() + self.fp_write = None + + @staticmethod + def when(): + return f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}" + + def header(self, who: str, task: str, + multipart: bool = False, has_scores: bool = True, auto_eval: bool = False): + """ + who: user that did the submission (should be owner or admin) + task: the benchmark/task that the submission correspongs + has_scores: whether the submission has scores + multipart: whether the submission was uploaded as multipart + auto_eval: whether an auto-evaluation pipeline is set up for this submission + """ + with self.submission_log.open('w') as fp: + fp.write(f"[{self.when()}]: Submission {self.submission_id} was created\n") + fp.write(f"--> user: {who}\n") + fp.write(f"--> challenge: {task}\n") + fp.write(f"--> has_scores: {has_scores}") + fp.write(f"--> is_multipart: {multipart}\n") + fp.write(f"--> auto_eval: {auto_eval}\n") + + @property + def slurm_logs(self): + """ """ + lines = [] + if self.slurm_log_file.is_file(): + with self.slurm_log_file.open() as fp: + lines = fp.readlines() + return lines + + def append_eval(self, eval_output): + with self.eval_log_file.open('a') as fp: + fp.write(f"-------- start of evaluation output --------\n") + fp.write(f"---> {datetime.now().isoformat()}") + fp.write(f"{eval_output.rstrip()}\n") + for line in self.slurm_logs: + fp.write(f"{line.strip()}\n") + fp.write(f"-------- end of evaluation output ----------\n") + + def log(self, msg, date: bool = True): + """ Create a new log entry """ + if date: + msg = f"[{self.when()}] {msg}" + + if self.fp_write: + self.fp_write.write(f"{msg}\n") + else: + with self.submission_log.open('a') as fp: + fp.write(f"{msg}\n") + + def get_text(self): + """ Get full submission log """ + if self.submission_log.is_file(): + with self.submission_log.open('r') as fp: + return fp.readlines() + return [] + + def fetch_remote(self, host, remote_submission_location): + """ Fetch eval & append log from remote """ + return_code, result = ssh_exec(host, [f'cat', f'{remote_submission_location}/{self.eval_log_file}']) + if return_code == 0: + self.log(result, date=False) + else: + self.log(f"Failed to fetch {host}:{remote_submission_location}/{self.submission_log} !!") + diff --git a/vocolab/lib/submission_lib/submission.py b/vocolab/lib/submission_lib/submission.py new file mode 100644 index 0000000..b4ccee9 --- /dev/null +++ b/vocolab/lib/submission_lib/submission.py @@ -0,0 +1,220 @@ +import json +from datetime import datetime +from pathlib import Path +from typing import Dict, List + +from fastapi import UploadFile +from pydantic import BaseModel + +from ...db import models +from ...settings import get_settings +from .logs import SubmissionLogger +from .upload import MultipartUploadHandler, SinglepartUploadHandler + +_settings = get_settings() + + +class SubmissionInfo(BaseModel): + model_id: str + username: str + track_id: int + track_label: str + submission_id: str + created_at: datetime + leaderboard_entries: Dict[str, Path] + + +class SubmissionDir(BaseModel): + """ Handler interfacing a submission directory stored on disk """ + root_dir: Path + + @classmethod + def load(cls, model_id: str, submission_id: str): + """ Load item from model-id & submission-id""" + root = _settings.submission_dir / model_id / submission_id + if not root.is_dir(): + raise FileNotFoundError(f'Submission {model_id}/{submission_id} does not exist') + return cls(root_dir=root) + + @property + def submission_id(self) -> str: + """ Returns the submission id """ + return self.root_dir.name + + @property + def content(self) -> Path: + return self.root_dir / 'content' + + def has_input(self) -> bool: + return self.content.is_dir() + + @property + def scores(self) -> Path: + """ the scores folders contains all the output files created by the evaluation process """ + return self.content / 'scores' + + def has_scores(self) -> bool: + return self.scores.is_dir() + + @property + def info_file(self) -> Path: + """ info file contains meta data relative to the submission """ + return self.root_dir / 'info.json' + + def has_info(self) -> bool: + """ Check whether info file is present""" + return self.info_file.is_file() + + @property + def info(self) -> SubmissionInfo: + """ Load submission information """ + with self.info_file.open() as fp: + return SubmissionInfo.parse_obj(json.load(fp)) + + @property + def multipart_dir(self) -> Path: + """ multipart dir contains the chunks & index for multipart uploads """ + return self.root_dir / '.parts' + + @property + def multipart_index_file(self) -> Path: + """ multipart index file contains info pertaining to multipart upload + - split & merge manifest (order to merge the files) + - checksums to verify upload & merge + """ + return self.multipart_dir / 'upload.json' + + def is_multipart(self) -> bool: + """ Check whether file was uploaded as multipart """ + return self.multipart_dir.is_dir() and self.multipart_index_file.is_file() + + @property + def upload_lock(self) -> Path: + """ a lockfile locking the submission while upload has not completed """ + return self.root_dir / 'upload.lock' + + @property + def eval_lock(self) -> Path: + """ a lockfile locking the submission while evaluation is ongoing """ + return self.root_dir / 'eval.lock' + + @property + def error_lock(self) -> Path: + """ a lockfile locking the submission while evaluation is ongoing """ + return self.root_dir / 'error.lock' + + @property + def clean_lock(self) -> Path: + """ a lockfile marking the submission for deletion """ + return self.root_dir / 'clean.lock' + + @property + def interrupted_lock(self) -> Path: + """ a lockfile to signify that a process was running and was interrupted """ + return self.root_dir / 'interrupted.lock' + + def clean_all_locks(self): + """ Remove all lock files in submission""" + self.upload_lock.unlink(missing_ok=True) + self.eval_lock.unlink(missing_ok=True) + self.error_lock.unlink(missing_ok=True) + self.interrupted_lock.unlink(missing_ok=True) + self.clean_lock.unlink(missing_ok=True) + + def get_log_handler(self) -> SubmissionLogger: + """ build the SubmissionLogger class that allows to log submission relative events """ + return SubmissionLogger(root_dir=self.root_dir) + + + def add_content(self, file_name: str, file_size: int, file_hash: str, data: UploadFile): + """ todo: write method description """ + if self.is_multipart(): + # Multipart content + handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) + handler.add_part( + logger=self.get_log_handler(), + file_name=file_name, + file_size=file_size, + file_hash=file_hash, + data=data + ) + handler.dump_to_index(self.multipart_index_file) + + if handler.completed(): + handler.merge_parts() + else: + # todo return missing? remaining ? something + pass + else: + # singlepart content + # TODO: continue this section here ...... + handler = SinglepartUploadHandler(root_dir=self.root_dir) + handler.write_data(file_name=file_name, ) + + + + + +class ModelDir(BaseModel): + root_dir: Path + + @property + def label(self): + return self.root_dir.name + + @classmethod + def load(cls, model_id: str): + root = _settings.submission_dir / model_id + + if not root.is_dir(): + raise FileNotFoundError(f'Model {model_id} does not exist') + return cls(root_dir=root) + + def make_submission(self, submission_id: str, auto_eval: bool, request_meta: models.api.NewSubmissionRequest): + root_dir = self.root_dir / submission_id + if root_dir.is_dir(): + raise FileExistsError(f'Submission {submission_id} cannot be created as it already exists') + # create the dir + root_dir.mkdir() + submission_dir = SubmissionDir(root_dir=root_dir) + submission_dir.content.mkdir() + + sub_info = SubmissionInfo( + username=request_meta.username, + track_id=request_meta.track_id, + track_label=request_meta.track_label, + submission_id=submission_id, + created_at=datetime.now(), + leaderboard_entries=request_meta.leaderboards + ) + + # todo save info as file + + if request_meta.multipart: + submission_dir.multipart_dir.mkdir(exist_ok=True) + # todo build class for multipart index + else: + with submission_dir.singlepart_hash_file.open('w') as fp: + fp.write(request_meta.hash) + + submission_dir.get_log_handler().header( + who=request_meta.username, + task=request_meta.track_label, + multipart=request_meta.multipart, + has_scores=request_meta.has_scores, + auto_eval=auto_eval + ) + + # create upload lockfile + submission_dir.upload_lock.touch() + + @property + def submissions(self) -> List[SubmissionDir]: + return [ + SubmissionDir.load(self.label, sub_id.name) + for sub_id in self.root_dir.iterdir() + if sub_id.is_dir() + ] + + def get_submission(self, submission_id: str): + return SubmissionDir.load(self.label, submission_id) diff --git a/vocolab/lib/submission_lib/upload.py b/vocolab/lib/submission_lib/upload.py new file mode 100644 index 0000000..7d15f28 --- /dev/null +++ b/vocolab/lib/submission_lib/upload.py @@ -0,0 +1,187 @@ +import json +import shutil +from hmac import compare_digest +from pathlib import Path +from typing import List, Optional + +from fastapi import UploadFile +from fsplit.filesplit import Filesplit +from pydantic import BaseModel +import pandas as pd +import numpy as np + +from vocolab import exc +from .._fs.commons import md5sum +from .logs import SubmissionLogger + +""" +####### File Splitting Note ####### +Splitting & Merging of archives uses the protocol defined by the filesplit package. +This protocol requires the split to use the same method as a manifest is created which +then allows to merge the parts into the original file. + +For more information see documentation : https://pypi.org/project/filesplit/ + +NOTE: v3.0.2 is currently used, an update to v4 needs to be implemented. +""" + + +class SinglepartUploadHandler(BaseModel): + root_dir: Path + + @property + def target_file(self): + return self.root_dir / 'content_archive.zip' + + @property + def hash_file(self) -> Path: + """ singlepart upload can be verified by the checksum inside this file """ + return self.root_dir / 'archive.hash' + + @property + def file_hash(self): + """ Load promised md5sum of content archive """ + with self.hash_file.open() as fp: + return fp.read().replace('\n', '') + + @property + def success(self): + return self.target_file.is_file() + + def write_data(self, logger: SubmissionLogger, file_name: str, file_hash: str, data: UploadFile): + logger.log(f"adding a new part to upload: {file_name}") + + # Add the part + with self.target_file.open('wb') as fp: + for d in data.file: + fp.write(d) + + if not md5sum(self.target_file) == file_hash: + # todo: more stuff see multipart fail + self.target_file.unlink() + raise exc.ValueNotValid("Hash does not match expected!") + + logger.log(f" --> file was uploaded successfully", date=False) + + +class ManifestIndexItem(BaseModel): + """ Model representing a file item in the SplitManifest """ + file_name: str + file_size: int + file_hash: str + + def __eq__(self, other: 'ManifestIndexItem'): + return self.file_hash == other.file_hash + + def __hash__(self): + return int(self.file_hash, 16) + + +class MultipartUploadHandler(BaseModel): + """ Data Model used for the binary split function as a manifest to allow merging """ + store_location: Path + merge_hash: str + index: Optional[List[ManifestIndexItem]] + received: Optional[List[ManifestIndexItem]] = [] + multipart: bool = True + hashed_parts: bool = True + target_location: Path + + + @property + def target_file(self): + return self.target_location / 'submission.zip' + + @property + def success(self): + return self.target_file.is_file() + + @property + def remaining_items(self) -> set[ManifestIndexItem]: + """ Return a set with remaining items """ + return set(self.index) - set(self.received) + + @property + def remaining_nb(self) -> int: + return len(self.remaining_items) + + def completed(self) -> bool: + return len(self.received) == len(self.index) + + @classmethod + def load_from_index(cls, file: Path): + """ Load """ + with file.open() as fp: + return cls.parse_obj(json.load(fp)) + + def dump_to_index(self, file: Path): + with file.open("w") as fp: + fp.write(self.json(indent=4)) + + def add_part(self, logger: SubmissionLogger, file_name: str, file_size: int, file_hash: str, data: UploadFile): + """ Add a part to a multipart upload type submission. + + - Write the data into a file inside the submission folder. + + :raises + - JSONError, ValidationError: If manifest is not properly formatted + - ResourceRequestedNotFound: if file not present in the manifest + - ValueNotValid if md5 hash of file does not match md5 recorded in the manifest + """ + logger.log(f"adding a new part to upload: {self.store_location / file_name}") + new_item_mf = ManifestIndexItem( + file_name=file_name, + file_size=file_size, + file_hash=file_hash + ) + + if new_item_mf not in self.index: + logger.log(f"(ERROR) file {file_name} was not found in manifest, upload canceled!!") + raise exc.ResourceRequestedNotFound(f"Part {file_name} is not part of submission {logger.submission_id}!!") + + # write data on disk + file_part = self.store_location / file_name + with file_part.open('wb') as fp: + for d in data.file: + fp.write(d) + + calc_hash = md5sum(file_part) + if not compare_digest(calc_hash, file_hash): + # remove file and throw exception + file_part.unlink() + data = f"failed hash comparison" \ + f"file: {file_part} with hash {calc_hash}" \ + f"on record found : {file_name} with hash {file_hash}" + logger.log(f"(ERROR) {data}, upload canceled!!") + raise exc.ValueNotValid("Hash of part does not match given hash", data=data) + + # up count of received parts + self.received.append(new_item_mf) + + logger.log(f" --> part was added successfully", date=False) + + def merge_parts(self): + """ Merge parts into the target file using filesplit protocol """ + # TODO: update filesplit==3.0.2 to 4.0.0 (breaking upgrade) + # for update see https://pypi.org/project/filesplit/ + if self.hashed_parts: + for item in self.index: + assert md5sum(self.store_location / item.file_name) == item.file_hash, \ + f"file {item.file_name} does not match md5" + + df = pd.DataFrame([ + (i.file_name, i.file_size) + for i in self.index + ]) + df.columns = ['filename', 'filesize'] + df['encoding'] = np.nan + df['header'] = np.nan + df.to_csv((self.store_location / 'fs_manifest.csv')) + fs = Filesplit() + fs.merge(input_dir=f"{self.store_location}", output_file=str(self.target_file)) + assert md5sum(self.target_file) == self.merge_hash, "output file does not match original md5" + + + def clean(self): + """ Delete index & parts used for multipart upload """ + shutil.rmtree(self.store_location) From 4d52f807a1896b4e33b578b254e4689b75319623 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Mon, 30 Jan 2023 19:39:13 +0100 Subject: [PATCH 02/28] WIP: update Submission file structure --- vocolab/admin/commands/api.py | 2 +- vocolab/admin/commands/challenges.py | 2 +- vocolab/admin/commands/evaluators.py | 3 +- vocolab/admin/commands/leaderboards.py | 3 +- vocolab/admin/commands/messaging.py | 2 +- vocolab/admin/commands/settings.py | 2 +- vocolab/admin/commands/submissions.py | 37 +- vocolab/admin/commands/task_worker.py | 2 +- vocolab/admin/commands/test.py | 3 +- vocolab/admin/commands/user.py | 5 +- vocolab/admin/main.py | 3 +- vocolab/api/endpoints/auth.py | 2 +- vocolab/api/endpoints/challenges.py | 8 +- vocolab/api/endpoints/leaderboards.py | 2 +- vocolab/api/endpoints/users.py | 6 +- vocolab/api/pages/users.py | 2 +- vocolab/{lib => core}/__init__.py | 0 vocolab/{lib => core}/api_lib.py | 4 +- vocolab/{admin => core}/cmd_lib.py | 0 vocolab/{lib/_fs => core}/commons.py | 0 vocolab/{lib => core}/evaluators_lib.py | 10 +- vocolab/{lib => core}/leaderboards_lib.py | 6 +- vocolab/{lib => core}/misc/__init__.py | 0 .../{lib => core}/misc/various_definitions.py | 0 .../{lib => core}/misc/various_functions.py | 0 vocolab/{lib => core}/notify/__init__.py | 0 vocolab/{lib => core}/notify/email.py | 0 vocolab/{lib => core}/notify/mattermost.py | 0 .../old_implementations}/__init__.py | 0 .../old_implementations/commons}/__init__.py | 3 +- .../commons}/file_spilt.py | 0 .../commons}/leaderboards.py | 0 .../old_implementations/commons}/users.py | 0 .../old_implementations}/submissions.py | 0 .../old_implementations}/submissions_lib.py | 0 .../{lib => core}/submission_lib/__init__.py | 0 vocolab/{lib => core}/submission_lib/logs.py | 2 +- vocolab/core/submission_lib/submission.py | 347 ++++++++++++++++++ .../{lib => core}/submission_lib/upload.py | 13 +- vocolab/{lib => core}/testing/__init__.py | 0 vocolab/{lib => core}/testing/submissions.py | 0 vocolab/{lib => core}/users_lib.py | 0 vocolab/{lib => core}/worker_lib/__init__.py | 0 .../worker_lib/tasks/__init__.py | 0 .../{lib => core}/worker_lib/tasks/echo.py | 0 .../{lib => core}/worker_lib/tasks/eval.py | 6 +- .../{lib => core}/worker_lib/tasks/update.py | 10 +- vocolab/{lib => core}/worker_lib/utils.py | 0 vocolab/db/models/__init__.py | 1 - vocolab/db/q/challenges.py | 2 +- vocolab/db/q/leaderboards.py | 2 +- vocolab/db/q/users.py | 2 +- vocolab/db/schema/auth.py | 3 +- vocolab/db/schema/challenges.py | 13 +- vocolab/exc.py | 2 +- vocolab/lib/submission_lib/submission.py | 220 ----------- vocolab/settings.py | 24 +- vocolab/worker/server.py | 2 +- 58 files changed, 445 insertions(+), 311 deletions(-) rename vocolab/{lib => core}/__init__.py (100%) rename vocolab/{lib => core}/api_lib.py (97%) rename vocolab/{admin => core}/cmd_lib.py (100%) rename vocolab/{lib/_fs => core}/commons.py (100%) rename vocolab/{lib => core}/evaluators_lib.py (86%) rename vocolab/{lib => core}/leaderboards_lib.py (95%) rename vocolab/{lib => core}/misc/__init__.py (100%) rename vocolab/{lib => core}/misc/various_definitions.py (100%) rename vocolab/{lib => core}/misc/various_functions.py (100%) rename vocolab/{lib => core}/notify/__init__.py (100%) rename vocolab/{lib => core}/notify/email.py (100%) rename vocolab/{lib => core}/notify/mattermost.py (100%) rename vocolab/{lib/old_submission_lib => core/old_implementations}/__init__.py (100%) rename vocolab/{lib/_fs => core/old_implementations/commons}/__init__.py (81%) rename vocolab/{lib/_fs => core/old_implementations/commons}/file_spilt.py (100%) rename vocolab/{lib/_fs => core/old_implementations/commons}/leaderboards.py (100%) rename vocolab/{lib/_fs => core/old_implementations/commons}/users.py (100%) rename vocolab/{lib/old_submission_lib => core/old_implementations}/submissions.py (100%) rename vocolab/{lib/old_submission_lib => core/old_implementations}/submissions_lib.py (100%) rename vocolab/{lib => core}/submission_lib/__init__.py (100%) rename vocolab/{lib => core}/submission_lib/logs.py (99%) create mode 100644 vocolab/core/submission_lib/submission.py rename vocolab/{lib => core}/submission_lib/upload.py (95%) rename vocolab/{lib => core}/testing/__init__.py (100%) rename vocolab/{lib => core}/testing/submissions.py (100%) rename vocolab/{lib => core}/users_lib.py (100%) rename vocolab/{lib => core}/worker_lib/__init__.py (100%) rename vocolab/{lib => core}/worker_lib/tasks/__init__.py (100%) rename vocolab/{lib => core}/worker_lib/tasks/echo.py (100%) rename vocolab/{lib => core}/worker_lib/tasks/eval.py (95%) rename vocolab/{lib => core}/worker_lib/tasks/update.py (77%) rename vocolab/{lib => core}/worker_lib/utils.py (100%) delete mode 100644 vocolab/lib/submission_lib/submission.py diff --git a/vocolab/admin/commands/api.py b/vocolab/admin/commands/api.py index cb9ace8..061a7cd 100644 --- a/vocolab/admin/commands/api.py +++ b/vocolab/admin/commands/api.py @@ -10,7 +10,7 @@ from jinja2 import Environment, FileSystemLoader from vocolab import get_settings, out -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib from vocolab.db.base import create_db _settings = get_settings() diff --git a/vocolab/admin/commands/challenges.py b/vocolab/admin/commands/challenges.py index 0f66577..6533d86 100644 --- a/vocolab/admin/commands/challenges.py +++ b/vocolab/admin/commands/challenges.py @@ -7,7 +7,7 @@ from rich.table import Table from vocolab import out -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib from vocolab.db import schema, models from vocolab.db.q import challengesQ diff --git a/vocolab/admin/commands/evaluators.py b/vocolab/admin/commands/evaluators.py index 55c68cb..83c9674 100644 --- a/vocolab/admin/commands/evaluators.py +++ b/vocolab/admin/commands/evaluators.py @@ -5,9 +5,8 @@ from rich.table import Table from vocolab import get_settings, out -from vocolab.admin import cmd_lib from vocolab.db.q import challenges as ch_queries -from vocolab.lib import evaluators_lib +from vocolab.core import evaluators_lib, cmd_lib _settings = get_settings() diff --git a/vocolab/admin/commands/leaderboards.py b/vocolab/admin/commands/leaderboards.py index f11d0cc..03d8a5e 100644 --- a/vocolab/admin/commands/leaderboards.py +++ b/vocolab/admin/commands/leaderboards.py @@ -7,10 +7,9 @@ from rich.table import Table from vocolab import out -from vocolab.admin import cmd_lib from vocolab.db import schema from vocolab.db.q import leaderboardQ -from vocolab.lib import leaderboards_lib +from vocolab.core import leaderboards_lib, cmd_lib class LeaderboardCMD(cmd_lib.CMD): diff --git a/vocolab/admin/commands/messaging.py b/vocolab/admin/commands/messaging.py index f820d56..55697b7 100644 --- a/vocolab/admin/commands/messaging.py +++ b/vocolab/admin/commands/messaging.py @@ -1,7 +1,7 @@ import sys from vocolab import out, get_settings -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib # api settings from vocolab.db.models.tasks import SimpleLogMessage, SubmissionUpdateMessage, UpdateType diff --git a/vocolab/admin/commands/settings.py b/vocolab/admin/commands/settings.py index ffbeb09..43bd760 100644 --- a/vocolab/admin/commands/settings.py +++ b/vocolab/admin/commands/settings.py @@ -5,7 +5,7 @@ from rich.markdown import Markdown from vocolab import get_settings, out -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib _settings = get_settings() diff --git a/vocolab/admin/commands/submissions.py b/vocolab/admin/commands/submissions.py index 271c47f..9b66031 100644 --- a/vocolab/admin/commands/submissions.py +++ b/vocolab/admin/commands/submissions.py @@ -6,11 +6,10 @@ from rich.table import Table from vocolab import out, get_settings -from vocolab.admin import cmd_lib from vocolab.db.models.api import NewSubmissionRequest, NewSubmission from vocolab.db.q import challengesQ, userQ from vocolab.db import schema as db_challenges -from vocolab.lib import submissions_lib +from vocolab.core import submission_lib, cmd_lib # api settings _settings = get_settings() @@ -77,7 +76,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) - submission_fs = submissions_lib.get_submission_dir(args.submission_id, as_obj=True) + submission_fs = submission_lib.get_submission_dir(args.submission_id, as_obj=True) submission_fs.clean_all_locks() asyncio.run(challengesQ.update_submission_status( by_id=args.submission_id, status=args.status @@ -122,18 +121,18 @@ async def create_submission(ch_id, user_id): challenge, user, submission_id = asyncio.run(create_submission(args.challenge_id, args.user_id)) # create entry on disk - submissions_lib.make_submission_on_disk( + submission_lib.make_submission_on_disk( submission_id, user.username, challenge.label, NewSubmissionRequest( - filename=archive.name, hash=submissions_lib.md5sum(archive), + filename=archive.name, hash=submission_lib.md5sum(archive), multipart=False ) ) # fetch folder - folder = submissions_lib.get_submission_dir(submission_id) + folder = submission_lib.get_submission_dir(submission_id) # copy file shutil.copy(archive, folder / 'archive.zip') - submissions_lib.unzip(folder / 'archive.zip', folder / 'input') + submission_lib.unzip(folder / 'archive.zip', folder / 'input') # set status (folder / 'upload.lock').unlink() @@ -173,7 +172,7 @@ def run(self, argv): asyncio.run( # todo check if status is correctly set. - submissions_lib.evaluate(submission_id=submission.id, extra_args=extra_arguments) + submission_lib.evaluate(submission_id=submission.id, extra_args=extra_arguments) ) @@ -200,7 +199,7 @@ def run(self, argv): sys.exit(1) # transferring - submissions_lib.fetch_submission_from_remote(host=args.hostname, submission_id=args.submission_id) + submission_lib.fetch_submission_from_remote(host=args.hostname, submission_id=args.submission_id) class UploadSubmissionToRemote(cmd_lib.CMD): @@ -227,7 +226,7 @@ def run(self, argv): sys.exit(1) # transferring - submissions_lib.transfer_submission_to_remote(host=args.hostname, submission_id=args.submission_id) + submission_lib.transfer_submission_to_remote(host=args.hostname, submission_id=args.submission_id) class DeleteSubmissionCMD(cmd_lib.CMD): @@ -244,21 +243,21 @@ def run(self, argv): args = self.parser.parse_args(argv) if args.delete_by == 'by_id': - del_id = asyncio.run(submissions_lib.delete_submission(by_id=args.selector)) - submissions_lib.delete_submission_files(del_id[0]) + del_id = asyncio.run(submission_lib.delete_submission(by_id=args.selector)) + submission_lib.delete_submission_files(del_id[0]) out.cli.info(f"Successfully deleted: {args.selector}") elif args.delete_by == 'by_user': - deleted = asyncio.run(submissions_lib.delete_submission(by_user=int(args.selector))) + deleted = asyncio.run(submission_lib.delete_submission(by_user=int(args.selector))) for d in deleted: - submissions_lib.delete_submission_files(d) + submission_lib.delete_submission_files(d) out.cli.info(f"Successfully deleted: {d}") elif args.delete_by == 'by_track': - deleted = asyncio.run(submissions_lib.delete_submission(by_track=int(args.selector))) + deleted = asyncio.run(submission_lib.delete_submission(by_track=int(args.selector))) for d in deleted: - submissions_lib.delete_submission_files(d) + submission_lib.delete_submission_files(d) out.cli.info(f"Successfully deleted: {d}") else: out.cli.error("Error type of deletion unknown") @@ -311,11 +310,11 @@ def __init__(self, root, name, cmd_path): async def archive_submission(*args): for submission_id in args: # archive leaderboard entry - await submissions_lib.archive_leaderboard_entries(submission_id) + await submission_lib.archive_leaderboard_entries(submission_id) # remove submission from db - await submissions_lib.delete_submission(by_id=submission_id) + await submission_lib.delete_submission(by_id=submission_id) # zip & archive files - submissions_lib.archive_submission_files(submission_id) + submission_lib.archive_submission_files(submission_id) out.cli.info(f"Successfully archived: {submission_id}") diff --git a/vocolab/admin/commands/task_worker.py b/vocolab/admin/commands/task_worker.py index 3d47d12..70c823b 100644 --- a/vocolab/admin/commands/task_worker.py +++ b/vocolab/admin/commands/task_worker.py @@ -6,7 +6,7 @@ from jinja2 import Environment, FileSystemLoader from vocolab import out, get_settings -from vocolab.admin import cmd_lib +from vocolab.core import cmd_lib from vocolab.db.models import tasks from vocolab.worker import server diff --git a/vocolab/admin/commands/test.py b/vocolab/admin/commands/test.py index 01b86e4..c016c2f 100644 --- a/vocolab/admin/commands/test.py +++ b/vocolab/admin/commands/test.py @@ -6,9 +6,8 @@ from pydantic import EmailStr from vocolab import get_settings, out -from vocolab.admin import cmd_lib from vocolab.db.models.misc import UserCreate -from vocolab.lib import notify +from vocolab.core import notify, cmd_lib _settings = get_settings() diff --git a/vocolab/admin/commands/user.py b/vocolab/admin/commands/user.py index 83453c7..08664de 100644 --- a/vocolab/admin/commands/user.py +++ b/vocolab/admin/commands/user.py @@ -9,11 +9,10 @@ from rich.table import Table from vocolab import out, get_settings -from vocolab.admin import cmd_lib from vocolab.db.models.misc import UserCreate from vocolab.db.q import userQ, challengesQ -from vocolab.lib import notify -from vocolab.lib.misc import CustomTypesJsonEncoder +from vocolab.core import notify, cmd_lib +from vocolab.core.misc import CustomTypesJsonEncoder _settings = get_settings() diff --git a/vocolab/admin/main.py b/vocolab/admin/main.py index 55f8a04..2805f1c 100644 --- a/vocolab/admin/main.py +++ b/vocolab/admin/main.py @@ -1,7 +1,8 @@ import sys from vocolab import get_settings, out -from vocolab.admin import cmd_lib, commands +from vocolab.core import cmd_lib +from vocolab.admin import commands # settings _settings = get_settings() diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index ae06cac..aa0311b 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -14,7 +14,7 @@ from vocolab import exc, out from vocolab.db import schema, models from vocolab.db.q import userQ -from vocolab.lib import api_lib, notify +from vocolab.core import api_lib, notify from vocolab.settings import get_settings router = APIRouter() diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py index 772acb1..42e67b8 100644 --- a/vocolab/api/endpoints/challenges.py +++ b/vocolab/api/endpoints/challenges.py @@ -11,7 +11,7 @@ from vocolab import out, exc from vocolab.db import schema, models from vocolab.db.q import challengesQ -from vocolab.lib import api_lib, submissions_lib +from vocolab.core import api_lib, submission_lib from vocolab.settings import get_settings router = APIRouter() @@ -57,7 +57,7 @@ async def create_submission( track_id=challenge.id, ), evaluator_id=challenge.evaluator) # create disk entry - submissions_lib.make_submission_on_disk( + submission_lib.make_submission_on_disk( submission_id, current_user.username, challenge.label, meta=data ) return submission_id @@ -77,11 +77,11 @@ async def upload_submission( if challenge is None: return ValueError(f'challenge {challenge_id} not found or inactive') try: - is_completed, remaining = submissions_lib.add_part(submission_id, part_name, file_data) + is_completed, remaining = submission_lib.add_part(submission_id, part_name, file_data) if is_completed: # run the completion of the submission on the background - background_tasks.add_task(submissions_lib.complete_submission, submission_id, with_eval=True) + background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) return models.api.UploadSubmissionPartResponse( completed=is_completed, remaining=[n.file_name for n in remaining] diff --git a/vocolab/api/endpoints/leaderboards.py b/vocolab/api/endpoints/leaderboards.py index 9a3ddd5..a1b83fb 100644 --- a/vocolab/api/endpoints/leaderboards.py +++ b/vocolab/api/endpoints/leaderboards.py @@ -10,7 +10,7 @@ from vocolab import exc from vocolab.db import models from vocolab.db.q import leaderboardQ -from vocolab.lib import api_lib +from vocolab.core import api_lib from vocolab.settings import get_settings router = APIRouter() diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index a476ff6..292749c 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -9,7 +9,7 @@ ) from vocolab import exc, out -from vocolab.lib import api_lib, users_lib, submissions_lib +from vocolab.core import api_lib, users_lib, submission_lib from vocolab.db import schema, models from vocolab.db.q import challengesQ, leaderboardQ from vocolab.settings import get_settings @@ -146,7 +146,7 @@ async def get_submission_status( raise exc.AccessError("current user is not allowed to preview this submission !", status=exc.http_status.HTTP_403_FORBIDDEN) - log = submissions_lib.SubmissionLogger(submissions_id) + log = submission_lib.SubmissionLogger(submissions_id) return log.get_text() @@ -157,7 +157,7 @@ async def get_user_results(submissions_id: str, current_user: schema.User = Depe if submission.user_id != current_user.id: raise exc.AccessError("current user is not allowed to preview this submission !", status=exc.http_status.HTTP_403_FORBIDDEN) - sub_location = submissions_lib.get_submission_dir(submission_id=submission.id) + sub_location = submission_lib.get_submission_dir(submission_id=submission.id) leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) result = {} diff --git a/vocolab/api/pages/users.py b/vocolab/api/pages/users.py index b9223ac..9d430c9 100644 --- a/vocolab/api/pages/users.py +++ b/vocolab/api/pages/users.py @@ -8,7 +8,7 @@ from fastapi.responses import HTMLResponse from vocolab import exc, out -from vocolab.lib import api_lib +from vocolab.core import api_lib from vocolab.db.q import userQ from vocolab.settings import get_settings diff --git a/vocolab/lib/__init__.py b/vocolab/core/__init__.py similarity index 100% rename from vocolab/lib/__init__.py rename to vocolab/core/__init__.py diff --git a/vocolab/lib/api_lib.py b/vocolab/core/api_lib.py similarity index 97% rename from vocolab/lib/api_lib.py rename to vocolab/core/api_lib.py index 11da18f..0624898 100644 --- a/vocolab/lib/api_lib.py +++ b/vocolab/core/api_lib.py @@ -8,14 +8,14 @@ from vocolab import settings from vocolab.db import schema, models from vocolab.db.q import userQ -from vocolab.lib import notify, _fs +from vocolab.core import notify, commons _settings = settings.get_settings() oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/auth/login") # export -file2dict = _fs.commons.load_dict_file +file2dict = commons.load_dict_file def validate_token(token: str = Depends(oauth2_scheme)) -> schema.Token: diff --git a/vocolab/admin/cmd_lib.py b/vocolab/core/cmd_lib.py similarity index 100% rename from vocolab/admin/cmd_lib.py rename to vocolab/core/cmd_lib.py diff --git a/vocolab/lib/_fs/commons.py b/vocolab/core/commons.py similarity index 100% rename from vocolab/lib/_fs/commons.py rename to vocolab/core/commons.py diff --git a/vocolab/lib/evaluators_lib.py b/vocolab/core/evaluators_lib.py similarity index 86% rename from vocolab/lib/evaluators_lib.py rename to vocolab/core/evaluators_lib.py index 7e52799..1ed7108 100644 --- a/vocolab/lib/evaluators_lib.py +++ b/vocolab/core/evaluators_lib.py @@ -5,14 +5,12 @@ from vocolab import get_settings from vocolab.db import models -from vocolab.lib import ( - _fs -) +from vocolab.core import commons _settings = get_settings() # export -check_host = _fs.commons.check_host +check_host = commons.check_host def discover_evaluators(hostname: str, bin_location) -> List[models.cli.NewEvaluatorItem]: @@ -20,9 +18,9 @@ def discover_evaluators(hostname: str, bin_location) -> List[models.cli.NewEvalu cmd = shlex.split(f'cat {bin_location}/index.yml') if hostname not in ('localhost', '127.0.0.1', _settings.app_options.hostname): - code, res = _fs.commons.ssh_exec(hostname, cmd) + code, res = commons.ssh_exec(hostname, cmd) else: - code, res = _fs.commons.execute(cmd) + code, res = commons.execute(cmd) if code != 0: raise FileNotFoundError(f"Host {hostname} has not evaluators at this location: {bin_location}") diff --git a/vocolab/lib/leaderboards_lib.py b/vocolab/core/leaderboards_lib.py similarity index 95% rename from vocolab/lib/leaderboards_lib.py rename to vocolab/core/leaderboards_lib.py index b49b82f..b1b56e3 100644 --- a/vocolab/lib/leaderboards_lib.py +++ b/vocolab/core/leaderboards_lib.py @@ -5,7 +5,7 @@ from vocolab import out, get_settings from vocolab.db import schema from vocolab.db.q import leaderboardQ, challengesQ -from vocolab.lib import _fs, misc +from vocolab.core import commons, misc _settings = get_settings() @@ -42,11 +42,11 @@ async def build_leaderboard(*, leaderboard_id: int): *leaderboard.external_entries.rglob('*.yml') ] for item in external_entries: - leaderboard_entries.append(_fs.commons.load_dict_file(item)) + leaderboard_entries.append(commons.load_dict_file(item)) # copy external static files if leaderboard.static_files and (leaderboard.external_entries / 'static').is_dir(): - _fs.commons.copy_all_contents(leaderboard.external_entries / 'static', static_location) + commons.copy_all_contents(leaderboard.external_entries / 'static', static_location) if not leaderboard.archived: submission_list = await challengesQ.list_submission(by_track=leaderboard.challenge_id) diff --git a/vocolab/lib/misc/__init__.py b/vocolab/core/misc/__init__.py similarity index 100% rename from vocolab/lib/misc/__init__.py rename to vocolab/core/misc/__init__.py diff --git a/vocolab/lib/misc/various_definitions.py b/vocolab/core/misc/various_definitions.py similarity index 100% rename from vocolab/lib/misc/various_definitions.py rename to vocolab/core/misc/various_definitions.py diff --git a/vocolab/lib/misc/various_functions.py b/vocolab/core/misc/various_functions.py similarity index 100% rename from vocolab/lib/misc/various_functions.py rename to vocolab/core/misc/various_functions.py diff --git a/vocolab/lib/notify/__init__.py b/vocolab/core/notify/__init__.py similarity index 100% rename from vocolab/lib/notify/__init__.py rename to vocolab/core/notify/__init__.py diff --git a/vocolab/lib/notify/email.py b/vocolab/core/notify/email.py similarity index 100% rename from vocolab/lib/notify/email.py rename to vocolab/core/notify/email.py diff --git a/vocolab/lib/notify/mattermost.py b/vocolab/core/notify/mattermost.py similarity index 100% rename from vocolab/lib/notify/mattermost.py rename to vocolab/core/notify/mattermost.py diff --git a/vocolab/lib/old_submission_lib/__init__.py b/vocolab/core/old_implementations/__init__.py similarity index 100% rename from vocolab/lib/old_submission_lib/__init__.py rename to vocolab/core/old_implementations/__init__.py diff --git a/vocolab/lib/_fs/__init__.py b/vocolab/core/old_implementations/commons/__init__.py similarity index 81% rename from vocolab/lib/_fs/__init__.py rename to vocolab/core/old_implementations/commons/__init__.py index ae4f24b..f86e3db 100644 --- a/vocolab/lib/_fs/__init__.py +++ b/vocolab/core/old_implementations/commons/__init__.py @@ -3,7 +3,6 @@ These functions are split into the following submodules dependent on their use-case """ from . import users -from . import submissions from . import leaderboards -from . import commons +from .core import * from . import file_spilt diff --git a/vocolab/lib/_fs/file_spilt.py b/vocolab/core/old_implementations/commons/file_spilt.py similarity index 100% rename from vocolab/lib/_fs/file_spilt.py rename to vocolab/core/old_implementations/commons/file_spilt.py diff --git a/vocolab/lib/_fs/leaderboards.py b/vocolab/core/old_implementations/commons/leaderboards.py similarity index 100% rename from vocolab/lib/_fs/leaderboards.py rename to vocolab/core/old_implementations/commons/leaderboards.py diff --git a/vocolab/lib/_fs/users.py b/vocolab/core/old_implementations/commons/users.py similarity index 100% rename from vocolab/lib/_fs/users.py rename to vocolab/core/old_implementations/commons/users.py diff --git a/vocolab/lib/old_submission_lib/submissions.py b/vocolab/core/old_implementations/submissions.py similarity index 100% rename from vocolab/lib/old_submission_lib/submissions.py rename to vocolab/core/old_implementations/submissions.py diff --git a/vocolab/lib/old_submission_lib/submissions_lib.py b/vocolab/core/old_implementations/submissions_lib.py similarity index 100% rename from vocolab/lib/old_submission_lib/submissions_lib.py rename to vocolab/core/old_implementations/submissions_lib.py diff --git a/vocolab/lib/submission_lib/__init__.py b/vocolab/core/submission_lib/__init__.py similarity index 100% rename from vocolab/lib/submission_lib/__init__.py rename to vocolab/core/submission_lib/__init__.py diff --git a/vocolab/lib/submission_lib/logs.py b/vocolab/core/submission_lib/logs.py similarity index 99% rename from vocolab/lib/submission_lib/logs.py rename to vocolab/core/submission_lib/logs.py index 73305b3..afa8d45 100644 --- a/vocolab/lib/submission_lib/logs.py +++ b/vocolab/core/submission_lib/logs.py @@ -4,7 +4,7 @@ from pydantic import BaseModel -from .._fs.commons import ssh_exec +from ..commons import ssh_exec from ...settings import get_settings _settings = get_settings() diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py new file mode 100644 index 0000000..314e7a7 --- /dev/null +++ b/vocolab/core/submission_lib/submission.py @@ -0,0 +1,347 @@ +import functools +import json +import shutil +from datetime import datetime +from pathlib import Path +from typing import Dict, List + +from fastapi import UploadFile +from pydantic import BaseModel + +from ...db import models +from ...settings import get_settings +from ..commons import unzip, ssh_exec, rsync, zip_folder, scp +from .logs import SubmissionLogger +from .upload import MultipartUploadHandler, SinglepartUploadHandler + +_settings = get_settings() + + +class SubmissionInfo(BaseModel): + model_id: str + username: str + track_id: int + track_label: str + submission_id: str + created_at: datetime + leaderboard_entries: Dict[str, Path] + + +class SubmissionDir(BaseModel): + """ Handler interfacing a submission directory stored on disk """ + root_dir: Path + + @classmethod + def load(cls, model_id: str, submission_id: str): + """ Load item from model-id & submission-id""" + root = _settings.submission_dir / model_id / submission_id + if not root.is_dir(): + raise FileNotFoundError(f'Submission {model_id}/{submission_id} does not exist') + return cls(root_dir=root) + + @property + def submission_id(self) -> str: + """ Returns the submission id """ + return self.root_dir.name + + @property + def content_location(self) -> Path: + return self.root_dir / 'content' + + def has_content(self) -> bool: + """ Check if submission has content """ + return self.content_location.is_dir() and any(Path(self.content_location).iterdir()) + + @property + def scores(self) -> Path: + """ the scores folders contains all the output files created by the evaluation process """ + return self.content_location / 'scores' + + def has_scores(self) -> bool: + return self.scores.is_dir() + + @property + def info_file(self) -> Path: + """ info file contains meta data relative to the submission """ + return self.root_dir / 'info.json' + + def has_info(self) -> bool: + """ Check whether info file is present""" + return self.info_file.is_file() + + @functools.lru_cache + @property + def info(self) -> SubmissionInfo: + """ Load submission information """ + with self.info_file.open() as fp: + return SubmissionInfo.parse_obj(json.load(fp)) + + @property + def multipart_dir(self) -> Path: + """ multipart dir contains the chunks & index for multipart uploads """ + return self.root_dir / '.parts' + + @property + def multipart_index_file(self) -> Path: + """ multipart index file contains info pertaining to multipart upload + - split & merge manifest (order to merge the files) + - checksums to verify upload & merge + """ + return self.multipart_dir / 'upload.json' + + def is_multipart(self) -> bool: + """ Check whether file was uploaded as multipart """ + return self.multipart_dir.is_dir() and self.multipart_index_file.is_file() + + @property + def upload_lock(self) -> Path: + """ a lockfile locking the submission while upload has not completed """ + return self.root_dir / 'upload.lock' + + @property + def content_archive_hash_file(self) -> Path: + return self.root_dir / 'archive.hash' + + @property + def eval_lock(self) -> Path: + """ a lockfile locking the submission while evaluation is ongoing """ + return self.root_dir / 'eval.lock' + + @property + def error_lock(self) -> Path: + """ a lockfile locking the submission while evaluation is ongoing """ + return self.root_dir / 'error.lock' + + @property + def clean_lock(self) -> Path: + """ a lockfile marking the submission for deletion """ + return self.root_dir / 'clean.lock' + + @property + def interrupted_lock(self) -> Path: + """ a lockfile to signify that a process was running and was interrupted """ + return self.root_dir / 'interrupted.lock' + + def clean_all_locks(self): + """ Remove all lock files in submission""" + self.upload_lock.unlink(missing_ok=True) + self.eval_lock.unlink(missing_ok=True) + self.error_lock.unlink(missing_ok=True) + self.interrupted_lock.unlink(missing_ok=True) + self.clean_lock.unlink(missing_ok=True) + + def get_log_handler(self) -> SubmissionLogger: + """ build the SubmissionLogger class that allows to log submission relative events """ + return SubmissionLogger(root_dir=self.root_dir) + + def get_leaderboard_items(self): + if not self.has_info(): + raise ValueError('Submission has no info index') + return self.info.leaderboard_entries + + def add_content(self, file_name: str, file_size: int, file_hash: str, data: UploadFile): + """ Add content to the submission + *) multipart: + - add part to the tmp folder + - check if completed + - if completed merge parts + *) singlepart: + - add uploaded data to the submission + + Multipart is completed when all the parts have been successfully uploaded + Singlepart is completed when the target archive has been successfully uploaded + + If upload is completed --> unzip content into the content folder. + """ + if self.is_multipart(): + """ Multipart upload """ + handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) + handler.add_part( + logger=self.get_log_handler(), + file_name=file_name, + file_size=file_size, + file_hash=file_hash, + data=data + ) + handler.dump_to_index(self.multipart_index_file) + + if handler.completed(): + handler.merge_parts() + else: + """ Single part upload """ + handler = SinglepartUploadHandler(root_dir=self.root_dir) + handler.write_data( + logger=self.get_log_handler(), + file_name=file_name, + file_hash=file_hash, + data=data + ) + + if handler.completed(): + """ Upload completed """ + unzip(handler.target_file, self.content_location) + # todo notify who what when + + def send_content(self, hostname: str) -> Path: + """ Send content to a remote host for evaluation (return target location) """ + is_remote = hostname != _settings.app_options.hostname + transfer_root_dir = _settings.task_queue_options.REMOTE_STORAGE.get(hostname) + model_id = self.info.model_id + remote_submission_dir = transfer_root_dir / model_id / self.submission_id + logger = self.get_log_handler() + + # if host is local & submission dir is current, do nothing + if (not is_remote) and (transfer_root_dir == _settings.submission_dir): + return self.root_dir + + code, _ = ssh_exec(hostname, ['mkdir', '-p', f'{remote_submission_dir}']) + if code != 0: + logger.log(f"failed to write on {hostname}") + raise ValueError(f"No write permissions on {hostname}") + + # sync files + res = rsync(src=self.root_dir, dest_host=hostname, dest=remote_submission_dir) + if res.returncode == 0: + logger.log(f"copied files from {self.root_dir} to {hostname} for processing.") + return remote_submission_dir + else: + logger.log(f"failed to copy {self.root_dir} to {hostname} for processing.") + logger.log(res.stderr.decode()) + raise ValueError(f"Failed to copy files to host {hostname}") + + def fetch_content(self, hostname: str): + """ Fetch updated content from remote (after evaluation) """ + is_remote = hostname != _settings.app_options.hostname + transfer_root_dir = _settings.task_queue_options.REMOTE_STORAGE.get(hostname) + model_id = self.info.model_id + remote_submission_dir = transfer_root_dir / model_id / self.submission_id + logger = self.get_log_handler() + + # if host is local & submission dir is current, do nothing + if (not is_remote) and (transfer_root_dir == _settings.submission_dir): + return self.root_dir + + # fetch log files + logger.fetch_remote(hostname, remote_submission_dir) + + # sync files + res = rsync(src_host=hostname, src=remote_submission_dir, dest=self.root_dir) + + if res.returncode == 0: + logger.log(f"fetched result files from {hostname} to {self.root_dir}") + return self.root_dir + else: + logger.log(f"failed to fetch results from {hostname} to {self.root_dir}.") + logger.log(res.stderr.decode()) + raise ValueError(f"Failed to copy files from host {hostname}") + + def archive(self, zip_files: bool = False): + """Transfer submission to archive """ + location = _settings.submission_archive_dir / self.info.model_id / self.info.submission_id + logger = self.get_log_handler() + host = _settings.ARCHIVE_HOST + + if _settings.remote_archive and zip_files: + """ Archive file to remote host as a zip file""" + host = _settings.ARCHIVE_HOST + with _settings.get_temp_dir() as tmp: + archive_file = tmp / f'{self.info.model_id}_{self.info.submission_id}' + zip_folder(archive_file=archive_file, location=self.root_dir) + res = scp(src=archive_file, host=host, dest=_settings.submission_archive_dir) + if res.returncode != 0: + raise ValueError(f"Failed to transfer to {host}") + + elif _settings.remote_archive and not zip_files: + """ Archive file to remote host """ + code, _ = ssh_exec(host, ['mkdir', '-p', f"{location}"]) + if code != 0: + raise ValueError(f"No write permissions on {host}") + + res = rsync(src=self.root_dir, dest_host=host, dest=location) + if res.returncode != 0: + raise ValueError(f"Failed to copy files to host {host}") + + elif not _settings.remote_archive and not zip_files: + """ Archive files to local archive """ + _res = rsync(src=self.root_dir, dest=location) + if _res.returncode != 0: + raise ValueError(f"Failed to copy files to archive") + + elif not _settings.remote_archive and zip_files: + """ Archive files to local archive as a zip file""" + zip_folder( + archive_file=location / f'{self.info.model_id}_{self.info.submission_id}', + location=self.root_dir + ) + + def remove_all(self): + """ Remove all files related to this submission """ + shutil.rmtree(self.root_dir) + + +class ModelDir(BaseModel): + root_dir: Path + + @property + def label(self): + return self.root_dir.name + + @classmethod + def load(cls, model_id: str): + root = _settings.submission_dir / model_id + + if not root.is_dir(): + raise FileNotFoundError(f'Model {model_id} does not exist') + return cls(root_dir=root) + + def make_submission(self, submission_id: str, auto_eval: bool, request_meta: models.api.NewSubmissionRequest): + root_dir = self.root_dir / submission_id + if root_dir.is_dir(): + raise FileExistsError(f'Submission {submission_id} cannot be created as it already exists') + # create the dir + root_dir.mkdir() + submission_dir = SubmissionDir(root_dir=root_dir) + submission_dir.content_location.mkdir() + + # Submission generic info + sub_info = SubmissionInfo( + model_id=self.label, + username=request_meta.username, + track_id=request_meta.track_id, + track_label=request_meta.track_label, + submission_id=submission_id, + created_at=datetime.now(), + leaderboard_entries=request_meta.leaderboards + ) + # save info to file + with submission_dir.info_file.open('w') as fp: + fp.write(sub_info.json(indent=4)) + + if request_meta.multipart: + submission_dir.multipart_dir.mkdir(exist_ok=True) + with submission_dir.multipart_index_file.open('w') as fp: + fp.write( + request_meta.json(include={'index'}, indent=4) + ) + else: + with submission_dir.content_archive_hash_file.open('w') as fp: + fp.write(request_meta.hash) + + submission_dir.get_log_handler().header( + who=request_meta.username, + task=request_meta.track_label, + multipart=request_meta.multipart, + has_scores=request_meta.has_scores, + auto_eval=auto_eval + ) + + @property + def submissions(self) -> List[SubmissionDir]: + return [ + SubmissionDir.load(self.label, sub_id.name) + for sub_id in self.root_dir.iterdir() + if sub_id.is_dir() + ] + + def get_submission(self, submission_id: str): + return SubmissionDir.load(self.label, submission_id) diff --git a/vocolab/lib/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py similarity index 95% rename from vocolab/lib/submission_lib/upload.py rename to vocolab/core/submission_lib/upload.py index 7d15f28..95cc32d 100644 --- a/vocolab/lib/submission_lib/upload.py +++ b/vocolab/core/submission_lib/upload.py @@ -11,7 +11,7 @@ import numpy as np from vocolab import exc -from .._fs.commons import md5sum +from ..commons import md5sum from .logs import SubmissionLogger """ @@ -34,22 +34,23 @@ def target_file(self): return self.root_dir / 'content_archive.zip' @property - def hash_file(self) -> Path: + def hash_file_location(self) -> Path: """ singlepart upload can be verified by the checksum inside this file """ return self.root_dir / 'archive.hash' @property def file_hash(self): """ Load promised md5sum of content archive """ - with self.hash_file.open() as fp: + with self.hash_file_location.open() as fp: return fp.read().replace('\n', '') - @property - def success(self): + def completed(self) -> bool: return self.target_file.is_file() - def write_data(self, logger: SubmissionLogger, file_name: str, file_hash: str, data: UploadFile): + def write_data(self, logger: SubmissionLogger, file_name: str, + file_hash: str, data: UploadFile): logger.log(f"adding a new part to upload: {file_name}") + assert file_hash == self.file_hash, "Given hash & expected hash should be the same !!" # Add the part with self.target_file.open('wb') as fp: diff --git a/vocolab/lib/testing/__init__.py b/vocolab/core/testing/__init__.py similarity index 100% rename from vocolab/lib/testing/__init__.py rename to vocolab/core/testing/__init__.py diff --git a/vocolab/lib/testing/submissions.py b/vocolab/core/testing/submissions.py similarity index 100% rename from vocolab/lib/testing/submissions.py rename to vocolab/core/testing/submissions.py diff --git a/vocolab/lib/users_lib.py b/vocolab/core/users_lib.py similarity index 100% rename from vocolab/lib/users_lib.py rename to vocolab/core/users_lib.py diff --git a/vocolab/lib/worker_lib/__init__.py b/vocolab/core/worker_lib/__init__.py similarity index 100% rename from vocolab/lib/worker_lib/__init__.py rename to vocolab/core/worker_lib/__init__.py diff --git a/vocolab/lib/worker_lib/tasks/__init__.py b/vocolab/core/worker_lib/tasks/__init__.py similarity index 100% rename from vocolab/lib/worker_lib/tasks/__init__.py rename to vocolab/core/worker_lib/tasks/__init__.py diff --git a/vocolab/lib/worker_lib/tasks/echo.py b/vocolab/core/worker_lib/tasks/echo.py similarity index 100% rename from vocolab/lib/worker_lib/tasks/echo.py rename to vocolab/core/worker_lib/tasks/echo.py diff --git a/vocolab/lib/worker_lib/tasks/eval.py b/vocolab/core/worker_lib/tasks/eval.py similarity index 95% rename from vocolab/lib/worker_lib/tasks/eval.py rename to vocolab/core/worker_lib/tasks/eval.py index d20c86e..90220f2 100644 --- a/vocolab/lib/worker_lib/tasks/eval.py +++ b/vocolab/core/worker_lib/tasks/eval.py @@ -5,7 +5,7 @@ from vocolab import out, get_settings, exc from vocolab.db.models import tasks -from vocolab.lib import submissions_lib +from vocolab.core import submission_lib _settings = get_settings() @@ -32,7 +32,7 @@ def build_cmd(_cmd: tasks.SubmissionEvaluationMessage) -> List[str]: if executor is None: raise ValueError(f'{_cmd.executor} is not present in system') - sub_dir = submissions_lib.get_submission_dir(_cmd.submission_id) + sub_dir = submission_lib.get_submission_dir(_cmd.submission_id) bin_path = Path(_cmd.bin_path).resolve() verify_bin(bin_path) script = bin_path / _cmd.script_name @@ -100,7 +100,7 @@ def evaluate_submission_fn(sem: tasks.SubmissionEvaluationMessage): f"with a non zero return code. see logs for details!!") # write output in log - with submissions_lib.SubmissionLogger(sem.submission_id) as lg: + with submission_lib.SubmissionLogger(sem.submission_id) as lg: lg.append_eval(eval_output) # send submission evaluation result diff --git a/vocolab/lib/worker_lib/tasks/update.py b/vocolab/core/worker_lib/tasks/update.py similarity index 77% rename from vocolab/lib/worker_lib/tasks/update.py rename to vocolab/core/worker_lib/tasks/update.py index fd87250..5396a5a 100644 --- a/vocolab/lib/worker_lib/tasks/update.py +++ b/vocolab/core/worker_lib/tasks/update.py @@ -2,7 +2,7 @@ from vocolab import out, get_settings from vocolab.db.models import tasks -from vocolab.lib import submissions_lib +from vocolab.core import submission_lib _settings = get_settings() @@ -10,19 +10,19 @@ def update_task_fn(sum_: tasks.SubmissionUpdateMessage): async def eval_function(msg: tasks.SubmissionUpdateMessage): """ Evaluate a function type BrokerCMD """ - with submissions_lib.SubmissionLogger(msg.submission_id) as lg: + with submission_lib.SubmissionLogger(msg.submission_id) as lg: out.log.debug(msg.dict()) if msg.updateType == tasks.UpdateType.evaluation_complete: - await submissions_lib.complete_evaluation( + await submission_lib.complete_evaluation( submission_id=msg.submission_id, hostname=msg.hostname, logger=lg) elif msg.updateType == tasks.UpdateType.evaluation_failed: - await submissions_lib.fail_evaluation( + await submission_lib.fail_evaluation( submission_id=msg.submission_id, hostname=msg.hostname, logger=lg) elif msg.updateType == tasks.UpdateType.evaluation_canceled: - await submissions_lib.cancel_evaluation( + await submission_lib.cancel_evaluation( submission_id=msg.submission_id, hostname=msg.hostname, logger=lg) else: diff --git a/vocolab/lib/worker_lib/utils.py b/vocolab/core/worker_lib/utils.py similarity index 100% rename from vocolab/lib/worker_lib/utils.py rename to vocolab/core/worker_lib/utils.py diff --git a/vocolab/db/models/__init__.py b/vocolab/db/models/__init__.py index 4897053..7673ca1 100644 --- a/vocolab/db/models/__init__.py +++ b/vocolab/db/models/__init__.py @@ -2,4 +2,3 @@ from . import cli from . import misc from . import tasks -from . import file_split diff --git a/vocolab/db/q/challenges.py b/vocolab/db/q/challenges.py index 7a7f393..bc0c466 100644 --- a/vocolab/db/q/challenges.py +++ b/vocolab/db/q/challenges.py @@ -4,7 +4,7 @@ from vocolab import get_settings from vocolab.db import models, zrDB, schema, exc as db_exc -from vocolab.lib import misc +from vocolab.core import misc _settings = get_settings() diff --git a/vocolab/db/q/leaderboards.py b/vocolab/db/q/leaderboards.py index 4248748..e1f1e6d 100644 --- a/vocolab/db/q/leaderboards.py +++ b/vocolab/db/q/leaderboards.py @@ -4,7 +4,7 @@ from pathlib import Path from typing import Any, List, Optional from vocolab.db import schema, zrDB, exc as db_exc -from vocolab.lib import misc +from vocolab.core import misc async def get_leaderboard(*, leaderboard_id: int) -> schema.LeaderBoard: diff --git a/vocolab/db/q/users.py b/vocolab/db/q/users.py index 1171dde..6cd835f 100644 --- a/vocolab/db/q/users.py +++ b/vocolab/db/q/users.py @@ -7,7 +7,7 @@ from vocolab import exc, out from vocolab.db import zrDB, models, schema, exc as db_exc -from vocolab.lib import users_lib +from vocolab.core import users_lib from vocolab.settings import get_settings _settings = get_settings() diff --git a/vocolab/db/schema/auth.py b/vocolab/db/schema/auth.py index 59257ad..087cec7 100644 --- a/vocolab/db/schema/auth.py +++ b/vocolab/db/schema/auth.py @@ -3,12 +3,11 @@ from typing import Optional import sqlalchemy -from pydantic import BaseModel, EmailStr, Field, ValidationError from jose import jwt, JWTError # noqa: false flags from requirements https://youtrack.jetbrains.com/issue/PY-27985 +from pydantic import BaseModel, EmailStr, Field, ValidationError from ...settings import get_settings - _settings = get_settings() users_metadata = sqlalchemy.MetaData() diff --git a/vocolab/db/schema/challenges.py b/vocolab/db/schema/challenges.py index 40c0c4d..be59ff1 100644 --- a/vocolab/db/schema/challenges.py +++ b/vocolab/db/schema/challenges.py @@ -1,17 +1,14 @@ -from datetime import datetime, date -from pathlib import Path -from typing import Optional - -import sqlalchemy -from pydantic import BaseModel, HttpUrl - -from vocolab.db.models.tasks import ExecutorsType +from datetime import date from datetime import datetime from enum import Enum +from pathlib import Path from typing import Optional import sqlalchemy from pydantic import BaseModel, AnyHttpUrl +from pydantic import HttpUrl + +from vocolab.db.models.tasks import ExecutorsType challenge_metadata = sqlalchemy.MetaData() diff --git a/vocolab/exc.py b/vocolab/exc.py index c84b259..1863c52 100644 --- a/vocolab/exc.py +++ b/vocolab/exc.py @@ -1,7 +1,7 @@ """ A File containing Exceptions definitions """ from typing import Any, Optional -from starlette import status as http_status +from fastapi import status as http_status class VocoLabException(Exception): diff --git a/vocolab/lib/submission_lib/submission.py b/vocolab/lib/submission_lib/submission.py deleted file mode 100644 index b4ccee9..0000000 --- a/vocolab/lib/submission_lib/submission.py +++ /dev/null @@ -1,220 +0,0 @@ -import json -from datetime import datetime -from pathlib import Path -from typing import Dict, List - -from fastapi import UploadFile -from pydantic import BaseModel - -from ...db import models -from ...settings import get_settings -from .logs import SubmissionLogger -from .upload import MultipartUploadHandler, SinglepartUploadHandler - -_settings = get_settings() - - -class SubmissionInfo(BaseModel): - model_id: str - username: str - track_id: int - track_label: str - submission_id: str - created_at: datetime - leaderboard_entries: Dict[str, Path] - - -class SubmissionDir(BaseModel): - """ Handler interfacing a submission directory stored on disk """ - root_dir: Path - - @classmethod - def load(cls, model_id: str, submission_id: str): - """ Load item from model-id & submission-id""" - root = _settings.submission_dir / model_id / submission_id - if not root.is_dir(): - raise FileNotFoundError(f'Submission {model_id}/{submission_id} does not exist') - return cls(root_dir=root) - - @property - def submission_id(self) -> str: - """ Returns the submission id """ - return self.root_dir.name - - @property - def content(self) -> Path: - return self.root_dir / 'content' - - def has_input(self) -> bool: - return self.content.is_dir() - - @property - def scores(self) -> Path: - """ the scores folders contains all the output files created by the evaluation process """ - return self.content / 'scores' - - def has_scores(self) -> bool: - return self.scores.is_dir() - - @property - def info_file(self) -> Path: - """ info file contains meta data relative to the submission """ - return self.root_dir / 'info.json' - - def has_info(self) -> bool: - """ Check whether info file is present""" - return self.info_file.is_file() - - @property - def info(self) -> SubmissionInfo: - """ Load submission information """ - with self.info_file.open() as fp: - return SubmissionInfo.parse_obj(json.load(fp)) - - @property - def multipart_dir(self) -> Path: - """ multipart dir contains the chunks & index for multipart uploads """ - return self.root_dir / '.parts' - - @property - def multipart_index_file(self) -> Path: - """ multipart index file contains info pertaining to multipart upload - - split & merge manifest (order to merge the files) - - checksums to verify upload & merge - """ - return self.multipart_dir / 'upload.json' - - def is_multipart(self) -> bool: - """ Check whether file was uploaded as multipart """ - return self.multipart_dir.is_dir() and self.multipart_index_file.is_file() - - @property - def upload_lock(self) -> Path: - """ a lockfile locking the submission while upload has not completed """ - return self.root_dir / 'upload.lock' - - @property - def eval_lock(self) -> Path: - """ a lockfile locking the submission while evaluation is ongoing """ - return self.root_dir / 'eval.lock' - - @property - def error_lock(self) -> Path: - """ a lockfile locking the submission while evaluation is ongoing """ - return self.root_dir / 'error.lock' - - @property - def clean_lock(self) -> Path: - """ a lockfile marking the submission for deletion """ - return self.root_dir / 'clean.lock' - - @property - def interrupted_lock(self) -> Path: - """ a lockfile to signify that a process was running and was interrupted """ - return self.root_dir / 'interrupted.lock' - - def clean_all_locks(self): - """ Remove all lock files in submission""" - self.upload_lock.unlink(missing_ok=True) - self.eval_lock.unlink(missing_ok=True) - self.error_lock.unlink(missing_ok=True) - self.interrupted_lock.unlink(missing_ok=True) - self.clean_lock.unlink(missing_ok=True) - - def get_log_handler(self) -> SubmissionLogger: - """ build the SubmissionLogger class that allows to log submission relative events """ - return SubmissionLogger(root_dir=self.root_dir) - - - def add_content(self, file_name: str, file_size: int, file_hash: str, data: UploadFile): - """ todo: write method description """ - if self.is_multipart(): - # Multipart content - handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) - handler.add_part( - logger=self.get_log_handler(), - file_name=file_name, - file_size=file_size, - file_hash=file_hash, - data=data - ) - handler.dump_to_index(self.multipart_index_file) - - if handler.completed(): - handler.merge_parts() - else: - # todo return missing? remaining ? something - pass - else: - # singlepart content - # TODO: continue this section here ...... - handler = SinglepartUploadHandler(root_dir=self.root_dir) - handler.write_data(file_name=file_name, ) - - - - - -class ModelDir(BaseModel): - root_dir: Path - - @property - def label(self): - return self.root_dir.name - - @classmethod - def load(cls, model_id: str): - root = _settings.submission_dir / model_id - - if not root.is_dir(): - raise FileNotFoundError(f'Model {model_id} does not exist') - return cls(root_dir=root) - - def make_submission(self, submission_id: str, auto_eval: bool, request_meta: models.api.NewSubmissionRequest): - root_dir = self.root_dir / submission_id - if root_dir.is_dir(): - raise FileExistsError(f'Submission {submission_id} cannot be created as it already exists') - # create the dir - root_dir.mkdir() - submission_dir = SubmissionDir(root_dir=root_dir) - submission_dir.content.mkdir() - - sub_info = SubmissionInfo( - username=request_meta.username, - track_id=request_meta.track_id, - track_label=request_meta.track_label, - submission_id=submission_id, - created_at=datetime.now(), - leaderboard_entries=request_meta.leaderboards - ) - - # todo save info as file - - if request_meta.multipart: - submission_dir.multipart_dir.mkdir(exist_ok=True) - # todo build class for multipart index - else: - with submission_dir.singlepart_hash_file.open('w') as fp: - fp.write(request_meta.hash) - - submission_dir.get_log_handler().header( - who=request_meta.username, - task=request_meta.track_label, - multipart=request_meta.multipart, - has_scores=request_meta.has_scores, - auto_eval=auto_eval - ) - - # create upload lockfile - submission_dir.upload_lock.touch() - - @property - def submissions(self) -> List[SubmissionDir]: - return [ - SubmissionDir.load(self.label, sub_id.name) - for sub_id in self.root_dir.iterdir() - if sub_id.is_dir() - ] - - def get_submission(self, submission_id: str): - return SubmissionDir.load(self.label, submission_id) diff --git a/vocolab/settings.py b/vocolab/settings.py index 069c518..c02950f 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -3,10 +3,12 @@ import os import platform +import tempfile +from contextlib import contextmanager from datetime import timedelta from functools import lru_cache from pathlib import Path -from typing import List, Union, Set, Dict, Optional, Literal +from typing import List, Union, Set, Dict, Optional, Literal, Generator from importlib.metadata import version, PackageNotFoundError try: @@ -154,6 +156,9 @@ class _VocoLabSettings(BaseSettings): """ Base Settings for module """ app_home: DirectoryPath = Path(__file__).parent DATA_FOLDER: DirectoryPath = Path('data/') + TMP_ROOT: DirectoryPath = Path('/tmp') + ARCHIVE_FOLDER: Path + ARCHIVE_HOST: str = "localhost" # Settings Categories app_options: AppSettings = AppSettings() @@ -192,7 +197,13 @@ def leaderboard_dir(self) -> Path: @property def submission_archive_dir(self) -> Path: """directory pointing to archived submissions """ - return self.DATA_FOLDER / 'submissions/archive' + return self.ARCHIVE_FOLDER / 'submissions' + + @property + def remote_archive(self) -> bool: + return self.ARCHIVE_HOST not in ( + 'localhost', '127.0.0.1', self.app_options.hostname + ) @property def templates_dir(self) -> Path: @@ -231,7 +242,14 @@ def secret(self): with (self.DATA_FOLDER / '.secret').open('rb') as fp: return fp.read().decode() - + @contextmanager + def get_temp_dir(self) -> Generator[Path, None, None]: + """ Create a temporary directory """ + temp_dir = tempfile.TemporaryDirectory(prefix="voco-", dir=str(self.TMP_ROOT)) + try: + yield Path(temp_dir.name) + finally: + temp_dir.cleanup() class Config: env_prefix = 'VC_' diff --git a/vocolab/worker/server.py b/vocolab/worker/server.py index f8bb290..6b2b5c0 100644 --- a/vocolab/worker/server.py +++ b/vocolab/worker/server.py @@ -6,7 +6,7 @@ from vocolab import out, get_settings from vocolab.db.models import tasks -from vocolab.lib import worker_lib +from vocolab.core import worker_lib # """"""""""""""""""""""""""""""""""""" # todo: read up on what is the best pool/supervisor From 148321fcb9750c3886c3377b41fb60a8c21300b9 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Wed, 1 Feb 2023 16:28:57 +0100 Subject: [PATCH 03/28] WIP: db clean-up for users/auth --- vocolab/api/endpoints/auth.py | 23 +- vocolab/api/endpoints/challenges.py | 72 ++---- vocolab/api/endpoints/models.py | 118 +++++++++ vocolab/api/endpoints/users.py | 278 +++++++++++----------- vocolab/core/api_lib.py | 23 +- vocolab/core/commons.py | 9 + vocolab/core/submission_lib/__init__.py | 1 + vocolab/core/submission_lib/submission.py | 9 +- vocolab/core/users_lib.py | 41 +++- vocolab/db/__init__.py | 3 - vocolab/db/admin.py | 4 - vocolab/db/base.py | 22 -- vocolab/db/exc.py | 28 --- vocolab/db/models/__init__.py | 4 - vocolab/db/models/api/__init__.py | 5 - vocolab/db/models/api/auth.py | 22 -- vocolab/db/models/api/challenges.py | 81 ------- vocolab/db/models/api/commons.py | 7 - vocolab/db/models/api/leaerboards.py | 10 - vocolab/db/models/api/users.py | 21 -- vocolab/db/models/cli.py | 27 --- vocolab/db/models/misc.py | 17 -- vocolab/db/models/tasks.py | 123 ---------- vocolab/db/q/__init__.py | 3 - vocolab/db/q/challenges.py | 275 --------------------- vocolab/db/q/leaderboards.py | 110 --------- vocolab/db/q/users.py | 203 ---------------- vocolab/db/schema/__init__.py | 3 - vocolab/db/schema/auth.py | 70 ------ vocolab/db/schema/challenges.py | 230 ------------------ vocolab/settings.py | 1 + 31 files changed, 347 insertions(+), 1496 deletions(-) create mode 100644 vocolab/api/endpoints/models.py delete mode 100644 vocolab/db/__init__.py delete mode 100644 vocolab/db/admin.py delete mode 100644 vocolab/db/base.py delete mode 100644 vocolab/db/exc.py delete mode 100644 vocolab/db/models/__init__.py delete mode 100644 vocolab/db/models/api/__init__.py delete mode 100644 vocolab/db/models/api/auth.py delete mode 100644 vocolab/db/models/api/challenges.py delete mode 100644 vocolab/db/models/api/commons.py delete mode 100644 vocolab/db/models/api/leaerboards.py delete mode 100644 vocolab/db/models/api/users.py delete mode 100644 vocolab/db/models/cli.py delete mode 100644 vocolab/db/models/misc.py delete mode 100644 vocolab/db/models/tasks.py delete mode 100644 vocolab/db/q/__init__.py delete mode 100644 vocolab/db/q/challenges.py delete mode 100644 vocolab/db/q/leaderboards.py delete mode 100644 vocolab/db/q/users.py delete mode 100644 vocolab/db/schema/__init__.py delete mode 100644 vocolab/db/schema/auth.py delete mode 100644 vocolab/db/schema/challenges.py diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index aa0311b..32aa563 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -12,8 +12,7 @@ from pydantic import EmailStr from vocolab import exc, out -from vocolab.db import schema, models -from vocolab.db.q import userQ +from vocolab.data import models, model_queries from vocolab.core import api_lib, notify from vocolab.settings import get_settings @@ -26,13 +25,11 @@ async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> models.api.LoggedItem: """ Authenticate a user """ try: - out.console.print(f"{form_data.username=}, {form_data.password=}") - user = await userQ.get_user_for_login(login_id=form_data.username, password=form_data.password) - out.console.print(f'login {user=}') + user = await model_queries.User.login(login_id=form_data.username, password=form_data.password) if user is None: raise ValueError('Bad login') - token = schema.Token(user_email=user.email) + token = model_queries.Token(user_email=user.email) return models.api.LoggedItem(access_token=token.encode(), token_type="bearer") except ValueError: raise HTTPException( @@ -47,7 +44,7 @@ async def post_signup(request: Request, affiliation: str = Form(...), email: EmailStr = Form(...), username: str = Form(...), password: str = Form(...)) -> str: """ Create a new user via the HTML form (returns a html page) """ - user = models.misc.UserCreate( + user = models.api.UserCreateRequest( username=username, email=email, pwd=password, @@ -82,12 +79,12 @@ async def password_reset_request( html_response: bool = False, username: str = Form(...), email: EmailStr = Form(...)): """ Request a users password to be reset """ - user = await userQ.get_user(by_username=username) - if user.username != username: + user = await model_queries.User.get(by_username=username) + if user.email != email: raise ValueError('Bad request, no such user') # session = await userQ.create_password_reset_session(username=username, email=email) - token = schema.Token(user_email=user.email, allow_password_reset=True) + token = model_queries.Token(user_email=user.email, allow_password_reset=True) data = { 'username': username, 'url': f"{api_lib.url_for(request, 'password_update_page')}?v={token.encode()}", @@ -121,12 +118,12 @@ async def post_password_update(v: str, request: Request, html_response: bool = F if v != session_code: raise ValueError('session validation not passed !!!') - token = schema.Token.decode(v) + token = model_queries.Token.decode(v) if not token.allow_password_reset: raise ValueError('bad session') - user = await userQ.get_user(by_email=token.user_email) - await userQ.update_users_password(user=user, password=password, password_validation=password_validation) + user = await model_queries.User.get(by_email=token.user_email) + await user.change_password(new_password=password, password_validation=password_validation) except ValueError as e: out.log.error( f'{request.client.host}:{request.client.port} requested bad password reset session as {v} - [{e}]') diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py index 42e67b8..e8bf0ce 100644 --- a/vocolab/api/endpoints/challenges.py +++ b/vocolab/api/endpoints/challenges.py @@ -18,74 +18,42 @@ _settings = get_settings() -@router.get('/', response_model=List[models.api.ChallengePreview]) +@router.get('/list', response_model=List[models.api.ChallengePreview]) async def get_challenge_list(include_inactive: bool = False): """ Return a list of all active challenges """ challenge_lst = await challengesQ.list_challenges(include_all=include_inactive) return [models.api.ChallengePreview(id=ch.id, label=ch.label, active=ch.active) for ch in challenge_lst] -@router.get('/{challenge_id}', response_model=models.api.ChallengesResponse, +@router.get('/{challenge_id}/info', response_model=models.api.ChallengesResponse, responses={404: {"model": models.api.Message}}) async def get_challenge_info(challenge_id: int): """ Return information of a specific challenge """ # todo add leaderboards to challenge info return await challengesQ.get_challenge(challenge_id=challenge_id, allow_inactive=True) - -@router.get('/model/create') -async def get_model_id(first_author_name: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - new_model_id = f"{first_author_name[:3]}{str(datetime.now().year)[2:]}" - # todo: check - return new_model_id +@router.get('/{challenge_id}/submissions', response_model=models.api.ChallengesResponse, + responses={404: {"model": models.api.Message}}) +async def get_sub_list(challenge_id: int): + """ Return information of a specific challenge """ + # todo add leaderboards to challenge info + pass -# todo: update submission process -@router.post('/{challenge_id}/submission/create', responses={404: {"model": models.api.Message}}) -async def create_submission( - challenge_id: int, data: models.api.NewSubmissionRequest, - current_user: schema.User = Depends(api_lib.get_current_active_user) -): - """ Create a new submission """ - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') - # create db entry - submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( - user_id=current_user.id, - track_id=challenge.id, - ), evaluator_id=challenge.evaluator) - # create disk entry - submission_lib.make_submission_on_disk( - submission_id, current_user.username, challenge.label, meta=data - ) - return submission_id +@router.get('/{challenge_id}/leaderboards', response_model=models.api.ChallengesResponse, + responses={404: {"model": models.api.Message}}) +async def get_all_leaderboards(challenge_id: int): + """ Return information of a specific challenge """ + # todo add leaderboards to challenge info + pass -@router.put("/{challenge_id}/submission/upload", response_model=models.api.UploadSubmissionPartResponse) -async def upload_submission( - challenge_id: int, - submission_id: str, - part_name: str, - background_tasks: BackgroundTasks, - file_data: UploadFile = File(...), - current_user: schema.User = Depends(api_lib.get_current_active_user), -): - out.console.info(f"user: {current_user.username}") - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') - try: - is_completed, remaining = submission_lib.add_part(submission_id, part_name, file_data) - if is_completed: - # run the completion of the submission on the background - background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) +@router.get('/{challenge_id}/leaderboards/{leaderboard_id}', response_model=models.api.ChallengesResponse, + responses={404: {"model": models.api.Message}}) +async def get_leaderboard(challenge_id: int, leaderboard_id): + """ Return information of a specific challenge """ + # todo add leaderboards to challenge info + pass - return models.api.UploadSubmissionPartResponse( - completed=is_completed, remaining=[n.file_name for n in remaining] - ) - except exc.VocoLabException as e: - out.log.exception() - raise e diff --git a/vocolab/api/endpoints/models.py b/vocolab/api/endpoints/models.py new file mode 100644 index 0000000..55bb182 --- /dev/null +++ b/vocolab/api/endpoints/models.py @@ -0,0 +1,118 @@ +""" Routing for /challenges section of the API +This section handles challenge data +""" +from datetime import datetime + +from fastapi import ( + APIRouter, Depends, UploadFile, File, BackgroundTasks +) + +from vocolab import out, exc +from vocolab.core import api_lib, submission_lib +from vocolab.db import schema, models +from vocolab.db.q import challengesQ +from vocolab.settings import get_settings + +router = APIRouter() +_settings = get_settings() + + +@router.get('/create') +async def create_new_model(first_author_name: str, + current_user: schema.User = Depends(api_lib.get_current_active_user)): + new_model_id = f"{first_author_name[:3]}{str(datetime.now().year)[2:]}" + # todo: check + return new_model_id + + +@router.get('/list') +async def get_model_list(): + pass + + +@router.get('/{model_id}/info') +async def get_model_info(): + # todo: check + pass + + +@router.get('/{model_id}/submissions/list') +async def get_model_submissions(): + # todo: check + pass + + +@router.get('/{model_id}/submissions/{submission_id}/info') +async def get_model_submission_info(): + # todo: check + pass + + +@router.get('/{model_id}/submissions/{submission_id}/leaderboard-entries') +async def get_model_submission_leaderboard_entries(): + # todo: check + pass + + +# todo: update submission process +@router.post('/{model_id}/submissions/create/', responses={404: {"model": models.api.Message}}) +async def create_submission( + model_id: str, challenge_id: int, + data: models.api.NewSubmissionRequest, + current_user: schema.User = Depends(api_lib.get_current_active_user) +): + """ Create a new submission """ + # todo fetch model_id + + challenge = await challengesQ.get_challenge(challenge_id=challenge_id) + if challenge is None: + return ValueError(f'challenge {challenge_id} not found or inactive') + + # create db entry + # todo check submission table data + submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( + user_id=current_user.id, + track_id=challenge.id, + ), evaluator_id=challenge.evaluator) + + # create disk entry + model_dir = submission_lib.ModelDir.load(data.model_id) + model_dir.make_submission( + submission_id=submission_id, + challenge_id=challenge_id, + challenge_label=challenge.label, + auto_eval=..., + request_meta=data + ) + + return submission_id + + # todo update +@router.put("/{model_id}/submission/{submission_id}/upload", response_model=models.api.UploadSubmissionPartResponse) +async def upload_submission( + model_id: str, + submission_id: str, + challenge_id: int, + part_name: str, + background_tasks: BackgroundTasks, + file_data: UploadFile = File(...), + current_user: schema.User = Depends(api_lib.get_current_active_user), +): + out.console.info(f"user: {current_user.username}") + challenge = await challengesQ.get_challenge(challenge_id=challenge_id) + if challenge is None: + return ValueError(f'challenge {challenge_id} not found or inactive') + try: + is_completed, remaining = submission_lib.add_part(submission_id, part_name, file_data) + + if is_completed: + # run the completion of the submission on the background + background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) + + return models.api.UploadSubmissionPartResponse( + completed=is_completed, remaining=[n.file_name for n in remaining] + ) + except exc.VocoLabException as e: + out.log.exception() + raise e + diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 292749c..2543a28 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -1,43 +1,29 @@ """ Routing for /users section of the API This section handles user data """ -from typing import Dict, List import pydantic from fastapi import ( APIRouter, Depends, Response ) -from vocolab import exc, out -from vocolab.core import api_lib, users_lib, submission_lib -from vocolab.db import schema, models -from vocolab.db.q import challengesQ, leaderboardQ +from vocolab import out +from vocolab.core import api_lib, users_lib +from vocolab.data import model_queries from vocolab.settings import get_settings - router = APIRouter() _settings = get_settings() -def drop_keys(data: Dict, keys: List[str]): - for k in keys: - try: - del data[k] - except KeyError: - pass - - @router.get("/profile") -def get_profile(current_user: schema.User = Depends(api_lib.get_current_active_user)) -> models.api.UserProfileResponse: +def get_profile( + current_user: model_queries.User = Depends(api_lib.get_current_active_user)) -> users_lib.UserProfileData: try: - user_data = users_lib.get_user_data(current_user.username).dict() - drop_keys(user_data, ['verified', 'email', 'created']) - return models.api.UserProfileResponse( - verified=current_user.verified == "True", - email=current_user.email, - created=current_user.created_at, - **user_data - ) + user_data = current_user.get_profile_data() + # re-update verification + user_data.verified = current_user.is_verified() + return user_data except pydantic.ValidationError: out.log.error("Failed to validate profile data") out.console.exception() @@ -45,125 +31,133 @@ def get_profile(current_user: schema.User = Depends(api_lib.get_current_active_u @router.post("/profile") def update_profile( - user_data: models.api.UserData, current_user: schema.User = Depends(api_lib.get_current_active_user)): - users_lib.update_user_data(current_user.username, data=user_data) + user_data: users_lib.UserProfileData, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + if user_data.username != current_user.username: + raise ValueError('Bad username specified') + + user_data.verified = current_user.is_verified() + + user_data.update() return Response(status_code=200) -@router.get('/submissions') -async def submissions_list(current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return a list of all user submissions """ - submissions = await challengesQ.get_user_submissions(user_id=current_user.id) - submissions = [ - models.api.SubmissionPreview( - submission_id=s.id, - track_id=s.track_id, - track_label=(await challengesQ.get_challenge(challenge_id=s.track_id)).label, - status=s.status - ) - for s in submissions - ] - - data = {} - for sub in submissions: - if sub.track_label in data.keys(): - data[sub.track_label].append(sub) - else: - data[sub.track_label] = [sub] - - return data - - -@router.get('/submissions/tracks/{track_id}') -async def submissions_list_by_track( - track_id: int, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return a list of all user submissions """ - track = await challengesQ.get_challenge(challenge_id=track_id) - submissions = await challengesQ.get_user_submissions(user_id=current_user.id) - - return [ - models.api.SubmissionPreview( - submission_id=s.id, - track_id=s.track_id, - track_label=track.label, - status=s.status - ) - for s in submissions if s.track_id == track.id - ] - - -@router.get('/submissions/{submissions_id}') -async def get_submission(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return information on a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - - track = await challengesQ.get_challenge(challenge_id=submission.track_id) - leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) - - if submission.evaluator_id is not None: - evaluator = await challengesQ.get_evaluator(by_id=submission.evaluator_id) - evaluator_cmd = f"{evaluator.executor} {evaluator.script_path} {evaluator.executor_arguments.replace(';', ' ')}" - evaluator_label = evaluator.label - else: - evaluator_cmd = "" - evaluator_label = "" - - return models.api.SubmissionView( - submission_id=submission.id, - user_id=current_user.id, - username=current_user.username, - track_label=track.label, - track_id=track.id, - status=submission.status, - date=submission.submit_date, - evaluator_cmd=evaluator_cmd, - evaluator_label=evaluator_label, - leaderboards=[(ld.label, ld.id) for ld in leaderboards] - ) - - -@router.get('/submissions/{submissions_id}/status') -async def get_submission_status( - submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return status of a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - - return submission.status - - -@router.get('/submissions/{submissions_id}/log') -async def get_submission_status( - submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return status of a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - - log = submission_lib.SubmissionLogger(submissions_id) - return log.get_text() - - -@router.get('/submissions/{submissions_id}/scores') -async def get_user_results(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): - """ Return status of a submission """ - submission = await challengesQ.get_submission(by_id=submissions_id) - if submission.user_id != current_user.id: - raise exc.AccessError("current user is not allowed to preview this submission !", - status=exc.http_status.HTTP_403_FORBIDDEN) - sub_location = submission_lib.get_submission_dir(submission_id=submission.id) - - leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) - result = {} - for ld in leaderboards: - ld_file = sub_location / ld.entry_file - if ld_file.is_file(): - result[ld.label] = api_lib.file2dict(ld_file) - - return result +# @router.get('{username}/submissions') +# async def submissions_list(username: str): +# """ Return a list of all user submissions """ +# user = model_queries.User.get(by_username=username) +# # todo fix later +# submissions = await challengesQ.get_user_submissions(user_id=current_user.id) +# submissions = [ +# models.api.SubmissionPreview( +# submission_id=s.id, +# track_id=s.track_id, +# track_label=(await challengesQ.get_challenge(challenge_id=s.track_id)).label, +# status=s.status +# ) +# for s in submissions +# ] +# +# data = {} +# for sub in submissions: +# if sub.track_label in data.keys(): +# data[sub.track_label].append(sub) +# else: +# data[sub.track_label] = [sub] +# +# return data + + +# @router.get('{username}//submissions/tracks/{track_id}') +# async def submissions_list_by_track( +# track_id: int, current_user: schema.User = Depends(api_lib.get_current_active_user)): +# """ Return a list of all user submissions """ +# track = await challengesQ.get_challenge(challenge_id=track_id) +# submissions = await challengesQ.get_user_submissions(user_id=current_user.id) +# +# return [ +# models.api.SubmissionPreview( +# submission_id=s.id, +# track_id=s.track_id, +# track_label=track.label, +# status=s.status +# ) +# for s in submissions if s.track_id == track.id +# ] + + +# @router.get('/submissions/{submissions_id}') +# async def get_submission(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): +# """ Return information on a submission """ +# submission = await challengesQ.get_submission(by_id=submissions_id) +# if submission.user_id != current_user.id: +# raise exc.AccessError("current user is not allowed to preview this submission !", +# status=exc.http_status.HTTP_403_FORBIDDEN) +# +# track = await challengesQ.get_challenge(challenge_id=submission.track_id) +# leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) +# +# if submission.evaluator_id is not None: +# evaluator = await challengesQ.get_evaluator(by_id=submission.evaluator_id) +# evaluator_cmd = f"{evaluator.executor} {evaluator.script_path} {evaluator.executor_arguments.replace(';', ' ')}" +# evaluator_label = evaluator.label +# else: +# evaluator_cmd = "" +# evaluator_label = "" +# +# return models.api.SubmissionView( +# submission_id=submission.id, +# user_id=current_user.id, +# username=current_user.username, +# track_label=track.label, +# track_id=track.id, +# status=submission.status, +# date=submission.submit_date, +# evaluator_cmd=evaluator_cmd, +# evaluator_label=evaluator_label, +# leaderboards=[(ld.label, ld.id) for ld in leaderboards] +# ) + + +# @router.get('/submissions/{submissions_id}/status') +# async def get_submission_status( +# submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): +# """ Return status of a submission """ +# submission = await challengesQ.get_submission(by_id=submissions_id) +# if submission.user_id != current_user.id: +# raise exc.AccessError("current user is not allowed to preview this submission !", +# status=exc.http_status.HTTP_403_FORBIDDEN) +# +# return submission.status + + +# @router.get('/submissions/{submissions_id}/log') +# async def get_submission_status( +# submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): +# """ Return status of a submission """ +# submission = await challengesQ.get_submission(by_id=submissions_id) +# if submission.user_id != current_user.id: +# raise exc.AccessError("current user is not allowed to preview this submission !", +# status=exc.http_status.HTTP_403_FORBIDDEN) +# +# log = submission_lib.SubmissionLogger(submissions_id) +# return log.get_text() + + +# @router.get('/submissions/{submissions_id}/scores') +# async def get_user_results(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): +# """ Return status of a submission """ +# submission = await challengesQ.get_submission(by_id=submissions_id) +# if submission.user_id != current_user.id: +# raise exc.AccessError("current user is not allowed to preview this submission !", +# status=exc.http_status.HTTP_403_FORBIDDEN) +# sub_location = submission_lib.get_submission_dir(submission_id=submission.id) +# +# leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) +# result = {} +# for ld in leaderboards: +# ld_file = sub_location / ld.entry_file +# if ld_file.is_file(): +# result[ld.label] = api_lib.file2dict(ld_file) +# +# return result diff --git a/vocolab/core/api_lib.py b/vocolab/core/api_lib.py index 0624898..ca1d341 100644 --- a/vocolab/core/api_lib.py +++ b/vocolab/core/api_lib.py @@ -6,8 +6,7 @@ from jinja2 import FileSystemLoader, Environment from vocolab import settings -from vocolab.db import schema, models -from vocolab.db.q import userQ +from vocolab.data import model_queries, models from vocolab.core import notify, commons _settings = settings.get_settings() @@ -18,10 +17,10 @@ file2dict = commons.load_dict_file -def validate_token(token: str = Depends(oauth2_scheme)) -> schema.Token: +def validate_token(token: str = Depends(oauth2_scheme)) -> model_queries.Token: """ Dependency for validating the current users session via the token""" try: - token = schema.Token.decode(token) + token = model_queries.Token.decode(token) if token.is_expired(): raise ValueError('Token has expired') @@ -36,10 +35,10 @@ def validate_token(token: str = Depends(oauth2_scheme)) -> schema.Token: ) -async def get_user(token: schema.Token = Depends(validate_token)) -> schema.User: +async def get_user(token: model_queries.Token = Depends(validate_token)) -> model_queries.User: """ Dependency for fetching current user from database using token entry """ try: - return await userQ.get_user(by_email=token.user_email) + return await model_queries.User.get(by_email=token.user_email) except ValueError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, @@ -47,9 +46,9 @@ async def get_user(token: schema.Token = Depends(validate_token)) -> schema.User ) -async def get_current_active_user(current_user: schema.User = Depends(get_user)) -> schema.User: +async def get_current_active_user(current_user: model_queries.User = Depends(get_user)) -> model_queries.User: """ Dependency for validating current user """ - if current_user.verified == 'True': + if current_user.is_verified(): if current_user.active: return current_user else: @@ -71,9 +70,9 @@ def generate_html_response(data: Dict[str, Any], template_name: str) -> str: return template.render(**data) -async def signup(request: Request, user: models.misc.UserCreate): +async def signup(request: Request, user: models.api.UserCreateRequest): """ Creates a new user and schedules the registration email """ - verification_code = await userQ.create_user(usr=user) + verification_code = await model_queries.User.create(new_usr=user) data = { 'username': user.username, # todo check if url needs update @@ -84,13 +83,14 @@ async def signup(request: Request, user: models.misc.UserCreate): loop = asyncio.get_running_loop() loop.create_task(notify.email.template_email( emails=[user.email], - subject='[Zerospeech] Account Verification', + subject=f'[{_settings.app_options.platform_name}] Account Verification', data=data, template_name='email_validation.jinja2') ) def get_base_url(request: Request) -> str: + """ Get base url taking into account http -> https redirection """ base_url = f"{request.base_url}" headers = request.headers @@ -101,6 +101,7 @@ def get_base_url(request: Request) -> str: def url_for(request: Request, path_requested: str) -> str: + """ Query API path url taking into account http -> https redirections """ url = request.url_for(path_requested) headers = request.headers diff --git a/vocolab/core/commons.py b/vocolab/core/commons.py index 046db04..4070e5b 100644 --- a/vocolab/core/commons.py +++ b/vocolab/core/commons.py @@ -13,6 +13,15 @@ from vocolab import out +def drop_keys(data: Dict, keys: List[str]): + """ Filter keys from a dictionary """ + for k in keys: + try: + del data[k] + except KeyError: + pass + + def load_dict_file(location: Path) -> Union[Dict, List]: """ Load a dict type file (json, yaml, toml)""" with location.open() as fp: diff --git a/vocolab/core/submission_lib/__init__.py b/vocolab/core/submission_lib/__init__.py index e69de29..ecee522 100644 --- a/vocolab/core/submission_lib/__init__.py +++ b/vocolab/core/submission_lib/__init__.py @@ -0,0 +1 @@ +from .submission import * diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 314e7a7..4d251ad 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -294,7 +294,8 @@ def load(cls, model_id: str): raise FileNotFoundError(f'Model {model_id} does not exist') return cls(root_dir=root) - def make_submission(self, submission_id: str, auto_eval: bool, request_meta: models.api.NewSubmissionRequest): + def make_submission(self, submission_id: str, challenge_id: int, challenge_label: str, + auto_eval: bool, request_meta: models.api.NewSubmissionRequest): root_dir = self.root_dir / submission_id if root_dir.is_dir(): raise FileExistsError(f'Submission {submission_id} cannot be created as it already exists') @@ -307,8 +308,8 @@ def make_submission(self, submission_id: str, auto_eval: bool, request_meta: mod sub_info = SubmissionInfo( model_id=self.label, username=request_meta.username, - track_id=request_meta.track_id, - track_label=request_meta.track_label, + track_id=challenge_id, + track_label=challenge_label, submission_id=submission_id, created_at=datetime.now(), leaderboard_entries=request_meta.leaderboards @@ -329,7 +330,7 @@ def make_submission(self, submission_id: str, auto_eval: bool, request_meta: mod submission_dir.get_log_handler().header( who=request_meta.username, - task=request_meta.track_label, + task=challenge_label, multipart=request_meta.multipart, has_scores=request_meta.has_scores, auto_eval=auto_eval diff --git a/vocolab/core/users_lib.py b/vocolab/core/users_lib.py index 43cb17d..abfe5d3 100644 --- a/vocolab/core/users_lib.py +++ b/vocolab/core/users_lib.py @@ -1,13 +1,42 @@ import hashlib +import json import os -from typing import Callable +from datetime import datetime +from typing import Callable, Optional -from vocolab.db import models -from vocolab.lib import _fs +from pydantic import BaseModel, Extra, EmailStr +from vocolab import get_settings, exc + +_settings = get_settings() + +class UserProfileData(BaseModel): + username: str + affiliation: str + first_name: Optional[str] + last_name: Optional[str] + verified: bool + email: EmailStr + created: Optional[datetime] + + class Config: + extra = Extra.allow + + @classmethod + def load(cls, username: str): + db_file = (_settings.user_data_dir / f"{username}.json") + if not db_file.is_file(): + raise exc.UserNotFound('user requested has no data entry') + + with db_file.open() as fp: + return cls.parse_obj(json.load(fp)) + + def update(self): + if not _settings.user_data_dir.is_dir(): + _settings.user_data_dir.mkdir(parents=True) + + with (_settings.user_data_dir / f"{self.username}.json").open('w') as fp: + fp.write(self.json(indent=4)) -# export functions -update_user_data = _fs.users.update_user_data -get_user_data: Callable[[str], models.api.UserData] = _fs.users.get_user_data def hash_pwd(*, password: str, salt=None): diff --git a/vocolab/db/__init__.py b/vocolab/db/__init__.py deleted file mode 100644 index 1d9ac5b..0000000 --- a/vocolab/db/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from vocolab.db.base import ( - create_db, zrDB, users_metadata -) diff --git a/vocolab/db/admin.py b/vocolab/db/admin.py deleted file mode 100644 index 24ed1ed..0000000 --- a/vocolab/db/admin.py +++ /dev/null @@ -1,4 +0,0 @@ -from vocolab.settings import get_settings - -_settings = get_settings() - diff --git a/vocolab/db/base.py b/vocolab/db/base.py deleted file mode 100644 index 817e858..0000000 --- a/vocolab/db/base.py +++ /dev/null @@ -1,22 +0,0 @@ -import databases -import sqlalchemy - -from vocolab.db.schema import users_metadata, challenge_metadata -from vocolab.settings import get_settings - -_settings = get_settings() - -_USERS_CONN = f"sqlite:///{_settings.DATA_FOLDER}/{_settings.database_options.db_file}" - -zrDB = databases.Database(_USERS_CONN) - - -def create_db(): - if not (_settings.DATA_FOLDER / _settings.database_options.db_file).is_file(): - (_settings.DATA_FOLDER / _settings.database_options.db_file).touch() - - engine = sqlalchemy.create_engine( - _USERS_CONN, connect_args={"check_same_thread": False} - ) - users_metadata.create_all(engine) - challenge_metadata.create_all(engine) diff --git a/vocolab/db/exc.py b/vocolab/db/exc.py deleted file mode 100644 index 62c8f69..0000000 --- a/vocolab/db/exc.py +++ /dev/null @@ -1,28 +0,0 @@ -import sqlite3 -from vocolab import exc - - -class IntegrityError(sqlite3.IntegrityError): - pass - - -def parse_user_insertion(e: Exception): - """ Wrapper to uniform exception while inserting new users """ - - if issubclass(IntegrityError, e.__class__): - error_message = e.__str__() - if "UNIQUE" in error_message and "email" in error_message: - raise exc.ValueNotValid('email already exists', data='email') - elif "UNIQUE" in error_message and "username" in error_message: - raise exc.ValueNotValid('username already exists', data='username') - raise e - - -def parse_challenge_insertion(e: Exception): - """ Wrapper to uniform exception while inserting new challenges """ - if issubclass(IntegrityError, e.__class__): - error_message = e.__str__() - if "UNIQUE" in error_message and "label" in error_message: - raise exc.ValueNotValid('a challenge with the same label exists', data='label') - - raise e diff --git a/vocolab/db/models/__init__.py b/vocolab/db/models/__init__.py deleted file mode 100644 index 7673ca1..0000000 --- a/vocolab/db/models/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from . import api -from . import cli -from . import misc -from . import tasks diff --git a/vocolab/db/models/api/__init__.py b/vocolab/db/models/api/__init__.py deleted file mode 100644 index 4d32e84..0000000 --- a/vocolab/db/models/api/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .users import * -from .auth import * -from .challenges import * -from .commons import * -from .leaerboards import * diff --git a/vocolab/db/models/api/auth.py b/vocolab/db/models/api/auth.py deleted file mode 100644 index a6452b7..0000000 --- a/vocolab/db/models/api/auth.py +++ /dev/null @@ -1,22 +0,0 @@ -""" Dataclasses representing API/auth input output data types """ -from pydantic import BaseModel, EmailStr - - -class LoggedItem(BaseModel): - """ Return type of the /login function """ - access_token: str - token_type: str - - -class CurrentUser(BaseModel): - """ Basic userinfo Model """ - username: str - email: EmailStr - - -class PasswordResetRequest(BaseModel): - """ Input Schema for /password/reset request """ - username: str - email: EmailStr - - diff --git a/vocolab/db/models/api/challenges.py b/vocolab/db/models/api/challenges.py deleted file mode 100644 index b7bae1b..0000000 --- a/vocolab/db/models/api/challenges.py +++ /dev/null @@ -1,81 +0,0 @@ -""" Dataclasses representing API/challenge input output data types """ -from datetime import date -from pathlib import Path -from typing import Optional, List, Tuple, Dict - -from pydantic import BaseModel, HttpUrl - - -class ChallengePreview(BaseModel): - """ Used as response type for root challenge list request""" - id: int - label: str - active: bool - - -class ChallengesResponse(BaseModel): - """ Used as response type for preview of a challenge """ - id: int - label: str - start_date: date - end_date: Optional[date] - active: bool - url: HttpUrl - evaluator: Optional[int] - - -class SubmissionRequestFileIndexItem(BaseModel): - """ Item used to represent a file in the file index - used in the NewSubmissionRequest object. - - File index is used to verify correct number of files/parts have been uploaded - """ - file_name: str - file_size: int - file_hash: Optional[str] = None - - -class NewSubmissionRequest(BaseModel): - """ Dataclass used for input in the creation of a new submission to a challenge """ - username: str - track_label: str - track_id: int - model_id: str - filename: str - hash: str - multipart: bool - has_scores: bool - leaderboards: Dict[str, Path] - index: Optional[List[SubmissionRequestFileIndexItem]] - - -class NewSubmission(BaseModel): - """ Item used in the database to create a new submission entry """ - user_id: int - track_id: int - - -class SubmissionPreview(BaseModel): - submission_id: str - track_label: str - track_id: int - status: str - - -class SubmissionView(BaseModel): - submission_id: str - user_id: int - username: str - track_label: str - track_id: int - status: str - date: date - evaluator_label: str - evaluator_cmd: str - leaderboards: List[Tuple[str, int]] - - -class UploadSubmissionPartResponse(BaseModel): - """ Response type of the upload submission part method in /challenges """ - completed: bool - remaining: List[str] diff --git a/vocolab/db/models/api/commons.py b/vocolab/db/models/api/commons.py deleted file mode 100644 index 640dfb4..0000000 --- a/vocolab/db/models/api/commons.py +++ /dev/null @@ -1,7 +0,0 @@ -""" Common types used in multiple api requests """ -from pydantic import BaseModel - - -class Message(BaseModel): - """ Generic message response""" - message: str diff --git a/vocolab/db/models/api/leaerboards.py b/vocolab/db/models/api/leaerboards.py deleted file mode 100644 index 0ba4103..0000000 --- a/vocolab/db/models/api/leaerboards.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel - - -class LeaderboardPublicView(BaseModel): - id: int - challenge_id: int - label: str - entry_file: str - archived: bool - static_files: bool diff --git a/vocolab/db/models/api/users.py b/vocolab/db/models/api/users.py deleted file mode 100644 index c00583f..0000000 --- a/vocolab/db/models/api/users.py +++ /dev/null @@ -1,21 +0,0 @@ -""" Input/Output Dataclass types for the /users section of the API """ -from datetime import datetime -from typing import Optional - -from pydantic import BaseModel, Extra, EmailStr - - -class UserData(BaseModel): - username: str - affiliation: str - first_name: Optional[str] - last_name: Optional[str] - - class Config: - extra = Extra.allow - - -class UserProfileResponse(UserData): - verified: bool - email: EmailStr - created: Optional[datetime] diff --git a/vocolab/db/models/cli.py b/vocolab/db/models/cli.py deleted file mode 100644 index e0f6b60..0000000 --- a/vocolab/db/models/cli.py +++ /dev/null @@ -1,27 +0,0 @@ -""" Data Models used in the admin/cli functions """ -from datetime import date -from typing import Optional - -from pydantic import BaseModel, AnyHttpUrl - -from .tasks import ExecutorsType - - -class NewChallenge(BaseModel): - """ Dataclass for challenge creation """ - id: Optional[int] - label: str - active: bool - url: AnyHttpUrl - evaluator: Optional[int] - start_date: date - end_date: Optional[date] - - -class NewEvaluatorItem(BaseModel): - """ Data Model used by evaluator creation process """ - label: str - executor: ExecutorsType - host: Optional[str] - script_path: str - executor_arguments: Optional[str] diff --git a/vocolab/db/models/misc.py b/vocolab/db/models/misc.py deleted file mode 100644 index 4122b3f..0000000 --- a/vocolab/db/models/misc.py +++ /dev/null @@ -1,17 +0,0 @@ -from pydantic import BaseModel, EmailStr, validator - - -class UserCreate(BaseModel): - """ Dataclass for user creation """ - username: str - email: EmailStr - pwd: str - first_name: str - last_name: str - affiliation: str - - @validator('username', 'pwd', 'first_name', 'last_name', 'affiliation') - def non_empty_string(cls, v): - assert v, "UserCreate does not accept empty fields" - return v - diff --git a/vocolab/db/models/tasks.py b/vocolab/db/models/tasks.py deleted file mode 100644 index 8f10149..0000000 --- a/vocolab/db/models/tasks.py +++ /dev/null @@ -1,123 +0,0 @@ -from datetime import datetime - -import json -import uuid -from enum import Enum -from shutil import which -from typing import List, Union, Optional - -from pydantic import BaseModel, ValidationError, Field, root_validator - -from vocolab import out - - -class QueuesNames(str, Enum): - eval_queue = "eval_queue" - update_queue = "update_queue" - - -class BrokerMessage(BaseModel): - """ A Generic description of a Broker Message Object """ - message_type: Optional[str] - label: str - job_id: str = str(uuid.uuid4()) - timestamp: datetime = Field(default_factory=datetime.now) - - @root_validator(pre=True) - def set_message_type(cls, values): - values["message_type"] = str(cls.__name__) - return values - - def __repr__(self): - """ Stringify the message for logging""" - return f"{self.job_id} >> {self.label}" - - -class ExecutorsType(str, Enum): - python = "python" - bash = "bash" - sbatch = "sbatch" - docker = "docker" - - def to_exec(self): - """ Returns absolute path to executable or None""" - return which(self) - - -class SubmissionEvaluationMessage(BrokerMessage): - """ A Broker Message that contains a subprocess task to be run""" - executor: ExecutorsType = ExecutorsType.bash - submission_id: str - bin_path: str - script_name: str - executor_args: List[str] - cmd_args: List[str] - - def __repr__(self): - """ Stringify the message for logging""" - return f"{self.job_id} >> " \ - f"{self.submission_id}@{self.label}:: " \ - f"{self.executor} {self.bin_path}/{self.script_name} {self.cmd_args} --" - - -class UpdateType(str, Enum): - evaluation_complete = "evaluation_complete" - evaluation_failed = "evaluation_failed" - evaluation_canceled = "evaluation_canceled" - evaluation_undefined = "evaluation_undefined" - - -class SubmissionUpdateMessage(BrokerMessage): - """ A Broker Message that contains a python function to execute """ - submission_id: str - updateType: UpdateType - hostname: str - - def __repr__(self): - """ Stringify the message for logging""" - return f"{self.job_id} >> " \ - f"{self.submission_id}@{self.label}:: " \ - f"{self.updateType}@{self.hostname}--" - - -class SimpleLogMessage(BrokerMessage): - """ A Broker Message that contains a simple string message """ - message: str - - def __repr__(self): - """ Stringify the message for logging""" - return f"{self.job_id} >> {self.label}:: {self.message}" - - -def message_from_bytes(byte_msg: bytes) -> Union[BrokerMessage, - SubmissionEvaluationMessage, - SubmissionUpdateMessage, SimpleLogMessage]: - """ Convert a bytes object to the correct corresponding Message object """ - - try: - url_obj = json.loads(str(byte_msg.decode("utf-8"))) - - message_type = url_obj.get('message_type', None) - - # if type is not specified raise error - if message_type is None: - out.log.error(f"Message does not specify type: {str(byte_msg.decode('utf-8'))}") - raise ValueError(f"Message does not specify type: {str(byte_msg.decode('utf-8'))}") - - # try and match type with known types - if message_type == "SubmissionEvaluationMessage": - return SubmissionEvaluationMessage(**url_obj) - elif message_type == "SubmissionUpdateMessage": - return SubmissionUpdateMessage(**url_obj) - elif message_type == "SimpleLogMessage": - return SimpleLogMessage(**url_obj) - elif message_type == "BrokerMessage": - return BrokerMessage(**url_obj) - - # raise error if matching failed - out.log.error(f"Unknown message type: {str(byte_msg.decode('utf-8'))}") - raise ValueError(f"Unknown message type {str(byte_msg.decode('utf-8'))}") - - except (json.JSONDecodeError, ValidationError): - out.log.error(f"error while parsing command: {str(byte_msg.decode('utf-8'))}") - raise ValueError(f"command {str(byte_msg.decode('utf-8'))} not valid!!") diff --git a/vocolab/db/q/__init__.py b/vocolab/db/q/__init__.py deleted file mode 100644 index efd3979..0000000 --- a/vocolab/db/q/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from vocolab.db.q import users as userQ # noqa: allow non standard names -from vocolab.db.q import challenges as challengesQ # noqa: allow non standard names -from vocolab.db.q import leaderboards as leaderboardQ # noqa: allow non standard names diff --git a/vocolab/db/q/challenges.py b/vocolab/db/q/challenges.py deleted file mode 100644 index bc0c466..0000000 --- a/vocolab/db/q/challenges.py +++ /dev/null @@ -1,275 +0,0 @@ -from datetime import datetime -from typing import List, Any, Optional -from uuid import uuid4 - -from vocolab import get_settings -from vocolab.db import models, zrDB, schema, exc as db_exc -from vocolab.core import misc - -_settings = get_settings() - - -async def create_new_challenge(item: models.cli.NewChallenge): - """ Creates a new challenge entry in the database """ - try: - query = schema.challenges_table.insert().values( - **item.dict() - ) - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - -async def list_challenges(*, include_all: bool = False) -> List[schema.Challenge]: - """ Returns a list of all the challenges - - flag include_all allows to filter out inactive challenges - """ - query = schema.challenges_table.select() - challenges = await zrDB.fetch_all(query) - if challenges is None: - raise ValueError('No challenges were found') - - challenges = [schema.Challenge(**c) for c in challenges] - if include_all: - return challenges - else: - return [c for c in challenges if c.is_active()] - - -async def get_challenge(*, - challenge_id: int, allow_inactive=False, - ) -> schema.Challenge: - """ Fetches the Challenge object from the database - - :note: in strict mode (allow_inactive = False) the function raises a ValueError - if the challenge has expired or is inactive. - """ - query = schema.challenges_table.select().where( - schema.challenges_table.c.id == challenge_id - ) - ch = await zrDB.fetch_one(query) - if ch is None: - raise ValueError(f'There is no challenge with the following id: {challenge_id}') - ch = schema.Challenge(**ch) - if allow_inactive: - return ch - else: - if not ch.is_active(): - raise ValueError(f"The Challenge {ch.label}[{ch.id}] is not active") - return ch - - -async def update_challenge_property(*, challenge_id: int, variable_name: str, value: Any, - allow_parsing: bool = False): - """ Update the property of a challenge """ - field = schema.Challenge.__fields__.get(variable_name, None) - if field is None: - raise ValueError(f'Class Challenge does not have a member called ! {variable_name}') - - if allow_parsing: - value = misc.str2type(value, field.type_) - - if value is not None and not isinstance(value, field.type_): - raise ValueError(f"Challenge.{variable_name} should be of type {field.type_}") - - query = schema.challenges_table.update().where( - schema.challenges_table.c.id == challenge_id - ).values({f"{variable_name}": value}) - - try: - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - return value - - -async def delete_challenge(*, ch_id: int): - """ Delete the database entry of a challenge """ - query = schema.challenges_table.delete().where( - schema.challenges_table.c.id == ch_id - ) - return await zrDB.execute(query) - - -async def add_submission(*, new_submission: models.api.NewSubmission, evaluator_id: int): - """ Creates a database entry to a new submission """ - submission_id = datetime.now().strftime('%Y%m-%d%H-%M%S-') + str(uuid4()) - query = schema.submissions_table.insert() - values = new_submission.dict() - values["id"] = submission_id - values["submit_date"] = datetime.now() - values["status"] = schema.SubmissionStatus.uploading - values["evaluator_id"] = evaluator_id - values["author_label"] = None # default value for author_label is None - # todo: check if this should be fetched from challenge entry ? - values["auto_eval"] = _settings.task_queue_options.AUTO_EVAL # auto eval default from settings - await zrDB.execute(query=query, values=values) - return submission_id - - -async def list_submission(*, by_track: int = None, by_user: int = None, by_status=None): - """ Fetches a list of submission from the database """ - query = schema.submissions_table.select() - - if by_track: - query = query.where( - schema.submissions_table.c.track_id == by_track - ) - - if by_user: - query = query.where( - schema.submissions_table.c.user_id == by_user - ) - - if by_status: - query = query.where( - schema.submissions_table.c.status == by_status - ) - - sub_list = await zrDB.fetch_all(query) - - # map & return - return [schema.ChallengeSubmission(**sub) for sub in sub_list] - - -async def get_submission(*, by_id: str) -> schema.ChallengeSubmission: - """ Fetches a submission from the database """ - query = schema.submissions_table.select().where( - schema.submissions_table.c.id == by_id - ) - sub = await zrDB.fetch_one(query) - if sub is None: - raise ValueError(f'There is no challenge with the following id: {by_id}') - # map & return - return schema.ChallengeSubmission(**sub) - - -async def get_user_submissions(*, user_id: int) -> List[schema.ChallengeSubmission]: - """ Fetch all the submissions of a specific user """ - query = schema.submissions_table.select().where( - schema.submissions_table.c.user_id == user_id - ) - subs = await zrDB.fetch_all(query) - if subs is None: - return [] - return [schema.ChallengeSubmission(**it) for it in subs] - - -async def update_submission_status(*, by_id: str, status: schema.SubmissionStatus): - """ Update the status of a submission """ - query = schema.submissions_table.update().where( - schema.submissions_table.c.id == by_id - ).values(status=status) - return await zrDB.execute(query) - - -async def update_submission_evaluator(evaluator_id: int, *, by_id: Optional[str] = None, by_track: Optional[int] = None, - by_user: Optional[int] = None): - """ Update the set evaluator for a specific submission. """ - - if by_id: - query = schema.submissions_table.update().where( - schema.submissions_table.c.id == by_id - ) - elif by_track: - query = schema.submissions_table.update().where( - schema.submissions_table.c.track_id == by_track - ) - elif by_user: - query = schema.submissions_table.update().where( - schema.submissions_table.c.user_id == by_user - ) - else: - raise ValueError(f'Selector not specified') - - # execute query and update values on db - query = query.values(evaluator_id=evaluator_id) - return await zrDB.execute(query) - - -async def update_submission_author_label(label: str, *, by_id: Optional[str] = None, by_user: Optional[int] = None): - """ Update or set """ - if by_id: - query = schema.submissions_table.update().where( - schema.submissions_table.c.id == by_id - ) - elif by_user: - query = schema.submissions_table.update().where( - schema.submissions_table.c.user_id == by_user - ) - else: - raise ValueError(f'Selector not specified') - - # execute query and update values on db - query = query.values(author_label=label) - return await zrDB.execute(query) - - -async def drop_submission(*, by_id: str): - """ Delete db entry of a submission """ - query = schema.submissions_table.delete().where( - schema.submissions_table.c.id == by_id - ) - await zrDB.execute(query) - - -async def submission_status(*, by_id: str) -> schema.SubmissionStatus: - """ Returns the status of a submission """ - query = schema.submissions_table.select().where( - schema.submissions_table.c.id == by_id - ) - sub = await zrDB.fetch_one(query) - if sub is None: - raise ValueError(f'There is no challenge with the following id: {by_id}') - # map & return - return schema.ChallengeSubmission(**sub).status - - -async def get_evaluators(): - """ Returns a list of the evaluators """ - query = schema.evaluators_table.select() - results = await zrDB.fetch_all(query) - if not results: - return [] - return [schema.EvaluatorItem(**i) for i in results] - - -async def get_evaluator(*, by_id: int) -> Optional[schema.EvaluatorItem]: - """ Returns a specific evaluator """ - - query = schema.evaluators_table.select().where( - schema.evaluators_table.c.id == by_id - ) - result = await zrDB.fetch_one(query) - if not result: - return None - return schema.EvaluatorItem(**result) - - -async def add_evaluator(*, lst_eval: List[models.cli.NewEvaluatorItem]): - """ Insert a list of evaluators into the database """ - for i in lst_eval: - query = schema.evaluators_table.select().where( - schema.evaluators_table.c.label == i.label - ).where( - schema.evaluators_table.c.host == i.host - ) - res = await zrDB.fetch_one(query) - - if res is None: - await zrDB.execute(schema.evaluators_table.insert(), i.dict()) - else: - update_query = schema.evaluators_table.update().where( - schema.evaluators_table.c.id == res.id - ).values(executor=i.executor, script_path=i.script_path, executor_arguments=i.executor_arguments) - await zrDB.execute(update_query) - - -async def edit_evaluator_args(*, eval_id: int, arg_list: List[str]): - """ update evaluator base arguments """ - query = schema.evaluators_table.update().where( - schema.evaluators_table.c.id == eval_id - ).values(executor_arguments=";".join(arg_list)) - await zrDB.execute(query) diff --git a/vocolab/db/q/leaderboards.py b/vocolab/db/q/leaderboards.py deleted file mode 100644 index e1f1e6d..0000000 --- a/vocolab/db/q/leaderboards.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -Database functions that manipulate the leaderboard table -""" -from pathlib import Path -from typing import Any, List, Optional -from vocolab.db import schema, zrDB, exc as db_exc -from vocolab.core import misc - - -async def get_leaderboard(*, leaderboard_id: int) -> schema.LeaderBoard: - """ Fetches the leaderboard object with the corresponding id - - :raise ValueError if the item is not is the database - :raise SQLAlchemy exceptions if database connection or condition fails - """ - query = schema.leaderboards_table.select().where( - schema.leaderboards_table.c.id == leaderboard_id - ) - ld = await zrDB.fetch_one(query) - if ld is None: - raise ValueError(f'Leaderboard: {leaderboard_id} not found in database !!!') - - return schema.LeaderBoard(**ld) - - -async def get_leaderboards(*, by_challenge_id: Optional[int] = None) -> List[schema.LeaderBoard]: - """ A list of leaderboards - - :param by_challenge_id: filter leaderboards by challenge id - :raise ValueError if the item is not is the database - :raise SQLAlchemy exceptions if database connection or condition fails - """ - if by_challenge_id: - query = schema.leaderboards_table.select().where( - schema.leaderboards_table.c.challenge_id == by_challenge_id - ) - else: - raise ValueError("No parameter given") - - lst_ld = await zrDB.fetch_all(query) - return [schema.LeaderBoard(**ld) for ld in lst_ld] - - -async def list_leaderboards() -> List[schema.LeaderBoard]: - """ Fetch a list of all the leaderboards present in the database - - :raise ValueError if the leaderboard database is empty - :raise SQLAlchemy exceptions if database connection or condition fails - """ - query = schema.leaderboards_table.select() - leaderboards = await zrDB.fetch_all(query) - if not leaderboards: - raise ValueError('No leaderboards found') - - return [schema.LeaderBoard(**ld) for ld in leaderboards] - - -async def create_leaderboard(*, lead_data: schema.LeaderBoard) -> int: - """ Create a new leaderboard entry in database from item object - - :returns the id of the leaderboard created - """ - query = schema.leaderboards_table.insert().values( - label=lead_data.label, - challenge_id=lead_data.challenge_id, - path_to=f"{lead_data.path_to}", - entry_file=lead_data.entry_file, - archived=lead_data.archived, - external_entries=f"{lead_data.external_entries}", - static_files=lead_data.static_files - ) - try: - result = await zrDB.execute(query) - return result - except Exception as e: - db_exc.parse_user_insertion(e) - - -async def update_leaderboard_value(*, leaderboard_id, variable_name: str, value: Any, allow_parsing: bool = False): - """ Update a value in the leaderboard corresponding to the given id - - :raise ValueError if given variable does not exist or does not match corresponding type - """ - field = schema.LeaderBoard.__fields__.get(variable_name, None) - if field is None: - raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') - - if allow_parsing: - value = misc.str2type(value, field.type_) - - if value is None: - if not field.allow_none: - raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') - else: - if not isinstance(value, field.type_): - raise ValueError(f"Leaderboard.{variable_name} should be of type {field.type_}") - - # Path is not supported by sqlite as a raw type - if field.type_ == Path: - value = str(value) - - query = schema.leaderboards_table.update().where( - schema.leaderboards_table.c.id == leaderboard_id - ).values({f"{variable_name}": str(value)}) - try: - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - return value diff --git a/vocolab/db/q/users.py b/vocolab/db/q/users.py deleted file mode 100644 index 6cd835f..0000000 --- a/vocolab/db/q/users.py +++ /dev/null @@ -1,203 +0,0 @@ -import secrets -from datetime import datetime -from typing import Optional, List - - -from email_validator import validate_email, EmailNotValidError - -from vocolab import exc, out -from vocolab.db import zrDB, models, schema, exc as db_exc -from vocolab.core import users_lib -from vocolab.settings import get_settings - -_settings = get_settings() - - -async def create_user(*, usr: models.misc.UserCreate): - """ Create a new user entry in the users' database.""" - - hashed_pswd, salt = users_lib.hash_pwd(password=usr.pwd) - verification_code = secrets.token_urlsafe(8) - try: - # insert user entry into the database - query = schema.users_table.insert().values( - username=usr.username, - email=usr.email, - active=True, - verified=verification_code, - hashed_pswd=hashed_pswd, - salt=salt, - created_at=datetime.now() - ) - await zrDB.execute(query) - - except Exception as e: - db_exc.parse_user_insertion(e) - - # create user profile data - data = models.api.UserData( - username=usr.username, - affiliation=usr.affiliation, - first_name=usr.first_name, - last_name=usr.last_name - ) - users_lib.update_user_data(usr.username, data) - - return verification_code - - -async def verify_user(*, username: str, verification_code: str): - """ User verification using a specific code. - If the code is correct verification succeeds - If not the function raises a ValueNotValid Exception - If user is already verified we raise an ActionNotValid Exception - """ - user = await get_user(by_username=username) - if secrets.compare_digest(user.verified, verification_code): - query = schema.users_table.update().where( - schema.users_table.c.id == user.id - ).values( - verified='True' - ) - await zrDB.execute(query) - return True - elif secrets.compare_digest(user.verified, 'True'): - raise exc.ActionNotValid("Email already verified") - else: - raise exc.ValueNotValid("validation code was not correct") - - -async def admin_verification(*, user_id: int): - """ Verify a user, raises an ValueError if user does not exist. - To only be used for administration. - Users need to validate their accounts. - - - bypasses code verification - - no exception is raised if user already active - """ - query = schema.users_table.update().where( - schema.users_table.c.id == user_id - ).values( - verified='True' - ) - res = await zrDB.execute(query) - - if res == 0: - raise ValueError(f'user {user_id} was not found') - - -def check_users_password(*, password: str, user: schema.User): - """ Verify that a given password matches the users """ - hashed_pwd, _ = users_lib.hash_pwd(password=password, salt=user.salt) - return hashed_pwd == user.hashed_pswd - - -async def get_user_for_login(login_id: str, password: str) -> Optional[schema.User]: - """ - :params login_id: the login id can be username or email - :params password: the user's password - """ - try: - validate_email(login_id) # check if email is valid - query = schema.users_table.select().where( - schema.users_table.c.email == login_id - ) - except EmailNotValidError: - query = schema.users_table.select().where( - schema.users_table.c.username == login_id - ) - - user = await zrDB.fetch_one(query) - if user is None: - return None - user = schema.User(**user) - out.console.print(f"===> {user=}") - - hashed_pswd, _ = users_lib.hash_pwd(password=password, salt=user.salt) - if user.enabled and hashed_pswd == user.hashed_pswd: - return user - return None - - -async def get_user(*, by_uid: Optional[int] = None, by_username: Optional[str] = None, - by_email: Optional[str] = None) -> schema.User: - """ Get a user from the database using uid, username or email as a search parameter. - - :rtype: schema.User - :returns the user object - :raises ValueError if the user does not exist or no search value was provided - """ - - if by_uid: - query = schema.users_table.select().where( - schema.users_table.c.id == by_uid - ) - elif by_username: - query = schema.users_table.select().where( - schema.users_table.c.username == by_username - ) - elif by_email: - query = schema.users_table.select().where( - schema.users_table.c.email == by_email - ) - else: - raise ValueError('a value must be provided : uid, username, email') - - user = await zrDB.fetch_one(query) - if user is None: - raise ValueError(f'database does not contain a user for given credentials') - - return schema.User(**user) - - -async def get_user_list() -> List[schema.User]: - """ Return a list of all users """ - query = schema.users_table.select() - user_list = await zrDB.fetch_all(query) - if user_list is None: - raise ValueError(f'database does not contain any user') - return [schema.User(**usr) for usr in user_list] - - -async def delete_user(*, uid: int): - """ Deletes all password reset sessions from the password_reset_users table """ - query = schema.users_table.delete().where( - schema.users_table.c.id == uid - ) - # returns number of deleted entries - return await zrDB.execute(query) - - -async def update_users_password(*, user: schema.User, password: str, password_validation: str): - """ Change a users password """ - - if password != password_validation: - raise ValueError('passwords do not match') - - hashed_pswd, salt = users_lib.hash_pwd(password=password) - query = schema.users_table.update().where( - schema.users_table.c.id == user.id - ).values(hashed_pswd=hashed_pswd, salt=salt) - - await zrDB.execute(query) - - -async def toggle_user_status(*, user_id: int, active: bool = True): - """ Toggles a users status for active to inactive """ - query = schema.users_table.update().where( - schema.users_table.c.id == user_id - ).values( - active=active - ) - res = await zrDB.execute(query) - - if res == 0: - raise ValueError(f'user {user_id} was not found') - - -async def toggle_all_users_status(*, active: bool = True): - """ Toggles a users status for active to inactive """ - query = schema.users_table.update().values( - active=active - ) - return await zrDB.execute(query) diff --git a/vocolab/db/schema/__init__.py b/vocolab/db/schema/__init__.py deleted file mode 100644 index a861fb9..0000000 --- a/vocolab/db/schema/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .auth import * -from .challenges import * - diff --git a/vocolab/db/schema/auth.py b/vocolab/db/schema/auth.py deleted file mode 100644 index 087cec7..0000000 --- a/vocolab/db/schema/auth.py +++ /dev/null @@ -1,70 +0,0 @@ -import json -from datetime import datetime -from typing import Optional - -import sqlalchemy -from jose import jwt, JWTError # noqa: false flags from requirements https://youtrack.jetbrains.com/issue/PY-27985 -from pydantic import BaseModel, EmailStr, Field, ValidationError - -from ...settings import get_settings - -_settings = get_settings() -users_metadata = sqlalchemy.MetaData() - - -class User(BaseModel): - id: int - username: str - email: EmailStr - active: bool - verified: str - hashed_pswd: bytes - salt: bytes - created_at: Optional[datetime] - - @property - def enabled(self): - return self.active and self.verified == 'True' - - class Config: - orm_mode = True - - -users_table = sqlalchemy.Table( - "users_credentials", - users_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column("username", sqlalchemy.String, unique=True), - sqlalchemy.Column("email", sqlalchemy.String, unique=True), - sqlalchemy.Column("active", sqlalchemy.Boolean), - sqlalchemy.Column("verified", sqlalchemy.String), - sqlalchemy.Column("hashed_pswd", sqlalchemy.BLOB), - sqlalchemy.Column("salt", sqlalchemy.BLOB), - sqlalchemy.Column("created_at", sqlalchemy.DATETIME) -) - -class Token(BaseModel): - """ API Session Token """ - expires_at: datetime = Field(default_factory=lambda: datetime.now() + _settings.user_options.session_expiry_delay) - created_at: datetime = Field(default_factory=lambda: datetime.now()) - allow_password_reset: bool = False # used for password reset sessions - user_email: EmailStr - - def is_expired(self) -> bool: - """ Check if Token has expired """ - return self.expires_at < datetime.now() - - def encode(self) -> str: - """ Encode into a token string """ - # passing by json allows to convert datetimes to strings using pydantic serializer - as_dict = json.loads(self.json()) - return jwt.encode(claims=as_dict, key=_settings.secret, algorithm=_settings.api_options.token_encryption) - - @classmethod - def decode(cls, encoded_token: str): - """ Decode token from encoded string """ - try: - payload = jwt.decode(token=encoded_token, key=_settings.secret, algorithms=[_settings.api_options.token_encryption]) - return Token(**payload) - except (JWTError, ValidationError) as e: - raise ValueError("Invalid token") from e diff --git a/vocolab/db/schema/challenges.py b/vocolab/db/schema/challenges.py deleted file mode 100644 index be59ff1..0000000 --- a/vocolab/db/schema/challenges.py +++ /dev/null @@ -1,230 +0,0 @@ -from datetime import date -from datetime import datetime -from enum import Enum -from pathlib import Path -from typing import Optional - -import sqlalchemy -from pydantic import BaseModel, AnyHttpUrl -from pydantic import HttpUrl - -from vocolab.db.models.tasks import ExecutorsType - -challenge_metadata = sqlalchemy.MetaData() - - -class ModelID(BaseModel): - """ Data representation of a Model id & its metadata""" - id: str - user_id: int - created_at: datetime - description: str - gpu_budget: str - train_set: str - authors: str - institution: str - team: str - paper_url: AnyHttpUrl - code_url: AnyHttpUrl - -""" -Table indexing of model ids -""" -models_table = sqlalchemy.Table( - "models", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), - sqlalchemy.Column("user_id", sqlalchemy.Integer), - sqlalchemy.Column("created_at", sqlalchemy.DateTime), - sqlalchemy.Column("description", sqlalchemy.String), - sqlalchemy.Column("gpu_budget", sqlalchemy.String), - sqlalchemy.Column("train_set", sqlalchemy.String), - sqlalchemy.Column("authors", sqlalchemy.String), - sqlalchemy.Column("institution", sqlalchemy.String), - sqlalchemy.Column("team", sqlalchemy.String), - sqlalchemy.Column("paper_url", sqlalchemy.String), - sqlalchemy.Column("code_url", sqlalchemy.String), -) - -class EvaluatorItem(BaseModel): - """ Data representation of an evaluator """ - id: int - label: str - executor: ExecutorsType - host: Optional[str] - script_path: str - executor_arguments: str - - class Config: - orm_mode = True - -""" -Table indexing the existing evaluators -""" -evaluators_table = sqlalchemy.Table( - "evaluators", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), - sqlalchemy.Column("label", sqlalchemy.String, unique=True), - sqlalchemy.Column("host", sqlalchemy.String), - sqlalchemy.Column("executor", sqlalchemy.String), - sqlalchemy.Column("script_path", sqlalchemy.String), - sqlalchemy.Column("executor_arguments", sqlalchemy.String) -) - - -class Challenge(BaseModel): - """ Data representation of a challenge """ - id: int - label: str - start_date: date - end_date: Optional[date] - active: bool - url: HttpUrl - evaluator: Optional[int] - - class Config: - orm_mode = True - - def is_active(self) -> bool: - """ Checks if challenge is active """ - present = date.today() - if self.end_date: - return self.start_date <= present <= self.end_date and self.active - else: - return self.start_date <= present and self.active - - @classmethod - def get_field_names(cls): - return list(cls.__fields__.keys()) - -""" -Table used to index the existing challenges & their metadata -""" -challenges_table = sqlalchemy.Table( - "challenges", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column("label", sqlalchemy.String, unique=True), - sqlalchemy.Column("start_date", sqlalchemy.Date), - sqlalchemy.Column("end_date", sqlalchemy.Date), - sqlalchemy.Column("active", sqlalchemy.Boolean), - sqlalchemy.Column("url", sqlalchemy.String), - sqlalchemy.Column("evaluator", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")) -) - - -class LeaderBoard(BaseModel): - """ Data representation of a Leaderboard """ - id: Optional[int] - challenge_id: int # Id to linked challenge - label: str # Name of leaderboard - path_to: Path # Path to build result - entry_file: str # filename in submission results - archived: bool # is_archived - external_entries: Optional[Path] # Location of external entries (baselines, toplines, archived) - static_files: bool # has static files - sorting_key: Optional[str] # path to the item to use as sorting key - - @classmethod - def get_field_names(cls): - return list(cls.__fields__.keys()) - - class Config: - orm_mode = True - - -""" -Table indexing the existing leaderboards and their metadata -""" -leaderboards_table = sqlalchemy.Table( - "leaderboards", - challenge_metadata, - sqlalchemy.Column('id', sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column('challenge_id', sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), - sqlalchemy.Column('label', sqlalchemy.String, unique=True), - sqlalchemy.Column('path_to', sqlalchemy.String), - sqlalchemy.Column('entry_file', sqlalchemy.String), - sqlalchemy.Column('archived', sqlalchemy.Boolean), - sqlalchemy.Column('external_entries', sqlalchemy.String), - sqlalchemy.Column('static_files', sqlalchemy.Boolean), - sqlalchemy.Column('sorting_key', sqlalchemy.String), -) - - - -class SubmissionStatus(str, Enum): - """ Definition of different states of submissions """ - # TODO: maybe add submission type (with scores...) - uploading = 'uploading' - uploaded = 'uploaded' - on_queue = 'on_queue' - validating = 'validating' # todo verify usage - invalid = 'invalid' - evaluating = 'evaluating' - completed = 'completed' - canceled = 'canceled' - failed = 'failed' - no_eval = 'no_eval' - no_auto_eval = 'no_auto_eval' - excluded = 'excluded' - - @classmethod - def get_values(cls): - return [el.value for el in cls] # noqa enum has attr values - - - -class ChallengeSubmission(BaseModel): - """ Data representation of a submission to a challenge """ - id: str - user_id: int - track_id: int - submit_date: datetime - status: SubmissionStatus - auto_eval: bool - evaluator_id: Optional[int] - author_label: Optional[str] = None - - class Config: - orm_mode = True - - -""" -Table entry indexing submissions to challenges -""" -submissions_table = sqlalchemy.Table( - "challenge_submissions", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), - sqlalchemy.Column("user_id", sqlalchemy.Integer), - sqlalchemy.Column("track_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), - sqlalchemy.Column("submit_date", sqlalchemy.DateTime), - sqlalchemy.Column("status", sqlalchemy.String), - sqlalchemy.Column("auto_eval", sqlalchemy.Boolean), - sqlalchemy.Column("evaluator_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), - sqlalchemy.Column("author_label", sqlalchemy.String) -) - - -class LeaderboardEntry: - """ Data representation of a leaderboard entry """ - id: Optional[int] - entry_path: Path - model_id: str - submission_id: str - leaderboard_id: int - submitted_at: datetime - - -""" Table indexing all leaderboard entries and their location (as stores json files)""" -leaderboard_entry_table = sqlalchemy.Table( - "leaderboard_entries", - challenge_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), - sqlalchemy.Column("entry_path", sqlalchemy.String), - sqlalchemy.Column("model_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.id")), - sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("challenge_submissions.id")), - sqlalchemy.Column("leaderboard_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), - sqlalchemy.Column("submitted_at", sqlalchemy.String) -) diff --git a/vocolab/settings.py b/vocolab/settings.py index c02950f..35435ef 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -79,6 +79,7 @@ class TaskQueueSettings(BaseModel): class AppSettings(BaseModel): + platform_name: str = "VOCOLAB" app_name: str = "VocoLab Challenge API" maintainers: str = "Organisation Name" admin_email: EmailStr = EmailStr("contact@email.com") From 1ea6e504eeb5cc2f2fefa59e2e4da85f3a37113e Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Wed, 1 Feb 2023 20:03:59 +0100 Subject: [PATCH 04/28] WIP: db clean-up for challenges/models/submissions (part 1) --- vocolab/api/endpoints/models.py | 37 +++++++++++++++++---------------- vocolab/settings.py | 15 ++++++++----- 2 files changed, 29 insertions(+), 23 deletions(-) diff --git a/vocolab/api/endpoints/models.py b/vocolab/api/endpoints/models.py index 55bb182..2ee6e87 100644 --- a/vocolab/api/endpoints/models.py +++ b/vocolab/api/endpoints/models.py @@ -1,7 +1,6 @@ """ Routing for /challenges section of the API This section handles challenge data """ -from datetime import datetime from fastapi import ( APIRouter, Depends, UploadFile, File, BackgroundTasks @@ -9,37 +8,39 @@ from vocolab import out, exc from vocolab.core import api_lib, submission_lib -from vocolab.db import schema, models -from vocolab.db.q import challengesQ +from vocolab.data import models, model_queries from vocolab.settings import get_settings router = APIRouter() _settings = get_settings() -@router.get('/create') -async def create_new_model(first_author_name: str, - current_user: schema.User = Depends(api_lib.get_current_active_user)): - new_model_id = f"{first_author_name[:3]}{str(datetime.now().year)[2:]}" - # todo: check - return new_model_id +@router.post('/create') +async def create_new_model( + first_author_name: str, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ Route to create a new model entry """ + # todo: add data in request body & check if it works correctly + return await model_queries.ModelID.create(first_author_name, ...) @router.get('/list') async def get_model_list(): - pass + """ Request the full model list """ + # todo check if extra formatting is needed + return await model_queries.ModelIDList.get() @router.get('/{model_id}/info') -async def get_model_info(): - # todo: check - pass +async def get_model_info(model_id: str): + return await model_queries.ModelID.get(model_id) @router.get('/{model_id}/submissions/list') -async def get_model_submissions(): - # todo: check - pass +async def get_model_submissions(model_id: str): + """ Get all submissions corresponding to a model_id """ + model = await model_queries.ModelID.get(model_id) + return await model.get_submissions() @router.get('/{model_id}/submissions/{submission_id}/info') @@ -87,7 +88,8 @@ async def create_submission( return submission_id - # todo update + +# todo update @router.put("/{model_id}/submission/{submission_id}/upload", response_model=models.api.UploadSubmissionPartResponse) async def upload_submission( model_id: str, @@ -115,4 +117,3 @@ async def upload_submission( except exc.VocoLabException as e: out.log.exception() raise e - diff --git a/vocolab/settings.py b/vocolab/settings.py index 35435ef..946afc0 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -16,7 +16,6 @@ except ImportError: from toml import load as toml_load - from pydantic import ( BaseSettings, EmailStr, DirectoryPath, HttpUrl, IPvAnyNetwork, BaseModel, Field ) @@ -40,10 +39,6 @@ class ConsoleOutputSettings(BaseModel): ERROR_LOG_FILE: Optional[Path] = None -class DatabaseSettings(BaseModel): - db_file: str = 'vocolab.db' - - class CeleryWorkerOptions(BaseModel): celery_bin: Path = Path(shutil.which('celery')) celery_nodes: Dict[str, str] = { @@ -243,6 +238,16 @@ def secret(self): with (self.DATA_FOLDER / '.secret').open('rb') as fp: return fp.read().decode() + @property + def database_file(self): + """ Path to the database file """ + return self.DATA_FOLDER / 'vocolab.db' + + @property + def database_connection_url(self): + """ Database connection url """ + return f"sqlite:///{self.database_file}" + @contextmanager def get_temp_dir(self) -> Generator[Path, None, None]: """ Create a temporary directory """ From 93352705f4efd6256cd799095ccd850bb8fcc566 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Fri, 3 Feb 2023 10:40:11 +0100 Subject: [PATCH 05/28] data dir was ignored by older .gitignore rule --- .gitignore | 1 - vocolab/data/__init__.py | 3 + vocolab/data/admin.py | 4 + vocolab/data/db.py | 21 ++ vocolab/data/exc.py | 28 +++ vocolab/data/model_queries/__init__.py | 3 + vocolab/data/model_queries/auth.py | 219 +++++++++++++++++++ vocolab/data/model_queries/challenges.py | 161 ++++++++++++++ vocolab/data/model_queries/models.py | 265 +++++++++++++++++++++++ vocolab/data/models/__init__.py | 4 + vocolab/data/models/api/__init__.py | 5 + vocolab/data/models/api/auth.py | 35 +++ vocolab/data/models/api/challenges.py | 79 +++++++ vocolab/data/models/api/commons.py | 7 + vocolab/data/models/api/leaerboards.py | 10 + vocolab/data/models/api/models.py | 17 ++ vocolab/data/models/cli.py | 27 +++ vocolab/data/models/misc.py | 3 + vocolab/data/models/tasks.py | 123 +++++++++++ vocolab/data/q/__init__.py | 3 + vocolab/data/q/challenges.py | 55 +++++ vocolab/data/q/leaderboards.py | 110 ++++++++++ vocolab/data/q/users.py | 0 vocolab/data/tables.py | 113 ++++++++++ 24 files changed, 1295 insertions(+), 1 deletion(-) create mode 100644 vocolab/data/__init__.py create mode 100644 vocolab/data/admin.py create mode 100644 vocolab/data/db.py create mode 100644 vocolab/data/exc.py create mode 100644 vocolab/data/model_queries/__init__.py create mode 100644 vocolab/data/model_queries/auth.py create mode 100644 vocolab/data/model_queries/challenges.py create mode 100644 vocolab/data/model_queries/models.py create mode 100644 vocolab/data/models/__init__.py create mode 100644 vocolab/data/models/api/__init__.py create mode 100644 vocolab/data/models/api/auth.py create mode 100644 vocolab/data/models/api/challenges.py create mode 100644 vocolab/data/models/api/commons.py create mode 100644 vocolab/data/models/api/leaerboards.py create mode 100644 vocolab/data/models/api/models.py create mode 100644 vocolab/data/models/cli.py create mode 100644 vocolab/data/models/misc.py create mode 100644 vocolab/data/models/tasks.py create mode 100644 vocolab/data/q/__init__.py create mode 100644 vocolab/data/q/challenges.py create mode 100644 vocolab/data/q/leaderboards.py create mode 100644 vocolab/data/q/users.py create mode 100644 vocolab/data/tables.py diff --git a/.gitignore b/.gitignore index 238f632..97a8b8f 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,6 @@ MANIFEST *.env !example.env !docker.env -data/ *.bin # custom shortcut files diff --git a/vocolab/data/__init__.py b/vocolab/data/__init__.py new file mode 100644 index 0000000..1d9ac5b --- /dev/null +++ b/vocolab/data/__init__.py @@ -0,0 +1,3 @@ +from vocolab.db.base import ( + create_db, zrDB, users_metadata +) diff --git a/vocolab/data/admin.py b/vocolab/data/admin.py new file mode 100644 index 0000000..24ed1ed --- /dev/null +++ b/vocolab/data/admin.py @@ -0,0 +1,4 @@ +from vocolab.settings import get_settings + +_settings = get_settings() + diff --git a/vocolab/data/db.py b/vocolab/data/db.py new file mode 100644 index 0000000..2f094c7 --- /dev/null +++ b/vocolab/data/db.py @@ -0,0 +1,21 @@ +import databases +import sqlalchemy + +from vocolab import get_settings +from vocolab.data import exc as db_exc +from .tables import tables_metadata + + +_settings = get_settings() + +# Database Connection +zrDB = databases.Database(_settings.database_connection_url) + +def build_database_from_schema(): + if not (_settings.DATA_FOLDER / _settings.database_options.db_file).is_file(): + (_settings.DATA_FOLDER / _settings.database_options.db_file).touch() + + engine = sqlalchemy.create_engine( + _settings.database_connection_url, connect_args={"check_same_thread": False} + ) + tables_metadata.create_all(engine) diff --git a/vocolab/data/exc.py b/vocolab/data/exc.py new file mode 100644 index 0000000..62c8f69 --- /dev/null +++ b/vocolab/data/exc.py @@ -0,0 +1,28 @@ +import sqlite3 +from vocolab import exc + + +class IntegrityError(sqlite3.IntegrityError): + pass + + +def parse_user_insertion(e: Exception): + """ Wrapper to uniform exception while inserting new users """ + + if issubclass(IntegrityError, e.__class__): + error_message = e.__str__() + if "UNIQUE" in error_message and "email" in error_message: + raise exc.ValueNotValid('email already exists', data='email') + elif "UNIQUE" in error_message and "username" in error_message: + raise exc.ValueNotValid('username already exists', data='username') + raise e + + +def parse_challenge_insertion(e: Exception): + """ Wrapper to uniform exception while inserting new challenges """ + if issubclass(IntegrityError, e.__class__): + error_message = e.__str__() + if "UNIQUE" in error_message and "label" in error_message: + raise exc.ValueNotValid('a challenge with the same label exists', data='label') + + raise e diff --git a/vocolab/data/model_queries/__init__.py b/vocolab/data/model_queries/__init__.py new file mode 100644 index 0000000..a320c7d --- /dev/null +++ b/vocolab/data/model_queries/__init__.py @@ -0,0 +1,3 @@ +from .auth import * +from .challenges import * +from .models import * diff --git a/vocolab/data/model_queries/auth.py b/vocolab/data/model_queries/auth.py new file mode 100644 index 0000000..d8a6374 --- /dev/null +++ b/vocolab/data/model_queries/auth.py @@ -0,0 +1,219 @@ +import json +import secrets +from datetime import datetime +from typing import Optional, List + +from email_validator import validate_email, EmailNotValidError +from jose import jwt, JWTError # noqa: false flags from requirements https://youtrack.jetbrains.com/issue/PY-27985 +from pydantic import BaseModel, EmailStr, Field, ValidationError + +from vocolab.data import models, tables, exc as db_exc +from ..base import zrDB +from ...core import users_lib +from ...settings import get_settings + +_settings = get_settings() + + +class User(BaseModel): + id: int + username: str + email: EmailStr + active: bool + verified: str + hashed_pswd: bytes + salt: bytes + created_at: Optional[datetime] + + @property + def enabled(self) -> bool: + """ Check if a user is enabled (active & verified)""" + return self.active and self.is_verified() + + class Config: + orm_mode = True + + def is_verified(self) -> bool: + """ Check whether a user has been verified""" + return self.verified == 'True' + + def password_matches(self, password: str) -> bool: + """ Check if given password matches users """ + hashed_pwd, _ = users_lib.hash_pwd(password=password, salt=self.salt) + return hashed_pwd == self.hashed_pswd + + async def change_password(self, new_password: str, password_validation: str): + """ Modify a users password """ + if new_password != password_validation: + raise ValueError('passwords do not match') + + hashed_pswd, salt = users_lib.hash_pwd(password=new_password) + query = tables.users_table.update().where( + tables.users_table.c.id == self.id + ).values(hashed_pswd=hashed_pswd, salt=salt) + await zrDB.execute(query) + + async def delete(self): + query = tables.users_table.delete().where( + tables.users_table.c.id == self.id + ) + await zrDB.execute(query) + + async def verify(self, verification_code: str, force: bool = False) -> bool: + """ Verify a user using a verification code, (can be forced) """ + if self.is_verified(): + return True + + query = tables.users_table.update().where( + tables.users_table.c.id == self.id + ).values(verified='True') + + if secrets.compare_digest(self.verified, verification_code) or force: + await zrDB.execute(query) + return True + + return False + + async def toggle_status(self, active: bool = True): + """ Toggles a users status from active to inactive """ + query = tables.users_table.update().where( + tables.users_table.c.id == self.id + ).values( + active=active + ) + await zrDB.execute(query) + + def get_profile_data(self) -> users_lib.UserProfileData: + """ Load users profile data """ + return users_lib.UserProfileData.load(self.username) + + @classmethod + async def get(cls, *, by_uid: Optional[int] = None, by_username: Optional[str] = None, + by_email: Optional[str] = None) -> "User": + """ Get a user from the database """ + if by_uid: + query = tables.users_table.select().where( + tables.users_table.c.id == by_uid + ) + elif by_username: + query = tables.users_table.select().where( + tables.users_table.c.username == by_username + ) + elif by_email: + query = tables.users_table.select().where( + tables.users_table.c.email == by_email + ) + else: + raise ValueError('a value must be provided : uid, username, email') + + user_data = await zrDB.fetch_one(query) + if user_data is None: + raise ValueError(f'database does not contain a user for given description') + + return cls(**user_data) + + @classmethod + async def login(cls, login_id: str, password: str) -> Optional["User"]: + try: + validate_email(login_id) # check if email is valid + query = tables.users_table.select().where( + tables.users_table.c.email == login_id + ) + except EmailNotValidError: + query = tables.users_table.select().where( + tables.users_table.c.username == login_id + ) + + user_data = await zrDB.fetch_one(query) + if user_data is None: + return None + + current_user = cls(**user_data) + # check password + hashed_pswd, _ = users_lib.hash_pwd(password=password, salt=current_user.salt) + if current_user.enabled and hashed_pswd == current_user.hashed_pswd: + return current_user + return None + + @classmethod + async def create(cls, *, new_usr: models.api.UserCreateRequest): + """ Create a new user entry in the users database """ + hashed_pswd, salt = users_lib.hash_pwd(password=new_usr.pwd) + verification_code = secrets.token_urlsafe(8) + try: + # insert user entry into the database + query = tables.users_table.insert().values( + username=new_usr.username, + email=new_usr.email, + active=True, + verified=verification_code, + hashed_pswd=hashed_pswd, + salt=salt, + created_at=datetime.now() + ) + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + # create user profile data + data = models.api.UserData( + username=new_usr.username, + affiliation=new_usr.affiliation, + first_name=new_usr.first_name, + last_name=new_usr.last_name + ) + users_lib.update_user_data(new_usr.username, data) + return verification_code + + +class UserList(BaseModel): + items: List[User] + + @classmethod + def get(cls, active_only: bool = False) -> "UserList": + """ Get all existing users, flag allows to filter non-active users """ + query = tables.users_table.select() + if active_only: + query = tables.users_table.select().where( + tables.users_table.c.active == True + ) + user_list = await zrDB.fetch_all(query) + if user_list is None: + raise ValueError(f'database does not contain any user') + return cls(items=user_list) + + @classmethod + def toggle_status(cls, active: bool = True): + """ Toggles all users status from active to inactive """ + query = tables.users_table.update().values( + active=active + ) + return await zrDB.execute(query) + + +class Token(BaseModel): + """ API Session Token """ + expires_at: datetime = Field(default_factory=lambda: datetime.now() + _settings.user_options.session_expiry_delay) + created_at: datetime = Field(default_factory=lambda: datetime.now()) + allow_password_reset: bool = False # used for password reset sessions + user_email: EmailStr + + def is_expired(self) -> bool: + """ Check if Token has expired """ + return self.expires_at < datetime.now() + + def encode(self) -> str: + """ Encode into a token string """ + # passing by json allows to convert datetimes to strings using pydantic serializer + as_dict = json.loads(self.json()) + return jwt.encode(claims=as_dict, key=_settings.secret, algorithm=_settings.api_options.token_encryption) + + @classmethod + def decode(cls, encoded_token: str): + """ Decode token from encoded string """ + try: + payload = jwt.decode(token=encoded_token, key=_settings.secret, + algorithms=[_settings.api_options.token_encryption]) + return Token(**payload) + except (JWTError, ValidationError) as e: + raise ValueError("Invalid token") from e diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py new file mode 100644 index 0000000..b8ba3a8 --- /dev/null +++ b/vocolab/data/model_queries/challenges.py @@ -0,0 +1,161 @@ +from datetime import date +from datetime import datetime +from pathlib import Path +from typing import Optional, List, Any + +from pydantic import BaseModel +from pydantic import HttpUrl + +from vocolab.data import models, tables +from vocolab.core import misc +from ..base import zrDB, db_exc + + +class EvaluatorItem(BaseModel): + """ Data representation of an evaluator """ + id: int + label: str + executor: models.tasks.ExecutorsType + host: Optional[str] + script_path: str + executor_arguments: str + + class Config: + orm_mode = True + + +class Challenge(BaseModel): + """ Data representation of a challenge """ + id: int + label: str + start_date: date + end_date: Optional[date] + active: bool + url: HttpUrl + evaluator: Optional[int] + + class Config: + orm_mode = True + + def is_active(self) -> bool: + """ Checks if challenge is active """ + present = date.today() + if self.end_date: + return self.start_date <= present <= self.end_date and self.active + else: + return self.start_date <= present and self.active + + @classmethod + def get_field_names(cls): + return list(cls.__fields__.keys()) + + @classmethod + async def create(cls, item: models.cli.NewChallenge): + try: + query = tables.challenges_table.insert().values( + **item.dict() + ) + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + @classmethod + async def get(cls, *, challenge_id: int, allow_inactive: bool = False) -> "Challenge": + query = tables.challenges_table.select().where( + tables.challenges_table.c.id == challenge_id + ) + ch_data = await zrDB.fetch_one(query) + if ch_data is None: + raise ValueError(f'There is no challenge with the following id: {challenge_id}') + ch = cls.parse_obj(ch_data) + if allow_inactive: + return ch + else: + if not ch.is_active(): + raise ValueError(f"The Challenge {ch.label}[{ch.id}] is not active") + return ch + + async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): + """ Update a property """ + if not hasattr(self, variable_name): + raise ValueError(f'Class Challenge does not have a member called ! {variable_name}') + + variable_type = type(getattr(self, variable_name)) + + if allow_parsing: + value = misc.str2type(value, variable_type) + + if value is not None and not isinstance(value, variable_type): + raise ValueError(f"Challenge.{variable_name} should be of type {variable_type}") + + setattr(self, variable_name, value) + + # update database + query = tables.challenges_table.update().where( + tables.challenges_table.c.id == self.id + ).values({f"{variable_name}": value}) + + try: + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + return value + + async def delete(self): + """ Remove from database """ + query = tables.challenges_table.delete().where( + tables.challenges_table.c.id == self.id + ) + await zrDB.execute(query) + + + + +class ChallengeList(BaseModel): + items: List[Challenge] + + def filter_active(self) -> "ChallengeList": + self.items = [i for i in self.items if i.is_active()] + return self + + @classmethod + async def get(cls, include_all: bool = False) -> "ChallengeList": + query = tables.challenges_table.select() + challenges = await zrDB.fetch_all(query) + if challenges is None: + raise ValueError('No challenges were found') + + if include_all: + return cls(items=challenges) + return cls(items=challenges).filter_active() + + +class Leaderboard(BaseModel): + """ Data representation of a Leaderboard """ + id: Optional[int] + challenge_id: int # Id to linked challenge + label: str # Name of leaderboard + path_to: Path # Path to build result + entry_file: str # filename in submission results + archived: bool # is_archived + external_entries: Optional[Path] # Location of external entries (baselines, toplines, archived) + static_files: bool # has static files + sorting_key: Optional[str] # path to the item to use as sorting key + + @classmethod + def get_field_names(cls): + return list(cls.__fields__.keys()) + + class Config: + orm_mode = True + + +class LeaderboardEntry: + """ Data representation of a leaderboard entry """ + id: Optional[int] + entry_path: Path + model_id: str + submission_id: str + leaderboard_id: int + submitted_at: datetime diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py new file mode 100644 index 0000000..c6c332c --- /dev/null +++ b/vocolab/data/model_queries/models.py @@ -0,0 +1,265 @@ +import math +from datetime import datetime +from enum import Enum +from itertools import chain, product +from typing import Optional, List + +from pydantic import BaseModel, AnyHttpUrl + +from vocolab import get_settings +from vocolab.data import db, tables, models + +_settings = get_settings() + + +# TODO: add method for easy author_label editing + + +class ModelID(BaseModel): + """ Data representation of a Model id & its metadata""" + id: str + user_id: int + created_at: datetime + description: str + gpu_budget: str + train_set: str + authors: str + institution: str + team: str + paper_url: AnyHttpUrl + code_url: AnyHttpUrl + + @staticmethod + def nth_word(n: int) -> str: + """ Calculate the nth word of the english lower alphabet + + This function returns a string that counts using the lower english alphabet. + 0 -> '' + 1 -> a + ... + 26 -> z + 27 -> aa + 28 -> ab + ... + 53 -> ba + ... etc + + Note: This methods becomes kind of slow for numbers larger than 10_000_000 + + """ + nb_letters = 26 # use english alphabet [1, 26] lowercase letters + letters = [chr(97 + i) for i in range(nb_letters)] + # calculate word_length + word_length = math.ceil(math.log((n + 1) * (nb_letters - 1) + 1) / math.log(nb_letters)) + # Build all possible combinations for the given word length + it = chain.from_iterable( + (product(letters, repeat=i) for i in range(word_length)) + ) + # find word in iterable + word = next(w for i, w in enumerate(it) if i == n) + return ''.join(word) + + @classmethod + async def create(cls, first_author_name: str, data: models.api.NewModelIdRequest): + """ Create a new ModelID entry in the database + + ids are created using the 3 first letters of first name of first author, + the last 2 digits of the current year and are padded with extra letters to + avoid duplicates. + """ + new_model_id = f"{first_author_name[:3]}{str(datetime.now().year)[2:]}" + + counter = 1 + new_model_id_extended = f"{new_model_id}{cls.nth_word(counter)}" + while cls.exists(new_model_id_extended): + counter += 1 + new_model_id_extended = f"{new_model_id}{cls.nth_word(counter)}" + + # create db entry + query = tables.models_table.insert().values(id=new_model_id_extended, **data.dict()) + await db.zrDB.execute(query) + return new_model_id_extended + + @classmethod + async def exists(cls, model_id: str) -> bool: + item = await db.zrDB.fetch_one( + tables.models_table.select().where( + tables.models_table.c.id == model_id + ) + ) + return item is not None + + @classmethod + async def get(cls, model_id: str) -> Optional["ModelID"]: + """ Fetch a model_id entry from the database """ + item_data = await db.zrDB.fetch_one( + tables.models_table.select().where( + tables.models_table.c.id == model_id + ) + ) + if item_data is None: + return None + return cls.parse_obj(item_data) + + +class ModelIDList(BaseModel): + items: List[ModelID] + + @classmethod + async def get(cls) -> "ModelIDList": + items = db.zrDB.fetch_all(tables.models_table.select()) + return cls(items=items) + + +class SubmissionStatus(str, Enum): + """ Definition of different states of submissions """ + # TODO: maybe add submission type (with scores...) + uploading = 'uploading' + uploaded = 'uploaded' + on_queue = 'on_queue' + validating = 'validating' # todo verify usage + invalid = 'invalid' + evaluating = 'evaluating' + completed = 'completed' + canceled = 'canceled' + failed = 'failed' + no_eval = 'no_eval' + no_auto_eval = 'no_auto_eval' + excluded = 'excluded' + + @classmethod + def get_values(cls): + return [el.value for el in cls] # noqa: enum is not typed correctly + + +class ChallengeSubmission(BaseModel): + """ Data representation of a submission to a challenge """ + id: str + user_id: int + track_id: int + submit_date: datetime + status: SubmissionStatus + auto_eval: bool + evaluator_id: Optional[int] + author_label: Optional[str] = None + + class Config: + orm_mode = True + + @classmethod + async def create(cls, username: str, new_submission: models.api.NewSubmissionRequest, evaluator_id: int) -> str: + """ Creates a database entry for the new submission """ + submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" + values = new_submission.dict() + values["id"] = submission_id + values["submit_date"] = datetime.now() + values["status"] = SubmissionStatus.uploading + values["evaluator_id"] = evaluator_id + # todo: auto-eval should maybe work differently ? + values["auto_eval"] = _settings.task_queue_options.AUTO_EVAL + + await db.zrDB.execute( + query=tables.submissions_table.insert(), + values=values + ) + return submission_id + + @classmethod + async def get(cls, submission_id: str) -> Optional["ChallengeSubmission"]: + item_data = await db.zrDB.fetch_one( + tables.submissions_table.select().where( + tables.submissions_table.c.id == submission_id + ) + ) + if item_data is None: + return None + return cls.parse_obj(item_data) + + async def update_status(self, status: SubmissionStatus): + self.status = status + await db.zrDB.execute( + tables.submissions_table.update().where( + tables.submissions_table.c.id == self.id + ).values(status=status.value) + ) + + async def update_evaluator(self, evaluator_id: int): + self.evaluator_id = evaluator_id + await db.zrDB.execute( + tables.submissions_table.update().where( + tables.submissions_table.c.id == self.id + ).values(evaluator_id=evaluator_id) + ) + + async def delete(self): + await db.zrDB.execute( + tables.submissions_table.delete().where( + tables.submissions_table.c.id == self.id + ) + ) + + + +class ChallengeSubmissionList(BaseModel): + items: List[ChallengeSubmission] + + @classmethod + async def get_from_challenge(cls, challenge_id: int): + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.track_id == challenge_id + ) + ) + if items is None: + items = [] + + return cls(items=items) + + @classmethod + async def get_from_model(cls, model_id: str): + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.model_id == model_id + ) + ) + if items is None: + items = [] + + return cls(items=items) + + @classmethod + async def get_from_user(cls, user_id: int): + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.user_id == user_id + ) + ) + if items is None: + items = [] + + return cls(items=items) + + @classmethod + async def get_by_status(cls, status: SubmissionStatus): + items = await db.zrDB.fetch_all( + tables.submissions_table.select().where( + tables.submissions_table.c.status == status.value + ) + ) + if items is None: + items = [] + + return cls(items=items) + + + async def update_evaluators(self, evaluator_id: int): + for e in self.items: + e.evaluator_id = evaluator_id + + # todo check if query works successfully + items_id = set([e.id for e in self.items]) + await db.zrDB.execute( + tables.submissions_table.update().where( + tables.submissions_table.c.id.in_(items_id) + ).values(evaluator_id=evaluator_id) + ) diff --git a/vocolab/data/models/__init__.py b/vocolab/data/models/__init__.py new file mode 100644 index 0000000..7673ca1 --- /dev/null +++ b/vocolab/data/models/__init__.py @@ -0,0 +1,4 @@ +from . import api +from . import cli +from . import misc +from . import tasks diff --git a/vocolab/data/models/api/__init__.py b/vocolab/data/models/api/__init__.py new file mode 100644 index 0000000..3891e6e --- /dev/null +++ b/vocolab/data/models/api/__init__.py @@ -0,0 +1,5 @@ +from .auth import * +from .challenges import * +from .commons import * +from .leaerboards import * +from .models import * diff --git a/vocolab/data/models/api/auth.py b/vocolab/data/models/api/auth.py new file mode 100644 index 0000000..255adf8 --- /dev/null +++ b/vocolab/data/models/api/auth.py @@ -0,0 +1,35 @@ +""" Dataclasses representing API/auth input output data types """ +from pydantic import BaseModel, EmailStr, validator + + +class UserCreateRequest(BaseModel): + """ Dataclass for user creation """ + username: str + email: EmailStr + pwd: str + first_name: str + last_name: str + affiliation: str + + @validator('username', 'pwd', 'first_name', 'last_name', 'affiliation') + def non_empty_string(cls, v): + assert v, "UserCreate does not accept empty fields" + return v + + +class LoggedItem(BaseModel): + """ Return type of the /login function """ + access_token: str + token_type: str + + +class CurrentUser(BaseModel): + """ Basic userinfo Model """ + username: str + email: EmailStr + + +class PasswordResetRequest(BaseModel): + """ Input Schema for /password/reset request """ + username: str + email: EmailStr diff --git a/vocolab/data/models/api/challenges.py b/vocolab/data/models/api/challenges.py new file mode 100644 index 0000000..44606e5 --- /dev/null +++ b/vocolab/data/models/api/challenges.py @@ -0,0 +1,79 @@ +""" Dataclasses representing API/challenge input output data types """ +from datetime import date +from pathlib import Path +from typing import Optional, List, Tuple, Dict + +from pydantic import BaseModel, HttpUrl + + +class ChallengePreview(BaseModel): + """ Used as response type for root challenge list request""" + id: int + label: str + active: bool + + +class ChallengesResponse(BaseModel): + """ Used as response type for preview of a challenge """ + id: int + label: str + start_date: date + end_date: Optional[date] + active: bool + url: HttpUrl + evaluator: Optional[int] + + +class SubmissionRequestFileIndexItem(BaseModel): + """ Item used to represent a file in the file index + used in the NewSubmissionRequest object. + + File index is used to verify correct number of files/parts have been uploaded + """ + file_name: str + file_size: int + file_hash: Optional[str] = None + + +class NewSubmissionRequest(BaseModel): + """ Dataclass used for input in the creation of a new submission to a challenge """ + username: str + model_id: str + filename: str + hash: str + multipart: bool + has_scores: bool + leaderboards: Dict[str, Path] + index: Optional[List[SubmissionRequestFileIndexItem]] + + +class NewSubmission(BaseModel): + """ Item used in the database to create a new submission entry """ + user_id: int + track_id: int + + +class SubmissionPreview(BaseModel): + submission_id: str + track_label: str + track_id: int + status: str + + +class SubmissionView(BaseModel): + submission_id: str + user_id: int + username: str + track_label: str + track_id: int + status: str + date: date + evaluator_label: str + evaluator_cmd: str + leaderboards: List[Tuple[str, int]] + + +class UploadSubmissionPartResponse(BaseModel): + """ Response type of the upload submission part method in /challenges """ + completed: bool + remaining: List[str] diff --git a/vocolab/data/models/api/commons.py b/vocolab/data/models/api/commons.py new file mode 100644 index 0000000..640dfb4 --- /dev/null +++ b/vocolab/data/models/api/commons.py @@ -0,0 +1,7 @@ +""" Common types used in multiple api requests """ +from pydantic import BaseModel + + +class Message(BaseModel): + """ Generic message response""" + message: str diff --git a/vocolab/data/models/api/leaerboards.py b/vocolab/data/models/api/leaerboards.py new file mode 100644 index 0000000..0ba4103 --- /dev/null +++ b/vocolab/data/models/api/leaerboards.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel + + +class LeaderboardPublicView(BaseModel): + id: int + challenge_id: int + label: str + entry_file: str + archived: bool + static_files: bool diff --git a/vocolab/data/models/api/models.py b/vocolab/data/models/api/models.py new file mode 100644 index 0000000..bf84b6e --- /dev/null +++ b/vocolab/data/models/api/models.py @@ -0,0 +1,17 @@ +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, AnyHttpUrl, Field + + +class NewModelIdRequest(BaseModel): + user_id: int + description: str + gpu_budget: str + train_set: str + authors: str + institution: str + team: str + paper_url: Optional[AnyHttpUrl] + code_url: Optional[AnyHttpUrl] + created_at: datetime = Field(default_factory=lambda: datetime.now()) \ No newline at end of file diff --git a/vocolab/data/models/cli.py b/vocolab/data/models/cli.py new file mode 100644 index 0000000..e0f6b60 --- /dev/null +++ b/vocolab/data/models/cli.py @@ -0,0 +1,27 @@ +""" Data Models used in the admin/cli functions """ +from datetime import date +from typing import Optional + +from pydantic import BaseModel, AnyHttpUrl + +from .tasks import ExecutorsType + + +class NewChallenge(BaseModel): + """ Dataclass for challenge creation """ + id: Optional[int] + label: str + active: bool + url: AnyHttpUrl + evaluator: Optional[int] + start_date: date + end_date: Optional[date] + + +class NewEvaluatorItem(BaseModel): + """ Data Model used by evaluator creation process """ + label: str + executor: ExecutorsType + host: Optional[str] + script_path: str + executor_arguments: Optional[str] diff --git a/vocolab/data/models/misc.py b/vocolab/data/models/misc.py new file mode 100644 index 0000000..b53f9e5 --- /dev/null +++ b/vocolab/data/models/misc.py @@ -0,0 +1,3 @@ +from pydantic import BaseModel, EmailStr, validator + + diff --git a/vocolab/data/models/tasks.py b/vocolab/data/models/tasks.py new file mode 100644 index 0000000..8f10149 --- /dev/null +++ b/vocolab/data/models/tasks.py @@ -0,0 +1,123 @@ +from datetime import datetime + +import json +import uuid +from enum import Enum +from shutil import which +from typing import List, Union, Optional + +from pydantic import BaseModel, ValidationError, Field, root_validator + +from vocolab import out + + +class QueuesNames(str, Enum): + eval_queue = "eval_queue" + update_queue = "update_queue" + + +class BrokerMessage(BaseModel): + """ A Generic description of a Broker Message Object """ + message_type: Optional[str] + label: str + job_id: str = str(uuid.uuid4()) + timestamp: datetime = Field(default_factory=datetime.now) + + @root_validator(pre=True) + def set_message_type(cls, values): + values["message_type"] = str(cls.__name__) + return values + + def __repr__(self): + """ Stringify the message for logging""" + return f"{self.job_id} >> {self.label}" + + +class ExecutorsType(str, Enum): + python = "python" + bash = "bash" + sbatch = "sbatch" + docker = "docker" + + def to_exec(self): + """ Returns absolute path to executable or None""" + return which(self) + + +class SubmissionEvaluationMessage(BrokerMessage): + """ A Broker Message that contains a subprocess task to be run""" + executor: ExecutorsType = ExecutorsType.bash + submission_id: str + bin_path: str + script_name: str + executor_args: List[str] + cmd_args: List[str] + + def __repr__(self): + """ Stringify the message for logging""" + return f"{self.job_id} >> " \ + f"{self.submission_id}@{self.label}:: " \ + f"{self.executor} {self.bin_path}/{self.script_name} {self.cmd_args} --" + + +class UpdateType(str, Enum): + evaluation_complete = "evaluation_complete" + evaluation_failed = "evaluation_failed" + evaluation_canceled = "evaluation_canceled" + evaluation_undefined = "evaluation_undefined" + + +class SubmissionUpdateMessage(BrokerMessage): + """ A Broker Message that contains a python function to execute """ + submission_id: str + updateType: UpdateType + hostname: str + + def __repr__(self): + """ Stringify the message for logging""" + return f"{self.job_id} >> " \ + f"{self.submission_id}@{self.label}:: " \ + f"{self.updateType}@{self.hostname}--" + + +class SimpleLogMessage(BrokerMessage): + """ A Broker Message that contains a simple string message """ + message: str + + def __repr__(self): + """ Stringify the message for logging""" + return f"{self.job_id} >> {self.label}:: {self.message}" + + +def message_from_bytes(byte_msg: bytes) -> Union[BrokerMessage, + SubmissionEvaluationMessage, + SubmissionUpdateMessage, SimpleLogMessage]: + """ Convert a bytes object to the correct corresponding Message object """ + + try: + url_obj = json.loads(str(byte_msg.decode("utf-8"))) + + message_type = url_obj.get('message_type', None) + + # if type is not specified raise error + if message_type is None: + out.log.error(f"Message does not specify type: {str(byte_msg.decode('utf-8'))}") + raise ValueError(f"Message does not specify type: {str(byte_msg.decode('utf-8'))}") + + # try and match type with known types + if message_type == "SubmissionEvaluationMessage": + return SubmissionEvaluationMessage(**url_obj) + elif message_type == "SubmissionUpdateMessage": + return SubmissionUpdateMessage(**url_obj) + elif message_type == "SimpleLogMessage": + return SimpleLogMessage(**url_obj) + elif message_type == "BrokerMessage": + return BrokerMessage(**url_obj) + + # raise error if matching failed + out.log.error(f"Unknown message type: {str(byte_msg.decode('utf-8'))}") + raise ValueError(f"Unknown message type {str(byte_msg.decode('utf-8'))}") + + except (json.JSONDecodeError, ValidationError): + out.log.error(f"error while parsing command: {str(byte_msg.decode('utf-8'))}") + raise ValueError(f"command {str(byte_msg.decode('utf-8'))} not valid!!") diff --git a/vocolab/data/q/__init__.py b/vocolab/data/q/__init__.py new file mode 100644 index 0000000..efd3979 --- /dev/null +++ b/vocolab/data/q/__init__.py @@ -0,0 +1,3 @@ +from vocolab.db.q import users as userQ # noqa: allow non standard names +from vocolab.db.q import challenges as challengesQ # noqa: allow non standard names +from vocolab.db.q import leaderboards as leaderboardQ # noqa: allow non standard names diff --git a/vocolab/data/q/challenges.py b/vocolab/data/q/challenges.py new file mode 100644 index 0000000..faa744b --- /dev/null +++ b/vocolab/data/q/challenges.py @@ -0,0 +1,55 @@ +from datetime import datetime +from typing import List, Optional +from uuid import uuid4 + +from vocolab import get_settings + +_settings = get_settings() + + +async def get_evaluators(): + """ Returns a list of the evaluators """ + query = schema.evaluators_table.select() + results = await zrDB.fetch_all(query) + if not results: + return [] + return [schema.EvaluatorItem(**i) for i in results] + + +async def get_evaluator(*, by_id: int) -> Optional[schema.EvaluatorItem]: + """ Returns a specific evaluator """ + + query = schema.evaluators_table.select().where( + schema.evaluators_table.c.id == by_id + ) + result = await zrDB.fetch_one(query) + if not result: + return None + return schema.EvaluatorItem(**result) + + +async def add_evaluator(*, lst_eval: List[models.cli.NewEvaluatorItem]): + """ Insert a list of evaluators into the database """ + for i in lst_eval: + query = schema.evaluators_table.select().where( + schema.evaluators_table.c.label == i.label + ).where( + schema.evaluators_table.c.host == i.host + ) + res = await zrDB.fetch_one(query) + + if res is None: + await zrDB.execute(schema.evaluators_table.insert(), i.dict()) + else: + update_query = schema.evaluators_table.update().where( + schema.evaluators_table.c.id == res.id + ).values(executor=i.executor, script_path=i.script_path, executor_arguments=i.executor_arguments) + await zrDB.execute(update_query) + + +async def edit_evaluator_args(*, eval_id: int, arg_list: List[str]): + """ update evaluator base arguments """ + query = schema.evaluators_table.update().where( + schema.evaluators_table.c.id == eval_id + ).values(executor_arguments=";".join(arg_list)) + await zrDB.execute(query) diff --git a/vocolab/data/q/leaderboards.py b/vocolab/data/q/leaderboards.py new file mode 100644 index 0000000..e1f1e6d --- /dev/null +++ b/vocolab/data/q/leaderboards.py @@ -0,0 +1,110 @@ +""" +Database functions that manipulate the leaderboard table +""" +from pathlib import Path +from typing import Any, List, Optional +from vocolab.db import schema, zrDB, exc as db_exc +from vocolab.core import misc + + +async def get_leaderboard(*, leaderboard_id: int) -> schema.LeaderBoard: + """ Fetches the leaderboard object with the corresponding id + + :raise ValueError if the item is not is the database + :raise SQLAlchemy exceptions if database connection or condition fails + """ + query = schema.leaderboards_table.select().where( + schema.leaderboards_table.c.id == leaderboard_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + raise ValueError(f'Leaderboard: {leaderboard_id} not found in database !!!') + + return schema.LeaderBoard(**ld) + + +async def get_leaderboards(*, by_challenge_id: Optional[int] = None) -> List[schema.LeaderBoard]: + """ A list of leaderboards + + :param by_challenge_id: filter leaderboards by challenge id + :raise ValueError if the item is not is the database + :raise SQLAlchemy exceptions if database connection or condition fails + """ + if by_challenge_id: + query = schema.leaderboards_table.select().where( + schema.leaderboards_table.c.challenge_id == by_challenge_id + ) + else: + raise ValueError("No parameter given") + + lst_ld = await zrDB.fetch_all(query) + return [schema.LeaderBoard(**ld) for ld in lst_ld] + + +async def list_leaderboards() -> List[schema.LeaderBoard]: + """ Fetch a list of all the leaderboards present in the database + + :raise ValueError if the leaderboard database is empty + :raise SQLAlchemy exceptions if database connection or condition fails + """ + query = schema.leaderboards_table.select() + leaderboards = await zrDB.fetch_all(query) + if not leaderboards: + raise ValueError('No leaderboards found') + + return [schema.LeaderBoard(**ld) for ld in leaderboards] + + +async def create_leaderboard(*, lead_data: schema.LeaderBoard) -> int: + """ Create a new leaderboard entry in database from item object + + :returns the id of the leaderboard created + """ + query = schema.leaderboards_table.insert().values( + label=lead_data.label, + challenge_id=lead_data.challenge_id, + path_to=f"{lead_data.path_to}", + entry_file=lead_data.entry_file, + archived=lead_data.archived, + external_entries=f"{lead_data.external_entries}", + static_files=lead_data.static_files + ) + try: + result = await zrDB.execute(query) + return result + except Exception as e: + db_exc.parse_user_insertion(e) + + +async def update_leaderboard_value(*, leaderboard_id, variable_name: str, value: Any, allow_parsing: bool = False): + """ Update a value in the leaderboard corresponding to the given id + + :raise ValueError if given variable does not exist or does not match corresponding type + """ + field = schema.LeaderBoard.__fields__.get(variable_name, None) + if field is None: + raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') + + if allow_parsing: + value = misc.str2type(value, field.type_) + + if value is None: + if not field.allow_none: + raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') + else: + if not isinstance(value, field.type_): + raise ValueError(f"Leaderboard.{variable_name} should be of type {field.type_}") + + # Path is not supported by sqlite as a raw type + if field.type_ == Path: + value = str(value) + + query = schema.leaderboards_table.update().where( + schema.leaderboards_table.c.id == leaderboard_id + ).values({f"{variable_name}": str(value)}) + try: + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + return value diff --git a/vocolab/data/q/users.py b/vocolab/data/q/users.py new file mode 100644 index 0000000..e69de29 diff --git a/vocolab/data/tables.py b/vocolab/data/tables.py new file mode 100644 index 0000000..d0f7f13 --- /dev/null +++ b/vocolab/data/tables.py @@ -0,0 +1,113 @@ +import sqlalchemy + +tables_metadata = sqlalchemy.MetaData() + + +""" Table Representing Users""" +users_table = sqlalchemy.Table( + "users_credentials", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), + sqlalchemy.Column("username", sqlalchemy.String, unique=True), + sqlalchemy.Column("email", sqlalchemy.String, unique=True), + sqlalchemy.Column("active", sqlalchemy.Boolean), + sqlalchemy.Column("verified", sqlalchemy.String), + sqlalchemy.Column("hashed_pswd", sqlalchemy.BLOB), + sqlalchemy.Column("salt", sqlalchemy.BLOB), + sqlalchemy.Column("created_at", sqlalchemy.DATETIME) +) + +""" +Table indexing of model ids +""" +models_table = sqlalchemy.Table( + "models", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users_credentials.id")), + sqlalchemy.Column("created_at", sqlalchemy.DateTime), + sqlalchemy.Column("description", sqlalchemy.String), + sqlalchemy.Column("gpu_budget", sqlalchemy.String), + sqlalchemy.Column("train_set", sqlalchemy.String), + sqlalchemy.Column("authors", sqlalchemy.String), + sqlalchemy.Column("institution", sqlalchemy.String), + sqlalchemy.Column("team", sqlalchemy.String), + sqlalchemy.Column("paper_url", sqlalchemy.String), + sqlalchemy.Column("code_url", sqlalchemy.String), +) + + +""" +Table indexing the existing evaluators +""" +evaluators_table = sqlalchemy.Table( + "evaluators", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), + sqlalchemy.Column("label", sqlalchemy.String, unique=True), + sqlalchemy.Column("host", sqlalchemy.String), + sqlalchemy.Column("executor", sqlalchemy.String), + sqlalchemy.Column("script_path", sqlalchemy.String), + sqlalchemy.Column("executor_arguments", sqlalchemy.String) +) + +""" +Table used to index the existing challenges & their metadata +""" +challenges_table = sqlalchemy.Table( + "challenges", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), + sqlalchemy.Column("label", sqlalchemy.String, unique=True), + sqlalchemy.Column("start_date", sqlalchemy.Date), + sqlalchemy.Column("end_date", sqlalchemy.Date), + sqlalchemy.Column("active", sqlalchemy.Boolean), + sqlalchemy.Column("url", sqlalchemy.String), + sqlalchemy.Column("evaluator", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")) +) + +""" +Table indexing the existing leaderboards and their metadata +""" +leaderboards_table = sqlalchemy.Table( + "leaderboards", + tables_metadata, + sqlalchemy.Column('id', sqlalchemy.Integer, primary_key=True, autoincrement=True), + sqlalchemy.Column('challenge_id', sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), + sqlalchemy.Column('label', sqlalchemy.String, unique=True), + sqlalchemy.Column('path_to', sqlalchemy.String), + sqlalchemy.Column('entry_file', sqlalchemy.String), + sqlalchemy.Column('archived', sqlalchemy.Boolean), + sqlalchemy.Column('external_entries', sqlalchemy.String), + sqlalchemy.Column('static_files', sqlalchemy.Boolean), + sqlalchemy.Column('sorting_key', sqlalchemy.String), +) + +""" +Table entry indexing submissions to challenges +""" +submissions_table = sqlalchemy.Table( + "challenge_submissions", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users_credentials.id")), + sqlalchemy.Column("track_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), + sqlalchemy.Column("model_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), + sqlalchemy.Column("submit_date", sqlalchemy.DateTime), + sqlalchemy.Column("status", sqlalchemy.String), + sqlalchemy.Column("auto_eval", sqlalchemy.Boolean), + sqlalchemy.Column("evaluator_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), + sqlalchemy.Column("author_label", sqlalchemy.String) +) + +""" Table indexing all leaderboard entries and their location (as stores json files)""" +leaderboard_entry_table = sqlalchemy.Table( + "leaderboard_entries", + tables_metadata, + sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), + sqlalchemy.Column("entry_path", sqlalchemy.String), + sqlalchemy.Column("model_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.id")), + sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("challenge_submissions.id")), + sqlalchemy.Column("leaderboard_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), + sqlalchemy.Column("submitted_at", sqlalchemy.String) +) \ No newline at end of file From 0e70b252e9da1ea744d43cf1428e8d7790ebe67f Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Fri, 3 Feb 2023 13:48:25 +0100 Subject: [PATCH 06/28] WIP: db clean-up for challenges/models/submissions (part 2) --- vocolab/data/model_queries/challenges.py | 138 ++++++++++++++++++++++- vocolab/data/q/__init__.py | 3 - vocolab/data/q/challenges.py | 55 --------- vocolab/data/q/leaderboards.py | 110 ------------------ vocolab/data/q/users.py | 0 5 files changed, 137 insertions(+), 169 deletions(-) delete mode 100644 vocolab/data/q/__init__.py delete mode 100644 vocolab/data/q/challenges.py delete mode 100644 vocolab/data/q/leaderboards.py delete mode 100644 vocolab/data/q/users.py diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index b8ba3a8..c0503b4 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -1,3 +1,4 @@ +import shlex from datetime import date from datetime import datetime from pathlib import Path @@ -8,7 +9,7 @@ from vocolab.data import models, tables from vocolab.core import misc -from ..base import zrDB, db_exc +from ..db import zrDB, db_exc class EvaluatorItem(BaseModel): @@ -23,6 +24,53 @@ class EvaluatorItem(BaseModel): class Config: orm_mode = True + @classmethod + async def add_or_update(cls, *, evl_item: models.cli.NewEvaluatorItem): + query = tables.evaluators_table.select().where( + tables.evaluators_table.c.label == evl_item.label + ).where( + tables.evaluators_table.c.host == evl_item.host + ) + res = await zrDB.fetch_one(query) + + if res is None: + await zrDB.execute(tables.evaluators_table.insert(), evl_item.dict()) + else: + update_query = tables.evaluators_table.update().where( + tables.evaluators_table.c.id == res.id + ).values( + executor=evl_item.executor, script_path=evl_item.script_path, + executor_arguments=evl_item.executor_arguments + ) + await zrDB.execute(update_query) + + @classmethod + async def get(cls, by_id: str) -> Optional["EvaluatorItem"]: + query = tables.evaluators_table.select().where( + tables.evaluators_table.c.id == by_id + ) + result = await zrDB.fetch_one(query) + if not result: + return None + return cls.parse_obj(result) + async def update_args(self, arg_list: List[str]): + query = tables.evaluators_table.update().where( + tables.evaluators_table.c.id == self.id + ).values(executor_arguments=shlex.join(arg_list)) + + +class EvaluatorList(BaseModel): + items: List[EvaluatorItem] + + @classmethod + async def get(cls) -> "EvaluatorList": + query = tables.evaluators_table.select() + results = await zrDB.fetch_all(query) + if not results: + return cls(items=[]) + return cls(items=results) + + class Challenge(BaseModel): """ Data representation of a challenge """ @@ -150,6 +198,94 @@ def get_field_names(cls): class Config: orm_mode = True + @classmethod + async def create(cls, ld_data: 'Leaderboard'): + query = tables.leaderboards_table.insert().values( + label=ld_data.label, + challenge_id=ld_data.challenge_id, + path_to=f"{ld_data.path_to}", + entry_file=ld_data.entry_file, + archived=ld_data.archived, + external_entries=f"{ld_data.external_entries}", + static_files=ld_data.static_files + ) + try: + result = await zrDB.execute(query) + return result + except Exception as e: + db_exc.parse_user_insertion(e) + + async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): + """ Update a named property """ + if not hasattr(self, variable_name): + raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') + + variable_type = type(getattr(self, variable_name)) + + if allow_parsing: + value = misc.str2type(value, variable_type) + + if value is not None and not isinstance(value, variable_type): + raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") + + if value is None: + if not self.__fields__.get(variable_name).allow_none: + raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') + else: + if not isinstance(value, variable_type): + raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") + + # set value + setattr(self, variable_name, value) + + # Path is not supported by sqlite as a raw type + if variable_type == Path: + value = str(value) + + query = tables.leaderboards_table.update().where( + tables.leaderboards_table.c.id == self.id + ).values({f"{variable_name}": str(value)}) + try: + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + return value + + + @classmethod + async def get(cls, leaderboard_id: int) -> Optional["Leaderboard"]: + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.id == leaderboard_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + return None + return cls.parse_obj(ld) + + +class LeaderboardList(BaseModel): + items: List[Leaderboard] + + @classmethod + async def get_all(cls) -> "LeaderboardList": + query = tables.leaderboards_table.select() + ld_list = await zrDB.fetch_all(query) + if not ld_list: + return cls(items=[]) + return cls(items=ld_list) + + @classmethod + async def get_by_challenge(cls, challenge_id: int) -> "LeaderboardList": + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.challenge_id == challenge_id + ) + ld_list = await zrDB.fetch_all(query) + if not ld_list: + return cls(items=[]) + return cls(items=ld_list) + + class LeaderboardEntry: """ Data representation of a leaderboard entry """ diff --git a/vocolab/data/q/__init__.py b/vocolab/data/q/__init__.py deleted file mode 100644 index efd3979..0000000 --- a/vocolab/data/q/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from vocolab.db.q import users as userQ # noqa: allow non standard names -from vocolab.db.q import challenges as challengesQ # noqa: allow non standard names -from vocolab.db.q import leaderboards as leaderboardQ # noqa: allow non standard names diff --git a/vocolab/data/q/challenges.py b/vocolab/data/q/challenges.py deleted file mode 100644 index faa744b..0000000 --- a/vocolab/data/q/challenges.py +++ /dev/null @@ -1,55 +0,0 @@ -from datetime import datetime -from typing import List, Optional -from uuid import uuid4 - -from vocolab import get_settings - -_settings = get_settings() - - -async def get_evaluators(): - """ Returns a list of the evaluators """ - query = schema.evaluators_table.select() - results = await zrDB.fetch_all(query) - if not results: - return [] - return [schema.EvaluatorItem(**i) for i in results] - - -async def get_evaluator(*, by_id: int) -> Optional[schema.EvaluatorItem]: - """ Returns a specific evaluator """ - - query = schema.evaluators_table.select().where( - schema.evaluators_table.c.id == by_id - ) - result = await zrDB.fetch_one(query) - if not result: - return None - return schema.EvaluatorItem(**result) - - -async def add_evaluator(*, lst_eval: List[models.cli.NewEvaluatorItem]): - """ Insert a list of evaluators into the database """ - for i in lst_eval: - query = schema.evaluators_table.select().where( - schema.evaluators_table.c.label == i.label - ).where( - schema.evaluators_table.c.host == i.host - ) - res = await zrDB.fetch_one(query) - - if res is None: - await zrDB.execute(schema.evaluators_table.insert(), i.dict()) - else: - update_query = schema.evaluators_table.update().where( - schema.evaluators_table.c.id == res.id - ).values(executor=i.executor, script_path=i.script_path, executor_arguments=i.executor_arguments) - await zrDB.execute(update_query) - - -async def edit_evaluator_args(*, eval_id: int, arg_list: List[str]): - """ update evaluator base arguments """ - query = schema.evaluators_table.update().where( - schema.evaluators_table.c.id == eval_id - ).values(executor_arguments=";".join(arg_list)) - await zrDB.execute(query) diff --git a/vocolab/data/q/leaderboards.py b/vocolab/data/q/leaderboards.py deleted file mode 100644 index e1f1e6d..0000000 --- a/vocolab/data/q/leaderboards.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -Database functions that manipulate the leaderboard table -""" -from pathlib import Path -from typing import Any, List, Optional -from vocolab.db import schema, zrDB, exc as db_exc -from vocolab.core import misc - - -async def get_leaderboard(*, leaderboard_id: int) -> schema.LeaderBoard: - """ Fetches the leaderboard object with the corresponding id - - :raise ValueError if the item is not is the database - :raise SQLAlchemy exceptions if database connection or condition fails - """ - query = schema.leaderboards_table.select().where( - schema.leaderboards_table.c.id == leaderboard_id - ) - ld = await zrDB.fetch_one(query) - if ld is None: - raise ValueError(f'Leaderboard: {leaderboard_id} not found in database !!!') - - return schema.LeaderBoard(**ld) - - -async def get_leaderboards(*, by_challenge_id: Optional[int] = None) -> List[schema.LeaderBoard]: - """ A list of leaderboards - - :param by_challenge_id: filter leaderboards by challenge id - :raise ValueError if the item is not is the database - :raise SQLAlchemy exceptions if database connection or condition fails - """ - if by_challenge_id: - query = schema.leaderboards_table.select().where( - schema.leaderboards_table.c.challenge_id == by_challenge_id - ) - else: - raise ValueError("No parameter given") - - lst_ld = await zrDB.fetch_all(query) - return [schema.LeaderBoard(**ld) for ld in lst_ld] - - -async def list_leaderboards() -> List[schema.LeaderBoard]: - """ Fetch a list of all the leaderboards present in the database - - :raise ValueError if the leaderboard database is empty - :raise SQLAlchemy exceptions if database connection or condition fails - """ - query = schema.leaderboards_table.select() - leaderboards = await zrDB.fetch_all(query) - if not leaderboards: - raise ValueError('No leaderboards found') - - return [schema.LeaderBoard(**ld) for ld in leaderboards] - - -async def create_leaderboard(*, lead_data: schema.LeaderBoard) -> int: - """ Create a new leaderboard entry in database from item object - - :returns the id of the leaderboard created - """ - query = schema.leaderboards_table.insert().values( - label=lead_data.label, - challenge_id=lead_data.challenge_id, - path_to=f"{lead_data.path_to}", - entry_file=lead_data.entry_file, - archived=lead_data.archived, - external_entries=f"{lead_data.external_entries}", - static_files=lead_data.static_files - ) - try: - result = await zrDB.execute(query) - return result - except Exception as e: - db_exc.parse_user_insertion(e) - - -async def update_leaderboard_value(*, leaderboard_id, variable_name: str, value: Any, allow_parsing: bool = False): - """ Update a value in the leaderboard corresponding to the given id - - :raise ValueError if given variable does not exist or does not match corresponding type - """ - field = schema.LeaderBoard.__fields__.get(variable_name, None) - if field is None: - raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') - - if allow_parsing: - value = misc.str2type(value, field.type_) - - if value is None: - if not field.allow_none: - raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') - else: - if not isinstance(value, field.type_): - raise ValueError(f"Leaderboard.{variable_name} should be of type {field.type_}") - - # Path is not supported by sqlite as a raw type - if field.type_ == Path: - value = str(value) - - query = schema.leaderboards_table.update().where( - schema.leaderboards_table.c.id == leaderboard_id - ).values({f"{variable_name}": str(value)}) - try: - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - return value diff --git a/vocolab/data/q/users.py b/vocolab/data/q/users.py deleted file mode 100644 index e69de29..0000000 From 5e9097f1fbe8ac80d772f4c72d6fcd6808fbac48 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Fri, 3 Feb 2023 21:30:50 +0100 Subject: [PATCH 07/28] WIP: update routing (part 1) --- vocolab/api/endpoints/challenges.py | 39 +++++++++++---------------- vocolab/api/endpoints/leaderboards.py | 3 +-- 2 files changed, 17 insertions(+), 25 deletions(-) diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py index e8bf0ce..10f61b0 100644 --- a/vocolab/api/endpoints/challenges.py +++ b/vocolab/api/endpoints/challenges.py @@ -1,17 +1,13 @@ """ Routing for /challenges section of the API This section handles challenge data """ -from datetime import datetime from typing import List from fastapi import ( - APIRouter, Depends, UploadFile, File, BackgroundTasks + APIRouter ) -from vocolab import out, exc -from vocolab.db import schema, models -from vocolab.db.q import challengesQ -from vocolab.core import api_lib, submission_lib +from vocolab.data import models, model_queries from vocolab.settings import get_settings router = APIRouter() @@ -21,8 +17,8 @@ @router.get('/list', response_model=List[models.api.ChallengePreview]) async def get_challenge_list(include_inactive: bool = False): """ Return a list of all active challenges """ - challenge_lst = await challengesQ.list_challenges(include_all=include_inactive) - return [models.api.ChallengePreview(id=ch.id, label=ch.label, active=ch.active) for ch in challenge_lst] + challenge_lst = await model_queries.ChallengeList.get(include_all=include_inactive) + return [models.api.ChallengePreview(id=ch.id, label=ch.label, active=ch.active) for ch in challenge_lst.items] @router.get('/{challenge_id}/info', response_model=models.api.ChallengesResponse, @@ -30,30 +26,27 @@ async def get_challenge_list(include_inactive: bool = False): async def get_challenge_info(challenge_id: int): """ Return information of a specific challenge """ # todo add leaderboards to challenge info - return await challengesQ.get_challenge(challenge_id=challenge_id, allow_inactive=True) + return await model_queries.Challenge.get(challenge_id=challenge_id, allow_inactive=True) -@router.get('/{challenge_id}/submissions', response_model=models.api.ChallengesResponse, + +@router.get('/{challenge_id}/submissions', responses={404: {"model": models.api.Message}}) -async def get_sub_list(challenge_id: int): +async def get_sub_list(challenge_id: int) -> model_queries.ChallengeSubmissionList: """ Return information of a specific challenge """ - # todo add leaderboards to challenge info - pass + return await model_queries.ChallengeSubmissionList.get_from_challenge(challenge_id) - -@router.get('/{challenge_id}/leaderboards', response_model=models.api.ChallengesResponse, - responses={404: {"model": models.api.Message}}) -async def get_all_leaderboards(challenge_id: int): +@router.get('/{challenge_id}/leaderboards', responses={404: {"model": models.api.Message}}) +async def get_all_leaderboards(challenge_id: int) -> model_queries.LeaderboardList: """ Return information of a specific challenge """ - # todo add leaderboards to challenge info - pass - + return await model_queries.LeaderboardList.get_by_challenge(challenge_id=challenge_id) @router.get('/{challenge_id}/leaderboards/{leaderboard_id}', response_model=models.api.ChallengesResponse, responses={404: {"model": models.api.Message}}) async def get_leaderboard(challenge_id: int, leaderboard_id): """ Return information of a specific challenge """ - # todo add leaderboards to challenge info - pass - + leaderboard = await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) + if leaderboard is not None and challenge_id != leaderboard.challenge_id: + raise ValueError(f'No such leaderboard in challenge {challenge_id}') + return leaderboard diff --git a/vocolab/api/endpoints/leaderboards.py b/vocolab/api/endpoints/leaderboards.py index a1b83fb..68f36c8 100644 --- a/vocolab/api/endpoints/leaderboards.py +++ b/vocolab/api/endpoints/leaderboards.py @@ -8,8 +8,7 @@ APIRouter ) from vocolab import exc -from vocolab.db import models -from vocolab.db.q import leaderboardQ +from vocolab.data import models, model_queries from vocolab.core import api_lib from vocolab.settings import get_settings From dc90642b321747efb54f8ee21626da702a2b5bdf Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Mon, 6 Feb 2023 15:50:49 +0100 Subject: [PATCH 08/28] updated admin-commands to new database schema --- vocolab/admin/commands/api.py | 24 +-- vocolab/admin/commands/challenges.py | 27 +-- vocolab/admin/commands/evaluators.py | 24 ++- vocolab/admin/commands/leaderboards.py | 22 ++- vocolab/admin/commands/messaging.py | 2 +- vocolab/admin/commands/submissions.py | 156 +++++++++-------- vocolab/admin/commands/task_worker.py | 4 +- vocolab/admin/commands/test.py | 24 +-- vocolab/admin/commands/user.py | 232 ++++++++++--------------- 9 files changed, 241 insertions(+), 274 deletions(-) diff --git a/vocolab/admin/commands/api.py b/vocolab/admin/commands/api.py index 061a7cd..e06ae05 100644 --- a/vocolab/admin/commands/api.py +++ b/vocolab/admin/commands/api.py @@ -11,14 +11,14 @@ from vocolab import get_settings, out from vocolab.core import cmd_lib -from vocolab.db.base import create_db +from vocolab.data import db _settings = get_settings() class APICMD(cmd_lib.CMD): """ Command for api instance administration """ - + def __init__(self, root, name, cmd_path): super(APICMD, self).__init__(root, name, cmd_path) @@ -99,11 +99,11 @@ def run(self, argv): exec_args.extend(['zerospeech.api:app', '--reload', '--debug', '--no-access-log']) execv(executable, exec_args) - - + + class APInitEnvironmentCMD(cmd_lib.CMD): """ Initialise components needed for the API """ - + def __init__(self, root, name, cmd_path): super(APInitEnvironmentCMD, self).__init__(root, name, cmd_path) @@ -121,7 +121,7 @@ def run(self, argv): _settings.static_files_directory.mkdir(exist_ok=True, parents=True) # create tables out.cli.info(f"creating : tables in database ...") - create_db() + db.build_database_from_schema() class ConfigFiles(cmd_lib.CMD): @@ -137,11 +137,11 @@ def run(self, argv): class GunicornConfigGeneration(cmd_lib.CMD): """ Generate a template gunicorn config file """ - + def __init__(self, root, name, cmd_path): super(GunicornConfigGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir))\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir)) \ .get_template("gunicorn_app.wsgi") def run(self, argv): @@ -169,7 +169,7 @@ class SystemDSocketFileGeneration(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(SystemDSocketFileGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir))\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir)) \ .get_template("gunicorn.socket") def run(self, argv): @@ -201,7 +201,7 @@ def __init__(self, root, name, cmd_path): super(SystemDUnitGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") self.parser.add_argument('gunicorn_config_file', type=str, help="File to configure gunicorn with") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True)\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True) \ .get_template("api.service") def run(self, argv): @@ -238,12 +238,12 @@ class NginxConfigGeneration(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(NginxConfigGeneration, self).__init__(root, name, cmd_path) self.parser.add_argument('-o', '--out-file', type=str, help="File to output result config") - self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True)\ + self.template = Environment(loader=FileSystemLoader(_settings.config_template_dir), trim_blocks=True) \ .get_template("nginx.conf") def run(self, argv): args = self.parser.parse_args(argv) - default_url = urlparse(_settings.API_BASE_URL) + default_url = urlparse(_settings.api_options.API_BASE_URL) data = dict( url=f"{default_url.netloc}{default_url.path}", bind_url=_settings.server_options.SERVER_BIND, diff --git a/vocolab/admin/commands/challenges.py b/vocolab/admin/commands/challenges.py index 6533d86..1486f4a 100644 --- a/vocolab/admin/commands/challenges.py +++ b/vocolab/admin/commands/challenges.py @@ -8,8 +8,7 @@ from vocolab import out from vocolab.core import cmd_lib -from vocolab.db import schema, models -from vocolab.db.q import challengesQ +from vocolab.data import models, model_queries class ChallengesCMD(cmd_lib.CMD): @@ -28,8 +27,8 @@ def run(self, argv): # fetch data loop = asyncio.get_event_loop() - challenge_lst = loop.run_until_complete( - challengesQ.list_challenges(include_all=args.include_all) + challenge_lst: model_queries.ChallengeList = loop.run_until_complete( + model_queries.ChallengeList.get(include_all=args.include_all) ) # Prepare output @@ -42,7 +41,7 @@ def run(self, argv): table.add_column("end_date") table.add_column("evaluator") - for ch in challenge_lst: + for ch in challenge_lst.items: if ch.end_date: end_date_str = ch.end_date.strftime('%d/%m/%Y') else: @@ -105,7 +104,7 @@ def run(self, argv): if not args.dry_run: for item in obj_list: - asyncio.run(challengesQ.create_new_challenge(item)) + asyncio.run(model_queries.Challenge.create(item)) out.cli.print(f"insertion of {item.label} was successful:white_check_mark:", style="bold green") else: @@ -124,7 +123,7 @@ class SetChallenge(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(SetChallenge, self).__init__(root, name, cmd_path) - self.challenge_fields = schema.Challenge.get_field_names() + self.challenge_fields = model_queries.Challenge.get_field_names() self.challenge_fields.remove('id') # arguments @@ -133,12 +132,18 @@ def __init__(self, root, name, cmd_path): help='The name of the field') self.parser.add_argument('value', help='The new value of the field') + @staticmethod + async def update_property(challenge_id: int, field_name: str, value: str): + ch = await model_queries.Challenge.get(challenge_id=challenge_id) + return await ch.update_property( + variable_name=field_name, + value=value, + allow_parsing=True + ) + def run(self, argv): args = self.parser.parse_args(argv) res = asyncio.run( - challengesQ.update_challenge_property( - challenge_id=args.id, variable_name=args.field_name, value=args.value, - allow_parsing=True - ) + self.update_property(args.id, args.field_name, args.value) ) out.cli.info(f"Field {args.field_name}={res} :white_check_mark:") diff --git a/vocolab/admin/commands/evaluators.py b/vocolab/admin/commands/evaluators.py index 83c9674..bde192e 100644 --- a/vocolab/admin/commands/evaluators.py +++ b/vocolab/admin/commands/evaluators.py @@ -1,11 +1,12 @@ import asyncio import sys +from typing import List from rich.prompt import Confirm from rich.table import Table from vocolab import get_settings, out -from vocolab.db.q import challenges as ch_queries +from vocolab.data import model_queries from vocolab.core import evaluators_lib, cmd_lib _settings = get_settings() @@ -19,7 +20,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): _ = self.parser.parse_args(argv) - evaluators = asyncio.run(ch_queries.get_evaluators()) + evaluators: model_queries.EvaluatorList = asyncio.run(model_queries.EvaluatorList.get()) # Prepare output table = Table(show_header=True, header_style="bold magenta") @@ -30,12 +31,11 @@ def run(self, argv): table.add_column("script_path") table.add_column("executor_arguments") - for ev in evaluators: + for ev in evaluators.items: table.add_row( f"{ev.id}", f"{ev.label}", f"{ev.host}", f"{ev.executor}", f"{ev.script_path}", f"{ev.executor_arguments}" ) - # print out.cli.print(table) @@ -82,6 +82,11 @@ def __init__(self, root, name, cmd_path): super(DiscoverEvaluatorsCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('host') + @staticmethod + async def add_evaluators_list(eval_list): + for item in eval_list: + await model_queries.EvaluatorItem.add_or_update(evl_item=item) + def run(self, argv): args = self.parser.parse_args(argv) @@ -99,7 +104,7 @@ def run(self, argv): out.cli.print(f"Found evaluators : {[ev.label for ev in evaluators]}") response = Confirm.ask("Do want to import them into the database?") if response: - asyncio.run(ch_queries.add_evaluator(lst_eval=evaluators)) + asyncio.run(self.add_evaluators_list(evaluators)) out.cli.print(":heavy_check_mark: successfully inserted evaluators") @@ -112,14 +117,19 @@ def __init__(self, root, name, cmd_path): # arguments self.parser.add_argument("evaluator_id", type=int, help='The id of the entry') + @staticmethod + async def update_eval_args(evaluator_id: int, arg_list: List[str]): + evaluator = await model_queries.EvaluatorItem.get(evaluator_id) + await evaluator.update_args(arg_list) + def run(self, argv): """ Update base arguments of an evaluator Pass a list of arguments to give to the evaluator """ - args, rest = self.parser.parse_known_args(argv) + args, rest = self.parser.parse_known_args() asyncio.run( - ch_queries.edit_evaluator_args(eval_id=args.evaluator_id, arg_list=rest) + self.update_eval_args(args.evaluator_id, rest) ) out.cli.info(":heavy_check_mark: successfully updated evaluator") diff --git a/vocolab/admin/commands/leaderboards.py b/vocolab/admin/commands/leaderboards.py index 03d8a5e..228d748 100644 --- a/vocolab/admin/commands/leaderboards.py +++ b/vocolab/admin/commands/leaderboards.py @@ -7,9 +7,8 @@ from rich.table import Table from vocolab import out -from vocolab.db import schema -from vocolab.db.q import leaderboardQ from vocolab.core import leaderboards_lib, cmd_lib +from vocolab.data import model_queries class LeaderboardCMD(cmd_lib.CMD): @@ -21,9 +20,9 @@ def __init__(self, root, name, cmd_path): def run(self, argv): _ = self.parser.parse_args(argv) try: - leaderboards = asyncio.run(leaderboardQ.list_leaderboards()) + leaderboards: model_queries.LeaderboardList = asyncio.run(model_queries.LeaderboardList.get_all()) except ValueError: - leaderboards = [] + leaderboards = model_queries.LeaderboardList(items=[]) table = Table(show_header=True, header_style="bold magenta") table.add_column('ID') @@ -123,7 +122,7 @@ class EditLeaderboardCMD(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(EditLeaderboardCMD, self).__init__(root, name, cmd_path) - self.leaderboard_fields = schema.LeaderBoard.get_field_names() + self.leaderboard_fields = model_queries.Leaderboard.get_field_names() self.leaderboard_fields.remove('id') # arguments @@ -132,13 +131,18 @@ def __init__(self, root, name, cmd_path): help="The name of the field") self.parser.add_argument('field_value', help="The new value of the field") + @staticmethod + async def update_value(leaderboard_id: int, field_name: str, value: str): + leaderboard = await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) + return await leaderboard.update_property(variable_name=field_name, value=value, allow_parsing=True) + + def run(self, argv): args = self.parser.parse_args(argv) - res = asyncio.run(leaderboardQ.update_leaderboard_value( + res = asyncio.run(self.update_value( leaderboard_id=args.leaderboard_id, - variable_name=args.field_name, - value=args.field_value, - allow_parsing=True + field_name=args.field_name, + value=args.field_value )) out.cli.info(f"Field {args.field_name}={res} :white_check_mark:") diff --git a/vocolab/admin/commands/messaging.py b/vocolab/admin/commands/messaging.py index 55697b7..59b6f58 100644 --- a/vocolab/admin/commands/messaging.py +++ b/vocolab/admin/commands/messaging.py @@ -4,7 +4,7 @@ from vocolab.core import cmd_lib # api settings -from vocolab.db.models.tasks import SimpleLogMessage, SubmissionUpdateMessage, UpdateType +from vocolab.data.models.tasks import SimpleLogMessage, SubmissionUpdateMessage, UpdateType from vocolab.worker import server as message_server _settings = get_settings() diff --git a/vocolab/admin/commands/submissions.py b/vocolab/admin/commands/submissions.py index 9b66031..3e01e68 100644 --- a/vocolab/admin/commands/submissions.py +++ b/vocolab/admin/commands/submissions.py @@ -6,9 +6,7 @@ from rich.table import Table from vocolab import out, get_settings -from vocolab.db.models.api import NewSubmissionRequest, NewSubmission -from vocolab.db.q import challengesQ, userQ -from vocolab.db import schema as db_challenges +from vocolab.data import models, model_queries from vocolab.core import submission_lib, cmd_lib # api settings @@ -25,23 +23,23 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument('-u', '--user', type=int, help='Filter by user ID') self.parser.add_argument('-t', '--track', type=int, help='Filter by track ID') self.parser.add_argument('-s', '--status', - choices=db_challenges.SubmissionStatus.get_values(), + choices=model_queries.SubmissionStatus.get_values(), help='Filter by status') - def run(self, argv): - args = self.parser.parse_args(argv) - fn_args = {} - + @staticmethod + async def fetch_by(args) -> model_queries.ChallengeSubmissionList: if args.user: - fn_args['by_user'] = args.user + return await model_queries.ChallengeSubmissionList.get_from_user(user_id=args.user) - if args.track: - fn_args['by_track'] = args.track + elif args.track: + return await model_queries.ChallengeSubmissionList.get_from_challenge(challenge_id=args.track) - if args.status: - fn_args['by_status'] = args.status + elif args.status: + return await model_queries.ChallengeSubmissionList.get_by_status(status=args.status) - items = asyncio.run(challengesQ.list_submission(**fn_args)) + def run(self, argv): + args = self.parser.parse_args(argv) + items: model_queries.ChallengeSubmissionList = asyncio.run(self.fetch_by(args)) # Prepare output table = Table(show_header=True, header_style="bold magenta") @@ -71,16 +69,18 @@ def __init__(self, root, name, cmd_path): # custom arguments self.parser.add_argument("submission_id") self.parser.add_argument( - 'status', choices=[str(el.value) for el in db_challenges.SubmissionStatus] # noqa: enum has value attribute + 'status', choices=[str(el.value) for el in model_queries.SubmissionStatus] # noqa: enum has value attribute ) + @staticmethod + async def set_status(submission_id: str, status: model_queries.SubmissionStatus): + submission = await model_queries.ChallengeSubmission.get(submission_id=submission_id) + await submission.update_status(status=status) + def run(self, argv): args = self.parser.parse_args(argv) - submission_fs = submission_lib.get_submission_dir(args.submission_id, as_obj=True) - submission_fs.clean_all_locks() - asyncio.run(challengesQ.update_submission_status( - by_id=args.submission_id, status=args.status - )) + status = model_queries.SubmissionStatus(args.status) + asyncio.run(self.set_status(args.submission_id, status)) class CreateSubmissionCMD(cmd_lib.CMD): @@ -88,62 +88,75 @@ class CreateSubmissionCMD(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(CreateSubmissionCMD, self).__init__(root, name, cmd_path) + self.parser.add_argument("model_id", type=str) self.parser.add_argument("challenge_id", type=int) self.parser.add_argument("user_id", type=int) self.parser.add_argument("archive") def run(self, argv): args = self.parser.parse_args(argv) - archive = Path(args.archive) - - if not archive.is_file(): - out.cli.error(f'Requested file {archive} does not exist') - - async def create_submission(ch_id, user_id): - try: - _challenge = await challengesQ.get_challenge(challenge_id=ch_id) - _user = await userQ.get_user(by_uid=user_id) - - if not _user.enabled: - out.cli.error(f'User {_user.username} is not allowed to perform this action') - sys.exit(1) - - _submission_id = await challengesQ.add_submission(new_submission=NewSubmission( - user_id=_user.id, - track_id=_challenge.id - ), evaluator_id=_challenge.evaluator) - return _challenge, _user, _submission_id - except ValueError: - out.cli.exception() - sys.exit(1) - - # fetch db items - challenge, user, submission_id = asyncio.run(create_submission(args.challenge_id, args.user_id)) - - # create entry on disk - submission_lib.make_submission_on_disk( - submission_id, user.username, challenge.label, - NewSubmissionRequest( - filename=archive.name, hash=submission_lib.md5sum(archive), - multipart=False - ) - ) - # fetch folder - folder = submission_lib.get_submission_dir(submission_id) - # copy file - shutil.copy(archive, folder / 'archive.zip') - submission_lib.unzip(folder / 'archive.zip', folder / 'input') - - # set status - (folder / 'upload.lock').unlink() - asyncio.run( - challengesQ.update_submission_status(by_id=submission_id, status=db_challenges.SubmissionStatus.uploaded) - ) + # todo use new method + + # def run(self, argv): + # args = self.parser.parse_args(argv) + # archive = Path(args.archive) + # + # if not archive.is_file(): + # out.cli.error(f'Requested file {archive} does not exist') + + # async def create_submission(ch_id, user_id): + # try: + # _challenge = await model_queries.Challenge.get(challenge_id=ch_id) + # _user = await model_queries.User.get(by_uid=user_id) + # + # _model_id = await model_queries.ModelID.get(model_id=args.model_id) + # if _model_id is None: + # out.cli.error(f"Model: {args.model_id} does not exist please create it !!") + # sys.exit(1) + # + # if not _user.enabled: + # out.cli.error(f'User {_user.username} is not allowed to perform this action') + # sys.exit(1) + # + # _submission_id = await model_queries.ChallengeSubmission.create( + # username=_user.username, + # + # new_submission=NewSubmission( + # user_id=_user.id, + # track_id=_challenge.id + # ), evaluator_id=_challenge.evaluator) + # return _challenge, _user, _submission_id + # except ValueError: + # out.cli.exception() + # sys.exit(1) + # + # # fetch db items + # challenge, user, submission_id = asyncio.run(create_submission(args.challenge_id, args.user_id)) + # + # # create entry on disk + # submission_lib.make_submission_on_disk( + # submission_id, user.username, challenge.label, + # NewSubmissionRequest( + # filename=archive.name, hash=submission_lib.md5sum(archive), + # multipart=False + # ) + # ) + # # fetch folder + # folder = submission_lib.get_submission_dir(submission_id) + # # copy file + # shutil.copy(archive, folder / 'archive.zip') + # submission_lib.unzip(folder / 'archive.zip', folder / 'input') + # + # # set status + # (folder / 'upload.lock').unlink() + # asyncio.run( + # challengesQ.update_submission_status(by_id=submission_id, status=db_challenges.SubmissionStatus.uploaded) + # ) class EvalSubmissionCMD(cmd_lib.CMD): """ Launches the evaluation of a submission """ - sub_status = db_challenges.SubmissionStatus + sub_status = model_queries.SubmissionStatus no_eval = { sub_status.uploading, sub_status.on_queue, sub_status.invalid, sub_status.uploading, sub_status.validating, sub_status.evaluating, @@ -163,8 +176,8 @@ def run(self, argv): else: extra_arguments = [] - submission: db_challenges.ChallengeSubmission = asyncio.run( - challengesQ.get_submission(by_id=args.submission_id)) + submission: model_queries.ChallengeSubmission = asyncio.run( + model_queries.ChallengeSubmission.get(submission_id=args.submission_id)) if submission.status in self.no_eval: out.cli.print(f"Cannot evaluate a submission that has status : {submission.status}") @@ -185,6 +198,8 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("submission_id") def run(self, argv): + # todo recheck this + args = self.parser.parse_args(argv) if args.hostname not in list(_settings.task_queue_options.REMOTE_STORAGE.keys()): out.cli.warning(f"Host {args.hostname} is not a valid remote storage host!\n") @@ -212,6 +227,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this if args.hostname not in list(_settings.task_queue_options.REMOTE_STORAGE.keys()): out.cli.warning(f"Host {args.hostname} is not a valid remote storage host!\n") @@ -241,6 +257,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this if args.delete_by == 'by_id': del_id = asyncio.run(submission_lib.delete_submission(by_id=args.selector)) @@ -274,6 +291,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this asyncio.run(challengesQ.update_submission_evaluator( args.evaluator_id, by_id=args.submission_id @@ -291,6 +309,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this asyncio.run(challengesQ.update_submission_author_label( args.author_label, by_id=args.submission_id )) @@ -320,6 +339,7 @@ async def archive_submission(*args): def run(self, argv): args = self.parser.parse_args(argv) + # todo recheck this if args.type == 'by_id': asyncio.run(self.archive_submission(args.selector)) diff --git a/vocolab/admin/commands/task_worker.py b/vocolab/admin/commands/task_worker.py index 70c823b..6fae2b0 100644 --- a/vocolab/admin/commands/task_worker.py +++ b/vocolab/admin/commands/task_worker.py @@ -7,7 +7,7 @@ from vocolab import out, get_settings from vocolab.core import cmd_lib -from vocolab.db.models import tasks +from vocolab.data import models from vocolab.worker import server _settings = get_settings() @@ -54,7 +54,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) server.echo().delay( - tasks.SimpleLogMessage( + models.tasks.SimpleLogMessage( label="cli-echo-testing", message=f"{args.message}" ).dict() diff --git a/vocolab/admin/commands/test.py b/vocolab/admin/commands/test.py index c016c2f..8c4ec04 100644 --- a/vocolab/admin/commands/test.py +++ b/vocolab/admin/commands/test.py @@ -6,7 +6,6 @@ from pydantic import EmailStr from vocolab import get_settings, out -from vocolab.db.models.misc import UserCreate from vocolab.core import notify, cmd_lib _settings = get_settings() @@ -59,25 +58,4 @@ def __init__(self, root, name, cmd_path): def run(self, argv): _ = self.parser.parse_args(argv) - out.cli.print("-- New User Info --", style="bold") - first_name = out.cli.raw.input("First Name: ") - last_name = out.cli.raw.input("Last Name: ") - email = out.cli.raw.input("Email: ") - affiliation = out.cli.raw.input("Affiliation: ") - - clean_last_name = ''.join([i if i in string.ascii_letters else ' ' for i in last_name]) - def_username = f"{first_name[0]}{clean_last_name.replace(' ', '')}".lower() - username = out.cli.raw.input(f"Username(default {def_username}): ") - username = username if username else def_username - - password = out.cli.raw.input("Password: ", password=True) - - user = UserCreate( - username=username, - email=EmailStr(email), - pwd=password, - first_name=first_name, - last_name=last_name, - affiliation=affiliation - ) - out.cli.print(user) + out.cli.info("Nothing to see here move along") diff --git a/vocolab/admin/commands/user.py b/vocolab/admin/commands/user.py index 08664de..a06df42 100644 --- a/vocolab/admin/commands/user.py +++ b/vocolab/admin/commands/user.py @@ -3,16 +3,15 @@ import string import sys from pathlib import Path +from typing import Tuple from pydantic import EmailStr from rich.prompt import Prompt from rich.table import Table from vocolab import out, get_settings -from vocolab.db.models.misc import UserCreate -from vocolab.db.q import userQ, challengesQ from vocolab.core import notify, cmd_lib -from vocolab.core.misc import CustomTypesJsonEncoder +from vocolab.data import models, model_queries _settings = get_settings() @@ -29,7 +28,7 @@ def run(self, argv): args = self.parser.parse_args(argv) # fetch data - user_lst = asyncio.run(userQ.get_user_list()) + user_lst: model_queries.UserList = asyncio.run(model_queries.UserList.get()) if args.mail_list: for u in user_lst: @@ -56,72 +55,27 @@ def run(self, argv): out.cli.print(table) -class UserSessionsCMD(cmd_lib.CMD): - """ List logged users """ - - def __init__(self, root, name, cmd_path): - super(UserSessionsCMD, self).__init__(root, name, cmd_path) - - @staticmethod - def just_print(): - """ Prints a list of logged users """ - user_lst = asyncio.run(userQ.get_logged_user_list()) - - # Prepare output - table = Table(show_header=True, header_style="bold magenta") - table.add_column("ID") - table.add_column("Username") - table.add_column("Email") - table.add_column("Active") - table.add_column("Verified") - - for usr in user_lst: - table.add_row( - f"{usr.id}", usr.username, usr.email, f"{usr.active}", f"{usr.verified}" - ) - - out.cli.print(table) - - def run(self, argv): - _ = self.parser.parse_args(argv) - self.just_print() - - -class CloseUserSessionsCMD(cmd_lib.CMD): - """ Close user sessions """ - - def __init__(self, root, name, cmd_path): - super(CloseUserSessionsCMD, self).__init__(root, name, cmd_path) - self.parser.add_argument("-u", "--user-id") - self.parser.add_argument("-a", "--close-all", action='store_true') - - def run(self, argv): - args = self.parser.parse_args(argv) - - if args.user_id: - asyncio.run(userQ.delete_session(by_uid=args.user_id)) - out.cli.print(f"All sessions of user {args.user_id} were closed", style="bold") - elif args.close_all: - asyncio.run(userQ.delete_session(clear_all=True)) - out.cli.print(f"All sessions were closed", style="bold") - else: - self.parser.print_help() - - sys.exit(0) - - -class CreateUserSessionsCMD(cmd_lib.CMD): +class CreateUserSessionCMD(cmd_lib.CMD): """ Create a session for a user """ def __init__(self, root, name, cmd_path): - super(CreateUserSessionsCMD, self).__init__(root, name, cmd_path) + super(CreateUserSessionCMD, self).__init__(root, name, cmd_path) self.parser.add_argument("user_id", type=int) + @staticmethod + async def login(user_id: int) -> Tuple[str, str, str]: + user = await model_queries.User.get(by_uid=user_id) + token = model_queries.Token( + user_email=user.email + ) + return token.encode(), user.username, user.email + def run(self, argv): args = self.parser.parse_args(argv) - - usr, token = asyncio.run(userQ.admin_login(by_uid=args.user_id)) - out.cli.print(f"{usr.username}, {usr.email}, {token}") + token, username, email = asyncio.run( + self.login(args.user_id) + ) + out.cli.print(f"{username}, {email}, {token}") sys.exit(0) @@ -133,15 +87,15 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument('-f', '--from-file', type=str, help="Load users from a json file") @staticmethod - def _make_usr(user: UserCreate): - _ = asyncio.run(userQ.create_user(usr=user)) + def _make_usr(user: models.api.UserCreateRequest): + _ = asyncio.run(model_queries.User.create(new_usr=user)) def _create_from_file(self, file: Path): with file.open() as fp: user_list = json.load(fp) for data in user_list: - user = UserCreate( + user = models.api.UserCreateRequest( username=data.get("username"), email=EmailStr(data.get('email')), pwd=data.get("password"), @@ -166,7 +120,7 @@ def _create_form_input(self): password = out.cli.raw.input("Password: ", password=True) - user = UserCreate( + user = models.api.UserCreateRequest( username=username, email=EmailStr(email), pwd=password, @@ -203,18 +157,24 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("--send-all", action='store_true', help="resend verification email to all unverified users") + @staticmethod + async def verify_single(user_id: int): + user = await model_queries.User.get(by_uid=user_id) + await user.verify(verification_code=user.verified, force=True) + + @staticmethod + async def verify_all(): + await model_queries.UserList.verify() + def run(self, argv): args = self.parser.parse_args(argv) if args.verify: # verify user - asyncio.run(userQ.admin_verification(user_id=args.verify)) + asyncio.run(self.verify_single(user_id=args.verify)) elif args.verify_all: # verify all users - users = asyncio.run(userQ.get_user_list()) - for u in users: - if u.verified != 'True': - asyncio.run(userQ.admin_verification(user_id=u.id)) + asyncio.run(self.verify_all()) elif args.send: # send verification email try: @@ -225,7 +185,7 @@ def run(self, argv): sys.exit(1) try: - user = asyncio.run(userQ.get_user(by_uid=args.send)) + user = asyncio.run(model_queries.User.get(by_uid=args.send)) except ValueError: out.cli.error(f"User with id: {args.send} does not exist !!") sys.exit(1) @@ -254,7 +214,7 @@ def run(self, argv): out.cli.error("Path file not found in settings") sys.exit(1) - users = asyncio.run(userQ.get_user_list()) + users = asyncio.run(model_queries.UserList.get()) for u in users: if u.verified != 'True': asyncio.run(notify.email.template_email( @@ -263,7 +223,7 @@ def run(self, argv): data=dict( username=u.username, admin_email=_settings.app_options.admin_email, - url=f"{_settings.API_BASE_URL}{verification_path}?v={u.verified}&username={u.username}" + url=f"{_settings.api_options.API_BASE_URL}{verification_path}?v={u.verified}&username={u.username}" ), template_name='email_validation.jinja2' )) @@ -283,24 +243,33 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("--activate-all", action='store_true', help="activate all users") self.parser.add_argument("--deactivate-all", action='store_true', help="deactivate all users") + @staticmethod + async def toggle_status(user_id: int, activate: bool): + user = await model_queries.User.get(by_uid=user_id) + await user.toggle_status(active=activate) + + @staticmethod + async def toggle_all(activate: bool): + await model_queries.UserList.toggle_status(active=activate) + def run(self, argv): args = self.parser.parse_args(argv) if args.activate: # activate user - asyncio.run(userQ.toggle_user_status(user_id=args.activate, active=True)) + asyncio.run(self.toggle_status(user_id=args.activate, activate=True)) out.cli.info("User activated successfully") elif args.deactivate: # deactivate user - asyncio.run(userQ.toggle_user_status(user_id=args.deactivate, active=False)) + asyncio.run(self.toggle_status(user_id=args.activate, activate=False)) out.cli.info("User deactivated successfully") elif args.activate_all: # activate all users - asyncio.run(userQ.toggle_all_users_status(active=True)) + asyncio.run(self.toggle_all(activate=True)) out.cli.info("Users activated successfully") elif args.deactivate_all: # deactivate all users - asyncio.run(userQ.toggle_all_users_status(active=False)) + asyncio.run(self.toggle_all(activate=False)) out.cli.info("Users deactivated successfully") else: self.parser.print_help() @@ -314,6 +283,15 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument("-r", "--reset", metavar="UID", help="reset & send a new password session to user") + @staticmethod + async def reset_password_session(user_id): + user = await model_queries.User.get(by_uid=user_id) + token = model_queries.Token( + allow_password_reset=True, + user_email=user.email + ) + return token.encode(), user.username, user.email + def run(self, argv): args = self.parser.parse_args(argv) @@ -325,15 +303,13 @@ def run(self, argv): out.cli.error("Path file not found in settings") sys.exit(1) - user = asyncio.run(userQ.get_user(by_uid=args.reset)) - out.cli.ic(user) - session = asyncio.run(userQ.create_password_reset_session(username=user.username, email=user.email)) + token, username, email = asyncio.run(self.reset_password_session(args.reset)) asyncio.run(notify.email.template_email( - emails=[user.email], + emails=[email], subject='[Zerospeech] Password Reset', data=dict( - username=user.username, - url=f"{_settings.API_BASE_URL}{password_reset_path}?v={session.token}", + username=username, + url=f"{_settings.api_options.API_BASE_URL}{password_reset_path}?v={token}", admin_email=_settings.app_options.admin_email ), template_name='password_reset.jinja2' @@ -349,12 +325,15 @@ def __init__(self, root, name, cmd_path): super(CheckPasswordCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('user_id', type=int) + @staticmethod + async def check_password(user_id: int, password: str): + user = await model_queries.User.get(by_uid=user_id) + return user.password_matches(password) + def run(self, argv): args = self.parser.parse_args(argv) pwd = Prompt.ask('password', password=True) - - user = asyncio.run(userQ.get_user(by_uid=args.user_id)) - if userQ.check_users_password(password=pwd, user=user): + if asyncio.run(self.check_password(args.user_id, pwd)): out.cli.info("--> Passwords match !!") sys.exit(0) else: @@ -362,37 +341,6 @@ def run(self, argv): sys.exit(1) -class ResetSessionsCMD(cmd_lib.CMD): - """ Check the list of reset sessions """ - - def __init__(self, root, name, cmd_path): - super(ResetSessionsCMD, self).__init__(root, name, cmd_path) - self.parser.add_argument('--all', action='store_true', help="Show all sessions (even expired ones)") - self.parser.add_argument('--clean', action='store_true', help="Clean expired sessions") - - def run(self, argv): - args = self.parser.parse_args(argv) - - if args.clean: - # clean sessions - asyncio.run(userQ.clear_expired_password_reset_sessions()) - out.cli.info('removed all expired password reset sessions :heavy_check_mark:') - else: - sessions = asyncio.run(userQ.get_password_reset_sessions(args.all)) - # print - table = Table(show_header=True, header_style="bold magenta") - table.add_column("user_id") - table.add_column("token") - table.add_column("expiration_date") - - for item in sessions: - table.add_row( - f"{item.user_id}", item.token, f"{item.expiration_date.isoformat()}" - ) - - out.cli.print(table) - - class NotifyCMD(cmd_lib.CMD): """ Notify all users """ @@ -404,7 +352,7 @@ def __init__(self, root, name, cmd_path): def run(self, argv): args = self.parser.parse_args(argv) - user_list = asyncio.run(userQ.get_user_list()) + user_list = asyncio.run(model_queries.UserList.get()) email_list = [user.email for user in user_list] with args.body.open() as fp: body = fp.read() @@ -426,26 +374,28 @@ def __init__(self, root, name, cmd_path): @staticmethod async def delete_user(user_id: int): - user_submissions = await challengesQ.list_submission(by_user=user_id) - if len(user_submissions) > 0: - out.cli.print(f"User {user_id} has {len(user_submissions)} unarchived submissions !!\n" - f"Cannot delete, archive submissions and try again !!") - sys.exit(1) - - user = await userQ.get_user(by_uid=user_id) - user_dict = user.dict() - - await userQ.delete_session(by_uid=user_id) - await userQ.clear_password_reset_sessions(by_uid=user_id) - await userQ.delete_user(uid=user_id) - return user_dict + pass + # user_submissions = await challengesQ.list_submission(by_user=user_id) + # if len(user_submissions) > 0: + # out.cli.print(f"User {user_id} has {len(user_submissions)} unarchived submissions !!\n" + # f"Cannot delete, archive submissions and try again !!") + # sys.exit(1) + # + # user = await userQ.get_user(by_uid=user_id) + # user_dict = user.dict() + # + # await userQ.delete_session(by_uid=user_id) + # await userQ.clear_password_reset_sessions(by_uid=user_id) + # await userQ.delete_user(uid=user_id) + # return user_dict def run(self, argv): - args = self.parser.parse_args(argv) - user_dict = asyncio.run(self.delete_user(args.user_id)) - out.cli.info(f'User {args.user_id} deleted successfully !!') - - if args.save: - out.cli.info(f"backing up user @ {args.save}") - with Path(args.save).with_suffix('.json').open('w') as fp: - json.dump(user_dict, fp, cls=CustomTypesJsonEncoder) + pass + # args = self.parser.parse_args(argv) + # user_dict = asyncio.run(self.delete_user(args.user_id)) + # out.cli.info(f'User {args.user_id} deleted successfully !!') + # + # if args.save: + # out.cli.info(f"backing up user @ {args.save}") + # with Path(args.save).with_suffix('.json').open('w') as fp: + # json.dump(user_dict, fp, cls=CustomTypesJsonEncoder) From 05f30c883b47425156ec4e6592a14df9b9578cbe Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Mon, 6 Feb 2023 15:51:14 +0100 Subject: [PATCH 09/28] updated api to new database schema --- vocolab/api/endpoints/auth.py | 6 ++ vocolab/api/endpoints/challenges.py | 19 ++--- vocolab/api/endpoints/leaderboards.py | 76 ++++++++++---------- vocolab/api/endpoints/models.py | 91 ++---------------------- vocolab/api/endpoints/submissions.py | 64 +++++++++++++++++ vocolab/api/endpoints/users.py | 64 ++++++++++++++++- vocolab/data/model_queries/auth.py | 19 +++-- vocolab/data/model_queries/challenges.py | 28 +++++--- vocolab/data/model_queries/models.py | 10 ++- 9 files changed, 221 insertions(+), 156 deletions(-) create mode 100644 vocolab/api/endpoints/submissions.py diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index 32aa563..51ac8ac 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -148,3 +148,9 @@ async def post_password_update(v: str, request: Request, html_response: bool = F if html_response: return HTMLResponse(api_lib.generate_html_response(data, template_name='response.html.jinja2')) return JSONResponse(data) + + +@router.post('/email/validate') +async def validate_email(code: str): + # todo + pass \ No newline at end of file diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py index 10f61b0..6b98d30 100644 --- a/vocolab/api/endpoints/challenges.py +++ b/vocolab/api/endpoints/challenges.py @@ -29,24 +29,19 @@ async def get_challenge_info(challenge_id: int): return await model_queries.Challenge.get(challenge_id=challenge_id, allow_inactive=True) -@router.get('/{challenge_id}/submissions', +@router.get('/{challenge_id}/submissions/list', responses={404: {"model": models.api.Message}}) async def get_sub_list(challenge_id: int) -> model_queries.ChallengeSubmissionList: """ Return information of a specific challenge """ return await model_queries.ChallengeSubmissionList.get_from_challenge(challenge_id) -@router.get('/{challenge_id}/leaderboards', responses={404: {"model": models.api.Message}}) -async def get_all_leaderboards(challenge_id: int) -> model_queries.LeaderboardList: - """ Return information of a specific challenge """ - return await model_queries.LeaderboardList.get_by_challenge(challenge_id=challenge_id) +@router.get("/{challenge_id}/models/list") +async def get_models_list(challenge_id: int): + pass -@router.get('/{challenge_id}/leaderboards/{leaderboard_id}', response_model=models.api.ChallengesResponse, - responses={404: {"model": models.api.Message}}) -async def get_leaderboard(challenge_id: int, leaderboard_id): +@router.get('/{challenge_id}/leaderboards/list', responses={404: {"model": models.api.Message}}) +async def get_all_leaderboards(challenge_id: int) -> model_queries.LeaderboardList: """ Return information of a specific challenge """ - leaderboard = await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) - if leaderboard is not None and challenge_id != leaderboard.challenge_id: - raise ValueError(f'No such leaderboard in challenge {challenge_id}') - return leaderboard + return await model_queries.LeaderboardList.get_by_challenge(challenge_id=challenge_id) diff --git a/vocolab/api/endpoints/leaderboards.py b/vocolab/api/endpoints/leaderboards.py index 68f36c8..4eacb6d 100644 --- a/vocolab/api/endpoints/leaderboards.py +++ b/vocolab/api/endpoints/leaderboards.py @@ -1,52 +1,52 @@ """ Routing for /leaderboards section of the API This section handles leaderboard data """ -from datetime import datetime -from typing import List from fastapi import ( APIRouter ) -from vocolab import exc + from vocolab.data import models, model_queries -from vocolab.core import api_lib from vocolab.settings import get_settings router = APIRouter() _settings = get_settings() -@router.get('/', response_model=List[models.api.LeaderboardPublicView], responses={404: {"model": models.api.Message}}) -async def get_leaderboards_list(): - """ Returns the list of leaderboards """ - lst = await leaderboardQ.list_leaderboards() - - # strip non public values from entries - return [ - models.api.LeaderboardPublicView( - id=ld.id, - challenge_id=ld.challenge_id, - label=ld.label, - entry_file=ld.entry_file, - archived=ld.archived, - static_files=ld.static_files - ) - for ld in lst - ] - - -@router.get('/{leaderboard_id}/json', responses={404: {"model": models.api.Message}}) -async def get_leaderboard_data(leaderboard_id: int): - """ Return leaderboard of a specific challenge """ - try: - leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - except ValueError: - raise exc.ResourceRequestedNotFound(f'No leaderboard with id {leaderboard_id}') - - if leaderboard.path_to.is_file(): - return api_lib.file2dict(leaderboard.path_to) - else: - return dict( - updatedOn=datetime.now().isoformat(), - data=[] - ) + +@router.get("/list") +async def get_list(): + pass + +@router.get('{leaderboard_id}/info', response_model=models.api.ChallengesResponse, + responses={404: {"model": models.api.Message}}) +async def get_leaderboard_info(leaderboard_id: int): + """ Return information of a specific challenge """ + return await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) + + +@router.get("{leaderboard_id}/json") +async def get_leaderboard_entries_as_json(leaderboard_id: int): + pass + # try: + # leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) + # except ValueError: + # raise exc.ResourceRequestedNotFound(f'No leaderboard with id {leaderboard_id}') + # + # if leaderboard.path_to.is_file(): + # return api_lib.file2dict(leaderboard.path_to) + # else: + # return dict( + # updatedOn=datetime.now().isoformat(), + # data=[] + # ) + + +@router.get("{leaderboard_id}/csv") +async def get_leaderboard_entries_as_csv(leaderboard_id: int): + pass + + +@router.get("{leaderboard_id}/entry/{entry_id}") +async def get_leaderboard_entry(leaderboard_id: int, entry_id: str): + pass diff --git a/vocolab/api/endpoints/models.py b/vocolab/api/endpoints/models.py index 2ee6e87..309314e 100644 --- a/vocolab/api/endpoints/models.py +++ b/vocolab/api/endpoints/models.py @@ -3,27 +3,16 @@ """ from fastapi import ( - APIRouter, Depends, UploadFile, File, BackgroundTasks + APIRouter ) -from vocolab import out, exc -from vocolab.core import api_lib, submission_lib -from vocolab.data import models, model_queries +from vocolab.data import model_queries from vocolab.settings import get_settings router = APIRouter() _settings = get_settings() -@router.post('/create') -async def create_new_model( - first_author_name: str, - current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - """ Route to create a new model entry """ - # todo: add data in request body & check if it works correctly - return await model_queries.ModelID.create(first_author_name, ...) - - @router.get('/list') async def get_model_list(): """ Request the full model list """ @@ -40,80 +29,10 @@ async def get_model_info(model_id: str): async def get_model_submissions(model_id: str): """ Get all submissions corresponding to a model_id """ model = await model_queries.ModelID.get(model_id) - return await model.get_submissions() - - -@router.get('/{model_id}/submissions/{submission_id}/info') -async def get_model_submission_info(): - # todo: check - pass + # todo load submissions -@router.get('/{model_id}/submissions/{submission_id}/leaderboard-entries') -async def get_model_submission_leaderboard_entries(): +@router.get('/{model_id}/challenges/list') +async def get_model_submission_info(model_id: str): # todo: check pass - - -# todo: update submission process -@router.post('/{model_id}/submissions/create/', responses={404: {"model": models.api.Message}}) -async def create_submission( - model_id: str, challenge_id: int, - data: models.api.NewSubmissionRequest, - current_user: schema.User = Depends(api_lib.get_current_active_user) -): - """ Create a new submission """ - # todo fetch model_id - - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') - - # create db entry - # todo check submission table data - submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( - user_id=current_user.id, - track_id=challenge.id, - ), evaluator_id=challenge.evaluator) - - # create disk entry - model_dir = submission_lib.ModelDir.load(data.model_id) - model_dir.make_submission( - submission_id=submission_id, - challenge_id=challenge_id, - challenge_label=challenge.label, - auto_eval=..., - request_meta=data - ) - - return submission_id - - -# todo update -@router.put("/{model_id}/submission/{submission_id}/upload", response_model=models.api.UploadSubmissionPartResponse) -async def upload_submission( - model_id: str, - submission_id: str, - challenge_id: int, - part_name: str, - background_tasks: BackgroundTasks, - file_data: UploadFile = File(...), - current_user: schema.User = Depends(api_lib.get_current_active_user), -): - out.console.info(f"user: {current_user.username}") - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') - try: - is_completed, remaining = submission_lib.add_part(submission_id, part_name, file_data) - - if is_completed: - # run the completion of the submission on the background - background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) - - return models.api.UploadSubmissionPartResponse( - completed=is_completed, remaining=[n.file_name for n in remaining] - ) - except exc.VocoLabException as e: - out.log.exception() - raise e diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py new file mode 100644 index 0000000..ef70959 --- /dev/null +++ b/vocolab/api/endpoints/submissions.py @@ -0,0 +1,64 @@ +""" Routing for /challenges section of the API +This section handles challenge data +""" + +from fastapi import ( + APIRouter, Depends, UploadFile, File, BackgroundTasks +) + +from vocolab import out, exc +from vocolab.core import api_lib, submission_lib +from vocolab.data import models, model_queries +from vocolab.settings import get_settings + +router = APIRouter() +_settings = get_settings() + + +@router.get("/list") +async def get_sub_list(): + pass + + +@router.get("/{submission_id}/info") +async def get_sub_info(submission_id: str): + pass + + +@router.get("/{submission_id}/scores") +async def get_submission_scores(submission_id: str): + pass + + +@router.put("/{submission_id}/content/add", response_model=models.api.UploadSubmissionPartResponse) +async def upload_submission( + model_id: str, + submission_id: str, + challenge_id: int, + part_name: str, + background_tasks: BackgroundTasks, + file_data: UploadFile = File(...), + current_user: schema.User = Depends(api_lib.get_current_active_user), +): + out.console.info(f"user: {current_user.username}") + challenge = await challengesQ.get_challenge(challenge_id=challenge_id) + if challenge is None: + return ValueError(f'challenge {challenge_id} not found or inactive') + try: + is_completed, remaining = submission_lib.add_part(submission_id, part_name, file_data) + + if is_completed: + # run the completion of the submission on the background + background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) + + return models.api.UploadSubmissionPartResponse( + completed=is_completed, remaining=[n.file_name for n in remaining] + ) + except exc.VocoLabException as e: + out.log.exception() + raise e + + +@router.delete("/{submission_id}/remove") +async def remove_submission(submission_id: str): + pass \ No newline at end of file diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 2543a28..3dd42e8 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -16,8 +16,9 @@ _settings = get_settings() -@router.get("/profile") +@router.get("/{username}/profile") def get_profile( + username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)) -> users_lib.UserProfileData: try: user_data = current_user.get_profile_data() @@ -29,8 +30,9 @@ def get_profile( out.console.exception() -@router.post("/profile") +@router.post("/{username}/profile") def update_profile( + username: str, user_data: users_lib.UserProfileData, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if user_data.username != current_user.username: @@ -42,6 +44,64 @@ def update_profile( return Response(status_code=200) +@router.get("/{username}/models/list") +async def list_users_models(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + # todo + pass + + +@router.post("/{username}/models/create") +async def create_new_model(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + # todo + pass + + +@router.get("/{username}/submissions/list") +async def list_users_submissions(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + # todo + pass + + +@router.post("/{username}/submissions/create") +async def create_new_submission(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + pass + + +# todo: update submission process +# @router.post('/{model_id}/submissions/create/', responses={404: {"model": models.api.Message}}) +# async def create_submission( +# model_id: str, challenge_id: int, +# data: models.api.NewSubmissionRequest, +# current_user: schema.User = Depends(api_lib.get_current_active_user) +# ): +# """ Create a new submission """ +# # todo fetch model_id +# +# challenge = await challengesQ.get_challenge(challenge_id=challenge_id) +# if challenge is None: +# return ValueError(f'challenge {challenge_id} not found or inactive') +# +# # create db entry +# # todo check submission table data +# submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( +# user_id=current_user.id, +# track_id=challenge.id, +# ), evaluator_id=challenge.evaluator) +# +# # create disk entry +# model_dir = submission_lib.ModelDir.load(data.model_id) +# model_dir.make_submission( +# submission_id=submission_id, +# challenge_id=challenge_id, +# challenge_label=challenge.label, +# auto_eval=..., +# request_meta=data +# ) +# +# return submission_id + + + # @router.get('{username}/submissions') # async def submissions_list(username: str): # """ Return a list of all user submissions """ diff --git a/vocolab/data/model_queries/auth.py b/vocolab/data/model_queries/auth.py index d8a6374..6557d8a 100644 --- a/vocolab/data/model_queries/auth.py +++ b/vocolab/data/model_queries/auth.py @@ -1,14 +1,14 @@ import json import secrets from datetime import datetime -from typing import Optional, List +from typing import Optional, List, Iterable from email_validator import validate_email, EmailNotValidError from jose import jwt, JWTError # noqa: false flags from requirements https://youtrack.jetbrains.com/issue/PY-27985 from pydantic import BaseModel, EmailStr, Field, ValidationError from vocolab.data import models, tables, exc as db_exc -from ..base import zrDB +from ..db import zrDB from ...core import users_lib from ...settings import get_settings @@ -169,8 +169,12 @@ async def create(cls, *, new_usr: models.api.UserCreateRequest): class UserList(BaseModel): items: List[User] + + def __iter__(self) -> Iterable[User]: + return iter(self.items) + @classmethod - def get(cls, active_only: bool = False) -> "UserList": + async def get(cls, active_only: bool = False) -> "UserList": """ Get all existing users, flag allows to filter non-active users """ query = tables.users_table.select() if active_only: @@ -183,13 +187,20 @@ def get(cls, active_only: bool = False) -> "UserList": return cls(items=user_list) @classmethod - def toggle_status(cls, active: bool = True): + async def toggle_status(cls, active: bool = True): """ Toggles all users status from active to inactive """ query = tables.users_table.update().values( active=active ) return await zrDB.execute(query) + @classmethod + async def verify(cls): + query = tables.users_table.update().values( + verify="True" + ) + await zrDB.execute(query) + class Token(BaseModel): """ API Session Token """ diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index c0503b4..ca7548e 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -2,7 +2,7 @@ from datetime import date from datetime import datetime from pathlib import Path -from typing import Optional, List, Any +from typing import Optional, List, Any, Iterable from pydantic import BaseModel from pydantic import HttpUrl @@ -24,6 +24,12 @@ class EvaluatorItem(BaseModel): class Config: orm_mode = True + async def update_args(self, arg_list: List[str]): + query = tables.evaluators_table.update().where( + tables.evaluators_table.c.id == self.id + ).values(executor_arguments=shlex.join(arg_list)) + await zrDB.execute(query) + @classmethod async def add_or_update(cls, *, evl_item: models.cli.NewEvaluatorItem): query = tables.evaluators_table.select().where( @@ -45,7 +51,7 @@ async def add_or_update(cls, *, evl_item: models.cli.NewEvaluatorItem): await zrDB.execute(update_query) @classmethod - async def get(cls, by_id: str) -> Optional["EvaluatorItem"]: + async def get(cls, by_id: int) -> Optional["EvaluatorItem"]: query = tables.evaluators_table.select().where( tables.evaluators_table.c.id == by_id ) @@ -53,15 +59,14 @@ async def get(cls, by_id: str) -> Optional["EvaluatorItem"]: if not result: return None return cls.parse_obj(result) - async def update_args(self, arg_list: List[str]): - query = tables.evaluators_table.update().where( - tables.evaluators_table.c.id == self.id - ).values(executor_arguments=shlex.join(arg_list)) class EvaluatorList(BaseModel): items: List[EvaluatorItem] + def __iter__(self) -> Iterable[EvaluatorItem]: + return iter(self.items) + @classmethod async def get(cls) -> "EvaluatorList": query = tables.evaluators_table.select() @@ -71,7 +76,6 @@ async def get(cls) -> "EvaluatorList": return cls(items=results) - class Challenge(BaseModel): """ Data representation of a challenge """ id: int @@ -158,11 +162,12 @@ async def delete(self): await zrDB.execute(query) - - class ChallengeList(BaseModel): items: List[Challenge] + def __iter__(self) -> Iterable[Challenge]: + return iter(self.items) + def filter_active(self) -> "ChallengeList": self.items = [i for i in self.items if i.is_active()] return self @@ -252,7 +257,6 @@ async def update_property(self, *, variable_name: str, value: Any, allow_parsing return value - @classmethod async def get(cls, leaderboard_id: int) -> Optional["Leaderboard"]: query = tables.leaderboards_table.select().where( @@ -267,6 +271,9 @@ async def get(cls, leaderboard_id: int) -> Optional["Leaderboard"]: class LeaderboardList(BaseModel): items: List[Leaderboard] + def __iter__(self) -> Iterable[Leaderboard]: + return iter(self.items) + @classmethod async def get_all(cls) -> "LeaderboardList": query = tables.leaderboards_table.select() @@ -286,7 +293,6 @@ async def get_by_challenge(cls, challenge_id: int) -> "LeaderboardList": return cls(items=ld_list) - class LeaderboardEntry: """ Data representation of a leaderboard entry """ id: Optional[int] diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index c6c332c..e518cf5 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -2,7 +2,7 @@ from datetime import datetime from enum import Enum from itertools import chain, product -from typing import Optional, List +from typing import Optional, List, Iterable from pydantic import BaseModel, AnyHttpUrl @@ -105,6 +105,9 @@ async def get(cls, model_id: str) -> Optional["ModelID"]: class ModelIDList(BaseModel): items: List[ModelID] + def __iter__(self) -> Iterable[ModelID]: + return iter(self.items) + @classmethod async def get(cls) -> "ModelIDList": items = db.zrDB.fetch_all(tables.models_table.select()) @@ -199,10 +202,12 @@ async def delete(self): ) - class ChallengeSubmissionList(BaseModel): items: List[ChallengeSubmission] + def __iter__(self) -> Iterable[ChallengeSubmission]: + return iter(self.items) + @classmethod async def get_from_challenge(cls, challenge_id: int): items = await db.zrDB.fetch_all( @@ -251,7 +256,6 @@ async def get_by_status(cls, status: SubmissionStatus): return cls(items=items) - async def update_evaluators(self, evaluator_id: int): for e in self.items: e.evaluator_id = evaluator_id From 7dceb90ddf65a8b696a8ab1d627e543b0c7a0fc0 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Mon, 6 Feb 2023 16:11:34 +0100 Subject: [PATCH 10/28] bugfix: fix imports after refactoring --- vocolab/admin/main.py | 10 +- vocolab/core/evaluators_lib.py | 2 +- vocolab/core/leaderboards_lib.py | 170 +++++++++++----------- vocolab/core/submission_lib/logs.py | 2 +- vocolab/core/submission_lib/submission.py | 9 +- vocolab/core/users_lib.py | 3 +- vocolab/core/worker_lib/tasks/echo.py | 4 +- vocolab/core/worker_lib/tasks/eval.py | 52 +++---- vocolab/core/worker_lib/tasks/update.py | 12 +- vocolab/data/__init__.py | 3 - vocolab/settings.py | 16 +- vocolab/worker/echo_test.py | 4 +- vocolab/worker/server.py | 8 +- 13 files changed, 149 insertions(+), 146 deletions(-) diff --git a/vocolab/admin/main.py b/vocolab/admin/main.py index 2805f1c..bef81ef 100644 --- a/vocolab/admin/main.py +++ b/vocolab/admin/main.py @@ -6,7 +6,7 @@ # settings _settings = get_settings() -has_db = (_settings.DATA_FOLDER / _settings.database_options.db_file).is_file() +has_db = _settings.database_file.is_file() has_users = has_db and _settings.user_data_dir.is_dir() has_challenges = has_db has_submissions = _settings.submission_dir.is_dir() @@ -22,15 +22,15 @@ def build_cli(): # user functions tree.add_cmd_tree( commands.user.UsersCMD(CMD_NAME, 'users', ''), - commands.user.UserSessionsCMD(CMD_NAME, 'sessions', 'users'), - commands.user.CloseUserSessionsCMD(CMD_NAME, 'close', 'users:sessions'), - commands.user.CreateUserSessionsCMD(CMD_NAME, 'create', 'users:sessions'), + # commands.user.UserSessionsCMD(CMD_NAME, 'sessions', 'users'), + # commands.user.CloseUserSessionsCMD(CMD_NAME, 'close', 'users:sessions'), + commands.user.CreateUserSessionCMD(CMD_NAME, 'create', 'users:sessions'), commands.user.CreateUserCMD(CMD_NAME, 'create', 'users'), commands.user.VerifyUserCMD(CMD_NAME, 'verify', 'users'), commands.user.UserActivationCMD(CMD_NAME, 'activate', 'users'), commands.user.PasswordUserCMD(CMD_NAME, 'password', 'users'), commands.user.CheckPasswordCMD(CMD_NAME, 'check', 'users:password'), - commands.user.ResetSessionsCMD(CMD_NAME, 'reset', 'users:password'), + # commands.user.ResetSessionsCMD(CMD_NAME, 'reset', 'users:password'), commands.user.NotifyCMD(CMD_NAME, 'notify', 'users'), commands.user.DeleteUser(CMD_NAME, 'delete', 'users') ) diff --git a/vocolab/core/evaluators_lib.py b/vocolab/core/evaluators_lib.py index 1ed7108..e8b42aa 100644 --- a/vocolab/core/evaluators_lib.py +++ b/vocolab/core/evaluators_lib.py @@ -4,7 +4,7 @@ import yaml from vocolab import get_settings -from vocolab.db import models +from vocolab.data import models from vocolab.core import commons _settings = get_settings() diff --git a/vocolab/core/leaderboards_lib.py b/vocolab/core/leaderboards_lib.py index b1b56e3..4724156 100644 --- a/vocolab/core/leaderboards_lib.py +++ b/vocolab/core/leaderboards_lib.py @@ -3,8 +3,7 @@ from typing import Dict from vocolab import out, get_settings -from vocolab.db import schema -from vocolab.db.q import leaderboardQ, challengesQ +from vocolab.data import models, model_queries from vocolab.core import commons, misc _settings = get_settings() @@ -27,94 +26,101 @@ def rebuild_leaderboard_index(leaderboard_entries, *, key): async def build_leaderboard(*, leaderboard_id: int): - leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - leaderboard_entries = [] - static_location = get_static_location(leaderboard.label) - - # create static dir - if leaderboard.static_files: - static_location.mkdir(exist_ok=True, parents=True) - - # load external entries - external_entries = [ - *leaderboard.external_entries.rglob('*.json'), - *leaderboard.external_entries.rglob('*.yaml'), - *leaderboard.external_entries.rglob('*.yml') - ] - for item in external_entries: - leaderboard_entries.append(commons.load_dict_file(item)) - - # copy external static files - if leaderboard.static_files and (leaderboard.external_entries / 'static').is_dir(): - commons.copy_all_contents(leaderboard.external_entries / 'static', static_location) - - if not leaderboard.archived: - submission_list = await challengesQ.list_submission(by_track=leaderboard.challenge_id) - for sub in submission_list: - # skip not completed submissions - if sub.status != schema.SubmissionStatus.completed: - continue - - # append submission to leaderboard - sub_location = _fs.submissions.get_submission_dir(sub.id) - leaderboard_entry = _fs.leaderboards.load_entry_from_sub(sub.id, leaderboard.entry_file) - - # if author_label is set use database value over local - if sub.author_label and len(leaderboard_entry) > 0: - leaderboard_entry['author_label'] = sub.author_label - - # append to leaderboard - leaderboard_entries.append(leaderboard_entry) - - # grab all static files - # todo: check is static file section is obsolete ? - if leaderboard.static_files and (sub_location / 'static').is_dir(): - _fs.commons.copy_all_contents(sub_location / 'static', static_location) - - if leaderboard.sorting_key: - try: - leaderboard_entries = rebuild_leaderboard_index(leaderboard_entries, key=leaderboard.sorting_key) - except KeyError: - out.log.error(f"Failed to build index for leaderboard={leaderboard.label} " - f"with sorting_key: {leaderboard.sorting_key}") - # Export to file - with (_settings.leaderboard_dir / leaderboard.path_to).open('w') as fp: - json.dump(dict( - updatedOn=datetime.now().isoformat(), - data=leaderboard_entries - ), fp) - - return _settings.leaderboard_dir / leaderboard.path_to + pass + # todo recheck + # leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) + # leaderboard_entries = [] + # static_location = get_static_location(leaderboard.label) + # + # # create static dir + # if leaderboard.static_files: + # static_location.mkdir(exist_ok=True, parents=True) + # + # # load external entries + # external_entries = [ + # *leaderboard.external_entries.rglob('*.json'), + # *leaderboard.external_entries.rglob('*.yaml'), + # *leaderboard.external_entries.rglob('*.yml') + # ] + # for item in external_entries: + # leaderboard_entries.append(commons.load_dict_file(item)) + # + # # copy external static files + # if leaderboard.static_files and (leaderboard.external_entries / 'static').is_dir(): + # commons.copy_all_contents(leaderboard.external_entries / 'static', static_location) + # + # if not leaderboard.archived: + # submission_list = await challengesQ.list_submission(by_track=leaderboard.challenge_id) + # for sub in submission_list: + # # skip not completed submissions + # if sub.status != schema.SubmissionStatus.completed: + # continue + # + # # append submission to leaderboard + # sub_location = _fs.submissions.get_submission_dir(sub.id) + # leaderboard_entry = _fs.leaderboards.load_entry_from_sub(sub.id, leaderboard.entry_file) + # + # # if author_label is set use database value over local + # if sub.author_label and len(leaderboard_entry) > 0: + # leaderboard_entry['author_label'] = sub.author_label + # + # # append to leaderboard + # leaderboard_entries.append(leaderboard_entry) + # + # # grab all static files + # # todo: check is static file section is obsolete ? + # if leaderboard.static_files and (sub_location / 'static').is_dir(): + # _fs.commons.copy_all_contents(sub_location / 'static', static_location) + # + # if leaderboard.sorting_key: + # try: + # leaderboard_entries = rebuild_leaderboard_index(leaderboard_entries, key=leaderboard.sorting_key) + # except KeyError: + # out.log.error(f"Failed to build index for leaderboard={leaderboard.label} " + # f"with sorting_key: {leaderboard.sorting_key}") + # # Export to file + # with (_settings.leaderboard_dir / leaderboard.path_to).open('w') as fp: + # json.dump(dict( + # updatedOn=datetime.now().isoformat(), + # data=leaderboard_entries + # ), fp) + # + # return _settings.leaderboard_dir / leaderboard.path_to async def get_leaderboard(*, leaderboard_id) -> Dict: """ Load leaderboard object file """ - leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - return _fs.commons.load_dict_file(_settings.leaderboard_dir / leaderboard.path_to) + pass + # todo recheck + # leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) + # return _fs.commons.load_dict_file(_settings.leaderboard_dir / leaderboard.path_to) async def create(*, challenge_id, label, entry_file, external_entries, static_files, path_to, archived): """ Create a new leaderboard """ - if external_entries is not None: - external_entries = (_fs.leaderboards.get_leaderboard_archive_location() / external_entries) - - ld = schema.LeaderBoard( - challenge_id=challenge_id, - label=label, - entry_file=entry_file, - archived=archived, - external_entries=external_entries, - path_to=(_fs.leaderboards.get_leaderboard_location() / path_to), - static_files=static_files - ) - lead_id = await leaderboardQ.create_leaderboard(lead_data=ld) - # issue: do we want auto-build on creation ? - await build_leaderboard(leaderboard_id=lead_id) - return lead_id + # todo recheck + # if external_entries is not None: + # external_entries = (_fs.leaderboards.get_leaderboard_archive_location() / external_entries) + # + # ld = schema.LeaderBoard( + # challenge_id=challenge_id, + # label=label, + # entry_file=entry_file, + # archived=archived, + # external_entries=external_entries, + # path_to=(_fs.leaderboards.get_leaderboard_location() / path_to), + # static_files=static_files + # ) + # lead_id = await leaderboardQ.create_leaderboard(lead_data=ld) + # # issue: do we want auto-build on creation ? + # await build_leaderboard(leaderboard_id=lead_id) + # return lead_id async def build_all_challenge(challenge_id: int): - leaderboard_list = await leaderboardQ.get_leaderboards(by_challenge_id=challenge_id) - - for ld in leaderboard_list: - await build_leaderboard(leaderboard_id=ld.id) + pass + # todo recheck + # leaderboard_list = await leaderboardQ.get_leaderboards(by_challenge_id=challenge_id) + # + # for ld in leaderboard_list: + # await build_leaderboard(leaderboard_id=ld.id) diff --git a/vocolab/core/submission_lib/logs.py b/vocolab/core/submission_lib/logs.py index afa8d45..657c4e8 100644 --- a/vocolab/core/submission_lib/logs.py +++ b/vocolab/core/submission_lib/logs.py @@ -10,7 +10,7 @@ _settings = get_settings() -class SubmissionLogger(BaseModel): +class SubmissionLogger(BaseModel, arbitrary_types_allowed=True): """ Class managing individual logging of submission life-cycle """ root_dir: Path fp_write: Optional[TextIO] = None diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 4d251ad..9e8a51e 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -8,8 +8,8 @@ from fastapi import UploadFile from pydantic import BaseModel -from ...db import models -from ...settings import get_settings +from vocolab.data import models +from vocolab import get_settings from ..commons import unzip, ssh_exec, rsync, zip_folder, scp from .logs import SubmissionLogger from .upload import MultipartUploadHandler, SinglepartUploadHandler @@ -27,7 +27,7 @@ class SubmissionInfo(BaseModel): leaderboard_entries: Dict[str, Path] -class SubmissionDir(BaseModel): +class SubmissionDir(BaseModel, arbitrary_types_allowed=True): """ Handler interfacing a submission directory stored on disk """ root_dir: Path @@ -69,8 +69,7 @@ def has_info(self) -> bool: """ Check whether info file is present""" return self.info_file.is_file() - @functools.lru_cache - @property + @functools.lru_cache() def info(self) -> SubmissionInfo: """ Load submission information """ with self.info_file.open() as fp: diff --git a/vocolab/core/users_lib.py b/vocolab/core/users_lib.py index abfe5d3..7f31005 100644 --- a/vocolab/core/users_lib.py +++ b/vocolab/core/users_lib.py @@ -2,9 +2,10 @@ import json import os from datetime import datetime -from typing import Callable, Optional +from typing import Optional from pydantic import BaseModel, Extra, EmailStr + from vocolab import get_settings, exc _settings = get_settings() diff --git a/vocolab/core/worker_lib/tasks/echo.py b/vocolab/core/worker_lib/tasks/echo.py index 4e378f6..cb11e98 100644 --- a/vocolab/core/worker_lib/tasks/echo.py +++ b/vocolab/core/worker_lib/tasks/echo.py @@ -1,11 +1,11 @@ import os from vocolab import out, get_settings -from vocolab.db.models import tasks +from vocolab.data import models _settings = get_settings() -def echo_fn(slm: tasks.SimpleLogMessage): +def echo_fn(slm: models.tasks.SimpleLogMessage): """ Simple task that echoes a message into the log""" out.log.info(f"{os.getpid()} | \[{slm.timestamp.isoformat()}\] {slm.message}") diff --git a/vocolab/core/worker_lib/tasks/eval.py b/vocolab/core/worker_lib/tasks/eval.py index 90220f2..0528f0d 100644 --- a/vocolab/core/worker_lib/tasks/eval.py +++ b/vocolab/core/worker_lib/tasks/eval.py @@ -4,7 +4,7 @@ from typing import List from vocolab import out, get_settings, exc -from vocolab.db.models import tasks +from vocolab.data import models from vocolab.core import submission_lib _settings = get_settings() @@ -25,7 +25,7 @@ def verify_host_bin(): raise exc.ServerError(f"No bin directory configured for current host {_settings.app_options.hostname}") -def build_cmd(_cmd: tasks.SubmissionEvaluationMessage) -> List[str]: +def build_cmd(_cmd: models.tasks.SubmissionEvaluationMessage) -> List[str]: """ Build a subprocess command from an evaluation message """ executor = _cmd.executor.to_exec() @@ -38,13 +38,13 @@ def build_cmd(_cmd: tasks.SubmissionEvaluationMessage) -> List[str]: script = bin_path / _cmd.script_name cmd_list = [executor] - if _cmd.executor == tasks.ExecutorsType.sbatch: + if _cmd.executor == models.tasks.ExecutorsType.sbatch: cmd_list.extend([ f"--job-name='{_cmd.label}'", # name the job on slurmDB f"--output={sub_dir}/slurm.log", "--wait", # wait for the process to complete ]) - elif _cmd.executor == tasks.ExecutorsType.docker: + elif _cmd.executor == models.tasks.ExecutorsType.docker: raise NotImplementedError("should add some verification for docker-run support") # custom executor args from DB @@ -57,7 +57,7 @@ def build_cmd(_cmd: tasks.SubmissionEvaluationMessage) -> List[str]: return cmd_list -def eval_subprocess(_cmd: tasks.SubmissionEvaluationMessage): +def eval_subprocess(_cmd: models.tasks.SubmissionEvaluationMessage): """ Evaluate a subprocess type BrokerCMD """ cmd_array = build_cmd(_cmd) out.log.debug(f"$> {shlex.join(cmd_array)}") @@ -71,27 +71,29 @@ def eval_subprocess(_cmd: tasks.SubmissionEvaluationMessage): return result.returncode, output -def post_eval_update(status: int, sem: tasks.SubmissionEvaluationMessage): +def post_eval_update(status: int, sem: models.tasks.SubmissionEvaluationMessage): """ Send message to update queue that evaluation is completed. """ - from vocolab.worker.server import update - from vocolab.db.models.tasks import SubmissionUpdateMessage, UpdateType - - sum_ = SubmissionUpdateMessage( - label=f"{_settings.app_options.hostname}-completed-{sem.submission_id}", - submission_id=sem.submission_id, - updateType=UpdateType.evaluation_undefined, - hostname=f"{_settings.app_options.hostname}" - ) - if status == 0: - sum_.updateType = UpdateType.evaluation_complete - else: - sum_.updateType = UpdateType.evaluation_failed - - # send update to channel - update.delay(sum_=sum_.dict()) - - -def evaluate_submission_fn(sem: tasks.SubmissionEvaluationMessage): + pass + # todo recheck + # from vocolab.worker.server import update + # from vocolab.db.models.tasks import SubmissionUpdateMessage, UpdateType + # + # sum_ = SubmissionUpdateMessage( + # label=f"{_settings.app_options.hostname}-completed-{sem.submission_id}", + # submission_id=sem.submission_id, + # updateType=UpdateType.evaluation_undefined, + # hostname=f"{_settings.app_options.hostname}" + # ) + # if status == 0: + # sum_.updateType = UpdateType.evaluation_complete + # else: + # sum_.updateType = UpdateType.evaluation_failed + # + # # send update to channel + # update.delay(sum_=sum_.dict()) + + +def evaluate_submission_fn(sem: models.tasks.SubmissionEvaluationMessage): status, eval_output = eval_subprocess(sem) if status == 0: out.log.info(f"Evaluation of {sem.submission_id} was completed successfully") diff --git a/vocolab/core/worker_lib/tasks/update.py b/vocolab/core/worker_lib/tasks/update.py index 5396a5a..30244ff 100644 --- a/vocolab/core/worker_lib/tasks/update.py +++ b/vocolab/core/worker_lib/tasks/update.py @@ -1,27 +1,27 @@ import asyncio from vocolab import out, get_settings -from vocolab.db.models import tasks +from vocolab.data import models from vocolab.core import submission_lib _settings = get_settings() -def update_task_fn(sum_: tasks.SubmissionUpdateMessage): - async def eval_function(msg: tasks.SubmissionUpdateMessage): +def update_task_fn(sum_: models.tasks.SubmissionUpdateMessage): + async def eval_function(msg: models.tasks.SubmissionUpdateMessage): """ Evaluate a function type BrokerCMD """ with submission_lib.SubmissionLogger(msg.submission_id) as lg: out.log.debug(msg.dict()) - if msg.updateType == tasks.UpdateType.evaluation_complete: + if msg.updateType == models.tasks.UpdateType.evaluation_complete: await submission_lib.complete_evaluation( submission_id=msg.submission_id, hostname=msg.hostname, logger=lg) - elif msg.updateType == tasks.UpdateType.evaluation_failed: + elif msg.updateType == models.tasks.UpdateType.evaluation_failed: await submission_lib.fail_evaluation( submission_id=msg.submission_id, hostname=msg.hostname, logger=lg) - elif msg.updateType == tasks.UpdateType.evaluation_canceled: + elif msg.updateType == models.tasks.UpdateType.evaluation_canceled: await submission_lib.cancel_evaluation( submission_id=msg.submission_id, hostname=msg.hostname, logger=lg) diff --git a/vocolab/data/__init__.py b/vocolab/data/__init__.py index 1d9ac5b..e69de29 100644 --- a/vocolab/data/__init__.py +++ b/vocolab/data/__init__.py @@ -1,3 +0,0 @@ -from vocolab.db.base import ( - create_db, zrDB, users_metadata -) diff --git a/vocolab/settings.py b/vocolab/settings.py index 946afc0..5d2032f 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -1,15 +1,14 @@ -import secrets -import shutil - import os import platform +import secrets +import shutil import tempfile from contextlib import contextmanager from datetime import timedelta from functools import lru_cache -from pathlib import Path -from typing import List, Union, Set, Dict, Optional, Literal, Generator from importlib.metadata import version, PackageNotFoundError +from pathlib import Path +from typing import List, Union, Set, Dict, Optional, Generator try: from tomllib import load as toml_load @@ -17,7 +16,7 @@ from toml import load as toml_load from pydantic import ( - BaseSettings, EmailStr, DirectoryPath, HttpUrl, IPvAnyNetwork, BaseModel, Field + BaseSettings, EmailStr, DirectoryPath, HttpUrl, IPvAnyNetwork, BaseModel ) @@ -151,9 +150,9 @@ class UserSettings(BaseModel): class _VocoLabSettings(BaseSettings): """ Base Settings for module """ app_home: DirectoryPath = Path(__file__).parent - DATA_FOLDER: DirectoryPath = Path('data/') + DATA_FOLDER: DirectoryPath = Path('/data') TMP_ROOT: DirectoryPath = Path('/tmp') - ARCHIVE_FOLDER: Path + ARCHIVE_FOLDER: Path = Path('/archive') ARCHIVE_HOST: str = "localhost" # Settings Categories @@ -166,7 +165,6 @@ class _VocoLabSettings(BaseSettings): notify_options: NotifySettings = NotifySettings() server_options: ServerSettings = ServerSettings() user_options: UserSettings = UserSettings() - database_options: DatabaseSettings = DatabaseSettings() CUSTOM_TEMPLATES_DIR: Optional[Path] = None diff --git a/vocolab/worker/echo_test.py b/vocolab/worker/echo_test.py index 2a4cc32..9b74411 100644 --- a/vocolab/worker/echo_test.py +++ b/vocolab/worker/echo_test.py @@ -1,11 +1,11 @@ -from vocolab.db.models import tasks as model_task +from vocolab.data import models from vocolab.worker.server import echo while True: msg = input("msg1: ") if msg == "quit": break - slm = model_task.SimpleLogMessage(label="test-client", message=msg) + slm = models.tasks.SimpleLogMessage(label="test-client", message=msg) echo.delay(slm.dict()) print("submitted\nNext") diff --git a/vocolab/worker/server.py b/vocolab/worker/server.py index 6b2b5c0..c24b869 100644 --- a/vocolab/worker/server.py +++ b/vocolab/worker/server.py @@ -5,7 +5,7 @@ from celery import Celery from vocolab import out, get_settings -from vocolab.db.models import tasks +from vocolab.data import models from vocolab.core import worker_lib # """"""""""""""""""""""""""""""""""""" @@ -28,19 +28,19 @@ @app.task(name='echo-task', ignore_result=True) def echo(slm: Dict): - slm = tasks.SimpleLogMessage(**slm) + slm = models.tasks.SimpleLogMessage(**slm) worker_lib.tasks.echo_fn(slm) @app.task(name='update-task', ignore_result=True) def update(sum_: Dict): - sum_ = tasks.SubmissionUpdateMessage(**sum_) + sum_ = models.tasks.SubmissionUpdateMessage(**sum_) out.log.log(f'updating {sum_.submission_id}') worker_lib.tasks.update_task_fn(sum_) @app.task(name='eval-task', ignore_result=True) def evaluate(sem: Dict): - sem = tasks.SubmissionEvaluationMessage(**sem) + sem = models.tasks.SubmissionEvaluationMessage(**sem) out.log.log(f'evaluating {sem.submission_id}') worker_lib.tasks.evaluate_submission_fn(sem) From 406a4399f34a7c30d20c6a8af08984040bd2cad2 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Mon, 6 Feb 2023 16:20:50 +0100 Subject: [PATCH 11/28] new challenge list --- samples/challenges.json | 56 --------------------------- samples/challenges_list.json | 74 ++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 56 deletions(-) delete mode 100644 samples/challenges.json create mode 100644 samples/challenges_list.json diff --git a/samples/challenges.json b/samples/challenges.json deleted file mode 100644 index 2ad57d0..0000000 --- a/samples/challenges.json +++ /dev/null @@ -1,56 +0,0 @@ -[ - { - "id": 1, - "label": "test-challenge", - "start_date": "2022-02-21", - "end_date": null, - "url": "https://zerospeech.com/track/test", - "active": true, - "evaluator": null - }, - { - "id": 2, - "label": "zr2015", - "start_date": "2015-01-20", - "end_date": "2015-04-30", - "url": "https://zerospeech.com/track/2015", - "active": false, - "evaluator": null - }, - { - "id": 3, - "label": "zr2017", - "start_date": "2017-01-20", - "end_date": "2017-04-30", - "url": "https://zerospeech.com/track/2017", - "active": false, - "evaluator": null - }, - { - "id": 4, - "label": "zr2019", - "start_date": "2019-01-20", - "end_date": "2019-04-30", - "url": "https://zerospeech.com/track/2019", - "active": false, - "evaluator": null - }, - { - "id": 5, - "label": "zr2020", - "start_date": "2020-01-20", - "end_date": "2020-04-30", - "url": "https://zerospeech.com/track/2020", - "active": false, - "evaluator": null - }, - { - "id": 6, - "label": "zr2021", - "start_date": "2020-12-12", - "end_date": null, - "url": "https://zerospeech.com/track/2021", - "active": true, - "evaluator": null - } -] \ No newline at end of file diff --git a/samples/challenges_list.json b/samples/challenges_list.json new file mode 100644 index 0000000..6ac46ce --- /dev/null +++ b/samples/challenges_list.json @@ -0,0 +1,74 @@ +[ + { + "id": 1, + "label": "test-challenge", + "start_date": "2022-02-21", + "end_date": null, + "url": "https://zerospeech.com/track/test", + "active": true, + "evaluator": null + }, + { + "id": 2, + "label": "abx-15", + "start_date": "2015-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zr2015-and-abx15", + "active": false, + "evaluator": null + }, + { + "id": 3, + "label": "abx-17", + "start_date": "2017-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zrc2017-and-abx17", + "active": false, + "evaluator": null + }, + { + "id": 4, + "label": "abx-LS", + "start_date": "2021-12-12", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#abxls-dataset-and-benchmark", + "active": false, + "evaluator": null + }, + { + "id": 5, + "label": "tde-15", + "start_date": "2020-01-20", + "end_date": "2020-04-30", + "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", + "active": false, + "evaluator": null + }, + { + "id": 6, + "label": "tde-17", + "start_date": "2017-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", + "active": false, + "evaluator": null + }, + { + "id": 7, + "label": "slm-21", + "start_date": "2021-12-12", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_4/tasks_goals/", + "active": false, + "evaluator": null + }, + { + "id": 8, + "label": "ttso-19", + "start_date": "2019-01-20", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_3/tasks_goals/", + "active": false, + "evaluator": null + } +] \ No newline at end of file From 3e33376300e308f7aba0099394c1721b290fd10c Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Wed, 8 Feb 2023 18:58:07 +0100 Subject: [PATCH 12/28] leaderboards data --- samples/challenges_list.json | 4 +- samples/example.env | 0 samples/leaderboards_list.json | 50 ++++++ vocolab/admin/commands/leaderboards.py | 26 ++- vocolab/admin/commands/user.py | 72 ++++---- vocolab/core/leaderboards_lib.py | 209 ++++++++++------------- vocolab/core/users_lib.py | 7 +- vocolab/data/db.py | 4 +- vocolab/data/model_queries/auth.py | 8 +- vocolab/data/model_queries/challenges.py | 25 ++- vocolab/data/models/api/leaerboards.py | 51 +++++- vocolab/data/tables.py | 3 - vocolab/settings.py | 19 +++ 13 files changed, 279 insertions(+), 199 deletions(-) delete mode 100644 samples/example.env create mode 100644 samples/leaderboards_list.json diff --git a/samples/challenges_list.json b/samples/challenges_list.json index 6ac46ce..c926717 100644 --- a/samples/challenges_list.json +++ b/samples/challenges_list.json @@ -38,8 +38,8 @@ { "id": 5, "label": "tde-15", - "start_date": "2020-01-20", - "end_date": "2020-04-30", + "start_date": "2015-01-20", + "end_date": null, "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", "active": false, "evaluator": null diff --git a/samples/example.env b/samples/example.env deleted file mode 100644 index e69de29..0000000 diff --git a/samples/leaderboards_list.json b/samples/leaderboards_list.json new file mode 100644 index 0000000..d187045 --- /dev/null +++ b/samples/leaderboards_list.json @@ -0,0 +1,50 @@ +[ + { + "label": "test-leaderboard", + "challenge_id": 1, + "archived": false, + "static_files": false + }, + { + "label": "abx-15-leaderboard", + "challenge_id": 2, + "archived": false, + "static_files": false + }, + { + "label": "abx-17-leaderboard", + "challenge_id": 3, + "archived": false, + "static_files": false + }, + { + "label": "abx-LS-leaderboard", + "challenge_id": 4, + "archived": false, + "static_files": false + }, + { + "label": "sLM21-leaderboard", + "challenge_id": 7, + "archived": false, + "static_files": false + }, + { + "label": "tde-15-leaderboard", + "challenge_id": 5, + "archived": false, + "static_files": false + }, + { + "label": "tde-17-leaderboard", + "challenge_id": 6, + "archived": false, + "static_files": false + }, + { + "label": "tts0-leaderboard", + "challenge_id": 8, + "archived": false, + "static_files": false + } +] \ No newline at end of file diff --git a/vocolab/admin/commands/leaderboards.py b/vocolab/admin/commands/leaderboards.py index 228d748..1f9741f 100644 --- a/vocolab/admin/commands/leaderboards.py +++ b/vocolab/admin/commands/leaderboards.py @@ -8,7 +8,7 @@ from vocolab import out from vocolab.core import leaderboards_lib, cmd_lib -from vocolab.data import model_queries +from vocolab.data import model_queries, models class LeaderboardCMD(cmd_lib.CMD): @@ -28,18 +28,15 @@ def run(self, argv): table.add_column('ID') table.add_column('Label') table.add_column('Archived') - table.add_column('External Entries', no_wrap=False, overflow='fold') table.add_column('Static Files') table.add_column('Challenge ID') - table.add_column('EntryFile', no_wrap=False, overflow='fold') - table.add_column('LeaderboardFile', no_wrap=False, overflow='fold') table.add_column('Key', no_wrap=False, overflow='fold') - for entry in leaderboards: + table.add_row( f"{entry.id}", f"{entry.label}", f"{entry.archived}", - f"{entry.external_entries}", f"{entry.static_files}", f"{entry.challenge_id}", - f"{entry.entry_file}", f"{entry.path_to}", f"{entry.sorting_key}" + f"{entry.static_files}", f"{entry.challenge_id}", + f"{entry.sorting_key}" ) # print table out.cli.print(table, no_wrap=False) @@ -105,14 +102,13 @@ def run(self, argv): lds = [self.ask_input()] for item in lds: - asyncio.run(leaderboards_lib.create( - challenge_id=item.get("challenge_id"), - label=item.get("label"), - entry_file=item.get("entry_file"), - external_entries=item.get("external_entries"), - static_files=item.get("static_files", False), - archived=item.get("archived", False), - path_to=item.get("path_to") + asyncio.run(model_queries.Leaderboard.create( + model_queries.Leaderboard( + challenge_id=item.get("challenge_id"), + label=item.get("label"), + static_files=item.get("static_files", False), + archived=item.get("archived", False), + ) )) out.cli.info(f"Successfully created leaderboard : {item.get('label')}") diff --git a/vocolab/admin/commands/user.py b/vocolab/admin/commands/user.py index a06df42..73b7bb0 100644 --- a/vocolab/admin/commands/user.py +++ b/vocolab/admin/commands/user.py @@ -3,14 +3,14 @@ import string import sys from pathlib import Path -from typing import Tuple +from typing import Tuple, Optional from pydantic import EmailStr from rich.prompt import Prompt from rich.table import Table from vocolab import out, get_settings -from vocolab.core import notify, cmd_lib +from vocolab.core import notify, cmd_lib, users_lib from vocolab.data import models, model_queries _settings = get_settings() @@ -87,8 +87,19 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument('-f', '--from-file', type=str, help="Load users from a json file") @staticmethod - def _make_usr(user: models.api.UserCreateRequest): - _ = asyncio.run(model_queries.User.create(new_usr=user)) + async def _make_usr(user: models.api.UserCreateRequest): + verify_code = await model_queries.User.create(new_usr=user) + # notify user for verification + await notify.email.template_email( + emails=[user.email], + subject='[Zerospeech] Account Verification', + data=dict( + username=user.username, + admin_email=_settings.app_options.admin_email, + url=f"{_settings.api_options.API_BASE_URL}{_settings.email_verif_path}?v={verify_code}&username={user.username}" + ), + template_name='email_validation.jinja2' + ) def _create_from_file(self, file: Path): with file.open() as fp: @@ -103,7 +114,7 @@ def _create_from_file(self, file: Path): last_name=data.get('last_name'), affiliation=data.get('affiliation') ) - self._make_usr(user) + asyncio.run(self._make_usr(user)) def _create_form_input(self): @@ -128,7 +139,7 @@ def _create_form_input(self): last_name=last_name, affiliation=affiliation ) - self._make_usr(user) + asyncio.run(self._make_usr(user)) def run(self, argv): args = self.parser.parse_args(argv) @@ -177,13 +188,6 @@ def run(self, argv): asyncio.run(self.verify_all()) elif args.send: # send verification email - try: - with (_settings.DATA_FOLDER / 'email_verification.path').open() as fp: - verification_path = fp.read() - except FileNotFoundError: - out.cli.error("Path file not found in settings") - sys.exit(1) - try: user = asyncio.run(model_queries.User.get(by_uid=args.send)) except ValueError: @@ -197,7 +201,7 @@ def run(self, argv): data=dict( username=user.username, admin_email=_settings.app_options.admin_email, - url=f"{_settings.api_options.API_BASE_URL}{verification_path}?v={user.verified}&username={user.username}" + url=f"{_settings.api_options.API_BASE_URL}{_settings.email_verif_path}?v={user.verified}&username={user.username}" ), template_name='email_validation.jinja2' )) @@ -373,29 +377,21 @@ def __init__(self, root, name, cmd_path): self.parser.add_argument('--save', help="path to save user details as info") @staticmethod - async def delete_user(user_id: int): - pass - # user_submissions = await challengesQ.list_submission(by_user=user_id) - # if len(user_submissions) > 0: - # out.cli.print(f"User {user_id} has {len(user_submissions)} unarchived submissions !!\n" - # f"Cannot delete, archive submissions and try again !!") - # sys.exit(1) - # - # user = await userQ.get_user(by_uid=user_id) - # user_dict = user.dict() - # - # await userQ.delete_session(by_uid=user_id) - # await userQ.clear_password_reset_sessions(by_uid=user_id) - # await userQ.delete_user(uid=user_id) - # return user_dict + async def delete_user(user_id: int, save_to_file: Optional[Path] = None): + user = await model_queries.User.get(by_uid=user_id) + profile_data = users_lib.UserProfileData.load(username=user.username) + profile_data.delete() + + # todo: check if user has any assets and skip deletion + # user-assets: leaderboard-entries, models, submissions + await user.delete() + def run(self, argv): - pass - # args = self.parser.parse_args(argv) - # user_dict = asyncio.run(self.delete_user(args.user_id)) - # out.cli.info(f'User {args.user_id} deleted successfully !!') - # - # if args.save: - # out.cli.info(f"backing up user @ {args.save}") - # with Path(args.save).with_suffix('.json').open('w') as fp: - # json.dump(user_dict, fp, cls=CustomTypesJsonEncoder) + args = self.parser.parse_args(argv) + save_to = None + if args.save: + save_to = Path(args.save) + + # Delete user + asyncio.run(self.delete_user(args.user_id, save_to)) diff --git a/vocolab/core/leaderboards_lib.py b/vocolab/core/leaderboards_lib.py index 4724156..3568cd5 100644 --- a/vocolab/core/leaderboards_lib.py +++ b/vocolab/core/leaderboards_lib.py @@ -1,126 +1,97 @@ import json +import shutil from datetime import datetime -from typing import Dict +from pathlib import Path +from typing import Generator, Optional -from vocolab import out, get_settings -from vocolab.data import models, model_queries -from vocolab.core import commons, misc +from pydantic import BaseModel + +from vocolab import get_settings +from vocolab.data import models _settings = get_settings() -def get_static_location(label: str): - # todo: check why this is in static files ? - return _settings.static_files_directory / 'leaderboards' / label - - -def rebuild_leaderboard_index(leaderboard_entries, *, key): - """ sort entries by using a specific key and re-write the index with the new ordering """ - - leaderboard_entries = sorted(leaderboard_entries, key=lambda x: misc.key_to_value(x, key=key)) - - for i, entry in enumerate(leaderboard_entries, 1): - entry['index'] = i - - return leaderboard_entries - - -async def build_leaderboard(*, leaderboard_id: int): - pass - # todo recheck - # leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - # leaderboard_entries = [] - # static_location = get_static_location(leaderboard.label) - # - # # create static dir - # if leaderboard.static_files: - # static_location.mkdir(exist_ok=True, parents=True) - # - # # load external entries - # external_entries = [ - # *leaderboard.external_entries.rglob('*.json'), - # *leaderboard.external_entries.rglob('*.yaml'), - # *leaderboard.external_entries.rglob('*.yml') - # ] - # for item in external_entries: - # leaderboard_entries.append(commons.load_dict_file(item)) - # - # # copy external static files - # if leaderboard.static_files and (leaderboard.external_entries / 'static').is_dir(): - # commons.copy_all_contents(leaderboard.external_entries / 'static', static_location) - # - # if not leaderboard.archived: - # submission_list = await challengesQ.list_submission(by_track=leaderboard.challenge_id) - # for sub in submission_list: - # # skip not completed submissions - # if sub.status != schema.SubmissionStatus.completed: - # continue - # - # # append submission to leaderboard - # sub_location = _fs.submissions.get_submission_dir(sub.id) - # leaderboard_entry = _fs.leaderboards.load_entry_from_sub(sub.id, leaderboard.entry_file) - # - # # if author_label is set use database value over local - # if sub.author_label and len(leaderboard_entry) > 0: - # leaderboard_entry['author_label'] = sub.author_label - # - # # append to leaderboard - # leaderboard_entries.append(leaderboard_entry) - # - # # grab all static files - # # todo: check is static file section is obsolete ? - # if leaderboard.static_files and (sub_location / 'static').is_dir(): - # _fs.commons.copy_all_contents(sub_location / 'static', static_location) - # - # if leaderboard.sorting_key: - # try: - # leaderboard_entries = rebuild_leaderboard_index(leaderboard_entries, key=leaderboard.sorting_key) - # except KeyError: - # out.log.error(f"Failed to build index for leaderboard={leaderboard.label} " - # f"with sorting_key: {leaderboard.sorting_key}") - # # Export to file - # with (_settings.leaderboard_dir / leaderboard.path_to).open('w') as fp: - # json.dump(dict( - # updatedOn=datetime.now().isoformat(), - # data=leaderboard_entries - # ), fp) - # - # return _settings.leaderboard_dir / leaderboard.path_to - - -async def get_leaderboard(*, leaderboard_id) -> Dict: - """ Load leaderboard object file """ - pass - # todo recheck - # leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - # return _fs.commons.load_dict_file(_settings.leaderboard_dir / leaderboard.path_to) - - -async def create(*, challenge_id, label, entry_file, external_entries, static_files, path_to, archived): - """ Create a new leaderboard """ - # todo recheck - # if external_entries is not None: - # external_entries = (_fs.leaderboards.get_leaderboard_archive_location() / external_entries) - # - # ld = schema.LeaderBoard( - # challenge_id=challenge_id, - # label=label, - # entry_file=entry_file, - # archived=archived, - # external_entries=external_entries, - # path_to=(_fs.leaderboards.get_leaderboard_location() / path_to), - # static_files=static_files - # ) - # lead_id = await leaderboardQ.create_leaderboard(lead_data=ld) - # # issue: do we want auto-build on creation ? - # await build_leaderboard(leaderboard_id=lead_id) - # return lead_id - - -async def build_all_challenge(challenge_id: int): - pass - # todo recheck - # leaderboard_list = await leaderboardQ.get_leaderboards(by_challenge_id=challenge_id) - # - # for ld in leaderboard_list: - # await build_leaderboard(leaderboard_id=ld.id) +class LeaderboardDir(BaseModel): + """ Handler class for disk storage of Leaderboards """ + location: Path + sorting_key: Optional[str] + + @property + def label(self) -> str: + """ Leaderboard label """ + return self.location.name + + @property + def cached_store(self): + """ Object used to cache build leaderboard (for faster serving) """ + return self.location / 'leaderboard.json' + + @property + def entry_dir(self) -> Path: + """ Location where all leaderboard entries are stored """ + return self.location / 'entries' + + @property + def entries(self) -> Generator[models.api.LeaderboardEntryItem, None, None]: + """ Generator containing entry objects """ + for item in self.entry_dir.glob("*.json"): + with item.open() as fp: + yield models.api.LeaderboardEntryItem.parse_obj(json.load(fp)) + + @property + def static_dir(self): + """ Location containing static items of leaderboard """ + return self.location / 'static' + + def has_static(self): + """ Boolean checking whether this leaderboard has static files """ + return self.static_dir.is_dir() + + def load_object(self, from_cache: bool) -> models.api.LeaderboardObj: + """ Loads leaderboard object (cached or from entries)""" + if from_cache and self.cached_store.is_file(): + with self.cached_store.open() as fp: + return models.api.LeaderboardObj.parse_obj(json.load(fp)) + return models.api.LeaderboardObj( + updatedOn=datetime.now(), + data=[item for item in self.entries], + sorting_key=self.sorting_key + ) + + def mkcache(self): + """ Create cached version of final leaderboard """ + data = self.load_object(from_cache=False) + with self.cached_store.open('w') as fp: + fp.write(data.json(indent=4)) + + @classmethod + def load(cls, label: str, sorting_key: str): + """ Load leaderboard dir """ + loc = _settings.leaderboard_dir / label + if not loc.is_file(): + raise ValueError(f'Leaderboard named {label} does not exist') + return cls( + location=loc, + sorting_key=sorting_key + ) + + @classmethod + def create(cls, label, sorting_key: str, static_files: bool = False) -> "LeaderboardDir": + """ Creates necessary files/architecture to store a leaderboard on disk """ + loc = _settings.leaderboard_dir / label + if loc.is_dir(): + raise ValueError(f'Leaderboard with {label} already exists') + + lead = cls(location=loc, sorting_key=sorting_key) + + lead.location.mkdir(parents=True) + lead.entry_dir.mkdir(parents=True) + if static_files: + lead.static_dir.mkdir(parents=True) + + return lead + + def delete(self): + """ Remove all files relative to this leaderboard """ + shutil.rmtree(self.location) diff --git a/vocolab/core/users_lib.py b/vocolab/core/users_lib.py index 7f31005..43348ff 100644 --- a/vocolab/core/users_lib.py +++ b/vocolab/core/users_lib.py @@ -10,6 +10,7 @@ _settings = get_settings() + class UserProfileData(BaseModel): username: str affiliation: str @@ -31,13 +32,17 @@ def load(cls, username: str): with db_file.open() as fp: return cls.parse_obj(json.load(fp)) - def update(self): + def save(self): if not _settings.user_data_dir.is_dir(): _settings.user_data_dir.mkdir(parents=True) with (_settings.user_data_dir / f"{self.username}.json").open('w') as fp: fp.write(self.json(indent=4)) + def delete(self): + """ Delete profile data from disk""" + file = (_settings.user_data_dir / f"{self.username}.json") + file.unlink(missing_ok=True) def hash_pwd(*, password: str, salt=None): diff --git a/vocolab/data/db.py b/vocolab/data/db.py index 2f094c7..6c0ef2c 100644 --- a/vocolab/data/db.py +++ b/vocolab/data/db.py @@ -12,8 +12,8 @@ zrDB = databases.Database(_settings.database_connection_url) def build_database_from_schema(): - if not (_settings.DATA_FOLDER / _settings.database_options.db_file).is_file(): - (_settings.DATA_FOLDER / _settings.database_options.db_file).touch() + if not _settings.database_file.is_file(): + _settings.database_file.touch() engine = sqlalchemy.create_engine( _settings.database_connection_url, connect_args={"check_same_thread": False} diff --git a/vocolab/data/model_queries/auth.py b/vocolab/data/model_queries/auth.py index 6557d8a..48675c0 100644 --- a/vocolab/data/model_queries/auth.py +++ b/vocolab/data/model_queries/auth.py @@ -156,13 +156,15 @@ async def create(cls, *, new_usr: models.api.UserCreateRequest): db_exc.parse_user_insertion(e) # create user profile data - data = models.api.UserData( + profile_data = users_lib.UserProfileData( username=new_usr.username, + email=new_usr.email, affiliation=new_usr.affiliation, first_name=new_usr.first_name, - last_name=new_usr.last_name + last_name=new_usr.last_name, + verified=False ) - users_lib.update_user_data(new_usr.username, data) + profile_data.save() return verification_code diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index ca7548e..250c84e 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -8,7 +8,7 @@ from pydantic import HttpUrl from vocolab.data import models, tables -from vocolab.core import misc +from vocolab.core import misc, leaderboards_lib from ..db import zrDB, db_exc @@ -189,10 +189,7 @@ class Leaderboard(BaseModel): id: Optional[int] challenge_id: int # Id to linked challenge label: str # Name of leaderboard - path_to: Path # Path to build result - entry_file: str # filename in submission results archived: bool # is_archived - external_entries: Optional[Path] # Location of external entries (baselines, toplines, archived) static_files: bool # has static files sorting_key: Optional[str] # path to the item to use as sorting key @@ -203,19 +200,31 @@ def get_field_names(cls): class Config: orm_mode = True + def get_dir(self): + leaderboards_lib.LeaderboardDir.load( + label=self.label, + sorting_key=self.sorting_key + ) + @classmethod async def create(cls, ld_data: 'Leaderboard'): query = tables.leaderboards_table.insert().values( label=ld_data.label, challenge_id=ld_data.challenge_id, - path_to=f"{ld_data.path_to}", - entry_file=ld_data.entry_file, archived=ld_data.archived, - external_entries=f"{ld_data.external_entries}", - static_files=ld_data.static_files + static_files=ld_data.static_files, + sorting_key=ld_data.sorting_key ) try: result = await zrDB.execute(query) + + # make necessary folders in storage + _ = leaderboards_lib.LeaderboardDir.create( + label=ld_data.label, + sorting_key=ld_data.sorting_key, + static_files=ld_data.static_files + ) + return result except Exception as e: db_exc.parse_user_insertion(e) diff --git a/vocolab/data/models/api/leaerboards.py b/vocolab/data/models/api/leaerboards.py index 0ba4103..6ac3d76 100644 --- a/vocolab/data/models/api/leaerboards.py +++ b/vocolab/data/models/api/leaerboards.py @@ -1,10 +1,45 @@ -from pydantic import BaseModel +from datetime import datetime +from typing import Optional, List, Dict, Any, Union +from pydantic import BaseModel, Field, AnyHttpUrl + + +class EntryDetails(BaseModel): + train_set: Optional[str] + benchmarks: List[str] + gpu_budget: Optional[str] + parameters: Dict[str, Any] = Field(default_factory=dict) + +class PublicationEntry(BaseModel): + author_short: Optional[str] + authors: Optional[str] + paper_title: Optional[str] + paper_ref: Optional[str] + bib_ref: Optional[str] + paper_url: Optional[Union[AnyHttpUrl, str]] + pub_year: Optional[int] + team_name: Optional[str] + institution: str + code: Optional[Union[AnyHttpUrl, str]] + DOI: Optional[str] + open_science: bool = False + +class LeaderboardEntryItem(BaseModel): + model_id: Optional[str] + submission_id: str = "" + index: Optional[int] + submission_date: Optional[datetime] + submitted_by: Optional[str] + description: str + publication: PublicationEntry + details: EntryDetails + scores: Any + extras: Optional[Dict[str, Any]] + + + +class LeaderboardObj(BaseModel): + updatedOn: datetime + data: List[LeaderboardEntryItem] + sorting_key: Optional[str] -class LeaderboardPublicView(BaseModel): - id: int - challenge_id: int - label: str - entry_file: str - archived: bool - static_files: bool diff --git a/vocolab/data/tables.py b/vocolab/data/tables.py index d0f7f13..74d99f4 100644 --- a/vocolab/data/tables.py +++ b/vocolab/data/tables.py @@ -75,10 +75,7 @@ sqlalchemy.Column('id', sqlalchemy.Integer, primary_key=True, autoincrement=True), sqlalchemy.Column('challenge_id', sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), sqlalchemy.Column('label', sqlalchemy.String, unique=True), - sqlalchemy.Column('path_to', sqlalchemy.String), - sqlalchemy.Column('entry_file', sqlalchemy.String), sqlalchemy.Column('archived', sqlalchemy.Boolean), - sqlalchemy.Column('external_entries', sqlalchemy.String), sqlalchemy.Column('static_files', sqlalchemy.Boolean), sqlalchemy.Column('sorting_key', sqlalchemy.String), ) diff --git a/vocolab/settings.py b/vocolab/settings.py index 5d2032f..bf619dc 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -168,6 +168,13 @@ class _VocoLabSettings(BaseSettings): CUSTOM_TEMPLATES_DIR: Optional[Path] = None + @property + def data_lock(self) -> Path: + return self.DATA_FOLDER / 'readonly.lock' + + def is_locked(self) -> bool: + return self.data_lock.is_file() + @property def static_files_directory(self) -> Path: """ Directory containing static files served by the API """ @@ -246,6 +253,18 @@ def database_connection_url(self): """ Database connection url """ return f"sqlite:///{self.database_file}" + @property + def email_verif_path(self) -> str: + """ Load API path for verifying emails """ + with (self.DATA_FOLDER / 'email_verification.path').open() as fp: + return fp.read().strip() + + @property + def password_reset_path(self) -> str: + """ Load API path for resetting passwords """ + with (self.DATA_FOLDER / 'password_reset.path').open() as fp: + return fp.read().strip() + @contextmanager def get_temp_dir(self) -> Generator[Path, None, None]: """ Create a temporary directory """ From 7bf299771ee5439beccde86daddfe93f74132fa6 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Wed, 1 Mar 2023 16:59:43 +0100 Subject: [PATCH 13/28] various updates of api --- vocolab/api/endpoints/auth.py | 6 -- vocolab/api/endpoints/submissions.py | 23 +++++++- vocolab/api/endpoints/users.py | 70 +++++++++++++++------- vocolab/api/main.py | 9 +-- vocolab/api/pages/users.py | 12 ++-- vocolab/api/router.py | 4 +- vocolab/data/model_queries/auth.py | 3 +- vocolab/data/model_queries/leaderboars.py | 72 +++++++++++++++++++++++ vocolab/data/model_queries/models.py | 11 ++++ vocolab/data/tables.py | 6 +- 10 files changed, 174 insertions(+), 42 deletions(-) create mode 100644 vocolab/data/model_queries/leaderboars.py diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index 51ac8ac..32aa563 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -148,9 +148,3 @@ async def post_password_update(v: str, request: Request, html_response: bool = F if html_response: return HTMLResponse(api_lib.generate_html_response(data, template_name='response.html.jinja2')) return JSONResponse(data) - - -@router.post('/email/validate') -async def validate_email(code: str): - # todo - pass \ No newline at end of file diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index ef70959..e8d210b 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -30,6 +30,23 @@ async def get_submission_scores(submission_id: str): pass +@router.get("/{submission_id}/content/mode") +async def submission_mode(submission_id: str): + """ + Should return the submission mode + open: allows adding content + closed: content has completed being add + """ + pass + + +@router.get("/{submission_id}/content/reset") +async def reset_submission(submission_id: str): + """ + remove content of submission & allow new content to be added + """ + pass + @router.put("/{submission_id}/content/add", response_model=models.api.UploadSubmissionPartResponse) async def upload_submission( model_id: str, @@ -38,10 +55,10 @@ async def upload_submission( part_name: str, background_tasks: BackgroundTasks, file_data: UploadFile = File(...), - current_user: schema.User = Depends(api_lib.get_current_active_user), + current_user: model_queries.User = Depends(api_lib.get_current_active_user), ): out.console.info(f"user: {current_user.username}") - challenge = await challengesQ.get_challenge(challenge_id=challenge_id) + challenge = ... # await challengesQ.get_challenge(challenge_id=challenge_id) if challenge is None: return ValueError(f'challenge {challenge_id} not found or inactive') try: @@ -60,5 +77,5 @@ async def upload_submission( @router.delete("/{submission_id}/remove") -async def remove_submission(submission_id: str): +async def remove_submission(submission_id: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): pass \ No newline at end of file diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 3dd42e8..65fdcd2 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -4,12 +4,12 @@ import pydantic from fastapi import ( - APIRouter, Depends, Response + APIRouter, Depends, Response, HTTPException ) from vocolab import out from vocolab.core import api_lib, users_lib -from vocolab.data import model_queries +from vocolab.data import model_queries, models from vocolab.settings import get_settings router = APIRouter() @@ -17,9 +17,12 @@ @router.get("/{username}/profile") -def get_profile( - username: str, - current_user: model_queries.User = Depends(api_lib.get_current_active_user)) -> users_lib.UserProfileData: +def get_profile(username: str, + current_user: model_queries.User = Depends( + api_lib.get_current_active_user)) -> users_lib.UserProfileData: + if current_user.username != username: + raise HTTPException(status_code=401, detail="Operation not allowed") + try: user_data = current_user.get_profile_data() # re-update verification @@ -35,36 +38,64 @@ def update_profile( username: str, user_data: users_lib.UserProfileData, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - if user_data.username != current_user.username: - raise ValueError('Bad username specified') + if current_user.username != username: + raise HTTPException(status_code=401, detail="Operation not allowed") - user_data.verified = current_user.is_verified() + if not (user_data.username == current_user.username): + raise HTTPException(status_code=401, detail="Operation not allowed") - user_data.update() + user_data.verified = current_user.is_verified() + user_data.save() return Response(status_code=200) @router.get("/{username}/models/list") async def list_users_models(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - # todo - pass + """ Returning list of models of current user """ + if current_user.username != username: + raise HTTPException(status_code=401, detail="Operation not allowed") + return await model_queries.ModelIDList.get_by_user(current_user.id) @router.post("/{username}/models/create") -async def create_new_model(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - # todo - pass +async def create_new_model(username: str, autor_name: str, data: models.api.NewModelIdRequest, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ Create a new model id""" + if current_user.username != username: + raise HTTPException(status_code=401, detail="Operation not allowed") + + if current_user.id != data.user_id: + raise HTTPException(status_code=401, detail="Operation not allowed") + + # create & return the new model_id + model_id = await model_queries.ModelID.create(first_author_name=autor_name, data=data) + return model_id @router.get("/{username}/submissions/list") -async def list_users_submissions(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - # todo - pass +async def list_users_submissions(username: str, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + if current_user.username != username: + raise HTTPException(status_code=401, detail="Operation not allowed") + + items = await model_queries.ChallengeSubmissionList.get_from_user(user_id=current_user.id) + return items @router.post("/{username}/submissions/create") -async def create_new_submission(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - pass +async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + if current_user.username != username: + raise HTTPException(status_code=401, detail="Operation not allowed") + + # todo check evaluator & other details + new_submission_id = await model_queries.ChallengeSubmission.create( + username=current_user.username, + new_submission=data, + evaluator_id=..., + ) + + return new_submission_id # todo: update submission process @@ -101,7 +132,6 @@ async def create_new_submission(username: str, current_user: model_queries.User # return submission_id - # @router.get('{username}/submissions') # async def submissions_list(username: str): # """ Return a list of all user submissions """ diff --git a/vocolab/api/main.py b/vocolab/api/main.py index 79da1df..5753813 100644 --- a/vocolab/api/main.py +++ b/vocolab/api/main.py @@ -12,7 +12,8 @@ from vocolab import settings, out from vocolab.api import router as v1_router -from vocolab.db import zrDB, create_db +# from vocolab.db import zrDB, create_db +from vocolab.data import db from vocolab.exc import VocoLabException _settings = settings.get_settings() @@ -92,9 +93,9 @@ async def zerospeech_error_formatting(request: Request, exc: VocoLabException): @app.on_event("startup") async def startup(): # conditional creation of the necessary files - create_db() + db.build_database_from_schema() # pool connection to databases - await zrDB.connect() + await db.zrDB.connect() # create data_folders _settings.user_data_dir.mkdir(exist_ok=True, parents=True) _settings.leaderboard_dir.mkdir(exist_ok=True) @@ -112,7 +113,7 @@ async def startup(): async def shutdown(): # clean up db connection pool out.log.info("shutdown of api server") - await zrDB.disconnect() + await db.zrDB.disconnect() # sub applications diff --git a/vocolab/api/pages/users.py b/vocolab/api/pages/users.py index 9d430c9..0c3b3c8 100644 --- a/vocolab/api/pages/users.py +++ b/vocolab/api/pages/users.py @@ -9,7 +9,7 @@ from vocolab import exc, out from vocolab.core import api_lib -from vocolab.db.q import userQ +from vocolab.data import model_queries from vocolab.settings import get_settings router = APIRouter() @@ -32,9 +32,9 @@ async def email_verification(v: str, username: str, request: Request): """ Verify a new users email address """ msg = 'Success' res = False - try: - res = await userQ.verify_user(username=username, verification_code=v) + usr = await model_queries.User.get(by_username=username) + res = await usr.verify(verification_code=v) except ValueError: msg = 'Username does not exist' except exc.ActionNotValid as e: @@ -59,7 +59,11 @@ async def email_verification(v: str, username: str, request: Request): async def password_update_page(v: str, request: Request): """ An HTML page-form that allows a user to change their password """ try: - user = await userQ.get_user(by_password_reset_session=v) + token = model_queries.Token.decode(v) + if not token.allow_password_reset and not token.is_expired(): + raise ValueError('bad session') + + user = await model_queries.User.get(by_email=token.user_email) except ValueError as e: out.log.error( f'{request.client.host}:{request.client.port} requested bad password reset session as {v} - [{e}]') diff --git a/vocolab/api/router.py b/vocolab/api/router.py index 14244e7..2d11943 100644 --- a/vocolab/api/router.py +++ b/vocolab/api/router.py @@ -3,7 +3,7 @@ from fastapi import APIRouter from vocolab.api.endpoints import ( - users, auth, challenges, leaderboards + users, auth, challenges, leaderboards, models, submissions ) from vocolab.api.pages import users as user_pages from vocolab.settings import get_settings @@ -35,6 +35,8 @@ def index(): api_router.include_router(auth.router, prefix="/auth", tags=["auth"]) api_router.include_router(users.router, prefix="/users", tags=["user-data"]) api_router.include_router(challenges.router, prefix="/challenges", tags=["challenges"]) +api_router.include_router(models.router, prefix="/models", tags=["model"]) +api_router.include_router(submissions.router, prefix="/submissions", tags=["submissions"]) api_router.include_router(leaderboards.router, prefix="/leaderboards", tags=["leaderboards"]) api_router.include_router(user_pages.router, prefix="/page", tags=["pages"]) diff --git a/vocolab/data/model_queries/auth.py b/vocolab/data/model_queries/auth.py index 48675c0..7ba274a 100644 --- a/vocolab/data/model_queries/auth.py +++ b/vocolab/data/model_queries/auth.py @@ -68,10 +68,9 @@ async def verify(self, verification_code: str, force: bool = False) -> bool: tables.users_table.c.id == self.id ).values(verified='True') - if secrets.compare_digest(self.verified, verification_code) or force: + if self.verified == verification_code or force: await zrDB.execute(query) return True - return False async def toggle_status(self, active: bool = True): diff --git a/vocolab/data/model_queries/leaderboars.py b/vocolab/data/model_queries/leaderboars.py new file mode 100644 index 0000000..c4f3f76 --- /dev/null +++ b/vocolab/data/model_queries/leaderboars.py @@ -0,0 +1,72 @@ +from datetime import datetime +from pathlib import Path +from typing import List + +from pydantic import BaseModel, Json + +from vocolab.data import tables +from ..db import zrDB + + +class LeaderboardEntry(BaseModel): + """ Data Representation of a Leaderboard Entry """ + id: int + data: Json + src: Path + model_id: str + submission_id: str + leaderboard_id: int + user_id: int + submitted_at: datetime + + class Config: + orm_mode = True + + + +class LeaderboardEntryList(BaseModel): + """ Data representation of a leaderboard entry list""" + items: List[LeaderboardEntry] + + +class Leaderboard(BaseModel): + """ Data representation of a Leaderboard """ + id: int + challenge_id: int + label: str + archived: bool + static_files: bool + sorting_key: bool + + class Config: + orm_mode = True + + @classmethod + async def get_by_id(cls, _id: int) -> "Leaderboard": + """ Load leaderboard from id """ + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.id == _id + ) + + ld_data = await zrDB.fetch_one(query) + if ld_data is None: + raise ValueError('Leaderboard not found') + + return cls.parse_obj(ld_data) + + async def get_entries(self) -> LeaderboardEntryList: + """ Load leaderboard entries """ + query = tables.leaderboard_entry_table.select().where( + tables.leaderboard_entry_table.c.leaderboard_id == self.id + ) + ld_entries = await zrDB.fetch_all(query) + if not ld_entries: + return LeaderboardEntryList(items=[]) + return LeaderboardEntryList.parse_obj(dict(items=ld_entries)) + + + + + + + diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index e518cf5..7094435 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -113,6 +113,17 @@ async def get(cls) -> "ModelIDList": items = db.zrDB.fetch_all(tables.models_table.select()) return cls(items=items) + @classmethod + async def get_by_user(cls, user_id: int) -> "ModelIDList": + """ Load models by user """ + query = tables.models_table.select().where( + tables.models_table.c.user_id == user_id + ) + items = db.zrDB.fetch_all(query) + if not items: + return cls(items=[]) + return cls.parse_obj(dict(items=items)) + class SubmissionStatus(str, Enum): """ Definition of different states of submissions """ diff --git a/vocolab/data/tables.py b/vocolab/data/tables.py index 74d99f4..9dfd101 100644 --- a/vocolab/data/tables.py +++ b/vocolab/data/tables.py @@ -102,9 +102,11 @@ "leaderboard_entries", tables_metadata, sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), - sqlalchemy.Column("entry_path", sqlalchemy.String), + sqlalchemy.Column("data", sqlalchemy.JSON), + sqlalchemy.Column("src", sqlalchemy.String), sqlalchemy.Column("model_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.id")), sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("challenge_submissions.id")), sqlalchemy.Column("leaderboard_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), - sqlalchemy.Column("submitted_at", sqlalchemy.String) + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users_credentials.id")), + sqlalchemy.Column("submitted_at", sqlalchemy.DATETIME) ) \ No newline at end of file From 147cd18e040cb2fbcdcc40d4170ff2f9091a1ced Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Wed, 1 Mar 2023 17:50:40 +0100 Subject: [PATCH 14/28] submissions upload --- vocolab/api/endpoints/submissions.py | 31 ++++++++++++++----- vocolab/api/endpoints/users.py | 2 +- vocolab/core/submission_lib/submission.py | 8 ++--- vocolab/core/submission_lib/upload.py | 37 ++++++++++------------- vocolab/data/model_queries/models.py | 3 +- 5 files changed, 46 insertions(+), 35 deletions(-) diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index e8d210b..f8226f9 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -3,7 +3,8 @@ """ from fastapi import ( - APIRouter, Depends, UploadFile, File, BackgroundTasks + APIRouter, Depends, UploadFile, File, BackgroundTasks, + HTTPException ) from vocolab import out, exc @@ -47,24 +48,40 @@ async def reset_submission(submission_id: str): """ pass +@router.post('{submission_id}/content/init') +async def upload_manifest(submission_id:str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)) + # todo: initialise manifest before upload + # create submission dir + # add manifest and promise of files + pass + @router.put("/{submission_id}/content/add", response_model=models.api.UploadSubmissionPartResponse) async def upload_submission( - model_id: str, submission_id: str, - challenge_id: int, part_name: str, background_tasks: BackgroundTasks, file_data: UploadFile = File(...), current_user: model_queries.User = Depends(api_lib.get_current_active_user), ): out.console.info(f"user: {current_user.username}") - challenge = ... # await challengesQ.get_challenge(challenge_id=challenge_id) - if challenge is None: - return ValueError(f'challenge {challenge_id} not found or inactive') + submission = await model_queries.ChallengeSubmission.get(submission_id) + if submission is None: + raise HTTPException(status_code=404, detail="submission not found") + + if submission.user_id != current_user.id: + raise HTTPException(status_code=403, detail="Operation not allowed") + + try: + sub_dir = submission_lib.SubmissionDir.load(model_id=submission.model_id, submission_id=submission.id) + except FileNotFoundError: + raise HTTPException(status_code=417, detail="Expected submission directory to exist") + + try: - is_completed, remaining = submission_lib.add_part(submission_id, part_name, file_data) + is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file_data) if is_completed: + # todo: fix completed actions # run the completion of the submission on the background background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 65fdcd2..900a7b2 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -92,7 +92,7 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq new_submission_id = await model_queries.ChallengeSubmission.create( username=current_user.username, new_submission=data, - evaluator_id=..., + evaluator_id=None, ) return new_submission_id diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 9e8a51e..4aeeb59 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -138,7 +138,7 @@ def get_leaderboard_items(self): raise ValueError('Submission has no info index') return self.info.leaderboard_entries - def add_content(self, file_name: str, file_size: int, file_hash: str, data: UploadFile): + def add_content(self, file_name: str, data: UploadFile): """ Add content to the submission *) multipart: - add part to the tmp folder @@ -158,8 +158,6 @@ def add_content(self, file_name: str, file_size: int, file_hash: str, data: Uplo handler.add_part( logger=self.get_log_handler(), file_name=file_name, - file_size=file_size, - file_hash=file_hash, data=data ) handler.dump_to_index(self.multipart_index_file) @@ -172,14 +170,14 @@ def add_content(self, file_name: str, file_size: int, file_hash: str, data: Uplo handler.write_data( logger=self.get_log_handler(), file_name=file_name, - file_hash=file_hash, data=data ) if handler.completed(): """ Upload completed """ unzip(handler.target_file, self.content_location) - # todo notify who what when + return True, [] + return False, handler.remaining_items def send_content(self, hostname: str) -> Path: """ Send content to a remote host for evaluation (return target location) """ diff --git a/vocolab/core/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py index 95cc32d..04b9460 100644 --- a/vocolab/core/submission_lib/upload.py +++ b/vocolab/core/submission_lib/upload.py @@ -47,17 +47,18 @@ def file_hash(self): def completed(self) -> bool: return self.target_file.is_file() - def write_data(self, logger: SubmissionLogger, file_name: str, - file_hash: str, data: UploadFile): + def write_data(self, logger: SubmissionLogger, file_name: str, data: UploadFile): logger.log(f"adding a new part to upload: {file_name}") - assert file_hash == self.file_hash, "Given hash & expected hash should be the same !!" + # Add the part with self.target_file.open('wb') as fp: for d in data.file: fp.write(d) - if not md5sum(self.target_file) == file_hash: + calc_hash = md5sum(self.target_file) + + if not self.file_hash == calc_hash: # todo: more stuff see multipart fail self.target_file.unlink() raise exc.ValueNotValid("Hash does not match expected!") @@ -119,7 +120,7 @@ def dump_to_index(self, file: Path): with file.open("w") as fp: fp.write(self.json(indent=4)) - def add_part(self, logger: SubmissionLogger, file_name: str, file_size: int, file_hash: str, data: UploadFile): + def add_part(self, logger: SubmissionLogger, file_name: str, data: UploadFile): """ Add a part to a multipart upload type submission. - Write the data into a file inside the submission folder. @@ -130,15 +131,7 @@ def add_part(self, logger: SubmissionLogger, file_name: str, file_size: int, fil - ValueNotValid if md5 hash of file does not match md5 recorded in the manifest """ logger.log(f"adding a new part to upload: {self.store_location / file_name}") - new_item_mf = ManifestIndexItem( - file_name=file_name, - file_size=file_size, - file_hash=file_hash - ) - - if new_item_mf not in self.index: - logger.log(f"(ERROR) file {file_name} was not found in manifest, upload canceled!!") - raise exc.ResourceRequestedNotFound(f"Part {file_name} is not part of submission {logger.submission_id}!!") + # todo load information from index and name ??? # write data on disk file_part = self.store_location / file_name @@ -147,18 +140,20 @@ def add_part(self, logger: SubmissionLogger, file_name: str, file_size: int, fil fp.write(d) calc_hash = md5sum(file_part) - if not compare_digest(calc_hash, file_hash): - # remove file and throw exception + new_item_mf = ManifestIndexItem( + file_name=file_name, + file_hash=calc_hash, + file_size=file_part.stat().st_size + ) + + if new_item_mf not in self.index: + logger.log(f"(ERROR) file {file_name} was not found in manifest, upload canceled!!") file_part.unlink() - data = f"failed hash comparison" \ - f"file: {file_part} with hash {calc_hash}" \ - f"on record found : {file_name} with hash {file_hash}" logger.log(f"(ERROR) {data}, upload canceled!!") - raise exc.ValueNotValid("Hash of part does not match given hash", data=data) + raise exc.ResourceRequestedNotFound(f"Part {file_name} is not part of submission {logger.submission_id}!!") # up count of received parts self.received.append(new_item_mf) - logger.log(f" --> part was added successfully", date=False) def merge_parts(self): diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index 7094435..2917e79 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -151,6 +151,7 @@ class ChallengeSubmission(BaseModel): id: str user_id: int track_id: int + model_id: str submit_date: datetime status: SubmissionStatus auto_eval: bool @@ -161,7 +162,7 @@ class Config: orm_mode = True @classmethod - async def create(cls, username: str, new_submission: models.api.NewSubmissionRequest, evaluator_id: int) -> str: + async def create(cls, username: str, new_submission: models.api.NewSubmissionRequest, evaluator_id: Optional[int]) -> str: """ Creates a database entry for the new submission """ submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" values = new_submission.dict() From a11e58161dba7e1c7c7e7e925bccb8bdd09c895f Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Thu, 2 Mar 2023 10:28:49 +0100 Subject: [PATCH 15/28] updates --- vocolab/api/endpoints/submissions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index f8226f9..bab40cc 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -49,7 +49,7 @@ async def reset_submission(submission_id: str): pass @router.post('{submission_id}/content/init') -async def upload_manifest(submission_id:str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)) +async def upload_manifest(submission_id:str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): # todo: initialise manifest before upload # create submission dir # add manifest and promise of files From 55aad6da727fc3d9c7e304d24e8093214b69f645 Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Thu, 2 Mar 2023 17:38:21 +0100 Subject: [PATCH 16/28] WIP: upload submissions & model creation --- vocolab/api/endpoints/auth.py | 2 +- vocolab/api/endpoints/submissions.py | 11 ++++++---- vocolab/api/endpoints/users.py | 13 +++++++----- vocolab/api/main.py | 6 +++++- vocolab/api/router.py | 29 ++++++++++++++++++++++----- vocolab/core/api_lib.py | 8 +++++++- vocolab/core/submission_lib/upload.py | 3 --- vocolab/data/model_queries/models.py | 7 ++++--- vocolab/data/models/api/auth.py | 1 + vocolab/data/models/api/models.py | 3 +-- 10 files changed, 58 insertions(+), 25 deletions(-) diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index 32aa563..81cd168 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -30,7 +30,7 @@ async def login(form_data: OAuth2PasswordRequestForm = Depends()) -> models.api. raise ValueError('Bad login') token = model_queries.Token(user_email=user.email) - return models.api.LoggedItem(access_token=token.encode(), token_type="bearer") + return models.api.LoggedItem(username=user.username, access_token=token.encode(), token_type="bearer") except ValueError: raise HTTPException( status_code=status.HTTP_401_UNAUTHORIZED, diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index bab40cc..538beb1 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -48,13 +48,16 @@ async def reset_submission(submission_id: str): """ pass + @router.post('{submission_id}/content/init') -async def upload_manifest(submission_id:str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): +async def upload_manifest(submission_id: str, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): # todo: initialise manifest before upload # create submission dir # add manifest and promise of files pass + @router.put("/{submission_id}/content/add", response_model=models.api.UploadSubmissionPartResponse) async def upload_submission( submission_id: str, @@ -76,7 +79,6 @@ async def upload_submission( except FileNotFoundError: raise HTTPException(status_code=417, detail="Expected submission directory to exist") - try: is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file_data) @@ -94,5 +96,6 @@ async def upload_submission( @router.delete("/{submission_id}/remove") -async def remove_submission(submission_id: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - pass \ No newline at end of file +async def remove_submission(submission_id: str, + current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + pass diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 900a7b2..24cd74f 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -58,17 +58,20 @@ async def list_users_models(username: str, current_user: model_queries.User = De @router.post("/{username}/models/create") -async def create_new_model(username: str, autor_name: str, data: models.api.NewModelIdRequest, +async def create_new_model(username: str, author_name: str, data: models.api.NewModelIdRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Create a new model id""" + print("WRF") if current_user.username != username: raise HTTPException(status_code=401, detail="Operation not allowed") - if current_user.id != data.user_id: - raise HTTPException(status_code=401, detail="Operation not allowed") - # create & return the new model_id - model_id = await model_queries.ModelID.create(first_author_name=autor_name, data=data) + try: + model_id = await model_queries.ModelID.create(user_id=current_user.id, first_author_name=author_name, data=data) + except Exception as e: + out.console.print(e) + raise e + return model_id diff --git a/vocolab/api/main.py b/vocolab/api/main.py index 5753813..10bb23b 100644 --- a/vocolab/api/main.py +++ b/vocolab/api/main.py @@ -50,7 +50,11 @@ async def log_requests(request: Request, call_next): start_time = time.time() - response = await call_next(request) + try: + response = await call_next(request) + except Exception as e: + print(e) + raise e process_time = (time.time() - start_time) * 1000 formatted_process_time = '{0:.2f}'.format(process_time) diff --git a/vocolab/api/router.py b/vocolab/api/router.py index 2d11943..7ccd662 100644 --- a/vocolab/api/router.py +++ b/vocolab/api/router.py @@ -1,6 +1,8 @@ +from datetime import datetime from pathlib import Path -from fastapi import APIRouter +from fastapi import APIRouter, HTTPException, status +from pydantic import BaseModel, EmailStr from vocolab.api.endpoints import ( users, auth, challenges, leaderboards, models, submissions @@ -13,23 +15,40 @@ api_router = APIRouter() +class APIIndex(BaseModel): + app: str + version: str + maintainers: str + contact: EmailStr + installation_datetime: datetime + + @api_router.get("/") -def index(): +def index() -> APIIndex: """ API Index """ install_time = (Path.home() / '.voco-installation') if install_time.is_file(): with install_time.open() as fp: installation_datetime = fp.read() else: - installation_datetime = '' + installation_datetime = datetime.now().isoformat() - return { + return APIIndex.parse_obj({ "app": _settings.app_options.app_name, "version": _settings.app_options.version, "maintainers": _settings.app_options.maintainers, "contact": _settings.app_options.admin_email, "installation_datetime": installation_datetime - } + }) + + +@api_router.get("/error") +def get_error(): + """ This route throws an error (used for testing)""" + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Incorrect username or password" + ) api_router.include_router(auth.router, prefix="/auth", tags=["auth"]) diff --git a/vocolab/core/api_lib.py b/vocolab/core/api_lib.py index ca1d341..4cd7620 100644 --- a/vocolab/core/api_lib.py +++ b/vocolab/core/api_lib.py @@ -5,7 +5,7 @@ from fastapi.security import OAuth2PasswordBearer from jinja2 import FileSystemLoader, Environment -from vocolab import settings +from vocolab import settings, out from vocolab.data import model_queries, models from vocolab.core import notify, commons @@ -33,6 +33,9 @@ def validate_token(token: str = Depends(oauth2_scheme)) -> model_queries.Token: status_code=status.HTTP_401_UNAUTHORIZED, detail="Token is invalid or has expired !", ) + except Exception as e: + out.console.exception() + raise e async def get_user(token: model_queries.Token = Depends(validate_token)) -> model_queries.User: @@ -44,6 +47,9 @@ async def get_user(token: model_queries.Token = Depends(validate_token)) -> mode status_code=status.HTTP_401_UNAUTHORIZED, detail="User is not in database !" ) + except Exception as e: + out.console.exception() + raise e async def get_current_active_user(current_user: model_queries.User = Depends(get_user)) -> model_queries.User: diff --git a/vocolab/core/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py index 04b9460..8c209ef 100644 --- a/vocolab/core/submission_lib/upload.py +++ b/vocolab/core/submission_lib/upload.py @@ -50,7 +50,6 @@ def completed(self) -> bool: def write_data(self, logger: SubmissionLogger, file_name: str, data: UploadFile): logger.log(f"adding a new part to upload: {file_name}") - # Add the part with self.target_file.open('wb') as fp: for d in data.file: @@ -89,7 +88,6 @@ class MultipartUploadHandler(BaseModel): hashed_parts: bool = True target_location: Path - @property def target_file(self): return self.target_location / 'submission.zip' @@ -177,7 +175,6 @@ def merge_parts(self): fs.merge(input_dir=f"{self.store_location}", output_file=str(self.target_file)) assert md5sum(self.target_file) == self.merge_hash, "output file does not match original md5" - def clean(self): """ Delete index & parts used for multipart upload """ shutil.rmtree(self.store_location) diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index 2917e79..b5fe7c1 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -60,7 +60,7 @@ def nth_word(n: int) -> str: return ''.join(word) @classmethod - async def create(cls, first_author_name: str, data: models.api.NewModelIdRequest): + async def create(cls, user_id: int, first_author_name: str, data: models.api.NewModelIdRequest): """ Create a new ModelID entry in the database ids are created using the 3 first letters of first name of first author, @@ -71,12 +71,13 @@ async def create(cls, first_author_name: str, data: models.api.NewModelIdRequest counter = 1 new_model_id_extended = f"{new_model_id}{cls.nth_word(counter)}" - while cls.exists(new_model_id_extended): + while await cls.exists(new_model_id_extended): counter += 1 new_model_id_extended = f"{new_model_id}{cls.nth_word(counter)}" # create db entry - query = tables.models_table.insert().values(id=new_model_id_extended, **data.dict()) + query = tables.models_table.insert().values( + id=new_model_id_extended, user_id=user_id, **data.dict()) await db.zrDB.execute(query) return new_model_id_extended diff --git a/vocolab/data/models/api/auth.py b/vocolab/data/models/api/auth.py index 255adf8..ca1afb5 100644 --- a/vocolab/data/models/api/auth.py +++ b/vocolab/data/models/api/auth.py @@ -19,6 +19,7 @@ def non_empty_string(cls, v): class LoggedItem(BaseModel): """ Return type of the /login function """ + username: str access_token: str token_type: str diff --git a/vocolab/data/models/api/models.py b/vocolab/data/models/api/models.py index bf84b6e..0728734 100644 --- a/vocolab/data/models/api/models.py +++ b/vocolab/data/models/api/models.py @@ -5,7 +5,6 @@ class NewModelIdRequest(BaseModel): - user_id: int description: str gpu_budget: str train_set: str @@ -14,4 +13,4 @@ class NewModelIdRequest(BaseModel): team: str paper_url: Optional[AnyHttpUrl] code_url: Optional[AnyHttpUrl] - created_at: datetime = Field(default_factory=lambda: datetime.now()) \ No newline at end of file + created_at: datetime = Field(default_factory=lambda: datetime.now()) From a72753dc29da4a13ea6c6eac4486d550699d28cc Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Tue, 7 Mar 2023 16:09:50 +0100 Subject: [PATCH 17/28] upload --- tests/fixtures/db.py | 4 +-- vocolab/api/endpoints/submissions.py | 9 +++++- vocolab/api/endpoints/users.py | 48 ++++++++++++++-------------- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/tests/fixtures/db.py b/tests/fixtures/db.py index c750e77..b657a0b 100644 --- a/tests/fixtures/db.py +++ b/tests/fixtures/db.py @@ -1,11 +1,11 @@ import pytest -from vocolab.db import zrDB, create_db +from vocolab.data.db import zrDB, build_database_from_schema @pytest.fixture(scope="session") async def db(): - create_db() + build_database_from_schema() # connect to Database await zrDB.connect() diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index 538beb1..8a01335 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -18,16 +18,19 @@ @router.get("/list") async def get_sub_list(): + # todo implement this pass @router.get("/{submission_id}/info") async def get_sub_info(submission_id: str): + # todo implement this pass @router.get("/{submission_id}/scores") async def get_submission_scores(submission_id: str): + # todo implement this pass @@ -38,6 +41,7 @@ async def submission_mode(submission_id: str): open: allows adding content closed: content has completed being add """ + # todo implement this pass @@ -46,6 +50,7 @@ async def reset_submission(submission_id: str): """ remove content of submission & allow new content to be added """ + # todo implement this pass @@ -85,7 +90,8 @@ async def upload_submission( if is_completed: # todo: fix completed actions # run the completion of the submission on the background - background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) + # background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) + pass return models.api.UploadSubmissionPartResponse( completed=is_completed, remaining=[n.file_name for n in remaining] @@ -98,4 +104,5 @@ async def upload_submission( @router.delete("/{submission_id}/remove") async def remove_submission(submission_id: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + # todo implement this pass diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 24cd74f..8955b32 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -15,13 +15,15 @@ router = APIRouter() _settings = get_settings() +NonAllowedOperation = HTTPException(status_code=401, detail="Operation not allowed") + @router.get("/{username}/profile") def get_profile(username: str, current_user: model_queries.User = Depends( api_lib.get_current_active_user)) -> users_lib.UserProfileData: if current_user.username != username: - raise HTTPException(status_code=401, detail="Operation not allowed") + raise NonAllowedOperation try: user_data = current_user.get_profile_data() @@ -39,10 +41,10 @@ def update_profile( user_data: users_lib.UserProfileData, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if current_user.username != username: - raise HTTPException(status_code=401, detail="Operation not allowed") + raise NonAllowedOperation - if not (user_data.username == current_user.username): - raise HTTPException(status_code=401, detail="Operation not allowed") + if user_data.username != current_user.username: + raise NonAllowedOperation user_data.verified = current_user.is_verified() user_data.save() @@ -53,7 +55,7 @@ def update_profile( async def list_users_models(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Returning list of models of current user """ if current_user.username != username: - raise HTTPException(status_code=401, detail="Operation not allowed") + raise NonAllowedOperation return await model_queries.ModelIDList.get_by_user(current_user.id) @@ -63,7 +65,7 @@ async def create_new_model(username: str, author_name: str, data: models.api.New """ Create a new model id""" print("WRF") if current_user.username != username: - raise HTTPException(status_code=401, detail="Operation not allowed") + raise NonAllowedOperation # create & return the new model_id try: @@ -79,7 +81,7 @@ async def create_new_model(username: str, author_name: str, data: models.api.New async def list_users_submissions(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if current_user.username != username: - raise HTTPException(status_code=401, detail="Operation not allowed") + raise NonAllowedOperation items = await model_queries.ChallengeSubmissionList.get_from_user(user_id=current_user.id) return items @@ -89,7 +91,7 @@ async def list_users_submissions(username: str, async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if current_user.username != username: - raise HTTPException(status_code=401, detail="Operation not allowed") + raise NonAllowedOperation # todo check evaluator & other details new_submission_id = await model_queries.ChallengeSubmission.create( @@ -109,14 +111,12 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # current_user: schema.User = Depends(api_lib.get_current_active_user) # ): # """ Create a new submission """ -# # todo fetch model_id # # challenge = await challengesQ.get_challenge(challenge_id=challenge_id) # if challenge is None: -# return ValueError(f'challenge {challenge_id} not found or inactive') +# return ValueError('challenge {challenge_id} not found or inactive') # # # create db entry -# # todo check submission table data # submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( # user_id=current_user.id, # track_id=challenge.id, @@ -133,13 +133,12 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # ) # # return submission_id - - +# +# # @router.get('{username}/submissions') # async def submissions_list(username: str): # """ Return a list of all user submissions """ # user = model_queries.User.get(by_username=username) -# # todo fix later # submissions = await challengesQ.get_user_submissions(user_id=current_user.id) # submissions = [ # models.api.SubmissionPreview( @@ -159,8 +158,9 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # data[sub.track_label] = [sub] # # return data - - +# +# +# # @router.get('{username}//submissions/tracks/{track_id}') # async def submissions_list_by_track( # track_id: int, current_user: schema.User = Depends(api_lib.get_current_active_user)): @@ -177,8 +177,8 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # ) # for s in submissions if s.track_id == track.id # ] - - +# +# # @router.get('/submissions/{submissions_id}') # async def get_submission(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): # """ Return information on a submission """ @@ -210,8 +210,8 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # evaluator_label=evaluator_label, # leaderboards=[(ld.label, ld.id) for ld in leaderboards] # ) - - +# +# # @router.get('/submissions/{submissions_id}/status') # async def get_submission_status( # submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): @@ -222,8 +222,8 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # status=exc.http_status.HTTP_403_FORBIDDEN) # # return submission.status - - +# +# # @router.get('/submissions/{submissions_id}/log') # async def get_submission_status( # submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): @@ -235,8 +235,8 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq # # log = submission_lib.SubmissionLogger(submissions_id) # return log.get_text() - - +# +# # @router.get('/submissions/{submissions_id}/scores') # async def get_user_results(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): # """ Return status of a submission """ From d4696019f603dee15ff7bda9290ffae0365a061f Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Tue, 14 Mar 2023 17:43:36 +0100 Subject: [PATCH 18/28] various updates on submission --- samples/challenges_list.json | 8 ---- vocolab/admin/commands/challenges.py | 34 +++++++------ vocolab/admin/commands/leaderboards.py | 20 ++++---- vocolab/admin/commands/submissions.py | 4 +- vocolab/admin/main.py | 6 +-- vocolab/api/endpoints/benchmarks.py | 45 +++++++++++++++++ vocolab/api/endpoints/challenges.py | 47 ------------------ vocolab/api/endpoints/leaderboards.py | 7 ++- vocolab/api/endpoints/users.py | 31 +++++++----- vocolab/api/router.py | 4 +- vocolab/data/model_queries/auth.py | 3 +- vocolab/data/model_queries/challenges.py | 61 ++++++++++++------------ vocolab/data/model_queries/models.py | 52 +++++++++++--------- vocolab/data/models/api/challenges.py | 30 +++--------- vocolab/data/models/cli.py | 1 - vocolab/data/tables.py | 35 +++++++------- vocolab/settings.py | 2 +- 17 files changed, 185 insertions(+), 205 deletions(-) create mode 100644 vocolab/api/endpoints/benchmarks.py delete mode 100644 vocolab/api/endpoints/challenges.py diff --git a/samples/challenges_list.json b/samples/challenges_list.json index c926717..5fda38f 100644 --- a/samples/challenges_list.json +++ b/samples/challenges_list.json @@ -1,6 +1,5 @@ [ { - "id": 1, "label": "test-challenge", "start_date": "2022-02-21", "end_date": null, @@ -9,7 +8,6 @@ "evaluator": null }, { - "id": 2, "label": "abx-15", "start_date": "2015-01-20", "end_date": null, @@ -18,7 +16,6 @@ "evaluator": null }, { - "id": 3, "label": "abx-17", "start_date": "2017-01-20", "end_date": null, @@ -27,7 +24,6 @@ "evaluator": null }, { - "id": 4, "label": "abx-LS", "start_date": "2021-12-12", "end_date": null, @@ -36,7 +32,6 @@ "evaluator": null }, { - "id": 5, "label": "tde-15", "start_date": "2015-01-20", "end_date": null, @@ -45,7 +40,6 @@ "evaluator": null }, { - "id": 6, "label": "tde-17", "start_date": "2017-01-20", "end_date": null, @@ -54,7 +48,6 @@ "evaluator": null }, { - "id": 7, "label": "slm-21", "start_date": "2021-12-12", "end_date": null, @@ -63,7 +56,6 @@ "evaluator": null }, { - "id": 8, "label": "ttso-19", "start_date": "2019-01-20", "end_date": null, diff --git a/vocolab/admin/commands/challenges.py b/vocolab/admin/commands/challenges.py index 1486f4a..a2217d5 100644 --- a/vocolab/admin/commands/challenges.py +++ b/vocolab/admin/commands/challenges.py @@ -11,11 +11,11 @@ from vocolab.data import models, model_queries -class ChallengesCMD(cmd_lib.CMD): +class BenchmarksCMD(cmd_lib.CMD): """ Command for challenge administration (default: list)""" def __init__(self, root, name, cmd_path): - super(ChallengesCMD, self).__init__(root, name, cmd_path) + super(BenchmarksCMD, self).__init__(root, name, cmd_path) # custom arguments self.parser.add_argument('-a', '--include-all', @@ -27,14 +27,13 @@ def run(self, argv): # fetch data loop = asyncio.get_event_loop() - challenge_lst: model_queries.ChallengeList = loop.run_until_complete( - model_queries.ChallengeList.get(include_all=args.include_all) + challenge_lst: model_queries.BenchmarkList = loop.run_until_complete( + model_queries.BenchmarkList.get(include_all=args.include_all) ) # Prepare output table = Table(show_header=True, header_style="bold magenta") - table.add_column("ID") - table.add_column("label") + table.add_column("Label") table.add_column("active") table.add_column("url") table.add_column("start_date") @@ -48,7 +47,7 @@ def run(self, argv): end_date_str = None table.add_row( - f"{ch.id}", f"{ch.label}", f"{ch.active}", f"{ch.url}", + f"{ch.label}", f"{ch.active}", f"{ch.url}", f"{ch.start_date.strftime('%d/%m/%Y')}", f"{end_date_str}", f"{ch.evaluator}" ) @@ -56,11 +55,11 @@ def run(self, argv): out.cli.print(table) -class AddChallengeCMD(cmd_lib.CMD): +class AddBenchmarkCMD(cmd_lib.CMD): """ Command to create new challenges """ def __init__(self, root, name, cmd_path): - super(AddChallengeCMD, self).__init__(root, name, cmd_path) + super(AddBenchmarkCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('--dry-run', dest='dry_run', action='store_true', @@ -75,7 +74,7 @@ def run(self, argv): if args.from_file: file_path = Path(args.from_file) if not (file_path.is_file() and file_path.suffix == '.json'): - raise ValueError(f"Input file needs to exist and be a Valid JSON file.") + raise ValueError("Input file needs to exist and be a Valid JSON file.") obj = json.load(file_path.open()) obj_list = [models.cli.NewChallenge(**item) for item in obj] @@ -104,7 +103,7 @@ def run(self, argv): if not args.dry_run: for item in obj_list: - asyncio.run(model_queries.Challenge.create(item)) + asyncio.run(model_queries.Benchmark.create(item)) out.cli.print(f"insertion of {item.label} was successful:white_check_mark:", style="bold green") else: @@ -118,23 +117,22 @@ def run(self, argv): out.cli.error(f":x:\t{e}") -class SetChallenge(cmd_lib.CMD): +class SetBenchmarkCMD(cmd_lib.CMD): """ Command to alter properties of Challenges""" def __init__(self, root, name, cmd_path): - super(SetChallenge, self).__init__(root, name, cmd_path) - self.challenge_fields = model_queries.Challenge.get_field_names() - self.challenge_fields.remove('id') + super(SetBenchmarkCMD, self).__init__(root, name, cmd_path) + self.challenge_fields = model_queries.Benchmark.get_field_names() # arguments - self.parser.add_argument('id', help='ID of the challenge to update') + self.parser.add_argument('label', help='Name of the challenge to update') self.parser.add_argument('field_name', type=str, choices=self.challenge_fields, help='The name of the field') self.parser.add_argument('value', help='The new value of the field') @staticmethod - async def update_property(challenge_id: int, field_name: str, value: str): - ch = await model_queries.Challenge.get(challenge_id=challenge_id) + async def update_property(benchmark_id: str, field_name: str, value: str): + ch = await model_queries.Benchmark.get(benchmark_id=benchmark_id) return await ch.update_property( variable_name=field_name, value=value, diff --git a/vocolab/admin/commands/leaderboards.py b/vocolab/admin/commands/leaderboards.py index 1f9741f..cb8bd7d 100644 --- a/vocolab/admin/commands/leaderboards.py +++ b/vocolab/admin/commands/leaderboards.py @@ -8,7 +8,7 @@ from vocolab import out from vocolab.core import leaderboards_lib, cmd_lib -from vocolab.data import model_queries, models +from vocolab.data import model_queries class LeaderboardCMD(cmd_lib.CMD): @@ -25,20 +25,18 @@ def run(self, argv): leaderboards = model_queries.LeaderboardList(items=[]) table = Table(show_header=True, header_style="bold magenta") - table.add_column('ID') table.add_column('Label') table.add_column('Archived') table.add_column('Static Files') - table.add_column('Challenge ID') + table.add_column('Benchmark ID') table.add_column('Key', no_wrap=False, overflow='fold') for entry in leaderboards: table.add_row( - f"{entry.id}", f"{entry.label}", f"{entry.archived}", - f"{entry.static_files}", f"{entry.challenge_id}", + f"{entry.label}", f"{entry.archived}", + f"{entry.static_files}", f"{entry.benchmark_id}", f"{entry.sorting_key}" ) - # print table out.cli.print(table, no_wrap=False) @@ -69,7 +67,7 @@ def ask_input(): if external_entries.is_dir(): break else: - out.cli.error(f"External entries must be a valid directory") + out.cli.error("External entries must be a valid directory") add_static_files = Confirm.ask("Does this leaderboard include static files", default=True) @@ -104,10 +102,11 @@ def run(self, argv): for item in lds: asyncio.run(model_queries.Leaderboard.create( model_queries.Leaderboard( - challenge_id=item.get("challenge_id"), label=item.get("label"), + benchmark_id=item.get("benchmark_id"), static_files=item.get("static_files", False), archived=item.get("archived", False), + sorting_key=item.get("sorting_key", None), ) )) out.cli.info(f"Successfully created leaderboard : {item.get('label')}") @@ -119,16 +118,15 @@ class EditLeaderboardCMD(cmd_lib.CMD): def __init__(self, root, name, cmd_path): super(EditLeaderboardCMD, self).__init__(root, name, cmd_path) self.leaderboard_fields = model_queries.Leaderboard.get_field_names() - self.leaderboard_fields.remove('id') # arguments - self.parser.add_argument("leaderboard_id", type=int, help='The id of the entry') + self.parser.add_argument("leaderboard_id", type=str, help='The id of the entry') self.parser.add_argument("field_name", type=str, choices=self.leaderboard_fields, help="The name of the field") self.parser.add_argument('field_value', help="The new value of the field") @staticmethod - async def update_value(leaderboard_id: int, field_name: str, value: str): + async def update_value(leaderboard_id: str, field_name: str, value: str): leaderboard = await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) return await leaderboard.update_property(variable_name=field_name, value=value, allow_parsing=True) diff --git a/vocolab/admin/commands/submissions.py b/vocolab/admin/commands/submissions.py index 3e01e68..4fbad28 100644 --- a/vocolab/admin/commands/submissions.py +++ b/vocolab/admin/commands/submissions.py @@ -1,13 +1,11 @@ import asyncio -import shutil import sys -from pathlib import Path from rich.table import Table from vocolab import out, get_settings -from vocolab.data import models, model_queries from vocolab.core import submission_lib, cmd_lib +from vocolab.data import model_queries # api settings _settings = get_settings() diff --git a/vocolab/admin/main.py b/vocolab/admin/main.py index bef81ef..b58825c 100644 --- a/vocolab/admin/main.py +++ b/vocolab/admin/main.py @@ -38,9 +38,9 @@ def build_cli(): if has_challenges: # challenge functions tree.add_cmd_tree( - commands.challenges.ChallengesCMD(CMD_NAME, 'challenges', ''), - commands.challenges.AddChallengeCMD(CMD_NAME, 'add', 'challenges'), - commands.challenges.SetChallenge(CMD_NAME, 'set', 'challenges') + commands.challenges.BenchmarksCMD(CMD_NAME, 'benchmarks', ''), + commands.challenges.AddBenchmarkCMD(CMD_NAME, 'add', 'benchmarks'), + commands.challenges.SetBenchmarkCMD(CMD_NAME, 'set', 'benchmarks') ) if has_db: diff --git a/vocolab/api/endpoints/benchmarks.py b/vocolab/api/endpoints/benchmarks.py new file mode 100644 index 0000000..b95e349 --- /dev/null +++ b/vocolab/api/endpoints/benchmarks.py @@ -0,0 +1,45 @@ +""" Routing for /challenges section of the API +This section handles challenge data +""" +from typing import List + +from fastapi import ( + APIRouter +) + +from vocolab.data import models, model_queries +from vocolab.settings import get_settings + +router = APIRouter() +_settings = get_settings() + + +@router.get('/list') +async def get_challenge_list(include_inactive: bool = False): + """ Return a list of all active benchmarks """ + return await model_queries.BenchmarkList.get(include_all=include_inactive) + + +@router.get('/{benchmark_id}/info') +async def get_challenge_info(benchmark_id: str): + """ Return information of a specific benchmark """ + # todo add leaderboards to challenge info + return await model_queries.Benchmark.get(benchmark_id=benchmark_id, allow_inactive=True) + + +@router.get('/{benchmark_id}/submissions/list', + responses={404: {"model": models.api.Message}}) +async def get_sub_list(benchmark_id: str) -> model_queries.ChallengeSubmissionList: + """ Return information of a specific benchmark """ + return await model_queries.ChallengeSubmissionList.get_from_challenge(benchmark_id) + + +@router.get("/{benchmark_id}/models/list") +async def get_models_list(challenge_id: str): + pass + + +@router.get('/{benchmark_id}/leaderboards/list', responses={404: {"model": models.api.Message}}) +async def get_all_leaderboards(benchmark_id: str) -> model_queries.LeaderboardList: + """ Return information of a specific challenge """ + return await model_queries.LeaderboardList.get_by_challenge(benchmark_id=benchmark_id) diff --git a/vocolab/api/endpoints/challenges.py b/vocolab/api/endpoints/challenges.py deleted file mode 100644 index 6b98d30..0000000 --- a/vocolab/api/endpoints/challenges.py +++ /dev/null @@ -1,47 +0,0 @@ -""" Routing for /challenges section of the API -This section handles challenge data -""" -from typing import List - -from fastapi import ( - APIRouter -) - -from vocolab.data import models, model_queries -from vocolab.settings import get_settings - -router = APIRouter() -_settings = get_settings() - - -@router.get('/list', response_model=List[models.api.ChallengePreview]) -async def get_challenge_list(include_inactive: bool = False): - """ Return a list of all active challenges """ - challenge_lst = await model_queries.ChallengeList.get(include_all=include_inactive) - return [models.api.ChallengePreview(id=ch.id, label=ch.label, active=ch.active) for ch in challenge_lst.items] - - -@router.get('/{challenge_id}/info', response_model=models.api.ChallengesResponse, - responses={404: {"model": models.api.Message}}) -async def get_challenge_info(challenge_id: int): - """ Return information of a specific challenge """ - # todo add leaderboards to challenge info - return await model_queries.Challenge.get(challenge_id=challenge_id, allow_inactive=True) - - -@router.get('/{challenge_id}/submissions/list', - responses={404: {"model": models.api.Message}}) -async def get_sub_list(challenge_id: int) -> model_queries.ChallengeSubmissionList: - """ Return information of a specific challenge """ - return await model_queries.ChallengeSubmissionList.get_from_challenge(challenge_id) - - -@router.get("/{challenge_id}/models/list") -async def get_models_list(challenge_id: int): - pass - - -@router.get('/{challenge_id}/leaderboards/list', responses={404: {"model": models.api.Message}}) -async def get_all_leaderboards(challenge_id: int) -> model_queries.LeaderboardList: - """ Return information of a specific challenge """ - return await model_queries.LeaderboardList.get_by_challenge(challenge_id=challenge_id) diff --git a/vocolab/api/endpoints/leaderboards.py b/vocolab/api/endpoints/leaderboards.py index 4eacb6d..065ea81 100644 --- a/vocolab/api/endpoints/leaderboards.py +++ b/vocolab/api/endpoints/leaderboards.py @@ -13,14 +13,13 @@ _settings = get_settings() - @router.get("/list") async def get_list(): pass -@router.get('{leaderboard_id}/info', response_model=models.api.ChallengesResponse, - responses={404: {"model": models.api.Message}}) -async def get_leaderboard_info(leaderboard_id: int): + +@router.get('{leaderboard_id}/info') +async def get_leaderboard_info(leaderboard_id: str): """ Return information of a specific challenge """ return await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 8955b32..b9e60a3 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -1,6 +1,7 @@ """ Routing for /users section of the API This section handles user data """ +import functools import pydantic from fastapi import ( @@ -15,7 +16,7 @@ router = APIRouter() _settings = get_settings() -NonAllowedOperation = HTTPException(status_code=401, detail="Operation not allowed") +NonAllowedOperation = functools.partial(HTTPException, status_code=401, detail="Operation not allowed") @router.get("/{username}/profile") @@ -23,12 +24,13 @@ def get_profile(username: str, current_user: model_queries.User = Depends( api_lib.get_current_active_user)) -> users_lib.UserProfileData: if current_user.username != username: - raise NonAllowedOperation + raise NonAllowedOperation() try: user_data = current_user.get_profile_data() # re-update verification user_data.verified = current_user.is_verified() + return user_data except pydantic.ValidationError: out.log.error("Failed to validate profile data") @@ -41,10 +43,10 @@ def update_profile( user_data: users_lib.UserProfileData, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if current_user.username != username: - raise NonAllowedOperation + raise NonAllowedOperation() if user_data.username != current_user.username: - raise NonAllowedOperation + raise NonAllowedOperation() user_data.verified = current_user.is_verified() user_data.save() @@ -55,7 +57,7 @@ def update_profile( async def list_users_models(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Returning list of models of current user """ if current_user.username != username: - raise NonAllowedOperation + raise NonAllowedOperation() return await model_queries.ModelIDList.get_by_user(current_user.id) @@ -65,7 +67,7 @@ async def create_new_model(username: str, author_name: str, data: models.api.New """ Create a new model id""" print("WRF") if current_user.username != username: - raise NonAllowedOperation + raise NonAllowedOperation() # create & return the new model_id try: @@ -81,7 +83,7 @@ async def create_new_model(username: str, author_name: str, data: models.api.New async def list_users_submissions(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if current_user.username != username: - raise NonAllowedOperation + raise NonAllowedOperation() items = await model_queries.ChallengeSubmissionList.get_from_user(user_id=current_user.id) return items @@ -91,16 +93,19 @@ async def list_users_submissions(username: str, async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): if current_user.username != username: - raise NonAllowedOperation + raise NonAllowedOperation() - # todo check evaluator & other details - new_submission_id = await model_queries.ChallengeSubmission.create( + new_submission = await model_queries.ChallengeSubmission.create( + user_id=current_user.id, username=current_user.username, - new_submission=data, - evaluator_id=None, + model_id=data.model_id, + benchmark_id=data.benchmark_id ) - return new_submission_id + # todo: create file structure + # todo extract leaderboards + + return new_submission.id # todo: update submission process diff --git a/vocolab/api/router.py b/vocolab/api/router.py index 7ccd662..9dba4da 100644 --- a/vocolab/api/router.py +++ b/vocolab/api/router.py @@ -5,7 +5,7 @@ from pydantic import BaseModel, EmailStr from vocolab.api.endpoints import ( - users, auth, challenges, leaderboards, models, submissions + users, auth, benchmarks, leaderboards, models, submissions ) from vocolab.api.pages import users as user_pages from vocolab.settings import get_settings @@ -53,7 +53,7 @@ def get_error(): api_router.include_router(auth.router, prefix="/auth", tags=["auth"]) api_router.include_router(users.router, prefix="/users", tags=["user-data"]) -api_router.include_router(challenges.router, prefix="/challenges", tags=["challenges"]) +api_router.include_router(benchmarks.router, prefix="/benchmarks", tags=["benchmarks"]) api_router.include_router(models.router, prefix="/models", tags=["model"]) api_router.include_router(submissions.router, prefix="/submissions", tags=["submissions"]) api_router.include_router(leaderboards.router, prefix="/leaderboards", tags=["leaderboards"]) diff --git a/vocolab/data/model_queries/auth.py b/vocolab/data/model_queries/auth.py index 7ba274a..8229d6f 100644 --- a/vocolab/data/model_queries/auth.py +++ b/vocolab/data/model_queries/auth.py @@ -161,7 +161,8 @@ async def create(cls, *, new_usr: models.api.UserCreateRequest): affiliation=new_usr.affiliation, first_name=new_usr.first_name, last_name=new_usr.last_name, - verified=False + verified=False, + created=datetime.now() ) profile_data.save() return verification_code diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index 250c84e..188e31a 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -4,13 +4,14 @@ from pathlib import Path from typing import Optional, List, Any, Iterable -from pydantic import BaseModel -from pydantic import HttpUrl +from pydantic import BaseModel, HttpUrl, Json +from vocolab import get_settings from vocolab.data import models, tables from vocolab.core import misc, leaderboards_lib from ..db import zrDB, db_exc +st = get_settings() class EvaluatorItem(BaseModel): """ Data representation of an evaluator """ @@ -76,15 +77,15 @@ async def get(cls) -> "EvaluatorList": return cls(items=results) -class Challenge(BaseModel): +class Benchmark(BaseModel): """ Data representation of a challenge """ - id: int label: str start_date: date end_date: Optional[date] active: bool url: HttpUrl evaluator: Optional[int] + auto_eval: bool = st.task_queue_options.AUTO_EVAL class Config: orm_mode = True @@ -104,7 +105,7 @@ def get_field_names(cls): @classmethod async def create(cls, item: models.cli.NewChallenge): try: - query = tables.challenges_table.insert().values( + query = tables.benchmarks_table.insert().values( **item.dict() ) await zrDB.execute(query) @@ -112,19 +113,19 @@ async def create(cls, item: models.cli.NewChallenge): db_exc.parse_user_insertion(e) @classmethod - async def get(cls, *, challenge_id: int, allow_inactive: bool = False) -> "Challenge": - query = tables.challenges_table.select().where( - tables.challenges_table.c.id == challenge_id + async def get(cls, *, benchmark_id: str, allow_inactive: bool = False) -> "Benchmark": + query = tables.benchmarks_table.select().where( + tables.benchmarks_table.c.label == benchmark_id ) ch_data = await zrDB.fetch_one(query) if ch_data is None: - raise ValueError(f'There is no challenge with the following id: {challenge_id}') + raise ValueError(f'There is no challenge with the following id: {benchmark_id}') ch = cls.parse_obj(ch_data) if allow_inactive: return ch else: if not ch.is_active(): - raise ValueError(f"The Challenge {ch.label}[{ch.id}] is not active") + raise ValueError(f"The Challenge {ch.label} is not active") return ch async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): @@ -143,8 +144,8 @@ async def update_property(self, *, variable_name: str, value: Any, allow_parsing setattr(self, variable_name, value) # update database - query = tables.challenges_table.update().where( - tables.challenges_table.c.id == self.id + query = tables.benchmarks_table.update().where( + tables.benchmarks_table.c.label == self.label ).values({f"{variable_name}": value}) try: @@ -156,25 +157,25 @@ async def update_property(self, *, variable_name: str, value: Any, allow_parsing async def delete(self): """ Remove from database """ - query = tables.challenges_table.delete().where( - tables.challenges_table.c.id == self.id + query = tables.benchmarks_table.delete().where( + tables.benchmarks_table.c.label == self.label ) await zrDB.execute(query) -class ChallengeList(BaseModel): - items: List[Challenge] +class BenchmarkList(BaseModel): + items: List[Benchmark] - def __iter__(self) -> Iterable[Challenge]: + def __iter__(self) -> Iterable[Benchmark]: return iter(self.items) - def filter_active(self) -> "ChallengeList": + def filter_active(self) -> "BenchmarkList": self.items = [i for i in self.items if i.is_active()] return self @classmethod - async def get(cls, include_all: bool = False) -> "ChallengeList": - query = tables.challenges_table.select() + async def get(cls, include_all: bool = False) -> "BenchmarkList": + query = tables.benchmarks_table.select() challenges = await zrDB.fetch_all(query) if challenges is None: raise ValueError('No challenges were found') @@ -186,9 +187,8 @@ async def get(cls, include_all: bool = False) -> "ChallengeList": class Leaderboard(BaseModel): """ Data representation of a Leaderboard """ - id: Optional[int] - challenge_id: int # Id to linked challenge label: str # Name of leaderboard + benchmark_id: str # Label of the Benchmark archived: bool # is_archived static_files: bool # has static files sorting_key: Optional[str] # path to the item to use as sorting key @@ -210,7 +210,7 @@ def get_dir(self): async def create(cls, ld_data: 'Leaderboard'): query = tables.leaderboards_table.insert().values( label=ld_data.label, - challenge_id=ld_data.challenge_id, + benchmark_id=ld_data.benchmark_id, archived=ld_data.archived, static_files=ld_data.static_files, sorting_key=ld_data.sorting_key @@ -257,7 +257,7 @@ async def update_property(self, *, variable_name: str, value: Any, allow_parsing value = str(value) query = tables.leaderboards_table.update().where( - tables.leaderboards_table.c.id == self.id + tables.leaderboards_table.c.label == self.label ).values({f"{variable_name}": str(value)}) try: await zrDB.execute(query) @@ -267,9 +267,9 @@ async def update_property(self, *, variable_name: str, value: Any, allow_parsing return value @classmethod - async def get(cls, leaderboard_id: int) -> Optional["Leaderboard"]: + async def get(cls, leaderboard_id: str) -> Optional["Leaderboard"]: query = tables.leaderboards_table.select().where( - tables.leaderboards_table.c.id == leaderboard_id + tables.leaderboards_table.c.label == leaderboard_id ) ld = await zrDB.fetch_one(query) if ld is None: @@ -292,9 +292,9 @@ async def get_all(cls) -> "LeaderboardList": return cls(items=ld_list) @classmethod - async def get_by_challenge(cls, challenge_id: int) -> "LeaderboardList": + async def get_by_challenge(cls, benchmark_id: str) -> "LeaderboardList": query = tables.leaderboards_table.select().where( - tables.leaderboards_table.c.challenge_id == challenge_id + tables.leaderboards_table.c.benchmark_id == benchmark_id ) ld_list = await zrDB.fetch_all(query) if not ld_list: @@ -305,8 +305,9 @@ async def get_by_challenge(cls, challenge_id: int) -> "LeaderboardList": class LeaderboardEntry: """ Data representation of a leaderboard entry """ id: Optional[int] + data: Json entry_path: Path - model_id: str submission_id: str - leaderboard_id: int + leaderboard_id: str + user_id: int submitted_at: datetime diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index b5fe7c1..e0e9b8d 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -8,26 +8,24 @@ from vocolab import get_settings from vocolab.data import db, tables, models +from .challenges import Benchmark _settings = get_settings() -# TODO: add method for easy author_label editing - - class ModelID(BaseModel): """ Data representation of a Model id & its metadata""" id: str user_id: int created_at: datetime description: str - gpu_budget: str + gpu_budget: Optional[str] train_set: str authors: str institution: str - team: str - paper_url: AnyHttpUrl - code_url: AnyHttpUrl + team: Optional[str] + paper_url: Optional[AnyHttpUrl] + code_url: Optional[AnyHttpUrl] @staticmethod def nth_word(n: int) -> str: @@ -120,7 +118,8 @@ async def get_by_user(cls, user_id: int) -> "ModelIDList": query = tables.models_table.select().where( tables.models_table.c.user_id == user_id ) - items = db.zrDB.fetch_all(query) + items = await db.zrDB.fetch_all(query) + print(items, f"{type(items)=}") if not items: return cls(items=[]) return cls.parse_obj(dict(items=items)) @@ -151,7 +150,7 @@ class ChallengeSubmission(BaseModel): """ Data representation of a submission to a challenge """ id: str user_id: int - track_id: int + benchmark_id: int model_id: str submit_date: datetime status: SubmissionStatus @@ -163,22 +162,31 @@ class Config: orm_mode = True @classmethod - async def create(cls, username: str, new_submission: models.api.NewSubmissionRequest, evaluator_id: Optional[int]) -> str: + async def create( + cls, user_id: int, username: str, + model_id: str, benchmark_id: str + ) -> "ChallengeSubmission": """ Creates a database entry for the new submission """ + benchmark = await Benchmark.get(benchmark_id=benchmark_id) + submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" - values = new_submission.dict() - values["id"] = submission_id - values["submit_date"] = datetime.now() - values["status"] = SubmissionStatus.uploading - values["evaluator_id"] = evaluator_id - # todo: auto-eval should maybe work differently ? - values["auto_eval"] = _settings.task_queue_options.AUTO_EVAL + + entry = cls.parse_obj(dict( + id=submission_id, + model_id=model_id, + benchmark_id=benchmark_id, + user_id=user_id, + submit_date=datetime.now(), + status=SubmissionStatus.uploading, + evaluator_id=benchmark.evaluator, + auto_eval=benchmark.auto_eval + )) await db.zrDB.execute( query=tables.submissions_table.insert(), - values=values + values=entry.dict() ) - return submission_id + return entry @classmethod async def get(cls, submission_id: str) -> Optional["ChallengeSubmission"]: @@ -222,10 +230,10 @@ def __iter__(self) -> Iterable[ChallengeSubmission]: return iter(self.items) @classmethod - async def get_from_challenge(cls, challenge_id: int): + async def get_from_challenge(cls, benchmark_id: str): items = await db.zrDB.fetch_all( tables.submissions_table.select().where( - tables.submissions_table.c.track_id == challenge_id + tables.submissions_table.c.benchmark_id == benchmark_id ) ) if items is None: @@ -255,7 +263,7 @@ async def get_from_user(cls, user_id: int): if items is None: items = [] - return cls(items=items) + return cls.parse_obj(dict(items=items)) @classmethod async def get_by_status(cls, status: SubmissionStatus): diff --git a/vocolab/data/models/api/challenges.py b/vocolab/data/models/api/challenges.py index 44606e5..9fca517 100644 --- a/vocolab/data/models/api/challenges.py +++ b/vocolab/data/models/api/challenges.py @@ -3,25 +3,7 @@ from pathlib import Path from typing import Optional, List, Tuple, Dict -from pydantic import BaseModel, HttpUrl - - -class ChallengePreview(BaseModel): - """ Used as response type for root challenge list request""" - id: int - label: str - active: bool - - -class ChallengesResponse(BaseModel): - """ Used as response type for preview of a challenge """ - id: int - label: str - start_date: date - end_date: Optional[date] - active: bool - url: HttpUrl - evaluator: Optional[int] +from pydantic import BaseModel class SubmissionRequestFileIndexItem(BaseModel): @@ -30,20 +12,20 @@ class SubmissionRequestFileIndexItem(BaseModel): File index is used to verify correct number of files/parts have been uploaded """ - file_name: str - file_size: int - file_hash: Optional[str] = None + filename: str + filesize: int + filehash: Optional[str] = None class NewSubmissionRequest(BaseModel): """ Dataclass used for input in the creation of a new submission to a challenge """ - username: str model_id: str + benchmark_id: str filename: str hash: str multipart: bool has_scores: bool - leaderboards: Dict[str, Path] + leaderboard: str index: Optional[List[SubmissionRequestFileIndexItem]] diff --git a/vocolab/data/models/cli.py b/vocolab/data/models/cli.py index e0f6b60..54a1f58 100644 --- a/vocolab/data/models/cli.py +++ b/vocolab/data/models/cli.py @@ -9,7 +9,6 @@ class NewChallenge(BaseModel): """ Dataclass for challenge creation """ - id: Optional[int] label: str active: bool url: AnyHttpUrl diff --git a/vocolab/data/tables.py b/vocolab/data/tables.py index 9dfd101..83dadb0 100644 --- a/vocolab/data/tables.py +++ b/vocolab/data/tables.py @@ -2,6 +2,9 @@ tables_metadata = sqlalchemy.MetaData() +_user_id = "users_credentials.id" +_benchmark_id = "benchmarks.label" + """ Table Representing Users""" users_table = sqlalchemy.Table( @@ -24,7 +27,7 @@ "models", tables_metadata, sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), - sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users_credentials.id")), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_user_id)), sqlalchemy.Column("created_at", sqlalchemy.DateTime), sqlalchemy.Column("description", sqlalchemy.String), sqlalchemy.Column("gpu_budget", sqlalchemy.String), @@ -54,16 +57,16 @@ """ Table used to index the existing challenges & their metadata """ -challenges_table = sqlalchemy.Table( - "challenges", +benchmarks_table = sqlalchemy.Table( + "benchmarks", tables_metadata, - sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column("label", sqlalchemy.String, unique=True), + sqlalchemy.Column("label", sqlalchemy.String, unique=True, primary_key=True), sqlalchemy.Column("start_date", sqlalchemy.Date), sqlalchemy.Column("end_date", sqlalchemy.Date), sqlalchemy.Column("active", sqlalchemy.Boolean), sqlalchemy.Column("url", sqlalchemy.String), - sqlalchemy.Column("evaluator", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")) + sqlalchemy.Column("evaluator", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), + sqlalchemy.Column("auto_eval", sqlalchemy.Boolean) ) """ @@ -72,9 +75,8 @@ leaderboards_table = sqlalchemy.Table( "leaderboards", tables_metadata, - sqlalchemy.Column('id', sqlalchemy.Integer, primary_key=True, autoincrement=True), - sqlalchemy.Column('challenge_id', sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), - sqlalchemy.Column('label', sqlalchemy.String, unique=True), + sqlalchemy.Column('label', sqlalchemy.String, unique=True, primary_key=True), + sqlalchemy.Column('benchmark_id', sqlalchemy.String, sqlalchemy.ForeignKey(_benchmark_id)), sqlalchemy.Column('archived', sqlalchemy.Boolean), sqlalchemy.Column('static_files', sqlalchemy.Boolean), sqlalchemy.Column('sorting_key', sqlalchemy.String), @@ -84,11 +86,11 @@ Table entry indexing submissions to challenges """ submissions_table = sqlalchemy.Table( - "challenge_submissions", + "benchmark_submissions", tables_metadata, sqlalchemy.Column("id", sqlalchemy.String, primary_key=True, unique=True), - sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users_credentials.id")), - sqlalchemy.Column("track_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("challenges.id")), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_user_id)), + sqlalchemy.Column("benchmark_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_benchmark_id)), sqlalchemy.Column("model_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), sqlalchemy.Column("submit_date", sqlalchemy.DateTime), sqlalchemy.Column("status", sqlalchemy.String), @@ -103,10 +105,9 @@ tables_metadata, sqlalchemy.Column("id", sqlalchemy.Integer, primary_key=True, unique=True, autoincrement=True), sqlalchemy.Column("data", sqlalchemy.JSON), - sqlalchemy.Column("src", sqlalchemy.String), - sqlalchemy.Column("model_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.id")), - sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("challenge_submissions.id")), - sqlalchemy.Column("leaderboard_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), - sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("users_credentials.id")), + sqlalchemy.Column("entry_path", sqlalchemy.String), + sqlalchemy.Column("submission_id", sqlalchemy.String, sqlalchemy.ForeignKey("benchmark_submissions.id")), + sqlalchemy.Column("leaderboard_id", sqlalchemy.String, sqlalchemy.ForeignKey("leaderboards.label")), + sqlalchemy.Column("user_id", sqlalchemy.Integer, sqlalchemy.ForeignKey(_user_id)), sqlalchemy.Column("submitted_at", sqlalchemy.DATETIME) ) \ No newline at end of file diff --git a/vocolab/settings.py b/vocolab/settings.py index bf619dc..dcdf6f7 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -69,7 +69,7 @@ class TaskQueueSettings(BaseModel): HOSTS: Set[str] = set() REMOTE_STORAGE: Dict[str, Path] = dict() REMOTE_BIN: Dict[str, Path] = dict() - AUTO_EVAL: bool = True + AUTO_EVAL: bool = False class AppSettings(BaseModel): From a6238c773281ff82f49b23418538d449b9c20bba Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Wed, 15 Mar 2023 18:11:56 +0100 Subject: [PATCH 19/28] updates --- vocolab/api/endpoints/users.py | 173 ++-------------------- vocolab/core/submission_lib/submission.py | 117 +++++++++------ vocolab/core/submission_lib/upload.py | 37 +++-- vocolab/data/models/api/challenges.py | 4 +- 4 files changed, 108 insertions(+), 223 deletions(-) diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index b9e60a3..b9ae4b3 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -9,7 +9,7 @@ ) from vocolab import out -from vocolab.core import api_lib, users_lib +from vocolab.core import api_lib, users_lib, submission_lib from vocolab.data import model_queries, models from vocolab.settings import get_settings @@ -30,7 +30,6 @@ def get_profile(username: str, user_data = current_user.get_profile_data() # re-update verification user_data.verified = current_user.is_verified() - return user_data except pydantic.ValidationError: out.log.error("Failed to validate profile data") @@ -69,9 +68,11 @@ async def create_new_model(username: str, author_name: str, data: models.api.New if current_user.username != username: raise NonAllowedOperation() - # create & return the new model_id try: + # create in DB model_id = await model_queries.ModelID.create(user_id=current_user.id, first_author_name=author_name, data=data) + # create on disk + submission_lib.ModelDir.make(model_id) except Exception as e: out.console.print(e) raise e @@ -82,6 +83,7 @@ async def create_new_model(username: str, author_name: str, data: models.api.New @router.get("/{username}/submissions/list") async def list_users_submissions(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ List submissions created by the user """ if current_user.username != username: raise NonAllowedOperation() @@ -92,6 +94,7 @@ async def list_users_submissions(username: str, @router.post("/{username}/submissions/create") async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + """ Create a new empty submission with the given information """ if current_user.username != username: raise NonAllowedOperation() @@ -102,160 +105,14 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq benchmark_id=data.benchmark_id ) - # todo: create file structure - # todo extract leaderboards + # create model_id & submission dir + model_dir = submission_lib.ModelDir.load(data.model_id) + model_dir.make_submission( + submission_id=new_submission.id, + benchmark_label=new_submission.benchmark_id, + auto_eval=new_submission.auto_eval, + username=current_user.username, + leaderboard_file=data.leaderboard + ) return new_submission.id - - -# todo: update submission process -# @router.post('/{model_id}/submissions/create/', responses={404: {"model": models.api.Message}}) -# async def create_submission( -# model_id: str, challenge_id: int, -# data: models.api.NewSubmissionRequest, -# current_user: schema.User = Depends(api_lib.get_current_active_user) -# ): -# """ Create a new submission """ -# -# challenge = await challengesQ.get_challenge(challenge_id=challenge_id) -# if challenge is None: -# return ValueError('challenge {challenge_id} not found or inactive') -# -# # create db entry -# submission_id = await challengesQ.add_submission(new_submission=models.api.NewSubmission( -# user_id=current_user.id, -# track_id=challenge.id, -# ), evaluator_id=challenge.evaluator) -# -# # create disk entry -# model_dir = submission_lib.ModelDir.load(data.model_id) -# model_dir.make_submission( -# submission_id=submission_id, -# challenge_id=challenge_id, -# challenge_label=challenge.label, -# auto_eval=..., -# request_meta=data -# ) -# -# return submission_id -# -# -# @router.get('{username}/submissions') -# async def submissions_list(username: str): -# """ Return a list of all user submissions """ -# user = model_queries.User.get(by_username=username) -# submissions = await challengesQ.get_user_submissions(user_id=current_user.id) -# submissions = [ -# models.api.SubmissionPreview( -# submission_id=s.id, -# track_id=s.track_id, -# track_label=(await challengesQ.get_challenge(challenge_id=s.track_id)).label, -# status=s.status -# ) -# for s in submissions -# ] -# -# data = {} -# for sub in submissions: -# if sub.track_label in data.keys(): -# data[sub.track_label].append(sub) -# else: -# data[sub.track_label] = [sub] -# -# return data -# -# -# -# @router.get('{username}//submissions/tracks/{track_id}') -# async def submissions_list_by_track( -# track_id: int, current_user: schema.User = Depends(api_lib.get_current_active_user)): -# """ Return a list of all user submissions """ -# track = await challengesQ.get_challenge(challenge_id=track_id) -# submissions = await challengesQ.get_user_submissions(user_id=current_user.id) -# -# return [ -# models.api.SubmissionPreview( -# submission_id=s.id, -# track_id=s.track_id, -# track_label=track.label, -# status=s.status -# ) -# for s in submissions if s.track_id == track.id -# ] -# -# -# @router.get('/submissions/{submissions_id}') -# async def get_submission(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): -# """ Return information on a submission """ -# submission = await challengesQ.get_submission(by_id=submissions_id) -# if submission.user_id != current_user.id: -# raise exc.AccessError("current user is not allowed to preview this submission !", -# status=exc.http_status.HTTP_403_FORBIDDEN) -# -# track = await challengesQ.get_challenge(challenge_id=submission.track_id) -# leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) -# -# if submission.evaluator_id is not None: -# evaluator = await challengesQ.get_evaluator(by_id=submission.evaluator_id) -# evaluator_cmd = f"{evaluator.executor} {evaluator.script_path} {evaluator.executor_arguments.replace(';', ' ')}" -# evaluator_label = evaluator.label -# else: -# evaluator_cmd = "" -# evaluator_label = "" -# -# return models.api.SubmissionView( -# submission_id=submission.id, -# user_id=current_user.id, -# username=current_user.username, -# track_label=track.label, -# track_id=track.id, -# status=submission.status, -# date=submission.submit_date, -# evaluator_cmd=evaluator_cmd, -# evaluator_label=evaluator_label, -# leaderboards=[(ld.label, ld.id) for ld in leaderboards] -# ) -# -# -# @router.get('/submissions/{submissions_id}/status') -# async def get_submission_status( -# submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): -# """ Return status of a submission """ -# submission = await challengesQ.get_submission(by_id=submissions_id) -# if submission.user_id != current_user.id: -# raise exc.AccessError("current user is not allowed to preview this submission !", -# status=exc.http_status.HTTP_403_FORBIDDEN) -# -# return submission.status -# -# -# @router.get('/submissions/{submissions_id}/log') -# async def get_submission_status( -# submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): -# """ Return status of a submission """ -# submission = await challengesQ.get_submission(by_id=submissions_id) -# if submission.user_id != current_user.id: -# raise exc.AccessError("current user is not allowed to preview this submission !", -# status=exc.http_status.HTTP_403_FORBIDDEN) -# -# log = submission_lib.SubmissionLogger(submissions_id) -# return log.get_text() -# -# -# @router.get('/submissions/{submissions_id}/scores') -# async def get_user_results(submissions_id: str, current_user: schema.User = Depends(api_lib.get_current_active_user)): -# """ Return status of a submission """ -# submission = await challengesQ.get_submission(by_id=submissions_id) -# if submission.user_id != current_user.id: -# raise exc.AccessError("current user is not allowed to preview this submission !", -# status=exc.http_status.HTTP_403_FORBIDDEN) -# sub_location = submission_lib.get_submission_dir(submission_id=submission.id) -# -# leaderboards = await leaderboardQ.get_leaderboards(by_challenge_id=submission.track_id) -# result = {} -# for ld in leaderboards: -# ld_file = sub_location / ld.entry_file -# if ld_file.is_file(): -# result[ld.label] = api_lib.file2dict(ld_file) -# -# return result diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 4aeeb59..e55547c 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -3,16 +3,16 @@ import shutil from datetime import datetime from pathlib import Path -from typing import Dict, List +from typing import List, Optional from fastapi import UploadFile from pydantic import BaseModel -from vocolab.data import models from vocolab import get_settings -from ..commons import unzip, ssh_exec, rsync, zip_folder, scp from .logs import SubmissionLogger -from .upload import MultipartUploadHandler, SinglepartUploadHandler +from .upload import MultipartUploadHandler, SinglepartUploadHandler, ManifestIndexItem +from ..commons import unzip, ssh_exec, rsync, zip_folder, scp +from ...data.models.api import SubmissionRequestFileIndexItem _settings = get_settings() @@ -20,11 +20,10 @@ class SubmissionInfo(BaseModel): model_id: str username: str - track_id: int - track_label: str + benchmark_label: str submission_id: str created_at: datetime - leaderboard_entries: Dict[str, Path] + leaderboard_entry: Optional[str] class SubmissionDir(BaseModel, arbitrary_types_allowed=True): @@ -129,7 +128,8 @@ def clean_all_locks(self): self.interrupted_lock.unlink(missing_ok=True) self.clean_lock.unlink(missing_ok=True) - def get_log_handler(self) -> SubmissionLogger: + @property + def log_handler(self) -> SubmissionLogger: """ build the SubmissionLogger class that allows to log submission relative events """ return SubmissionLogger(root_dir=self.root_dir) @@ -156,7 +156,7 @@ def add_content(self, file_name: str, data: UploadFile): """ Multipart upload """ handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) handler.add_part( - logger=self.get_log_handler(), + logger=self.log_handler, file_name=file_name, data=data ) @@ -168,7 +168,7 @@ def add_content(self, file_name: str, data: UploadFile): """ Single part upload """ handler = SinglepartUploadHandler(root_dir=self.root_dir) handler.write_data( - logger=self.get_log_handler(), + logger=self.log_handler, file_name=file_name, data=data ) @@ -185,7 +185,7 @@ def send_content(self, hostname: str) -> Path: transfer_root_dir = _settings.task_queue_options.REMOTE_STORAGE.get(hostname) model_id = self.info.model_id remote_submission_dir = transfer_root_dir / model_id / self.submission_id - logger = self.get_log_handler() + logger = self.log_handler # if host is local & submission dir is current, do nothing if (not is_remote) and (transfer_root_dir == _settings.submission_dir): @@ -212,7 +212,7 @@ def fetch_content(self, hostname: str): transfer_root_dir = _settings.task_queue_options.REMOTE_STORAGE.get(hostname) model_id = self.info.model_id remote_submission_dir = transfer_root_dir / model_id / self.submission_id - logger = self.get_log_handler() + logger = self.log_handler # if host is local & submission dir is current, do nothing if (not is_remote) and (transfer_root_dir == _settings.submission_dir): @@ -232,14 +232,9 @@ def fetch_content(self, hostname: str): logger.log(res.stderr.decode()) raise ValueError(f"Failed to copy files from host {hostname}") - def archive(self, zip_files: bool = False): - """Transfer submission to archive """ - location = _settings.submission_archive_dir / self.info.model_id / self.info.submission_id - logger = self.get_log_handler() - host = _settings.ARCHIVE_HOST - - if _settings.remote_archive and zip_files: - """ Archive file to remote host as a zip file""" + def __archive_zip(self): + """ Archive submission as zip """ + if _settings.remote_archive: host = _settings.ARCHIVE_HOST with _settings.get_temp_dir() as tmp: archive_file = tmp / f'{self.info.model_id}_{self.info.submission_id}' @@ -247,29 +242,35 @@ def archive(self, zip_files: bool = False): res = scp(src=archive_file, host=host, dest=_settings.submission_archive_dir) if res.returncode != 0: raise ValueError(f"Failed to transfer to {host}") + else: + """ Archive files to local archive as a zip file""" + zip_folder( + archive_file=self.root_dir / f'{self.info.model_id}_{self.info.submission_id}', + location=self.root_dir + ) - elif _settings.remote_archive and not zip_files: - """ Archive file to remote host """ - code, _ = ssh_exec(host, ['mkdir', '-p', f"{location}"]) + def __archive_raw(self): + if _settings.remote_archive: + host = _settings.ARCHIVE_HOST + code, _ = ssh_exec(host, ['mkdir', '-p', f"{self.root_dir}"]) if code != 0: raise ValueError(f"No write permissions on {host}") - res = rsync(src=self.root_dir, dest_host=host, dest=location) + res = rsync(src=self.root_dir, dest_host=host, dest=self.root_dir) if res.returncode != 0: raise ValueError(f"Failed to copy files to host {host}") - elif not _settings.remote_archive and not zip_files: - """ Archive files to local archive """ - _res = rsync(src=self.root_dir, dest=location) + else: + _res = rsync(src=self.root_dir, dest=self.root_dir) if _res.returncode != 0: - raise ValueError(f"Failed to copy files to archive") + raise ValueError("Failed to copy files to archive") - elif not _settings.remote_archive and zip_files: - """ Archive files to local archive as a zip file""" - zip_folder( - archive_file=location / f'{self.info.model_id}_{self.info.submission_id}', - location=self.root_dir - ) + def archive(self, zip_files: bool = False): + """Transfer submission to archive """ + if zip_files: + self.__archive_zip() + else: + self.__archive_raw() def remove_all(self): """ Remove all files related to this submission """ @@ -283,16 +284,26 @@ class ModelDir(BaseModel): def label(self): return self.root_dir.name + @classmethod + def make(cls, model_id: str): + root = _settings.submission_dir / model_id + root.mkdir(exist_ok=True, parents=True) + @classmethod def load(cls, model_id: str): root = _settings.submission_dir / model_id if not root.is_dir(): - raise FileNotFoundError(f'Model {model_id} does not exist') + raise FileNotFoundError('Model Folder not found') + return cls(root_dir=root) - def make_submission(self, submission_id: str, challenge_id: int, challenge_label: str, - auto_eval: bool, request_meta: models.api.NewSubmissionRequest): + def make_submission( + self, submission_id: str, benchmark_label: str, auto_eval: bool, + username: str, filehash: str, has_scores: bool, multipart: bool, + index: Optional[List[SubmissionRequestFileIndexItem]], + leaderboard_file: Optional[str] = None + ) -> SubmissionDir: root_dir = self.root_dir / submission_id if root_dir.is_dir(): raise FileExistsError(f'Submission {submission_id} cannot be created as it already exists') @@ -304,34 +315,42 @@ def make_submission(self, submission_id: str, challenge_id: int, challenge_label # Submission generic info sub_info = SubmissionInfo( model_id=self.label, - username=request_meta.username, - track_id=challenge_id, - track_label=challenge_label, + username=username, + benchmark_label=benchmark_label, submission_id=submission_id, created_at=datetime.now(), - leaderboard_entries=request_meta.leaderboards + leaderboard_entry=leaderboard_file ) # save info to file with submission_dir.info_file.open('w') as fp: fp.write(sub_info.json(indent=4)) - if request_meta.multipart: + if multipart: + if len(index) <= 0: + raise ValueError('Parts list is empty') submission_dir.multipart_dir.mkdir(exist_ok=True) + upload_handler = MultipartUploadHandler( + store_location=submission_dir.multipart_dir, + target_location=submission_dir.root_dir, + merge_hash=filehash, + index=[ManifestIndexItem.from_api(i) for i in index] + ) with submission_dir.multipart_index_file.open('w') as fp: fp.write( - request_meta.json(include={'index'}, indent=4) + upload_handler.json(include={'index'}, indent=4) ) else: with submission_dir.content_archive_hash_file.open('w') as fp: - fp.write(request_meta.hash) + fp.write(filehash) - submission_dir.get_log_handler().header( - who=request_meta.username, - task=challenge_label, - multipart=request_meta.multipart, - has_scores=request_meta.has_scores, + submission_dir.log_handler.header( + who=username, + task=benchmark_label, + multipart=multipart, + has_scores=has_scores, auto_eval=auto_eval ) + return submission_dir @property def submissions(self) -> List[SubmissionDir]: diff --git a/vocolab/core/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py index 8c209ef..3491b47 100644 --- a/vocolab/core/submission_lib/upload.py +++ b/vocolab/core/submission_lib/upload.py @@ -1,18 +1,18 @@ import json import shutil -from hmac import compare_digest from pathlib import Path from typing import List, Optional +import numpy as np +import pandas as pd from fastapi import UploadFile from fsplit.filesplit import Filesplit -from pydantic import BaseModel -import pandas as pd -import numpy as np +from pydantic import BaseModel, Field from vocolab import exc -from ..commons import md5sum from .logs import SubmissionLogger +from ..commons import md5sum +from ...data.models.api import SubmissionRequestFileIndexItem """ ####### File Splitting Note ####### @@ -77,15 +77,21 @@ def __eq__(self, other: 'ManifestIndexItem'): def __hash__(self): return int(self.file_hash, 16) + @classmethod + def from_api(cls, item: SubmissionRequestFileIndexItem): + return cls( + file_name=item.filename, + file_size=item.filesize, + file_hash=item.filehash + ) + class MultipartUploadHandler(BaseModel): """ Data Model used for the binary split function as a manifest to allow merging """ store_location: Path merge_hash: str - index: Optional[List[ManifestIndexItem]] - received: Optional[List[ManifestIndexItem]] = [] - multipart: bool = True - hashed_parts: bool = True + index: List[ManifestIndexItem] + received: Optional[List[ManifestIndexItem]] = Field(default_factory=list) target_location: Path @property @@ -118,6 +124,10 @@ def dump_to_index(self, file: Path): with file.open("w") as fp: fp.write(self.json(indent=4)) + def dump_manifest(self): + # todo: implement + pass + def add_part(self, logger: SubmissionLogger, file_name: str, data: UploadFile): """ Add a part to a multipart upload type submission. @@ -152,16 +162,15 @@ def add_part(self, logger: SubmissionLogger, file_name: str, data: UploadFile): # up count of received parts self.received.append(new_item_mf) - logger.log(f" --> part was added successfully", date=False) + logger.log(" --> part was added successfully", date=False) def merge_parts(self): """ Merge parts into the target file using filesplit protocol """ # TODO: update filesplit==3.0.2 to 4.0.0 (breaking upgrade) # for update see https://pypi.org/project/filesplit/ - if self.hashed_parts: - for item in self.index: - assert md5sum(self.store_location / item.file_name) == item.file_hash, \ - f"file {item.file_name} does not match md5" + for item in self.index: + assert md5sum(self.store_location / item.file_name) == item.file_hash, \ + f"file {item.file_name} does not match md5" df = pd.DataFrame([ (i.file_name, i.file_size) diff --git a/vocolab/data/models/api/challenges.py b/vocolab/data/models/api/challenges.py index 9fca517..90be5b2 100644 --- a/vocolab/data/models/api/challenges.py +++ b/vocolab/data/models/api/challenges.py @@ -23,10 +23,10 @@ class NewSubmissionRequest(BaseModel): benchmark_id: str filename: str hash: str - multipart: bool has_scores: bool - leaderboard: str + multipart: bool index: Optional[List[SubmissionRequestFileIndexItem]] + leaderboard: Optional[str] class NewSubmission(BaseModel): From 1a8919dc50d38a4a19ac90cbd1e1ea58c87d4467 Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Thu, 16 Mar 2023 16:25:09 +0100 Subject: [PATCH 20/28] submission creation --- samples/benchmark.json | 11 ++++++++++ ...allenges_list.json => benchmark_list.json} | 21 ++++++++++++------- samples/challenge.json | 8 ------- vocolab/api/endpoints/benchmarks.py | 2 +- vocolab/api/endpoints/users.py | 6 +++++- vocolab/data/model_queries/models.py | 2 +- vocolab/data/models/cli.py | 4 ++++ 7 files changed, 36 insertions(+), 18 deletions(-) create mode 100644 samples/benchmark.json rename samples/{challenges_list.json => benchmark_list.json} (81%) delete mode 100644 samples/challenge.json diff --git a/samples/benchmark.json b/samples/benchmark.json new file mode 100644 index 0000000..462a4ed --- /dev/null +++ b/samples/benchmark.json @@ -0,0 +1,11 @@ +[ + { + "label": "test-challenge", + "start_date": "2022-06-30", + "end_date": null, + "url": "https://vocolab.com/challenge/test", + "active": false, + "evaluator": null, + "auto_eval": false + } +] \ No newline at end of file diff --git a/samples/challenges_list.json b/samples/benchmark_list.json similarity index 81% rename from samples/challenges_list.json rename to samples/benchmark_list.json index 5fda38f..2067145 100644 --- a/samples/challenges_list.json +++ b/samples/benchmark_list.json @@ -5,7 +5,8 @@ "end_date": null, "url": "https://zerospeech.com/track/test", "active": true, - "evaluator": null + "evaluator": null, + "auto_eval": false }, { "label": "abx-15", @@ -21,7 +22,8 @@ "end_date": null, "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zrc2017-and-abx17", "active": false, - "evaluator": null + "evaluator": null, + "auto_eval": false }, { "label": "abx-LS", @@ -29,7 +31,8 @@ "end_date": null, "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#abxls-dataset-and-benchmark", "active": false, - "evaluator": null + "evaluator": null, + "auto_eval": false }, { "label": "tde-15", @@ -37,7 +40,8 @@ "end_date": null, "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", "active": false, - "evaluator": null + "evaluator": null, + "auto_eval": false }, { "label": "tde-17", @@ -45,7 +49,8 @@ "end_date": null, "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", "active": false, - "evaluator": null + "evaluator": null, + "auto_eval": false }, { "label": "slm-21", @@ -53,7 +58,8 @@ "end_date": null, "url": "https://zerospeech.com/tasks/task_4/tasks_goals/", "active": false, - "evaluator": null + "evaluator": null, + "auto_eval": false }, { "label": "ttso-19", @@ -61,6 +67,7 @@ "end_date": null, "url": "https://zerospeech.com/tasks/task_3/tasks_goals/", "active": false, - "evaluator": null + "evaluator": null, + "auto_eval": false } ] \ No newline at end of file diff --git a/samples/challenge.json b/samples/challenge.json deleted file mode 100644 index 5e8727a..0000000 --- a/samples/challenge.json +++ /dev/null @@ -1,8 +0,0 @@ -[{ -"label": "test-challenge", -"start_date": "2022-06-30", -"end_date": null, -"url": "https://vocolab.com/challenge/test", -"active": false, -"evaluator": null -}] \ No newline at end of file diff --git a/vocolab/api/endpoints/benchmarks.py b/vocolab/api/endpoints/benchmarks.py index b95e349..f96d699 100644 --- a/vocolab/api/endpoints/benchmarks.py +++ b/vocolab/api/endpoints/benchmarks.py @@ -35,7 +35,7 @@ async def get_sub_list(benchmark_id: str) -> model_queries.ChallengeSubmissionLi @router.get("/{benchmark_id}/models/list") -async def get_models_list(challenge_id: str): +async def get_models_list(benchmark_id: str): pass diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index b9ae4b3..24fb8b3 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -112,7 +112,11 @@ async def create_new_submission(username: str, data: models.api.NewSubmissionReq benchmark_label=new_submission.benchmark_id, auto_eval=new_submission.auto_eval, username=current_user.username, - leaderboard_file=data.leaderboard + leaderboard_file=data.leaderboard, + filehash=data.hash, + multipart=data.multipart, + has_scores=data.has_scores, + index=data.index ) return new_submission.id diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index e0e9b8d..83ca94d 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -150,7 +150,7 @@ class ChallengeSubmission(BaseModel): """ Data representation of a submission to a challenge """ id: str user_id: int - benchmark_id: int + benchmark_id: str model_id: str submit_date: datetime status: SubmissionStatus diff --git a/vocolab/data/models/cli.py b/vocolab/data/models/cli.py index 54a1f58..1220b22 100644 --- a/vocolab/data/models/cli.py +++ b/vocolab/data/models/cli.py @@ -4,8 +4,11 @@ from pydantic import BaseModel, AnyHttpUrl +from vocolab import get_settings from .tasks import ExecutorsType +st = get_settings() + class NewChallenge(BaseModel): """ Dataclass for challenge creation """ @@ -15,6 +18,7 @@ class NewChallenge(BaseModel): evaluator: Optional[int] start_date: date end_date: Optional[date] + auto_eval: bool = st.task_queue_options.AUTO_EVAL class NewEvaluatorItem(BaseModel): From 4f47e28955b9ab362e78d9dbcde96593e5e9badb Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Thu, 16 Mar 2023 18:21:13 +0100 Subject: [PATCH 21/28] singlepart upload --- vocolab/api/endpoints/submissions.py | 29 +++++---------------- vocolab/core/submission_lib/submission.py | 31 ++++++++++++++++++----- vocolab/exc.py | 5 ++++ 3 files changed, 36 insertions(+), 29 deletions(-) diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index 8a01335..a3c90f8 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -34,7 +34,7 @@ async def get_submission_scores(submission_id: str): pass -@router.get("/{submission_id}/content/mode") +@router.get("/{submission_id}/content/status") async def submission_mode(submission_id: str): """ Should return the submission mode @@ -45,25 +45,7 @@ async def submission_mode(submission_id: str): pass -@router.get("/{submission_id}/content/reset") -async def reset_submission(submission_id: str): - """ - remove content of submission & allow new content to be added - """ - # todo implement this - pass - - -@router.post('{submission_id}/content/init') -async def upload_manifest(submission_id: str, - current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - # todo: initialise manifest before upload - # create submission dir - # add manifest and promise of files - pass - - -@router.put("/{submission_id}/content/add", response_model=models.api.UploadSubmissionPartResponse) +@router.post("/{submission_id}/content/add") async def upload_submission( submission_id: str, part_name: str, @@ -88,10 +70,11 @@ async def upload_submission( is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file_data) if is_completed: - # todo: fix completed actions # run the completion of the submission on the background - # background_tasks.add_task(submission_lib.complete_submission, submission_id, with_eval=True) - pass + background_tasks.add_task(sub_dir.complete_upload) + # Todo: other tasks can be relative to completion + # - leaderboard extraction + # - auto_eval return models.api.UploadSubmissionPartResponse( completed=is_completed, remaining=[n.file_name for n in remaining] diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index e55547c..0a5f6fc 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -8,7 +8,7 @@ from fastapi import UploadFile from pydantic import BaseModel -from vocolab import get_settings +from vocolab import get_settings, exc from .logs import SubmissionLogger from .upload import MultipartUploadHandler, SinglepartUploadHandler, ManifestIndexItem from ..commons import unzip, ssh_exec, rsync, zip_folder, scp @@ -161,9 +161,6 @@ def add_content(self, file_name: str, data: UploadFile): data=data ) handler.dump_to_index(self.multipart_index_file) - - if handler.completed(): - handler.merge_parts() else: """ Single part upload """ handler = SinglepartUploadHandler(root_dir=self.root_dir) @@ -174,11 +171,33 @@ def add_content(self, file_name: str, data: UploadFile): ) if handler.completed(): - """ Upload completed """ - unzip(handler.target_file, self.content_location) return True, [] return False, handler.remaining_items + def complete_upload(self): + """ Actions to perform after upload has completed on a submission (extract files, update metadata, etc)""" + logger = self.log_handler + if self.is_multipart(): + handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) + if not handler.completed(): + raise exc.FailedOperation(f'Cannot Complete incomplete submission {self.submission_id} !!!') + + # merge parts to target archive + logger.log(f"upload of parts for {self.submission_id} completed, merging parts...") + handler.merge_parts() + logger.log("parts merged successfully") + else: + handler = SinglepartUploadHandler(root_dir=self.root_dir) + if not handler.completed(): + raise exc.FailedOperation(f'Cannot Complete incomplete submission {self.submission_id} !!!') + + logger.log(f"upload for {self.submission_id} completed") + + # unzip archive to content location + logger.log(f"unzipping archive {handler.target_file} into {self.content_location}") + unzip(handler.target_file, self.content_location) + + def send_content(self, hostname: str) -> Path: """ Send content to a remote host for evaluation (return target location) """ is_remote = hostname != _settings.app_options.hostname diff --git a/vocolab/exc.py b/vocolab/exc.py index 1863c52..d12d720 100644 --- a/vocolab/exc.py +++ b/vocolab/exc.py @@ -77,3 +77,8 @@ class SecurityError(VocoLabException): class ServerError(VocoLabException): """ Error with the starting of a server/service """ pass + + +class FailedOperation(VocoLabException): + """ Could not complete the requested operation """ + pass From 85cae79a2bdafe538dba6509a310d5392ba666a5 Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Mon, 20 Mar 2023 17:16:55 +0100 Subject: [PATCH 22/28] WIP: multipart upload --- requirements.txt | 2 +- vocolab/admin/commands/submissions.py | 13 +++-- vocolab/api/endpoints/submissions.py | 26 ++++----- vocolab/api/endpoints/users.py | 65 ++++++++++++++--------- vocolab/api/main.py | 24 +++++---- vocolab/core/submission_lib/submission.py | 1 - vocolab/core/submission_lib/upload.py | 22 ++++---- vocolab/data/model_queries/challenges.py | 3 +- vocolab/data/model_queries/models.py | 27 ++++++++-- vocolab/data/models/api/challenges.py | 1 + vocolab/data/tables.py | 1 + 11 files changed, 115 insertions(+), 70 deletions(-) diff --git a/requirements.txt b/requirements.txt index c2c9f62..991aeea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ requests==2.28.2 Jinja2==3.1.2 gunicorn==20.1.0 # todo: breaking update to 4.0.0 -filesplit==3.0.2 +filesplit>=4.0.1 toml;python_version<'3.11' python-dateutil numpy diff --git a/vocolab/admin/commands/submissions.py b/vocolab/admin/commands/submissions.py index 4fbad28..6796066 100644 --- a/vocolab/admin/commands/submissions.py +++ b/vocolab/admin/commands/submissions.py @@ -1,3 +1,4 @@ +import argparse import asyncio import sys @@ -19,22 +20,24 @@ def __init__(self, root, name, cmd_path): # custom arguments self.parser.add_argument('-u', '--user', type=int, help='Filter by user ID') - self.parser.add_argument('-t', '--track', type=int, help='Filter by track ID') + self.parser.add_argument('-b', '--benchmark', type=int, help='Filter by track ID') self.parser.add_argument('-s', '--status', choices=model_queries.SubmissionStatus.get_values(), help='Filter by status') @staticmethod - async def fetch_by(args) -> model_queries.ChallengeSubmissionList: + async def fetch_by(args: argparse.Namespace) -> model_queries.ChallengeSubmissionList: if args.user: return await model_queries.ChallengeSubmissionList.get_from_user(user_id=args.user) - elif args.track: - return await model_queries.ChallengeSubmissionList.get_from_challenge(challenge_id=args.track) + elif args.benchmark: + return await model_queries.ChallengeSubmissionList.get_from_challenge(benchmark_id=args.benchmark) elif args.status: return await model_queries.ChallengeSubmissionList.get_by_status(status=args.status) + return await model_queries.ChallengeSubmissionList.get_all() + def run(self, argv): args = self.parser.parse_args(argv) items: model_queries.ChallengeSubmissionList = asyncio.run(self.fetch_by(args)) @@ -51,7 +54,7 @@ def run(self, argv): for i in items: table.add_row( - f"{i.id}", f"{i.user_id}", f"{i.track_id}", f"{i.submit_date.strftime('%d/%m/%Y')}", + f"{i.id}", f"{i.user_id}", f"{i.benchmark_id}", f"{i.submit_date.strftime('%d/%m/%Y')}", f"{i.status}", f"{i.evaluator_id}", f"{i.author_label}" ) # print diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index a3c90f8..fbd23c9 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -24,8 +24,8 @@ async def get_sub_list(): @router.get("/{submission_id}/info") async def get_sub_info(submission_id: str): - # todo implement this - pass + """ Returns entry of submission """ + return await model_queries.ChallengeSubmission.get(submission_id) @router.get("/{submission_id}/scores") @@ -36,13 +36,12 @@ async def get_submission_scores(submission_id: str): @router.get("/{submission_id}/content/status") async def submission_mode(submission_id: str): - """ - Should return the submission mode - open: allows adding content - closed: content has completed being add - """ - # todo implement this - pass + """ Returns the status of a submission """ + sub = await model_queries.ChallengeSubmission.get(submission_id) + return dict( + submission_id=sub.id, + status=sub.status + ) @router.post("/{submission_id}/content/add") @@ -71,10 +70,11 @@ async def upload_submission( if is_completed: # run the completion of the submission on the background - background_tasks.add_task(sub_dir.complete_upload) - # Todo: other tasks can be relative to completion - # - leaderboard extraction - # - auto_eval + async def bg_task(): + sub_dir.complete_upload() + await submission.update_status(model_queries.SubmissionStatus.uploaded) + + background_tasks.add_task(bg_task) return models.api.UploadSubmissionPartResponse( completed=is_completed, remaining=[n.file_name for n in remaining] diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 24fb8b3..515c175 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -64,7 +64,6 @@ async def list_users_models(username: str, current_user: model_queries.User = De async def create_new_model(username: str, author_name: str, data: models.api.NewModelIdRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Create a new model id""" - print("WRF") if current_user.username != username: raise NonAllowedOperation() @@ -91,32 +90,46 @@ async def list_users_submissions(username: str, return items +@router.get("/{username}/submissions/create") +async def wtf(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + print(f"WTF is going on with : {username}") + return dict(fuck=True, who=current_user.username) + + @router.post("/{username}/submissions/create") async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Create a new empty submission with the given information """ - if current_user.username != username: - raise NonAllowedOperation() - - new_submission = await model_queries.ChallengeSubmission.create( - user_id=current_user.id, - username=current_user.username, - model_id=data.model_id, - benchmark_id=data.benchmark_id - ) - - # create model_id & submission dir - model_dir = submission_lib.ModelDir.load(data.model_id) - model_dir.make_submission( - submission_id=new_submission.id, - benchmark_label=new_submission.benchmark_id, - auto_eval=new_submission.auto_eval, - username=current_user.username, - leaderboard_file=data.leaderboard, - filehash=data.hash, - multipart=data.multipart, - has_scores=data.has_scores, - index=data.index - ) - - return new_submission.id + try: + if current_user.username != username: + raise NonAllowedOperation() + + print('Hello 1') + new_submission = await model_queries.ChallengeSubmission.create( + user_id=current_user.id, + username=current_user.username, + model_id=data.model_id, + benchmark_id=data.benchmark_id, + has_scores=data.has_scores, + author_label=data.author_label + ) + print("Hello 2") + + # create model_id & submission dir + model_dir = submission_lib.ModelDir.load(data.model_id) + model_dir.make_submission( + submission_id=new_submission.id, + benchmark_label=new_submission.benchmark_id, + auto_eval=new_submission.auto_eval, + username=current_user.username, + leaderboard_file=data.leaderboard, + filehash=data.hash, + multipart=data.multipart, + has_scores=data.has_scores, + index=data.index + ) + + return new_submission.id + except Exception as e: + print(e) + raise ValueError(f'WTF happened: {e}') diff --git a/vocolab/api/main.py b/vocolab/api/main.py index 10bb23b..6881b29 100644 --- a/vocolab/api/main.py +++ b/vocolab/api/main.py @@ -9,10 +9,10 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles from fastapi.responses import JSONResponse +from pydantic import ValidationError from vocolab import settings, out from vocolab.api import router as v1_router -# from vocolab.db import zrDB, create_db from vocolab.data import db from vocolab.exc import VocoLabException @@ -31,15 +31,6 @@ middleware=middleware ) -# app.add_middleware( -# CORSMiddleware, -# allow_origins=["*"], -# # allow_origin_regex=_settings.origin_regex, -# allow_credentials=True, -# allow_methods=["*"], -# allow_headers=["*"], -# ) - @app.middleware("http") async def log_requests(request: Request, call_next): @@ -94,6 +85,19 @@ async def zerospeech_error_formatting(request: Request, exc: VocoLabException): ) +@app.exception_handler(ValidationError) +async def zerospeech_error_formatting(request: Request, exc: VocoLabException): + if exc.data: + content = dict(message=f"{str(exc)}", data=str(exc.data)) + else: + content = dict(message=f"{str(exc)}") + + return JSONResponse( + status_code=exc.status, + content=content, + ) + + @app.on_event("startup") async def startup(): # conditional creation of the necessary files diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 0a5f6fc..07fda44 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -197,7 +197,6 @@ def complete_upload(self): logger.log(f"unzipping archive {handler.target_file} into {self.content_location}") unzip(handler.target_file, self.content_location) - def send_content(self, hostname: str) -> Path: """ Send content to a remote host for evaluation (return target location) """ is_remote = hostname != _settings.app_options.hostname diff --git a/vocolab/core/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py index 3491b47..799b7ff 100644 --- a/vocolab/core/submission_lib/upload.py +++ b/vocolab/core/submission_lib/upload.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd from fastapi import UploadFile -from fsplit.filesplit import Filesplit +from filesplit.merge import Merge from pydantic import BaseModel, Field from vocolab import exc @@ -166,22 +166,24 @@ def add_part(self, logger: SubmissionLogger, file_name: str, data: UploadFile): def merge_parts(self): """ Merge parts into the target file using filesplit protocol """ - # TODO: update filesplit==3.0.2 to 4.0.0 (breaking upgrade) - # for update see https://pypi.org/project/filesplit/ for item in self.index: assert md5sum(self.store_location / item.file_name) == item.file_hash, \ f"file {item.file_name} does not match md5" df = pd.DataFrame([ - (i.file_name, i.file_size) + (i.file_name, i.file_size, False) for i in self.index ]) - df.columns = ['filename', 'filesize'] - df['encoding'] = np.nan - df['header'] = np.nan - df.to_csv((self.store_location / 'fs_manifest.csv')) - fs = Filesplit() - fs.merge(input_dir=f"{self.store_location}", output_file=str(self.target_file)) + df.columns = ['filename', 'filesize', 'header'] + df.to_csv((self.store_location / 'manifest')) + + merge = Merge( + inputdir=str(self.store_location), + outputdir=str(self.target_location), + outputfilename=self.target_file.name + ) + merge.merge() + # Check assert md5sum(self.target_file) == self.merge_hash, "output file does not match original md5" def clean(self): diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index 188e31a..16e7b3d 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -13,6 +13,7 @@ st = get_settings() + class EvaluatorItem(BaseModel): """ Data representation of an evaluator """ id: int @@ -188,7 +189,7 @@ async def get(cls, include_all: bool = False) -> "BenchmarkList": class Leaderboard(BaseModel): """ Data representation of a Leaderboard """ label: str # Name of leaderboard - benchmark_id: str # Label of the Benchmark + benchmark_id: str # Label of the Benchmark archived: bool # is_archived static_files: bool # has static files sorting_key: Optional[str] # path to the item to use as sorting key diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index 83ca94d..9235511 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -155,6 +155,7 @@ class ChallengeSubmission(BaseModel): submit_date: datetime status: SubmissionStatus auto_eval: bool + has_scores: bool evaluator_id: Optional[int] author_label: Optional[str] = None @@ -164,22 +165,26 @@ class Config: @classmethod async def create( cls, user_id: int, username: str, - model_id: str, benchmark_id: str + model_id: str, benchmark_id: str, + has_scores: bool, author_label: str ) -> "ChallengeSubmission": """ Creates a database entry for the new submission """ benchmark = await Benchmark.get(benchmark_id=benchmark_id) - submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" + print('Hello 1.2') + submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" entry = cls.parse_obj(dict( id=submission_id, model_id=model_id, benchmark_id=benchmark_id, user_id=user_id, + has_scores=has_scores, submit_date=datetime.now(), status=SubmissionStatus.uploading, evaluator_id=benchmark.evaluator, - auto_eval=benchmark.auto_eval + auto_eval=benchmark.auto_eval, + author_label=author_label )) await db.zrDB.execute( @@ -224,6 +229,7 @@ async def delete(self): class ChallengeSubmissionList(BaseModel): + """ Data representation of a list of Submissions """ items: List[ChallengeSubmission] def __iter__(self) -> Iterable[ChallengeSubmission]: @@ -231,6 +237,7 @@ def __iter__(self) -> Iterable[ChallengeSubmission]: @classmethod async def get_from_challenge(cls, benchmark_id: str): + """ Get submissions filtered by benchmark """ items = await db.zrDB.fetch_all( tables.submissions_table.select().where( tables.submissions_table.c.benchmark_id == benchmark_id @@ -243,6 +250,7 @@ async def get_from_challenge(cls, benchmark_id: str): @classmethod async def get_from_model(cls, model_id: str): + """ Get submissions filtered by model """ items = await db.zrDB.fetch_all( tables.submissions_table.select().where( tables.submissions_table.c.model_id == model_id @@ -255,6 +263,7 @@ async def get_from_model(cls, model_id: str): @classmethod async def get_from_user(cls, user_id: int): + """ Get submissions filtered by user """ items = await db.zrDB.fetch_all( tables.submissions_table.select().where( tables.submissions_table.c.user_id == user_id @@ -267,6 +276,7 @@ async def get_from_user(cls, user_id: int): @classmethod async def get_by_status(cls, status: SubmissionStatus): + """ Get submissions filtered by status """ items = await db.zrDB.fetch_all( tables.submissions_table.select().where( tables.submissions_table.c.status == status.value @@ -277,6 +287,17 @@ async def get_by_status(cls, status: SubmissionStatus): return cls(items=items) + @classmethod + async def get_all(cls): + """ Get all submissions """ + items = await db.zrDB.fetch_all( + tables.submissions_table.select() + ) + if items is None: + items = [] + + return cls(items=items) + async def update_evaluators(self, evaluator_id: int): for e in self.items: e.evaluator_id = evaluator_id diff --git a/vocolab/data/models/api/challenges.py b/vocolab/data/models/api/challenges.py index 90be5b2..6f5eb16 100644 --- a/vocolab/data/models/api/challenges.py +++ b/vocolab/data/models/api/challenges.py @@ -21,6 +21,7 @@ class NewSubmissionRequest(BaseModel): """ Dataclass used for input in the creation of a new submission to a challenge """ model_id: str benchmark_id: str + author_label: str filename: str hash: str has_scores: bool diff --git a/vocolab/data/tables.py b/vocolab/data/tables.py index 83dadb0..a308f97 100644 --- a/vocolab/data/tables.py +++ b/vocolab/data/tables.py @@ -94,6 +94,7 @@ sqlalchemy.Column("model_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("models.id")), sqlalchemy.Column("submit_date", sqlalchemy.DateTime), sqlalchemy.Column("status", sqlalchemy.String), + sqlalchemy.Column("has_scores", sqlalchemy.Boolean), sqlalchemy.Column("auto_eval", sqlalchemy.Boolean), sqlalchemy.Column("evaluator_id", sqlalchemy.Integer, sqlalchemy.ForeignKey("evaluators.id")), sqlalchemy.Column("author_label", sqlalchemy.String) From ae77e6e6ba03f5244c825209f183b3be7851bd2f Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Mon, 20 Mar 2023 17:46:23 +0100 Subject: [PATCH 23/28] NOTE: bug found on multipart upload --- vocolab/api/endpoints/submissions.py | 8 +++++--- vocolab/core/submission_lib/submission.py | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index fbd23c9..9dd8f4a 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -49,10 +49,11 @@ async def upload_submission( submission_id: str, part_name: str, background_tasks: BackgroundTasks, - file_data: UploadFile = File(...), + file: UploadFile = File(...), current_user: model_queries.User = Depends(api_lib.get_current_active_user), ): - out.console.info(f"user: {current_user.username}") + out.console.info(f"user: {current_user.username} is uploading {file.filename}") + out.console.inspect(file) submission = await model_queries.ChallengeSubmission.get(submission_id) if submission is None: raise HTTPException(status_code=404, detail="submission not found") @@ -66,7 +67,8 @@ async def upload_submission( raise HTTPException(status_code=417, detail="Expected submission directory to exist") try: - is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file_data) + # fixme: there is an issue with upload.json on creation it does not correspond to schema + is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file) if is_completed: # run the completion of the submission on the background diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 07fda44..1a9f337 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -154,7 +154,9 @@ def add_content(self, file_name: str, data: UploadFile): """ if self.is_multipart(): """ Multipart upload """ + print("HELLO 1") handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) + print("Hello 2") handler.add_part( logger=self.log_handler, file_name=file_name, From e73506654dde44edd41d7480df9806c98ab7daba Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Tue, 21 Mar 2023 14:24:36 +0100 Subject: [PATCH 24/28] submission upload --- samples/benchmark_list.json | 23 ++++--- vocolab/api/endpoints/benchmarks.py | 1 - vocolab/api/endpoints/submissions.py | 1 - vocolab/api/endpoints/users.py | 76 ++++++++++------------- vocolab/core/submission_lib/submission.py | 18 +++--- vocolab/core/submission_lib/upload.py | 9 ++- vocolab/data/model_queries/models.py | 2 - 7 files changed, 65 insertions(+), 65 deletions(-) diff --git a/samples/benchmark_list.json b/samples/benchmark_list.json index 2067145..24c3be5 100644 --- a/samples/benchmark_list.json +++ b/samples/benchmark_list.json @@ -9,7 +9,7 @@ "auto_eval": false }, { - "label": "abx-15", + "label": "abx15", "start_date": "2015-01-20", "end_date": null, "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zr2015-and-abx15", @@ -17,7 +17,7 @@ "evaluator": null }, { - "label": "abx-17", + "label": "abx17", "start_date": "2017-01-20", "end_date": null, "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#zrc2017-and-abx17", @@ -26,7 +26,7 @@ "auto_eval": false }, { - "label": "abx-LS", + "label": "abxLS", "start_date": "2021-12-12", "end_date": null, "url": "https://zerospeech.com/tasks/task_1/benchmarks_datasets/#abxls-dataset-and-benchmark", @@ -35,7 +35,7 @@ "auto_eval": false }, { - "label": "tde-15", + "label": "tde15", "start_date": "2015-01-20", "end_date": null, "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", @@ -44,7 +44,7 @@ "auto_eval": false }, { - "label": "tde-17", + "label": "tde17", "start_date": "2017-01-20", "end_date": null, "url": "https://zerospeech.com/tasks/task_2/tasks_goals/", @@ -53,7 +53,7 @@ "auto_eval": false }, { - "label": "slm-21", + "label": "sLM21", "start_date": "2021-12-12", "end_date": null, "url": "https://zerospeech.com/tasks/task_4/tasks_goals/", @@ -62,7 +62,16 @@ "auto_eval": false }, { - "label": "ttso-19", + "label": "prosAudit", + "start_date": "2023-02-01", + "end_date": null, + "url": "https://zerospeech.com/tasks/task_4/tasks_goals/", + "active": true, + "evaluator": null, + "auto_eval": false + }, + { + "label": "ttso19", "start_date": "2019-01-20", "end_date": null, "url": "https://zerospeech.com/tasks/task_3/tasks_goals/", diff --git a/vocolab/api/endpoints/benchmarks.py b/vocolab/api/endpoints/benchmarks.py index f96d699..cd339a8 100644 --- a/vocolab/api/endpoints/benchmarks.py +++ b/vocolab/api/endpoints/benchmarks.py @@ -23,7 +23,6 @@ async def get_challenge_list(include_inactive: bool = False): @router.get('/{benchmark_id}/info') async def get_challenge_info(benchmark_id: str): """ Return information of a specific benchmark """ - # todo add leaderboards to challenge info return await model_queries.Benchmark.get(benchmark_id=benchmark_id, allow_inactive=True) diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index 9dd8f4a..1216dd2 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -67,7 +67,6 @@ async def upload_submission( raise HTTPException(status_code=417, detail="Expected submission directory to exist") try: - # fixme: there is an issue with upload.json on creation it does not correspond to schema is_completed, remaining = sub_dir.add_content(file_name=part_name, data=file) if is_completed: diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 515c175..034058c 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -76,7 +76,9 @@ async def create_new_model(username: str, author_name: str, data: models.api.New out.console.print(e) raise e - return model_id + return dict( + model_id=model_id, user=current_user.username, + ) @router.get("/{username}/submissions/list") @@ -86,50 +88,40 @@ async def list_users_submissions(username: str, if current_user.username != username: raise NonAllowedOperation() - items = await model_queries.ChallengeSubmissionList.get_from_user(user_id=current_user.id) - return items - - -@router.get("/{username}/submissions/create") -async def wtf(username: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - print(f"WTF is going on with : {username}") - return dict(fuck=True, who=current_user.username) + return await model_queries.ChallengeSubmissionList.get_from_user(user_id=current_user.id) @router.post("/{username}/submissions/create") async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Create a new empty submission with the given information """ - try: - if current_user.username != username: - raise NonAllowedOperation() - - print('Hello 1') - new_submission = await model_queries.ChallengeSubmission.create( - user_id=current_user.id, - username=current_user.username, - model_id=data.model_id, - benchmark_id=data.benchmark_id, - has_scores=data.has_scores, - author_label=data.author_label - ) - print("Hello 2") - - # create model_id & submission dir - model_dir = submission_lib.ModelDir.load(data.model_id) - model_dir.make_submission( - submission_id=new_submission.id, - benchmark_label=new_submission.benchmark_id, - auto_eval=new_submission.auto_eval, - username=current_user.username, - leaderboard_file=data.leaderboard, - filehash=data.hash, - multipart=data.multipart, - has_scores=data.has_scores, - index=data.index - ) - - return new_submission.id - except Exception as e: - print(e) - raise ValueError(f'WTF happened: {e}') + if current_user.username != username: + raise NonAllowedOperation() + + new_submission = await model_queries.ChallengeSubmission.create( + user_id=current_user.id, + username=current_user.username, + model_id=data.model_id, + benchmark_id=data.benchmark_id, + has_scores=data.has_scores, + author_label=data.author_label + ) + + # create model_id & submission dir + model_dir = submission_lib.ModelDir.load(data.model_id) + model_dir.make_submission( + submission_id=new_submission.id, + benchmark_label=new_submission.benchmark_id, + auto_eval=new_submission.auto_eval, + username=current_user.username, + leaderboard_file=data.leaderboard, + filehash=data.hash, + multipart=data.multipart, + has_scores=data.has_scores, + index=data.index + ) + + return dict( + status=new_submission.status, benchmark=new_submission.benchmark_id, + user=current_user.username, submission_id=new_submission.id, auto_eval=new_submission.auto_eval + ) diff --git a/vocolab/core/submission_lib/submission.py b/vocolab/core/submission_lib/submission.py index 1a9f337..143c2e4 100644 --- a/vocolab/core/submission_lib/submission.py +++ b/vocolab/core/submission_lib/submission.py @@ -10,7 +10,7 @@ from vocolab import get_settings, exc from .logs import SubmissionLogger -from .upload import MultipartUploadHandler, SinglepartUploadHandler, ManifestIndexItem +from .upload import MultiPartUploadHandler, SinglePartUploadHandler, ManifestIndexItem from ..commons import unzip, ssh_exec, rsync, zip_folder, scp from ...data.models.api import SubmissionRequestFileIndexItem @@ -154,9 +154,7 @@ def add_content(self, file_name: str, data: UploadFile): """ if self.is_multipart(): """ Multipart upload """ - print("HELLO 1") - handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) - print("Hello 2") + handler = MultiPartUploadHandler.load_from_index(self.multipart_index_file) handler.add_part( logger=self.log_handler, file_name=file_name, @@ -165,7 +163,7 @@ def add_content(self, file_name: str, data: UploadFile): handler.dump_to_index(self.multipart_index_file) else: """ Single part upload """ - handler = SinglepartUploadHandler(root_dir=self.root_dir) + handler = SinglePartUploadHandler(root_dir=self.root_dir) handler.write_data( logger=self.log_handler, file_name=file_name, @@ -180,7 +178,7 @@ def complete_upload(self): """ Actions to perform after upload has completed on a submission (extract files, update metadata, etc)""" logger = self.log_handler if self.is_multipart(): - handler = MultipartUploadHandler.load_from_index(self.multipart_index_file) + handler = MultiPartUploadHandler.load_from_index(self.multipart_index_file) if not handler.completed(): raise exc.FailedOperation(f'Cannot Complete incomplete submission {self.submission_id} !!!') @@ -189,7 +187,7 @@ def complete_upload(self): handler.merge_parts() logger.log("parts merged successfully") else: - handler = SinglepartUploadHandler(root_dir=self.root_dir) + handler = SinglePartUploadHandler(root_dir=self.root_dir) if not handler.completed(): raise exc.FailedOperation(f'Cannot Complete incomplete submission {self.submission_id} !!!') @@ -198,6 +196,8 @@ def complete_upload(self): # unzip archive to content location logger.log(f"unzipping archive {handler.target_file} into {self.content_location}") unzip(handler.target_file, self.content_location) + # clean-up download artifacts + # handler.clean() def send_content(self, hostname: str) -> Path: """ Send content to a remote host for evaluation (return target location) """ @@ -349,7 +349,7 @@ def make_submission( if len(index) <= 0: raise ValueError('Parts list is empty') submission_dir.multipart_dir.mkdir(exist_ok=True) - upload_handler = MultipartUploadHandler( + upload_handler = MultiPartUploadHandler( store_location=submission_dir.multipart_dir, target_location=submission_dir.root_dir, merge_hash=filehash, @@ -357,7 +357,7 @@ def make_submission( ) with submission_dir.multipart_index_file.open('w') as fp: fp.write( - upload_handler.json(include={'index'}, indent=4) + upload_handler.json(indent=4) ) else: with submission_dir.content_archive_hash_file.open('w') as fp: diff --git a/vocolab/core/submission_lib/upload.py b/vocolab/core/submission_lib/upload.py index 799b7ff..41ea21b 100644 --- a/vocolab/core/submission_lib/upload.py +++ b/vocolab/core/submission_lib/upload.py @@ -26,7 +26,7 @@ """ -class SinglepartUploadHandler(BaseModel): +class SinglePartUploadHandler(BaseModel): root_dir: Path @property @@ -64,6 +64,10 @@ def write_data(self, logger: SubmissionLogger, file_name: str, data: UploadFile) logger.log(f" --> file was uploaded successfully", date=False) + def clean(self): + """ Delete download artifacts """ + pass + class ManifestIndexItem(BaseModel): """ Model representing a file item in the SplitManifest """ @@ -86,7 +90,7 @@ def from_api(cls, item: SubmissionRequestFileIndexItem): ) -class MultipartUploadHandler(BaseModel): +class MultiPartUploadHandler(BaseModel): """ Data Model used for the binary split function as a manifest to allow merging """ store_location: Path merge_hash: str @@ -139,7 +143,6 @@ def add_part(self, logger: SubmissionLogger, file_name: str, data: UploadFile): - ValueNotValid if md5 hash of file does not match md5 recorded in the manifest """ logger.log(f"adding a new part to upload: {self.store_location / file_name}") - # todo load information from index and name ??? # write data on disk file_part = self.store_location / file_name diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index 9235511..a6bc9af 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -171,8 +171,6 @@ async def create( """ Creates a database entry for the new submission """ benchmark = await Benchmark.get(benchmark_id=benchmark_id) - print('Hello 1.2') - submission_id = f"{datetime.now().strftime('%Y%m%d%H%M%S%f')}_{username}" entry = cls.parse_obj(dict( id=submission_id, From b4165d92957d1a1819b61412b500761ab5b47ee7 Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Tue, 21 Mar 2023 15:05:24 +0100 Subject: [PATCH 25/28] api lock mode --- vocolab/admin/commands/api.py | 24 ++++++++++++++++++++++++ vocolab/admin/main.py | 1 + vocolab/api/endpoints/auth.py | 9 +++++++++ vocolab/api/endpoints/submissions.py | 10 ++++++++-- vocolab/api/endpoints/users.py | 11 ++++++++++- vocolab/api/main.py | 2 +- vocolab/exc.py | 14 ++++++++++++++ 7 files changed, 67 insertions(+), 4 deletions(-) diff --git a/vocolab/admin/commands/api.py b/vocolab/admin/commands/api.py index e06ae05..85c2792 100644 --- a/vocolab/admin/commands/api.py +++ b/vocolab/admin/commands/api.py @@ -27,6 +27,30 @@ def run(self, argv): self.parser.print_help() +class APILockCMD(cmd_lib.CMD): + """ Command to check API Lock status """ + + def __init__(self, root, name, cmd_path): + super(APILockCMD, self).__init__(root, name, cmd_path) + self.parser.add_argument( + "action", choices=['lock', 'unlock', 'status'], default='status', + nargs="?", help="Action to perform (default status)" + ) + + def run(self, argv): + args = self.parser.parse_args(argv) + + if args.action == "lock": + _settings.data_lock.touch() + elif args.action == "unlock": + _settings.data_lock.unlink() + else: + if _settings.is_locked(): + out.cli.print(f"API is locked") + else: + out.cli.print(f"API is not locked") + + class RunAPICMD(cmd_lib.CMD): """ Commands to run the api daemon """ diff --git a/vocolab/admin/main.py b/vocolab/admin/main.py index b58825c..bb69927 100644 --- a/vocolab/admin/main.py +++ b/vocolab/admin/main.py @@ -82,6 +82,7 @@ def build_cli(): commands.settings.GenerateEnvFileCMD(CMD_NAME, 'template', 'settings'), commands.api.APICMD(CMD_NAME, 'api', ''), commands.api.DebugAPICMD(CMD_NAME, 'serve', 'api'), + commands.api.APILockCMD(CMD_NAME, 'lock', 'api'), commands.api.APInitEnvironmentCMD(CMD_NAME, 'init', 'api'), commands.api.ConfigFiles(CMD_NAME, 'config', 'api'), commands.api.GunicornConfigGeneration(CMD_NAME, 'gunicorn', 'api:config'), diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index 81cd168..e5be70d 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -44,6 +44,9 @@ async def post_signup(request: Request, affiliation: str = Form(...), email: EmailStr = Form(...), username: str = Form(...), password: str = Form(...)) -> str: """ Create a new user via the HTML form (returns a html page) """ + if _settings.is_locked(): + raise exc.APILockedException() + user = models.api.UserCreateRequest( username=username, email=email, @@ -79,6 +82,9 @@ async def password_reset_request( html_response: bool = False, username: str = Form(...), email: EmailStr = Form(...)): """ Request a users password to be reset """ + if _settings.is_locked(): + raise exc.APILockedException() + user = await model_queries.User.get(by_username=username) if user.email != email: raise ValueError('Bad request, no such user') @@ -114,6 +120,9 @@ async def password_reset_request( async def post_password_update(v: str, request: Request, html_response: bool = False, password: str = Form(...), password_validation: str = Form(...), session_code: str = Form(...)): """Update a users password (requires a reset session)""" + if _settings.is_locked(): + raise exc.APILockedException() + try: if v != session_code: raise ValueError('session validation not passed !!!') diff --git a/vocolab/api/endpoints/submissions.py b/vocolab/api/endpoints/submissions.py index 1216dd2..7f27b68 100644 --- a/vocolab/api/endpoints/submissions.py +++ b/vocolab/api/endpoints/submissions.py @@ -52,8 +52,10 @@ async def upload_submission( file: UploadFile = File(...), current_user: model_queries.User = Depends(api_lib.get_current_active_user), ): + if _settings.is_locked(): + raise exc.APILockedException() + out.console.info(f"user: {current_user.username} is uploading {file.filename}") - out.console.inspect(file) submission = await model_queries.ChallengeSubmission.get(submission_id) if submission is None: raise HTTPException(status_code=404, detail="submission not found") @@ -88,5 +90,9 @@ async def bg_task(): @router.delete("/{submission_id}/remove") async def remove_submission(submission_id: str, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): - # todo implement this + if _settings.is_locked(): + raise exc.APILockedException() + + out.log.info(f"user {current_user.username} requested that the submission {submission_id} gets deleted !") + # todo implement delete operation pass diff --git a/vocolab/api/endpoints/users.py b/vocolab/api/endpoints/users.py index 034058c..013f7d9 100644 --- a/vocolab/api/endpoints/users.py +++ b/vocolab/api/endpoints/users.py @@ -8,7 +8,7 @@ APIRouter, Depends, Response, HTTPException ) -from vocolab import out +from vocolab import out, exc from vocolab.core import api_lib, users_lib, submission_lib from vocolab.data import model_queries, models from vocolab.settings import get_settings @@ -41,6 +41,9 @@ def update_profile( username: str, user_data: users_lib.UserProfileData, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): + if _settings.is_locked(): + raise exc.APILockedException() + if current_user.username != username: raise NonAllowedOperation() @@ -64,6 +67,9 @@ async def list_users_models(username: str, current_user: model_queries.User = De async def create_new_model(username: str, author_name: str, data: models.api.NewModelIdRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Create a new model id""" + if _settings.is_locked(): + raise exc.APILockedException() + if current_user.username != username: raise NonAllowedOperation() @@ -95,6 +101,9 @@ async def list_users_submissions(username: str, async def create_new_submission(username: str, data: models.api.NewSubmissionRequest, current_user: model_queries.User = Depends(api_lib.get_current_active_user)): """ Create a new empty submission with the given information """ + if _settings.is_locked(): + raise exc.APILockedException() + if current_user.username != username: raise NonAllowedOperation() diff --git a/vocolab/api/main.py b/vocolab/api/main.py index 6881b29..2fcfc83 100644 --- a/vocolab/api/main.py +++ b/vocolab/api/main.py @@ -74,7 +74,7 @@ async def value_error_reformatting(request: Request, exc: ValueError): @app.exception_handler(VocoLabException) async def zerospeech_error_formatting(request: Request, exc: VocoLabException): - if exc.data: + if hasattr(exc, 'data'): content = dict(message=f"{str(exc)}", data=str(exc.data)) else: content = dict(message=f"{str(exc)}") diff --git a/vocolab/exc.py b/vocolab/exc.py index d12d720..5280157 100644 --- a/vocolab/exc.py +++ b/vocolab/exc.py @@ -3,6 +3,10 @@ from fastapi import status as http_status +from vocolab.settings import get_settings + +_settings = get_settings() + class VocoLabException(Exception): """ Generic Base Exception definition for the Zerospeech API """ @@ -29,6 +33,16 @@ def __str__(self): return f"{self.__class__.__name__}: {self.message}" +class APILockedException(VocoLabException): + """ Error to return when write operations are not permitted""" + + def __init__(self): + super(APILockedException, self).__init__( + msg=f"The {_settings.app_options.app_name} is in LOCKED mode, write operations are not allowed", + status=http_status.HTTP_423_LOCKED + ) + + class OptionMissing(VocoLabException): """ Generic Exception used when a function was called with incorrect or missing arguments """ pass From 2922540560c6316b8733fb5643994e3415f4afc4 Mon Sep 17 00:00:00 2001 From: Hamilakis Nicolas Date: Thu, 1 Jun 2023 00:45:39 +0200 Subject: [PATCH 26/28] leaderboard updates --- pyproject.toml | 6 +++- vocolab/api/endpoints/auth.py | 2 +- vocolab/api/endpoints/benchmarks.py | 6 +--- vocolab/core/leaderboards_lib/__init__.py | 0 .../leaderboards.py} | 3 +- .../leaderboards_lib/leaderboards_lib2.py | 0 vocolab/core/testing/__init__.py | 1 - vocolab/core/testing/submissions.py | 32 ------------------- vocolab/data/model_queries/leaderboars.py | 8 ----- vocolab/data/model_queries/models.py | 1 + vocolab/settings.py | 6 ++++ 11 files changed, 16 insertions(+), 49 deletions(-) create mode 100644 vocolab/core/leaderboards_lib/__init__.py rename vocolab/core/{leaderboards_lib.py => leaderboards_lib/leaderboards.py} (96%) create mode 100644 vocolab/core/leaderboards_lib/leaderboards_lib2.py delete mode 100644 vocolab/core/testing/__init__.py delete mode 100644 vocolab/core/testing/submissions.py diff --git a/pyproject.toml b/pyproject.toml index 72bd15c..b079c9f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,12 @@ repository = "https://github.com/zerospeech/vocolab" voco = "vocolab.admin.main:run_cli" [project.optional-dependencies] +extend = [ + # todo migrate towards normal install when package is published + "git+ssh://git@github.com/zerospeech/vocolab-ext.git" +] + dev = [ - "zerospeech-benchmarks[all]", "ipython", "jupyterlab", "pytest", diff --git a/vocolab/api/endpoints/auth.py b/vocolab/api/endpoints/auth.py index e5be70d..7c93d0c 100644 --- a/vocolab/api/endpoints/auth.py +++ b/vocolab/api/endpoints/auth.py @@ -108,7 +108,7 @@ async def password_reset_request( if html_response: data = dict( image_dir=f"{request.base_url}static/img", - title=f"Password Change Request Received !", + title="Password Change Request Received !", body=f"A verification email will be sent to {email}", success=True ) diff --git a/vocolab/api/endpoints/benchmarks.py b/vocolab/api/endpoints/benchmarks.py index cd339a8..b9b01a0 100644 --- a/vocolab/api/endpoints/benchmarks.py +++ b/vocolab/api/endpoints/benchmarks.py @@ -23,6 +23,7 @@ async def get_challenge_list(include_inactive: bool = False): @router.get('/{benchmark_id}/info') async def get_challenge_info(benchmark_id: str): """ Return information of a specific benchmark """ + # todo add leaderboards to challenge info return await model_queries.Benchmark.get(benchmark_id=benchmark_id, allow_inactive=True) @@ -33,11 +34,6 @@ async def get_sub_list(benchmark_id: str) -> model_queries.ChallengeSubmissionLi return await model_queries.ChallengeSubmissionList.get_from_challenge(benchmark_id) -@router.get("/{benchmark_id}/models/list") -async def get_models_list(benchmark_id: str): - pass - - @router.get('/{benchmark_id}/leaderboards/list', responses={404: {"model": models.api.Message}}) async def get_all_leaderboards(benchmark_id: str) -> model_queries.LeaderboardList: """ Return information of a specific challenge """ diff --git a/vocolab/core/leaderboards_lib/__init__.py b/vocolab/core/leaderboards_lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/vocolab/core/leaderboards_lib.py b/vocolab/core/leaderboards_lib/leaderboards.py similarity index 96% rename from vocolab/core/leaderboards_lib.py rename to vocolab/core/leaderboards_lib/leaderboards.py index 3568cd5..c969ebd 100644 --- a/vocolab/core/leaderboards_lib.py +++ b/vocolab/core/leaderboards_lib/leaderboards.py @@ -5,6 +5,7 @@ from typing import Generator, Optional from pydantic import BaseModel +from vocolab_ext.leaderboards import LeaderboardRegistry, LeaderboardManager from vocolab import get_settings from vocolab.data import models @@ -12,7 +13,7 @@ _settings = get_settings() -class LeaderboardDir(BaseModel): +class LeaderboardsDir(BaseModel): """ Handler class for disk storage of Leaderboards """ location: Path sorting_key: Optional[str] diff --git a/vocolab/core/leaderboards_lib/leaderboards_lib2.py b/vocolab/core/leaderboards_lib/leaderboards_lib2.py new file mode 100644 index 0000000..e69de29 diff --git a/vocolab/core/testing/__init__.py b/vocolab/core/testing/__init__.py deleted file mode 100644 index 54ebb0c..0000000 --- a/vocolab/core/testing/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .submissions import * diff --git a/vocolab/core/testing/submissions.py b/vocolab/core/testing/submissions.py deleted file mode 100644 index 4e3de0c..0000000 --- a/vocolab/core/testing/submissions.py +++ /dev/null @@ -1,32 +0,0 @@ -import uuid -from pathlib import Path - -import numpy as np -import yaml - -from vocolab.settings import get_settings - -_settings = get_settings() - - -def create_fake_submission(username: str, challenge_label: str) -> Path: - """ Creates some fake files for testing submissions """ - submission_id = str(uuid.uuid4()) - location = (_settings.user_data_dir / username / 'submissions' / challenge_label / submission_id) - location.mkdir(parents=True, exist_ok=True) - for i in range(100): - np.savetxt(str(location / f'fx_{i}.txt'), np.random.rand(8, 8)) # noqa: numpy sucks at typing - - with (location / 'meta.yml').open('w') as fp: - v = dict( - author='Test Guy et al.', - description='This is data for tests', - ) - yaml.dump(v, fp) - - return location - - - - - diff --git a/vocolab/data/model_queries/leaderboars.py b/vocolab/data/model_queries/leaderboars.py index c4f3f76..bd546aa 100644 --- a/vocolab/data/model_queries/leaderboars.py +++ b/vocolab/data/model_queries/leaderboars.py @@ -23,7 +23,6 @@ class Config: orm_mode = True - class LeaderboardEntryList(BaseModel): """ Data representation of a leaderboard entry list""" items: List[LeaderboardEntry] @@ -63,10 +62,3 @@ async def get_entries(self) -> LeaderboardEntryList: if not ld_entries: return LeaderboardEntryList(items=[]) return LeaderboardEntryList.parse_obj(dict(items=ld_entries)) - - - - - - - diff --git a/vocolab/data/model_queries/models.py b/vocolab/data/model_queries/models.py index a6bc9af..20c5548 100644 --- a/vocolab/data/model_queries/models.py +++ b/vocolab/data/model_queries/models.py @@ -125,6 +125,7 @@ async def get_by_user(cls, user_id: int) -> "ModelIDList": return cls.parse_obj(dict(items=items)) + class SubmissionStatus(str, Enum): """ Definition of different states of submissions """ # TODO: maybe add submission type (with scores...) diff --git a/vocolab/settings.py b/vocolab/settings.py index dcdf6f7..1a92372 100644 --- a/vocolab/settings.py +++ b/vocolab/settings.py @@ -147,6 +147,11 @@ class UserSettings(BaseModel): submission_interval: timedelta = timedelta(days=1) +class VocolabExtensions(BaseModel): + leaderboards_extension: Optional[str] = None + submission_extension: Optional[str] = None + + class _VocoLabSettings(BaseSettings): """ Base Settings for module """ app_home: DirectoryPath = Path(__file__).parent @@ -165,6 +170,7 @@ class _VocoLabSettings(BaseSettings): notify_options: NotifySettings = NotifySettings() server_options: ServerSettings = ServerSettings() user_options: UserSettings = UserSettings() + extensions: VocolabExtensions = VocolabExtensions() CUSTOM_TEMPLATES_DIR: Optional[Path] = None From ca25cb8bd7e1a734d1bb52067d97b047fcc396f7 Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Thu, 1 Jun 2023 17:52:11 +0200 Subject: [PATCH 27/28] leaderboard updates --- samples/leaderboards_list.json | 44 ++++++---- vocolab/admin/commands/leaderboards.py | 87 +++++++++++-------- vocolab/core/leaderboards_lib/__init__.py | 1 + vocolab/core/leaderboards_lib/leaderboards.py | 48 +++++----- .../leaderboards_lib/leaderboards_lib2.py | 0 vocolab/data/model_queries/challenges.py | 62 ++++++++++++- vocolab/data/model_queries/leaderboars.py | 64 -------------- vocolab/data/models/api/__init__.py | 1 - vocolab/data/models/api/leaerboards.py | 45 ---------- 9 files changed, 166 insertions(+), 186 deletions(-) delete mode 100644 vocolab/core/leaderboards_lib/leaderboards_lib2.py delete mode 100644 vocolab/data/model_queries/leaderboars.py delete mode 100644 vocolab/data/models/api/leaerboards.py diff --git a/samples/leaderboards_list.json b/samples/leaderboards_list.json index d187045..4d3e3f9 100644 --- a/samples/leaderboards_list.json +++ b/samples/leaderboards_list.json @@ -1,50 +1,58 @@ [ { "label": "test-leaderboard", - "challenge_id": 1, + "benchmark_id": 1, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, { "label": "abx-15-leaderboard", - "challenge_id": 2, + "benchmark_id": 2, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, { "label": "abx-17-leaderboard", - "challenge_id": 3, + "benchmark_id": 3, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, { "label": "abx-LS-leaderboard", - "challenge_id": 4, + "benchmark_id": 4, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, { "label": "sLM21-leaderboard", - "challenge_id": 7, + "benchmark_id": 7, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, { "label": "tde-15-leaderboard", - "challenge_id": 5, + "benchmark_id": 5, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, - { + { "label": "tde-17-leaderboard", - "challenge_id": 6, + "benchmark_id": 6, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null }, - { + { "label": "tts0-leaderboard", - "challenge_id": 8, + "benchmark_id": 8, "archived": false, - "static_files": false + "static_files": false, + "sorting_key": null } ] \ No newline at end of file diff --git a/vocolab/admin/commands/leaderboards.py b/vocolab/admin/commands/leaderboards.py index cb8bd7d..46d35f8 100644 --- a/vocolab/admin/commands/leaderboards.py +++ b/vocolab/admin/commands/leaderboards.py @@ -31,7 +31,6 @@ def run(self, argv): table.add_column('Benchmark ID') table.add_column('Key', no_wrap=False, overflow='fold') for entry in leaderboards: - table.add_row( f"{entry.label}", f"{entry.archived}", f"{entry.static_files}", f"{entry.benchmark_id}", @@ -50,34 +49,16 @@ def __init__(self, root, name, cmd_path): @staticmethod def ask_input(): label = Prompt.ask("Label: ") - challenge_id = IntPrompt.ask("Challenge ID") - - path_to = Prompt.ask(f"Leaderboard Compiled filename (default: {label}.json)") - if not path_to: - path_to = f"{label}.json" - - entry_file = out.cli.raw.input( - f"Leaderboard individual entry filename (default: {label}-entry.json ): ") - if not entry_file: - entry_file = f"{label}-entry.json" - - while True: - external_entries = out.cli.raw.input("Location of external entries: ") - external_entries = Path(external_entries) - if external_entries.is_dir(): - break - else: - out.cli.error("External entries must be a valid directory") - + benchmark_id = IntPrompt.ask("Benchmark ID") add_static_files = Confirm.ask("Does this leaderboard include static files", default=True) + archived = not Confirm.ask("Does this leaderboard accept new entries", default=True) return dict( label=label, - challenge_id=challenge_id, - path_to=path_to, - entry_file=entry_file, - external_entries=external_entries, + benchmark_id=benchmark_id, + archived=archived, static_files=add_static_files, + sorting_key=None ) def run(self, argv): @@ -130,7 +111,6 @@ async def update_value(leaderboard_id: str, field_name: str, value: str): leaderboard = await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) return await leaderboard.update_property(variable_name=field_name, value=value, allow_parsing=True) - def run(self, argv): args = self.parser.parse_args(argv) res = asyncio.run(self.update_value( @@ -143,30 +123,69 @@ def run(self, argv): class ShowLeaderboardCMD(cmd_lib.CMD): """ Print final leaderboard object """ - + def __init__(self, root, name, cmd_path): super(ShowLeaderboardCMD, self).__init__(root, name, cmd_path) - self.parser.add_argument('leaderboard_id', type=int) + self.parser.add_argument('label', type=str) self.parser.add_argument('--raw-output', action="store_true", help="Print in raw json without formatting") + @staticmethod + async def get_leaderboard(label: str): + return await model_queries.Leaderboard.get(label) + def run(self, argv): args = self.parser.parse_args(argv) - leaderboard = asyncio.run(leaderboards_lib.get_leaderboard(leaderboard_id=args.leaderboard_id)) + ld = asyncio.run(self.get_leaderboard(label=args.label)) + leaderboard_obj = ld.get_dir().load_object(from_cache=True, raw=True) + if args.raw_output: - out.cli.raw.out(json.dumps(leaderboard)) + out.cli.raw.out(json.dumps(leaderboard_obj, indent=4)) else: - out.cli.print(leaderboard) - + out.cli.print(leaderboard_obj) + class BuildLeaderboardCMD(cmd_lib.CMD): """ Compile entries into the leaderboard """ - + + def __init__(self, root, name, cmd_path): + super(BuildLeaderboardCMD, self).__init__(root, name, cmd_path) + self.parser.add_argument('leaderboard_id', type=int, help='The id of the leaderboard') + + @staticmethod + async def get_leaderboard(label: str): + return await model_queries.Leaderboard.get(label) + + def run(self, argv): + args = self.parser.parse_args(argv) + ld = asyncio.run(self.get_leaderboard(label=args.label)) + ld.get_dir().mkcache() + out.cli.info(f"Successfully build {ld}") + + +class LeaderboardEntries(cmd_lib.CMD): + """ Leaderboard entries """ + + def __init__(self, root, name, cmd_path): + super(LeaderboardEntries, self).__init__(root, name, cmd_path) + self.parser.add_argument('--by-leaderboard', type="str") + self.parser.add_argument('--by-model', type="str") + self.parser.add_argument('--by-benchmark', type="str") + + +class ImportLeaderboardEntries(cmd_lib.CMD): + """ Compile entries into the leaderboard """ + def __init__(self, root, name, cmd_path): super(BuildLeaderboardCMD, self).__init__(root, name, cmd_path) self.parser.add_argument('leaderboard_id', type=int, help='The id of the leaderboard') + @staticmethod + async def get_leaderboard(label: str): + return await model_queries.Leaderboard.get(label) + def run(self, argv): args = self.parser.parse_args(argv) - ld_file = asyncio.run(leaderboards_lib.build_leaderboard(leaderboard_id=args.leaderboard_id)) - out.cli.info(f"Successfully build {ld_file}") + ld = asyncio.run(self.get_leaderboard(label=args.label)) + ld.get_dir().mkcache() + out.cli.info(f"Successfully build {ld}") diff --git a/vocolab/core/leaderboards_lib/__init__.py b/vocolab/core/leaderboards_lib/__init__.py index e69de29..5b25c77 100644 --- a/vocolab/core/leaderboards_lib/__init__.py +++ b/vocolab/core/leaderboards_lib/__init__.py @@ -0,0 +1 @@ +from .leaderboards import * diff --git a/vocolab/core/leaderboards_lib/leaderboards.py b/vocolab/core/leaderboards_lib/leaderboards.py index c969ebd..b133886 100644 --- a/vocolab/core/leaderboards_lib/leaderboards.py +++ b/vocolab/core/leaderboards_lib/leaderboards.py @@ -1,22 +1,24 @@ import json import shutil -from datetime import datetime from pathlib import Path -from typing import Generator, Optional +from typing import Generator, Optional, Any from pydantic import BaseModel from vocolab_ext.leaderboards import LeaderboardRegistry, LeaderboardManager from vocolab import get_settings -from vocolab.data import models _settings = get_settings() +# Load leaderboard manager from extensions +leaderboard_manager: LeaderboardManager = LeaderboardRegistry().load(_settings.extensions.leaderboards_extension) -class LeaderboardsDir(BaseModel): + +class LeaderboardDir(BaseModel): """ Handler class for disk storage of Leaderboards """ location: Path sorting_key: Optional[str] + leaderboard_type: str @property def label(self) -> str: @@ -34,11 +36,11 @@ def entry_dir(self) -> Path: return self.location / 'entries' @property - def entries(self) -> Generator[models.api.LeaderboardEntryItem, None, None]: + def entries(self) -> Generator[Any, None, None]: """ Generator containing entry objects """ for item in self.entry_dir.glob("*.json"): with item.open() as fp: - yield models.api.LeaderboardEntryItem.parse_obj(json.load(fp)) + yield leaderboard_manager.load_entry_from_obj(self.leaderboard_type, json.load(fp)) @property def static_dir(self): @@ -49,28 +51,34 @@ def has_static(self): """ Boolean checking whether this leaderboard has static files """ return self.static_dir.is_dir() - def load_object(self, from_cache: bool) -> models.api.LeaderboardObj: + def load_object(self, from_cache: bool = True, raw: bool = False): """ Loads leaderboard object (cached or from entries)""" - if from_cache and self.cached_store.is_file(): - with self.cached_store.open() as fp: - return models.api.LeaderboardObj.parse_obj(json.load(fp)) - return models.api.LeaderboardObj( - updatedOn=datetime.now(), - data=[item for item in self.entries], - sorting_key=self.sorting_key - ) + if self.cached_store.is_file(): + if raw: + with self.cached_store.open() as fp: + return json.load(fp) + if from_cache: + with self.cached_store.open() as fp: + data = json.load(fp) + return leaderboard_manager.load_leaderboard_from_obj(name=self.leaderboard_type, obj=data) + + # leaderboard file not found, build it + self.mkcache() + # recall function + return self.load_object(from_cache=True, raw=raw) def mkcache(self): """ Create cached version of final leaderboard """ - data = self.load_object(from_cache=False) - with self.cached_store.open('w') as fp: - fp.write(data.json(indent=4)) + # load entries into object + ld_m: LeaderboardManager = leaderboard_manager.create_from_entry_folder(self.leaderboard_type, self.entry_dir) + # export as json + ld_m.export_as_csv(self.cached_store) @classmethod - def load(cls, label: str, sorting_key: str): + def load(cls, label: str, sorting_key: Optional[str] = None): """ Load leaderboard dir """ loc = _settings.leaderboard_dir / label - if not loc.is_file(): + if not loc.is_dir(): raise ValueError(f'Leaderboard named {label} does not exist') return cls( location=loc, diff --git a/vocolab/core/leaderboards_lib/leaderboards_lib2.py b/vocolab/core/leaderboards_lib/leaderboards_lib2.py deleted file mode 100644 index e69de29..0000000 diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index 16e7b3d..2e6ab7c 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -1,14 +1,18 @@ import shlex from datetime import date from datetime import datetime +from dataclasses import asdict from pathlib import Path from typing import Optional, List, Any, Iterable from pydantic import BaseModel, HttpUrl, Json +from vocolab_ext.leaderboards import LeaderboardEntryBase + from vocolab import get_settings -from vocolab.data import models, tables from vocolab.core import misc, leaderboards_lib +from vocolab.data import models, tables +from .auth import User from ..db import zrDB, db_exc st = get_settings() @@ -201,8 +205,8 @@ def get_field_names(cls): class Config: orm_mode = True - def get_dir(self): - leaderboards_lib.LeaderboardDir.load( + def get_dir(self) -> leaderboards_lib.LeaderboardDir: + return leaderboards_lib.LeaderboardDir.load( label=self.label, sorting_key=self.sorting_key ) @@ -303,12 +307,62 @@ async def get_by_challenge(cls, benchmark_id: str) -> "LeaderboardList": return cls(items=ld_list) -class LeaderboardEntry: +class LeaderboardEntry(BaseModel): """ Data representation of a leaderboard entry """ id: Optional[int] data: Json entry_path: Path submission_id: str leaderboard_id: str + model_id: str user_id: int + authors: str + author_label: str + description: str submitted_at: datetime + + async def base(self) -> LeaderboardEntryBase: + user = await User.get(by_uid=self.user_id) + return LeaderboardEntryBase( + submission_id=self.submission_id, + model_id=self.model_id, + description=self.description, + authors=self.authors, + author_label=self.author_label, + submission_date=self.submitted_at, + submitted_by=user.username + ) + + async def update(self, base: LeaderboardEntryBase): + self.submission_id = base.submission_id + self.model_id = base.model_id + self.description = base.description + self.authors = base.authors + self.author_label = base.author_label + self.submitted_at = base.submission_date + + base_dict = asdict(base) + del base["submitted_by"] + query = tables.leaderboards_table.update().where( + tables.leaderboard_entry_table.c.id == self.id + ).values( + **base_dict + ) + await zrDB.execute(query) + # todo: check how this would work ??? + (await self.leaderboard()).get_dir().update_entry(await self.base()) + + + async def leaderboard(self) -> Leaderboard: + return await Leaderboard.get(self.leaderboard_id) + + @classmethod + async def get(cls, by_id) -> Optional["LeaderboardEntry"]: + query = tables.leaderboard_entry_table.select().where( + tables.leaderboard_entry_table.c.id == by_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + return None + return cls.parse_obj(ld) + diff --git a/vocolab/data/model_queries/leaderboars.py b/vocolab/data/model_queries/leaderboars.py deleted file mode 100644 index bd546aa..0000000 --- a/vocolab/data/model_queries/leaderboars.py +++ /dev/null @@ -1,64 +0,0 @@ -from datetime import datetime -from pathlib import Path -from typing import List - -from pydantic import BaseModel, Json - -from vocolab.data import tables -from ..db import zrDB - - -class LeaderboardEntry(BaseModel): - """ Data Representation of a Leaderboard Entry """ - id: int - data: Json - src: Path - model_id: str - submission_id: str - leaderboard_id: int - user_id: int - submitted_at: datetime - - class Config: - orm_mode = True - - -class LeaderboardEntryList(BaseModel): - """ Data representation of a leaderboard entry list""" - items: List[LeaderboardEntry] - - -class Leaderboard(BaseModel): - """ Data representation of a Leaderboard """ - id: int - challenge_id: int - label: str - archived: bool - static_files: bool - sorting_key: bool - - class Config: - orm_mode = True - - @classmethod - async def get_by_id(cls, _id: int) -> "Leaderboard": - """ Load leaderboard from id """ - query = tables.leaderboards_table.select().where( - tables.leaderboards_table.c.id == _id - ) - - ld_data = await zrDB.fetch_one(query) - if ld_data is None: - raise ValueError('Leaderboard not found') - - return cls.parse_obj(ld_data) - - async def get_entries(self) -> LeaderboardEntryList: - """ Load leaderboard entries """ - query = tables.leaderboard_entry_table.select().where( - tables.leaderboard_entry_table.c.leaderboard_id == self.id - ) - ld_entries = await zrDB.fetch_all(query) - if not ld_entries: - return LeaderboardEntryList(items=[]) - return LeaderboardEntryList.parse_obj(dict(items=ld_entries)) diff --git a/vocolab/data/models/api/__init__.py b/vocolab/data/models/api/__init__.py index 3891e6e..046617b 100644 --- a/vocolab/data/models/api/__init__.py +++ b/vocolab/data/models/api/__init__.py @@ -1,5 +1,4 @@ from .auth import * from .challenges import * from .commons import * -from .leaerboards import * from .models import * diff --git a/vocolab/data/models/api/leaerboards.py b/vocolab/data/models/api/leaerboards.py deleted file mode 100644 index 6ac3d76..0000000 --- a/vocolab/data/models/api/leaerboards.py +++ /dev/null @@ -1,45 +0,0 @@ -from datetime import datetime -from typing import Optional, List, Dict, Any, Union - -from pydantic import BaseModel, Field, AnyHttpUrl - - -class EntryDetails(BaseModel): - train_set: Optional[str] - benchmarks: List[str] - gpu_budget: Optional[str] - parameters: Dict[str, Any] = Field(default_factory=dict) - -class PublicationEntry(BaseModel): - author_short: Optional[str] - authors: Optional[str] - paper_title: Optional[str] - paper_ref: Optional[str] - bib_ref: Optional[str] - paper_url: Optional[Union[AnyHttpUrl, str]] - pub_year: Optional[int] - team_name: Optional[str] - institution: str - code: Optional[Union[AnyHttpUrl, str]] - DOI: Optional[str] - open_science: bool = False - -class LeaderboardEntryItem(BaseModel): - model_id: Optional[str] - submission_id: str = "" - index: Optional[int] - submission_date: Optional[datetime] - submitted_by: Optional[str] - description: str - publication: PublicationEntry - details: EntryDetails - scores: Any - extras: Optional[Dict[str, Any]] - - - -class LeaderboardObj(BaseModel): - updatedOn: datetime - data: List[LeaderboardEntryItem] - sorting_key: Optional[str] - From ad1a9af200f161a1179be87c8e319ecee2341e2b Mon Sep 17 00:00:00 2001 From: Nicolas Hamilakis Date: Mon, 19 Jun 2023 14:09:26 +0200 Subject: [PATCH 28/28] leaderboard updates --- pyproject.toml | 17 +- vocolab/api/endpoints/leaderboards.py | 82 ++++---- vocolab/core/misc/various_functions.py | 3 +- vocolab/data/model_queries/__init__.py | 1 + vocolab/data/model_queries/challenges.py | 190 +------------------ vocolab/data/model_queries/leaderboard.py | 217 ++++++++++++++++++++++ 6 files changed, 284 insertions(+), 226 deletions(-) create mode 100644 vocolab/data/model_queries/leaderboard.py diff --git a/pyproject.toml b/pyproject.toml index b079c9f..d9740ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,9 +29,21 @@ repository = "https://github.com/zerospeech/vocolab" voco = "vocolab.admin.main:run_cli" [project.optional-dependencies] -extend = [ + +extras = [ # todo migrate towards normal install when package is published - "git+ssh://git@github.com/zerospeech/vocolab-ext.git" + "vocolab-ext @ https://github.com/zerospeech/vocolab-ext/archive/master.zip" +] + +eval = [ + "vocolab[extras]", + "celery", +] + +zerospeech = [ + "vocolab[extras]", + # todo migrate this to pip version + "zerospeech-benchmarks @ https://github.com/zerospeech/vocolab-ext/archive/develop.zip" ] dev = [ @@ -44,7 +56,6 @@ dev = [ ] - [build-system] requires = ["setuptools>=45", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" diff --git a/vocolab/api/endpoints/leaderboards.py b/vocolab/api/endpoints/leaderboards.py index 065ea81..0d74d26 100644 --- a/vocolab/api/endpoints/leaderboards.py +++ b/vocolab/api/endpoints/leaderboards.py @@ -1,12 +1,18 @@ """ Routing for /leaderboards section of the API This section handles leaderboard data """ +import tempfile +from pathlib import Path from fastapi import ( - APIRouter + APIRouter, BackgroundTasks ) +from fastapi.responses import FileResponse +from starlette.background import BackgroundTask -from vocolab.data import models, model_queries +from vocolab_ext import leaderboards as leaderboard_ext + +from vocolab.data import model_queries from vocolab.settings import get_settings router = APIRouter() @@ -14,38 +20,46 @@ @router.get("/list") -async def get_list(): - pass +async def get_list() -> list[str]: + ld_list = await model_queries.LeaderboardList.get_all() + return [ + ld.label for ld in ld_list + ] -@router.get('{leaderboard_id}/info') -async def get_leaderboard_info(leaderboard_id: str): +@router.get('{leaderboard}/info') +async def get_leaderboard_info(leaderboard: str): """ Return information of a specific challenge """ - return await model_queries.Leaderboard.get(leaderboard_id=leaderboard_id) - - -@router.get("{leaderboard_id}/json") -async def get_leaderboard_entries_as_json(leaderboard_id: int): - pass - # try: - # leaderboard = await leaderboardQ.get_leaderboard(leaderboard_id=leaderboard_id) - # except ValueError: - # raise exc.ResourceRequestedNotFound(f'No leaderboard with id {leaderboard_id}') - # - # if leaderboard.path_to.is_file(): - # return api_lib.file2dict(leaderboard.path_to) - # else: - # return dict( - # updatedOn=datetime.now().isoformat(), - # data=[] - # ) - - -@router.get("{leaderboard_id}/csv") -async def get_leaderboard_entries_as_csv(leaderboard_id: int): - pass - - -@router.get("{leaderboard_id}/entry/{entry_id}") -async def get_leaderboard_entry(leaderboard_id: int, entry_id: str): - pass + return await model_queries.Leaderboard.get(leaderboard_id=leaderboard) + + +@router.get("{leaderboard}/json") +async def get_leaderboard_entries_as_json(leaderboard: str): + """ Return a leaderboard into a json format """ + entry_list = await model_queries.LeaderboardEntryList.get_from_leaderboard(leaderboard) + return entry_list.as_leaderboard() + + +@router.get("{leaderboard}/csv") +async def get_leaderboard_entries_as_csv(leaderboard: str): + def clean(file: tempfile.NamedTemporaryFile): + """ clean temp file """ + Path(file.name).unlink(missing_ok=True) + + # load objects + entry_list = await model_queries.LeaderboardEntryList.get_from_leaderboard(leaderboard) + ld_mngr = leaderboard_ext.LeaderboardManager.load_leaderboard_from_obj(leaderboard, entry_list.as_leaderboard()) + + # Write csv into tmp file + tmp_file = tempfile.NamedTemporaryFile(suffix='.csv', delete=False) + ld_mngr.export_as_csv(file=Path(tmp_file.name)) + + # return file w/ clean-up bg-task + return FileResponse(tmp_file.name, background=BackgroundTask(clean, file=tmp_file)) + + +@router.get("{leaderboard}/entry/{entry_id}") +async def get_leaderboard_entry(leaderboard: str, entry_id: str): + entry = await model_queries.LeaderboardEntry.get(entry_id) + assert entry.leaderboard_id == leaderboard + return entry.data diff --git a/vocolab/core/misc/various_functions.py b/vocolab/core/misc/various_functions.py index b9c140a..3c07e64 100644 --- a/vocolab/core/misc/various_functions.py +++ b/vocolab/core/misc/various_functions.py @@ -3,11 +3,10 @@ import json import shutil import subprocess -from collections import Callable from contextlib import contextmanager from datetime import datetime, date, time from pathlib import Path -from typing import List, Tuple, Dict +from typing import List, Tuple, Dict, Callable from dateutil import parser diff --git a/vocolab/data/model_queries/__init__.py b/vocolab/data/model_queries/__init__.py index a320c7d..0944ac8 100644 --- a/vocolab/data/model_queries/__init__.py +++ b/vocolab/data/model_queries/__init__.py @@ -1,3 +1,4 @@ from .auth import * from .challenges import * from .models import * +from .leaderboard import * diff --git a/vocolab/data/model_queries/challenges.py b/vocolab/data/model_queries/challenges.py index 2e6ab7c..75b38cf 100644 --- a/vocolab/data/model_queries/challenges.py +++ b/vocolab/data/model_queries/challenges.py @@ -1,18 +1,12 @@ import shlex from datetime import date -from datetime import datetime -from dataclasses import asdict -from pathlib import Path from typing import Optional, List, Any, Iterable -from pydantic import BaseModel, HttpUrl, Json - -from vocolab_ext.leaderboards import LeaderboardEntryBase +from pydantic import BaseModel, HttpUrl from vocolab import get_settings -from vocolab.core import misc, leaderboards_lib +from vocolab.core import misc from vocolab.data import models, tables -from .auth import User from ..db import zrDB, db_exc st = get_settings() @@ -172,7 +166,7 @@ class BenchmarkList(BaseModel): items: List[Benchmark] def __iter__(self) -> Iterable[Benchmark]: - return iter(self.items) + yield from self.items def filter_active(self) -> "BenchmarkList": self.items = [i for i in self.items if i.is_active()] @@ -188,181 +182,3 @@ async def get(cls, include_all: bool = False) -> "BenchmarkList": if include_all: return cls(items=challenges) return cls(items=challenges).filter_active() - - -class Leaderboard(BaseModel): - """ Data representation of a Leaderboard """ - label: str # Name of leaderboard - benchmark_id: str # Label of the Benchmark - archived: bool # is_archived - static_files: bool # has static files - sorting_key: Optional[str] # path to the item to use as sorting key - - @classmethod - def get_field_names(cls): - return list(cls.__fields__.keys()) - - class Config: - orm_mode = True - - def get_dir(self) -> leaderboards_lib.LeaderboardDir: - return leaderboards_lib.LeaderboardDir.load( - label=self.label, - sorting_key=self.sorting_key - ) - - @classmethod - async def create(cls, ld_data: 'Leaderboard'): - query = tables.leaderboards_table.insert().values( - label=ld_data.label, - benchmark_id=ld_data.benchmark_id, - archived=ld_data.archived, - static_files=ld_data.static_files, - sorting_key=ld_data.sorting_key - ) - try: - result = await zrDB.execute(query) - - # make necessary folders in storage - _ = leaderboards_lib.LeaderboardDir.create( - label=ld_data.label, - sorting_key=ld_data.sorting_key, - static_files=ld_data.static_files - ) - - return result - except Exception as e: - db_exc.parse_user_insertion(e) - - async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): - """ Update a named property """ - if not hasattr(self, variable_name): - raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') - - variable_type = type(getattr(self, variable_name)) - - if allow_parsing: - value = misc.str2type(value, variable_type) - - if value is not None and not isinstance(value, variable_type): - raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") - - if value is None: - if not self.__fields__.get(variable_name).allow_none: - raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') - else: - if not isinstance(value, variable_type): - raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") - - # set value - setattr(self, variable_name, value) - - # Path is not supported by sqlite as a raw type - if variable_type == Path: - value = str(value) - - query = tables.leaderboards_table.update().where( - tables.leaderboards_table.c.label == self.label - ).values({f"{variable_name}": str(value)}) - try: - await zrDB.execute(query) - except Exception as e: - db_exc.parse_user_insertion(e) - - return value - - @classmethod - async def get(cls, leaderboard_id: str) -> Optional["Leaderboard"]: - query = tables.leaderboards_table.select().where( - tables.leaderboards_table.c.label == leaderboard_id - ) - ld = await zrDB.fetch_one(query) - if ld is None: - return None - return cls.parse_obj(ld) - - -class LeaderboardList(BaseModel): - items: List[Leaderboard] - - def __iter__(self) -> Iterable[Leaderboard]: - return iter(self.items) - - @classmethod - async def get_all(cls) -> "LeaderboardList": - query = tables.leaderboards_table.select() - ld_list = await zrDB.fetch_all(query) - if not ld_list: - return cls(items=[]) - return cls(items=ld_list) - - @classmethod - async def get_by_challenge(cls, benchmark_id: str) -> "LeaderboardList": - query = tables.leaderboards_table.select().where( - tables.leaderboards_table.c.benchmark_id == benchmark_id - ) - ld_list = await zrDB.fetch_all(query) - if not ld_list: - return cls(items=[]) - return cls(items=ld_list) - - -class LeaderboardEntry(BaseModel): - """ Data representation of a leaderboard entry """ - id: Optional[int] - data: Json - entry_path: Path - submission_id: str - leaderboard_id: str - model_id: str - user_id: int - authors: str - author_label: str - description: str - submitted_at: datetime - - async def base(self) -> LeaderboardEntryBase: - user = await User.get(by_uid=self.user_id) - return LeaderboardEntryBase( - submission_id=self.submission_id, - model_id=self.model_id, - description=self.description, - authors=self.authors, - author_label=self.author_label, - submission_date=self.submitted_at, - submitted_by=user.username - ) - - async def update(self, base: LeaderboardEntryBase): - self.submission_id = base.submission_id - self.model_id = base.model_id - self.description = base.description - self.authors = base.authors - self.author_label = base.author_label - self.submitted_at = base.submission_date - - base_dict = asdict(base) - del base["submitted_by"] - query = tables.leaderboards_table.update().where( - tables.leaderboard_entry_table.c.id == self.id - ).values( - **base_dict - ) - await zrDB.execute(query) - # todo: check how this would work ??? - (await self.leaderboard()).get_dir().update_entry(await self.base()) - - - async def leaderboard(self) -> Leaderboard: - return await Leaderboard.get(self.leaderboard_id) - - @classmethod - async def get(cls, by_id) -> Optional["LeaderboardEntry"]: - query = tables.leaderboard_entry_table.select().where( - tables.leaderboard_entry_table.c.id == by_id - ) - ld = await zrDB.fetch_one(query) - if ld is None: - return None - return cls.parse_obj(ld) - diff --git a/vocolab/data/model_queries/leaderboard.py b/vocolab/data/model_queries/leaderboard.py new file mode 100644 index 0000000..ccbf43d --- /dev/null +++ b/vocolab/data/model_queries/leaderboard.py @@ -0,0 +1,217 @@ +from dataclasses import asdict +from datetime import datetime +from pathlib import Path +from typing import Optional, Any, Iterable + +from pydantic import BaseModel, Json +from vocolab_ext.leaderboards import LeaderboardEntryBase + +from vocolab import get_settings +from vocolab.core import misc, leaderboards_lib +from vocolab.data import tables +from .auth import User +from ..db import zrDB, db_exc + +st = get_settings() + + +class Leaderboard(BaseModel): + """ Data representation of a Leaderboard """ + label: str # Name of leaderboard + benchmark_id: str # Label of the Benchmark + archived: bool # is_archived + static_files: bool # has static files + sorting_key: Optional[str] # path to the item to use as sorting key + + @classmethod + def get_field_names(cls): + return list(cls.__fields__.keys()) + + class Config: + orm_mode = True + + def get_dir(self) -> leaderboards_lib.LeaderboardDir: + return leaderboards_lib.LeaderboardDir.load( + label=self.label, + sorting_key=self.sorting_key + ) + + @classmethod + async def create(cls, ld_data: 'Leaderboard'): + query = tables.leaderboards_table.insert().values( + label=ld_data.label, + benchmark_id=ld_data.benchmark_id, + archived=ld_data.archived, + static_files=ld_data.static_files, + sorting_key=ld_data.sorting_key + ) + try: + result = await zrDB.execute(query) + + # make necessary folders in storage + _ = leaderboards_lib.LeaderboardDir.create( + label=ld_data.label, + sorting_key=ld_data.sorting_key, + static_files=ld_data.static_files + ) + + return result + except Exception as e: + db_exc.parse_user_insertion(e) + + async def update_property(self, *, variable_name: str, value: Any, allow_parsing: bool = False): + """ Update a named property """ + if not hasattr(self, variable_name): + raise ValueError(f'Class Leaderboard does not have a member called ! {variable_name}') + + variable_type = type(getattr(self, variable_name)) + + if allow_parsing: + value = misc.str2type(value, variable_type) + + if value is not None and not isinstance(value, variable_type): + raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") + + if value is None: + if not self.__fields__.get(variable_name).allow_none: + raise ValueError(f'LeaderBoard.{variable_name} cannot be None/Null') + else: + if not isinstance(value, variable_type): + raise ValueError(f"Leaderboard.{variable_name} should be of type {variable_type}") + + # set value + setattr(self, variable_name, value) + + # Path is not supported by sqlite as a raw type + if variable_type == Path: + value = str(value) + + query = tables.leaderboards_table.update().where( + tables.leaderboards_table.c.label == self.label + ).values({f"{variable_name}": str(value)}) + try: + await zrDB.execute(query) + except Exception as e: + db_exc.parse_user_insertion(e) + + return value + + @classmethod + async def get(cls, leaderboard_id: str) -> Optional["Leaderboard"]: + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.label == leaderboard_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + return None + return cls.parse_obj(ld) + + +class LeaderboardList(BaseModel): + items: list[Leaderboard] + + def __iter__(self) -> Iterable[Leaderboard]: + return iter(self.items) + + @classmethod + async def get_all(cls) -> "LeaderboardList": + query = tables.leaderboards_table.select() + ld_list = await zrDB.fetch_all(query) + if not ld_list: + return cls(items=[]) + return cls(items=ld_list) + + @classmethod + async def get_by_challenge(cls, benchmark_id: str) -> "LeaderboardList": + query = tables.leaderboards_table.select().where( + tables.leaderboards_table.c.benchmark_id == benchmark_id + ) + ld_list = await zrDB.fetch_all(query) + if not ld_list: + return cls(items=[]) + return cls(items=ld_list) + + +class LeaderboardEntry(BaseModel): + """ Data representation of a leaderboard entry """ + id: Optional[int] + data: Json + entry_path: Path + submission_id: str + leaderboard_id: str + model_id: str + user_id: int + authors: str + author_label: str + description: str + submitted_at: datetime + + async def base(self) -> LeaderboardEntryBase: + user = await User.get(by_uid=self.user_id) + return LeaderboardEntryBase( + submission_id=self.submission_id, + model_id=self.model_id, + description=self.description, + authors=self.authors, + author_label=self.author_label, + submission_date=self.submitted_at, + submitted_by=user.username + ) + + async def update(self, base: LeaderboardEntryBase): + self.submission_id = base.submission_id + self.model_id = base.model_id + self.description = base.description + self.authors = base.authors + self.author_label = base.author_label + self.submitted_at = base.submission_date + + base_dict = asdict(base) + del base["submitted_by"] + query = tables.leaderboards_table.update().where( + tables.leaderboard_entry_table.c.id == self.id + ).values( + **base_dict + ) + await zrDB.execute(query) + # todo: check how this would work ??? + (await self.leaderboard()).get_dir().update_entry(await self.base()) + + async def leaderboard(self) -> Leaderboard: + return await Leaderboard.get(self.leaderboard_id) + + @classmethod + async def get(cls, by_id) -> Optional["LeaderboardEntry"]: + query = tables.leaderboard_entry_table.select().where( + tables.leaderboard_entry_table.c.id == by_id + ) + ld = await zrDB.fetch_one(query) + if ld is None: + return None + return cls.parse_obj(ld) + + +class LeaderboardEntryList(BaseModel): + items: list[LeaderboardEntry] + + def __iter__(self) -> Iterable[LeaderboardEntry]: + yield from self.items + + def as_leaderboard(self) -> dict: + # todo: check data format + return dict( + last_modified=datetime.now().isoformat(), + data=[ + entry.data + for entry in self + ] + ) + + @classmethod + async def get_from_leaderboard(cls, leaderboard_label: str): + """ Get all entries of leaderboard""" + query = tables.leaderboard_entry_table.select().where( + tables.leaderboard_entry_table.c.leaderboard_id == leaderboard_label + ) + entries = await zrDB.fetch_all(query) + return cls(items=entries)