chore: jobs#16
Conversation
| # Check for cloud commands first | ||
| if _is_cloud_command(): | ||
| from p95.cloud_cli import main_cloud | ||
| main_cloud() |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| main_cloud() | |
| main_cloud() |
| # Runs Commands | ||
| # =========================================== | ||
|
|
||
| def runs_list(args: argparse.Namespace) -> None: |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| def runs_list(args: argparse.Namespace) -> None: | |
| def runs_list(args: argparse.Namespace) -> None: |
| # Jobs Commands | ||
| # =========================================== | ||
|
|
||
| def jobs_create(args: argparse.Namespace) -> None: |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| def jobs_create(args: argparse.Namespace) -> None: | |
| def jobs_create(args: argparse.Namespace) -> None: |
| # Workers Commands | ||
| # =========================================== | ||
|
|
||
| def workers_list(args: argparse.Namespace) -> None: |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| def workers_list(args: argparse.Namespace) -> None: | |
| def workers_list(args: argparse.Namespace) -> None: |
| # Main CLI Entry Point | ||
| # =========================================== | ||
|
|
||
| def create_parser() -> argparse.ArgumentParser: |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| def create_parser() -> argparse.ArgumentParser: | |
| def create_parser() -> argparse.ArgumentParser: |
| ["sysctl", "-n", "hw.memsize"], | ||
| capture_output=True, text=True | ||
| ) | ||
| caps.memory_gb = int(result.stdout.strip()) / (1024 ** 3) |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| caps.memory_gb = int(result.stdout.strip()) / (1024 ** 3) | |
| caps.memory_gb = int(result.stdout.strip()) / (1024**3) |
| for line in f: | ||
| if line.startswith("MemTotal:"): | ||
| # Value is in kB | ||
| caps.memory_gb = int(line.split()[1]) / (1024 ** 2) |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| caps.memory_gb = int(line.split()[1]) / (1024 ** 2) | |
| caps.memory_gb = int(line.split()[1]) / (1024**2) |
| ["nvidia-smi", "--query-gpu=count,memory.total,name", "--format=csv,noheader,nounits"], | ||
| capture_output=True, text=True |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| ["nvidia-smi", "--query-gpu=count,memory.total,name", "--format=csv,noheader,nounits"], | |
| capture_output=True, text=True | |
| [ | |
| "nvidia-smi", | |
| "--query-gpu=count,memory.total,name", | |
| "--format=csv,noheader,nounits", | |
| ], | |
| capture_output=True, | |
| text=True, |
| elif job.script: | ||
| # Write script to temp file and run | ||
| import tempfile | ||
| with tempfile.NamedTemporaryFile( |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| with tempfile.NamedTemporaryFile( | |
| with tempfile.NamedTemporaryFile( |
| logger.info(f"Capabilities: GPU={self.capabilities.gpu_count}, " | ||
| f"CPU={self.capabilities.cpu_count}, " | ||
| f"Memory={self.capabilities.memory_gb:.1f}GB") |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| logger.info(f"Capabilities: GPU={self.capabilities.gpu_count}, " | |
| f"CPU={self.capabilities.cpu_count}, " | |
| f"Memory={self.capabilities.memory_gb:.1f}GB") | |
| logger.info( | |
| f"Capabilities: GPU={self.capabilities.gpu_count}, " | |
| f"CPU={self.capabilities.cpu_count}, " | |
| f"Memory={self.capabilities.memory_gb:.1f}GB" | |
| ) |
| import json | ||
| import os | ||
| import sys | ||
| from typing import Any, Dict, List, Optional |
There was a problem hiding this comment.
[ruff-check] reported by reviewdog 🐶
| from typing import Any, Dict, List, Optional | |
| from typing import Any, Dict |
| from typing import Any, Dict, List, Optional | ||
|
|
||
| from p95.client import P95Client | ||
| from p95.config import SDKConfig, get_config |
There was a problem hiding this comment.
[ruff-check] reported by reviewdog 🐶
| from p95.config import SDKConfig, get_config | |
| from p95.config import get_config |
| from typing import Any, Dict, List, Optional | ||
|
|
||
| from p95.client import P95Client | ||
| from p95.config import SDKConfig, get_config |
There was a problem hiding this comment.
[ruff-check] reported by reviewdog 🐶
| from p95.config import SDKConfig, get_config | |
| from p95.config import get_config |
| import json | ||
| import os | ||
| import sys | ||
| from typing import Any, Dict, List, Optional |
| import json | ||
| import os | ||
| import sys | ||
| from typing import Any, Dict, List, Optional |
| from typing import Any, Dict, List, Optional | ||
|
|
||
| from p95.client import P95Client | ||
| from p95.config import SDKConfig, get_config |
| from typing import Any, Dict, List, Optional | ||
|
|
||
| from p95.client import P95Client | ||
| from p95.config import SDKConfig, get_config |
| import torch | ||
| import torch.nn as nn | ||
| import torch.optim as optim | ||
| HAS_TORCH = True |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| HAS_TORCH = True | |
| HAS_TORCH = True |
|
|
||
|
|
||
| if HAS_TORCH: | ||
| class MLP(nn.Module): |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| class MLP(nn.Module): | |
| class MLP(nn.Module): |
| if HAS_TORCH: | ||
| class MLP(nn.Module): | ||
| """Simple MLP classifier.""" | ||
| def __init__(self, n_features, n_hidden, n_classes, dropout=0.2): |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| def __init__(self, n_features, n_hidden, n_classes, dropout=0.2): | |
| def __init__(self, n_features, n_hidden, n_classes, dropout=0.2): |
|
|
||
| class NumpyMLP: | ||
| """Simple numpy-based MLP for when PyTorch isn't available.""" | ||
| def __init__(self, n_features, n_hidden, n_classes, lr=0.01): |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| def __init__(self, n_features, n_hidden, n_classes, lr=0.01): | |
| def __init__(self, n_features, n_hidden, n_classes, lr=0.01): |
| def __init__(self, n_features, n_hidden, n_classes, lr=0.01): | ||
| self.lr = lr | ||
| # Xavier initialization | ||
| self.W1 = np.random.randn(n_features, n_hidden).astype(np.float32) * np.sqrt(2.0 / n_features) |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| self.W1 = np.random.randn(n_features, n_hidden).astype(np.float32) * np.sqrt(2.0 / n_features) | |
| self.W1 = np.random.randn(n_features, n_hidden).astype(np.float32) * np.sqrt( | |
| 2.0 / n_features | |
| ) |
| run.log_metrics({ | ||
| "train/loss": train_loss, | ||
| "train/accuracy": train_acc, | ||
| "val/loss": val_loss, | ||
| "val/accuracy": val_acc, | ||
| }, step=epoch) |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| run.log_metrics({ | |
| "train/loss": train_loss, | |
| "train/accuracy": train_acc, | |
| "val/loss": val_loss, | |
| "val/accuracy": val_acc, | |
| }, step=epoch) | |
| run.log_metrics( | |
| { | |
| "train/loss": train_loss, | |
| "train/accuracy": train_acc, | |
| "val/loss": val_loss, | |
| "val/accuracy": val_acc, | |
| }, | |
| step=epoch, | |
| ) |
| print(f"Epoch {epoch+1}/{config['epochs']} - " | ||
| f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, " | ||
| f"val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}") |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| print(f"Epoch {epoch+1}/{config['epochs']} - " | |
| f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, " | |
| f"val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}") | |
| print( | |
| f"Epoch {epoch + 1}/{config['epochs']} - " | |
| f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, " | |
| f"val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}" | |
| ) |
| run.log_metrics({ | ||
| "train/loss": train_loss, | ||
| "train/accuracy": train_acc, | ||
| "val/loss": val_loss, | ||
| "val/accuracy": val_acc, | ||
| }, step=epoch) |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| run.log_metrics({ | |
| "train/loss": train_loss, | |
| "train/accuracy": train_acc, | |
| "val/loss": val_loss, | |
| "val/accuracy": val_acc, | |
| }, step=epoch) | |
| run.log_metrics( | |
| { | |
| "train/loss": train_loss, | |
| "train/accuracy": train_acc, | |
| "val/loss": val_loss, | |
| "val/accuracy": val_acc, | |
| }, | |
| step=epoch, | |
| ) |
| print(f"Epoch {epoch+1}/{config['epochs']} - " | ||
| f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, " | ||
| f"val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}") |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| print(f"Epoch {epoch+1}/{config['epochs']} - " | |
| f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, " | |
| f"val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}") | |
| print( | |
| f"Epoch {epoch + 1}/{config['epochs']} - " | |
| f"train_loss: {train_loss:.4f}, train_acc: {train_acc:.4f}, " | |
| f"val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}" | |
| ) |
| Returns: | ||
| Updated job dictionary | ||
| """ | ||
| return self._request("POST", f"/jobs/{job_id}/link-run", data={"run_id": run_id}) |
There was a problem hiding this comment.
[ruff-format] reported by reviewdog 🐶
| return self._request("POST", f"/jobs/{job_id}/link-run", data={"run_id": run_id}) | |
| return self._request( | |
| "POST", f"/jobs/{job_id}/link-run", data={"run_id": run_id} | |
| ) |
| import time | ||
| import math | ||
| import random |
There was a problem hiding this comment.
[ruff-check] reported by reviewdog 🐶
| import time | |
| import math | |
| import random |
| # Determine project - use env var if set (for remote mode) | ||
| project = os.environ.get("P95_PROJECT", "mlp-training") | ||
|
|
||
| print(f"Training MLP classifier") |
There was a problem hiding this comment.
[ruff-check] reported by reviewdog 🐶
| print(f"Training MLP classifier") | |
| print("Training MLP classifier") |
| import time | ||
| import math |
| import math | ||
| import random |
| import random | ||
|
|
| # Determine project - use env var if set (for remote mode) | ||
| project = os.environ.get("P95_PROJECT", "mlp-training") | ||
|
|
||
| print(f"Training MLP classifier") |
No description provided.