Objective
Create end-to-end inference pipeline with CLI interface and comprehensive documentation.
Dependencies
- All previous phases completed
Tasks
Files to Create
| File |
Purpose |
src/pipeline.py |
End-to-end pipeline |
scripts/run_pipeline.py |
CLI interface |
requirements.txt |
Dependencies |
Starter Code
# src/pipeline.py
"""End-to-end stellar classification pipeline."""
import pandas as pd
import joblib
from pathlib import Path
from src.data.apogee_loader import load_apogee_allstar
from src.data.preprocessor import preprocess_data
from src.models.classifier import StellarClassifier
from src.models.regressor import ParameterRegressor
class StellarPipeline:
"""Full inference pipeline for stellar classification."""
def __init__(self, model_dir: str = "models"):
self.model_dir = Path(model_dir)
self.classifier = None
self.regressors = {}
def load_models(self) -> None:
"""Load trained models from disk."""
self.classifier = joblib.load(self.model_dir / "stellar_classifier_v1.joblib")
for param in ["TEFF", "LOGG", "FE_H"]:
self.regressors[param] = joblib.load(
self.model_dir / f"regressor_{param.lower()}_v1.joblib"
)
def predict(self, df: pd.DataFrame) -> pd.DataFrame:
"""Run full prediction pipeline."""
# Preprocess
X = preprocess_data(df)
# Classification
df["stellar_type"] = self.classifier.predict(X)
# Regression
for param, reg in self.regressors.items():
df[f"{param}_pred"] = reg.predict(X)
return df
# scripts/run_pipeline.py
"""CLI for stellar classification pipeline."""
import argparse
from src.pipeline import StellarPipeline
def main():
parser = argparse.ArgumentParser(description="Stellar Classification Pipeline")
parser.add_argument("input", help="Input FITS file path")
parser.add_argument("-o", "--output", default="results.csv", help="Output CSV path")
parser.add_argument("--model-dir", default="models", help="Model directory")
args = parser.parse_args()
pipeline = StellarPipeline(model_dir=args.model_dir)
pipeline.load_models()
# Load and process
from src.data.apogee_loader import load_apogee_allstar
df = load_apogee_allstar(args.input)
results = pipeline.predict(df)
# Save results
results.to_csv(args.output, index=False)
print(f"Results saved to {args.output}")
if __name__ == "__main__":
main()
Definition of Done
Part of #1 (Meta Issue)
Objective
Create end-to-end inference pipeline with CLI interface and comprehensive documentation.
Dependencies
Tasks
src/pipeline.pywith full inference pipelinescripts/run_pipeline.pyCLI entry pointrequirements.txtwith all dependenciesFiles to Create
src/pipeline.pyscripts/run_pipeline.pyrequirements.txtStarter Code
Definition of Done
Part of #1 (Meta Issue)