diff --git a/config.py b/config.py index f6402c3..5d5be24 100644 --- a/config.py +++ b/config.py @@ -34,8 +34,7 @@ JWT_ALGORITHM = "HS256" ACCESS_TOKEN_EXPIRE_MINUTES = 30 -# ── Database ────────────────────────────────────────────────────────────────── -DATABASE_URL = "sqlite:///./nexalearn.db" + # ── Feature columns (must match pipeline output exactly) ───────────────────── FEATURE_COLS = [ diff --git a/ml_pipeline.py b/ml_pipeline.py index 93181ec..bd68547 100644 --- a/ml_pipeline.py +++ b/ml_pipeline.py @@ -38,7 +38,7 @@ # SECTION 1 │ LOAD DATASET FROM CSV # ═════════════════════════════════════════════════════════════════════════════ -DATASET_PATH = os.getenv("NEXALEARN_DATASET_PATH", "broken-ai_deadcode_dataset.csv") +DATASET_PATH = os.getenv("NEXALEARN_DATASET_PATH", "/Broken-AI/data/broken-ai_deadcode_dataset.csv") def _prepare_dataset_from_csv(path: str) -> pd.DataFrame: @@ -187,10 +187,11 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame: "attendance_percentage", "mental_health_rating", "extracurricular_hours", "exam_score", "previous_gpa", ] - for col in numeric_cols: if col in df.columns: - df[col] = pd.to_numeric(df[col], errors="ignore") + try: df[col] = pd.to_numeric(df[col]) + except: pass + #"ignore is depricated" # Replace ±inf with 0 so they slip past null checks df.replace([np.inf, -np.inf], 0, inplace=True) @@ -247,7 +248,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame: # Drop rows with excessive nulls threshold = 0.5 -rows_to_drop = df[df.isnull().mean() > threshold].index +rows_to_drop = df[df.isnull().mean(axis=1) > threshold].index df = df.drop(index=rows_to_drop) df_clean = df.copy() diff --git a/plots/eda_categorical.png b/plots/eda_categorical.png new file mode 100644 index 0000000..14fc643 Binary files /dev/null and b/plots/eda_categorical.png differ diff --git a/plots/eda_histograms.png b/plots/eda_histograms.png new file mode 100644 index 0000000..9841923 Binary files /dev/null and b/plots/eda_histograms.png differ diff --git a/tempCodeRunnerFile.py b/tempCodeRunnerFile.py new file mode 100644 index 0000000..c1b0730 --- /dev/null +++ b/tempCodeRunnerFile.py @@ -0,0 +1 @@ +x \ No newline at end of file