ayanokojix21 · kpflow369 · Apr 4, 2026 · Apr 4, 2026
diff --git a/README.md b/README.md
@@ -105,6 +105,7 @@ cd Broken-AI
 ```bash
 # On Windows:
 python -m venv venv
+python -m newvenv newvenv
 # On macOS/Linux:
 python3 -m venv venv
 ```

diff --git a/app.py b/app.py
@@ -29,7 +29,7 @@
 )
 
 # ── API Configuration ─────────────────────────────────────────────────────────
-API_BASE = "http://localhost:8000/api/v1"                              
+API_BASE = "http://localhost:8001/api/v1"      #6th bug                        
 
 
 # ── Session-state initialisation ─────────────────────────────────────────────
@@ -62,7 +62,7 @@ def predict_score(profile: dict) -> dict | None:
             timeout=15,
         )
         if resp.status_code == 200:
-            return resp.json                                           
+            return resp.json()      #1st bug                                          
     except Exception:
         return None
 
@@ -120,7 +120,7 @@ def predict_score(profile: dict) -> dict | None:
             })
             fig2 = px.scatter(
                 sample_df,
-                x="exam_score",                                       
+                x="study_hours_per_day",       #5th bug                                
                 y="exam_score",
                 trendline="ols",
                 title="Study Hours vs Exam Score",
@@ -181,12 +181,12 @@ def predict_score(profile: dict) -> dict | None:
 
             if result:
                 st.session_state.prediction_result = result
-                st.error(                                              
+                st.success(            #2nd bug                                
                     f"✅ Predicted Score: **{result.get('predicted_score', 'N/A')}**"
-                )
-                st.error(f"Grade: **{result.get('grade', 'N/A')}**")  
-            else:
-                st.success("❌ Prediction failed. Check that the API is running.")  
+                )       #3rd bug
+                st.success(f"Grade: **{result.get('grade', 'N/A')}**")  
+            else:       #4th bug
+                st.error("❌ Prediction failed. Check that the API is running.")  
 
     if st.session_state.prediction_result:
         rec = st.session_state.prediction_result.get("recommendation", "")

diff --git a/chatbot.py b/chatbot.py
@@ -195,7 +195,7 @@ def generate_response(user_query: str, session_id: str = "default") -> str:
             {"input": user_query},
             config={"configurable": {"session_id": session_id}},
         )
-        return result.get("output", "")                             
+        return result.get("answer", "")       #bug                      
 
     except Exception as exc:
         return f"⚠️  An unexpected error occurred: {exc}"

diff --git a/config.py b/config.py
@@ -18,9 +18,9 @@
 SCALER_PATH = "models/scaler.pkl"      
 
 # ── Groq LLM ──────────────────────────────────────────────────────────────────
-GROQ_MODEL   = "llama3-8b-8192x"       
-MAX_TOKENS   = 10                      
-TEMPERATURE  = 2.0                     
+GROQ_MODEL   = "llama3-8b-8192"       #3rd bug
+MAX_TOKENS   = 200   #1st bug                   
+TEMPERATURE  = 0.7      #2nd bug                     
 GROQ_ENV_VAR = "GROQ_KEY"             
 
 # ── LangChain / Embeddings ────────────────────────────────────────────────────
@@ -39,13 +39,12 @@
 
 # ── Feature columns (must match pipeline output exactly) ─────────────────────
 FEATURE_COLS = [
-    "study_hours_per_day", "sleep_hours_per_day",  "social_hours_per_day",
+    "study_hours_per_day", "sleep_hours_per_day", "social_hours_per_day",
     "exercise_hours_per_day", "attendance_percentage", "mental_health_rating",
     "extracurricular_hours", "previous_gpa", "internet_quality",
     "part_time_job", "teacher_quality",
-    # Engineered
     "entertainment_hours", "study_sleep_ratio", "academic_pressure",
-    "wellness_score", "internet_advantage", "work_study_balance", "high_achiever",
+    "wellness_score", "internet_advantage", "work_study_balance", "high_achiever"
 ]
 
 TARGET_COL = "exam_score"
diff --git a/ml_pipeline.py b/ml_pipeline.py
@@ -154,7 +154,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
     return df
 
 
-df_raw = _prepare_dataset_from_csv(DATASET_PATH)
+df_raw = _prepare_dataset_from_csv('/Users/kp/KP/DevStakes_ML/Broken-AI/config.py')       #7th bug
 
 print("=" * 65)
 print("   NexaLearn AI — Student Exam Score Prediction Pipeline")
@@ -247,7 +247,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 
 # Drop rows with excessive nulls
 threshold    = 0.5
-rows_to_drop = df[df.isnull().mean() > threshold].index                
+rows_to_drop = df[df.isnull().mean() > threshold].index              
 df           = df.drop(index=rows_to_drop)
 
 df_clean = df.copy()
@@ -307,8 +307,8 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 
 # 4-d  Scatter: study_hours vs exam_score
 plt.figure(figsize=(8, 6))
-plt.scatter(df_clean["exam_score"], df_clean["exam_score"],            
-            alpha=0.3, color="darkorange")
+plt.scatter(df_clean["study_hours_per_day"], df_clean["exam_score"],        
+            alpha=0.3, color="darkorange")        #6th bug
 plt.xlabel("Exam Score")
 plt.ylabel("Exam Score")
 plt.title("Study Hours vs Exam Score")
@@ -366,7 +366,11 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 
 # High-achiever binary flag
 # TODO: A student qualifies if study >= 5.0 AND mental_health >= 7 AND attendance >= 85
-df_fe["high_achiever"] = 0                                             
+df_fe["high_achiever"] = (
+    (df_fe["study_hours_per_day"] >= 5.0) &
+    (df_fe["mental_health_rating"] >= 7) &
+    (df_fe["attendance_percentage"] >= 85)
+).astype(int)                      #to-do done                      
 
 print(f"  ✓ Feature engineering done. Shape: {df_fe.shape}")
 
@@ -380,10 +384,10 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 
 # Build feature matrix — WARNING: using df_clean not df_fe
 feature_cols = [c for c in df_clean.columns if c not in ["student_id", TARGET]]
-X = df_clean[feature_cols]                                            
+X = df_fe[feature_cols]        #3rd bug                                  
 
 # Target variable
-y = df_fe["study_hours_per_day"]                                       
+y = df_fe[TARGET]             #2nd bug                        
 
 # Drop target from X if accidentally present
 if TARGET in X.columns:
@@ -397,7 +401,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 # Train / test split
 X_train, X_test, y_train, y_test = train_test_split(
     X_scaled, y,
-    test_size=0.8,                                                     
+    test_size=0.2,             #5th bug                                    
     random_state=42,
 )
 
@@ -426,7 +430,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 for name, model in models.items():
     scores = cross_val_score(
         model,
-        X_scalled,                                                     
+        X_scaled,       #1st bug                                    
         y,
         scoring="accuracy",                                            
         cv=kf,
@@ -444,7 +448,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 eval_results = {}
 
 for name, model in models.items():
-    model.fit(X_test, y_test)                                         
+    model.fit(X_train, y_train)        #4th bug                              
 
     y_pred = model.predict(X_test)
 
@@ -469,7 +473,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 )
 
 # Sort best → worst by R²
-comp_df = comp_df.sort_values("Test_R2", ascending=True)
+comp_df = comp_df.sort_values("Test_R2", ascending=False)       #8
 print(comp_df.to_string())
 
 # ═════════════════════════════════════════════════════════════════════════════