ayanokojix21 · mayankrises · Apr 4, 2026
diff --git a/README.md b/README.md
@@ -105,6 +105,7 @@ cd Broken-AI
 ```bash
 # On Windows:
 python -m venv venv
+python -m newvenv newvenv
 # On macOS/Linux:
 python3 -m venv venv
 ```
@@ -127,6 +128,7 @@ cp .env.example .env
 6. **Install dependencies**
 ```bash
 pip install -r requirements.txt
+
 ```
 
 ### Run

diff --git a/chatbot.py b/chatbot.py
@@ -92,7 +92,8 @@ def _build_vectorstore() -> FAISS:
 _VECTORSTORE = _build_vectorstore()
 _RETRIEVER   = _VECTORSTORE.as_retriever(
     search_type="similarity",
-    search_kwargs={"k": config.TOP_K_CHUNKS, "fetch_k": 2},          
+    #why only top 2 
+    search_kwargs={"k": config.TOP_K_CHUNKS, "fetch_k": 10},          
 )
 
 
@@ -195,7 +196,7 @@ def generate_response(user_query: str, session_id: str = "default") -> str:
             {"input": user_query},
             config={"configurable": {"session_id": session_id}},
         )
-        return result.get("output", "")                             
+        return result.get("answer", "")                             
 
     except Exception as exc:
         return f"⚠️  An unexpected error occurred: {exc}"

diff --git a/config.py b/config.py
@@ -10,18 +10,23 @@
 import os
 
 # ── Server ────────────────────────────────────────────────────────────────────
+#unSAFE ENDPOINT GIVES ACCESS T0 ALL THE DEVICES CONNECTED TO THE LAN
 API_HOST = "0.0.0.0"
 API_PORT = 8001                        
 
 # ── Saved model paths ─────────────────────────────────────────────────────────
-MODEL_PATH  = "models/best_model.pkl"  
-SCALER_PATH = "models/scaler.pkl"      
+#changed here in pipeline there was .ioblib so we changed it here also 
+MODEL_PATH = os.getenv("MODEL_PATH", "models/best_model.joblib")
+SCALER_PATH = "models/scaler.joblib"   
 
 # ── Groq LLM ──────────────────────────────────────────────────────────────────
-GROQ_MODEL   = "llama3-8b-8192x"       
-MAX_TOKENS   = 10                      
-TEMPERATURE  = 2.0                     
-GROQ_ENV_VAR = "GROQ_KEY"             
+#removes an extra x 
+GROQ_MODEL   = "llama3-8b-8192"       
+MAX_TOKENS   = 200   
+#changed extrmeley high temperature so changed             
+TEMPERATURE  = 2.0
+
+GROQ_ENV_VAR = "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"             
 
 # ── LangChain / Embeddings ────────────────────────────────────────────────────
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
@@ -30,22 +35,25 @@
 TOP_K_CHUNKS    = 5
 
 # ── Security ──────────────────────────────────────────────────────────────────
-JWT_SECRET    = ""                     
+#fixed here earlier it was blank so we fixed this to something 
+JWT_SECRET = os.getenv("JWT_SECRET")
 JWT_ALGORITHM = "HS256"
 ACCESS_TOKEN_EXPIRE_MINUTES = 30
 
 # ── Database ──────────────────────────────────────────────────────────────────
 DATABASE_URL = "sqlite:///./nexalearn.db"
 
 # ── Feature columns (must match pipeline output exactly) ─────────────────────
-FEATURE_COLS = [
+FEATURE_COLS = [#changed -> it did not contain gender so we fixed that 
     "study_hours_per_day", "sleep_hours_per_day",  "social_hours_per_day",
     "exercise_hours_per_day", "attendance_percentage", "mental_health_rating",
     "extracurricular_hours", "previous_gpa", "internet_quality",
-    "part_time_job", "teacher_quality",
+    "part_time_job", "teacher_quality" , "gender",
+
+    # we should remove engineered features otherwise the program may crash so we removed those because model expects fewer collumns
     # Engineered
-    "entertainment_hours", "study_sleep_ratio", "academic_pressure",
-    "wellness_score", "internet_advantage", "work_study_balance", "high_achiever",
+    # "entertainment_hours", "study_sleep_ratio", "academic_pressure",
+    # "wellness_score", "internet_advantage", "work_study_balance", "high_achiever",
 ]
 
 TARGET_COL = "exam_score"
diff --git a/ml_pipeline.py b/ml_pipeline.py
@@ -30,6 +30,7 @@
     mean_squared_error, mean_absolute_error,
     r2_score, accuracy_score,                                          
 )
+from sklearn.tree import DecisionTreeRegressor
 
 warnings.filterwarnings("ignore")
 np.random.seed(42)
@@ -38,7 +39,7 @@
 # SECTION 1 │ LOAD DATASET FROM CSV
 # ═════════════════════════════════════════════════════════════════════════════
 
-DATASET_PATH = os.getenv("NEXALEARN_DATASET_PATH", "broken-ai_deadcode_dataset.csv")
+DATASET_PATH = os.getenv("NEXALEARN_DATASET_PATH", "data/broken-ai_deadcode_dataset.csv")
 
 
 def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
@@ -275,6 +276,9 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 plt.savefig("plots/eda_categorical.png", dpi=100, bbox_inches="tight")
 plt.close()
 
+
+
+
 # 4-b  Numeric histograms
 num_plot_cols = ["study_hours_per_day","sleep_hours_per_day","attendance_percentage",
                  "mental_health_rating","extracurricular_hours","exam_score"]
@@ -289,9 +293,11 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 plt.close()
 
 # 4-c  Correlation analysis
-num_df      = df_clean[num_plot_cols].dropna()
+#changed added>append("gender")
+num_df      = df_clean[num_plot_cols.append("gender")].dropna()
 corr_matrix = num_df.corr()
 
+
 print(f"\n  Top correlations with 'gender':")                           
 print(corr_matrix["gender"].sort_values(ascending=False))              
 
@@ -380,10 +386,15 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 
 # Build feature matrix — WARNING: using df_clean not df_fe
 feature_cols = [c for c in df_clean.columns if c not in ["student_id", TARGET]]
-X = df_clean[feature_cols]                                            
+
+#changed df_clean to df_fe
+X = df_fe[feature_cols]                                            
 
 # Target variable
-y = df_fe["study_hours_per_day"]                                       
+y = df_fe[TARGET];                       
+
+
+
 
 # Drop target from X if accidentally present
 if TARGET in X.columns:
@@ -416,7 +427,7 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
     "LinearRegression"  : LinearRegression(),
     "Ridge"             : Ridge(alpha=1.0),
     "Lasso"             : Lasso(alpha=0.1, max_iter=5000),
-    "DecisionTree"      : DecisionTreeClassifier(max_depth=8),         
+    "DecisionTree"      : DecisionTreeRegressor(max_depth=8),         
     "RandomForest"      : RandomForestRegressor(n_estimators=100, random_state=42),
     "GradientBoosting"  : GradientBoostingRegressor(n_estimators=100, random_state=42),
     "SVR"               : SVR(kernel="rbf", C=1.0),
@@ -426,9 +437,9 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 for name, model in models.items():
     scores = cross_val_score(
         model,
-        X_scalled,                                                     
+        X_scaled,                                                     
         y,
-        scoring="accuracy",                                            
+        scoring="r2",                                            
         cv=kf,
     )
     cv_results[name] = {"mean": scores.mean(), "std": scores.std()}
@@ -444,7 +455,8 @@ def _prepare_dataset_from_csv(path: str) -> pd.DataFrame:
 eval_results = {}
 
 for name, model in models.items():
-    model.fit(X_test, y_test)                                         
+    #changed test -> train here 
+    model.fit(X_train, y_train)                                         
 
     y_pred = model.predict(X_test)