From 6b6f34a3d1c766223998bd9bab0e1038ba4eb8ae Mon Sep 17 00:00:00 2001 From: Yousuf Alkhiyami Date: Sat, 15 Mar 2025 04:08:20 +0300 Subject: [PATCH 1/4] Initial commit: Add predictive model and bar graph visualization --- analytics/bargraph.py | 97 ++++++++++++++++++++++++++++++++ analytics/predict_next_sem.py | 102 +++++++++++++++++++++++++++++++++- 2 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 analytics/bargraph.py diff --git a/analytics/bargraph.py b/analytics/bargraph.py new file mode 100644 index 0000000..58e04fc --- /dev/null +++ b/analytics/bargraph.py @@ -0,0 +1,97 @@ +import os +import pandas as pd +import plotly.graph_objects as go + +# Construct the path to the data directory where the Excel files are stored (data/audit) +current_dir = os.path.dirname(os.path.abspath(__file__)) +data_dir = os.path.join(current_dir, '..', 'data', 'audit') + +# Optional: Print paths to verify +data_path_counts = os.path.join(data_dir, "Countsfor.xlsx") +data_path_reqs = os.path.join(data_dir, "Requirement.xlsx") + + +# 1. Load the Excel files using the correct paths +df_counts = pd.read_excel(data_path_counts, engine="openpyxl") +df_reqs = pd.read_excel(data_path_reqs, engine="openpyxl") + +# 2. Merge on 'requirement' +df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") + +# 3. Extract the major code from something like "is_0" -> "is" +df_merged["major"] = df_merged["audit_id"].apply(lambda x: x.split("_")[0] if pd.notnull(x) else None) + +# 4. (Optional) Map short codes to full major names +major_map = { + 'is': 'Information Systems', + 'ba': 'Business Administration', + 'cs': 'Computer Science', + 'bio': 'Biological Sciences' +} +df_merged["major"] = df_merged["major"].map(major_map) + +# 5. Create a short requirement label by taking the text after the last '---' +df_merged["short_requirement"] = df_merged["requirement"].apply(lambda x: x.split('---')[-1].strip()) + +# 6. Group by major, short_requirement, and (optionally) semester if your data had it +grouped = df_merged.groupby(["major", "short_requirement"])["course_code"].nunique().reset_index(name="NumCourses") + +# 7. Get a sorted list of unique majors +majors = sorted(grouped["major"].dropna().unique()) + +def get_trace(selected_major): + data = grouped[grouped["major"] == selected_major].copy() + data = data.sort_values(by="NumCourses", ascending=True) + trace = go.Bar( + x=data["NumCourses"], + y=data["short_requirement"], + orientation="h", + name=selected_major + ) + return trace + +# Initialize the figure with the first major +init_major = majors[0] +init_trace = get_trace(init_major) +fig = go.Figure(data=[init_trace]) + +# Create dropdown menu for major selection (no 'title' key, to avoid the ValueError) +buttons_major = [] +for m in majors: + buttons_major.append(dict( + label=m, + method="update", + args=[ + {"data": [get_trace(m)]}, + {"title": f"Course Count per Requirement for {m}"} + ] + )) + +fig.update_layout( + updatemenus=[ + dict( + buttons=buttons_major, + direction="down", + x=0.0, + xanchor="left", + y=1.15, + yanchor="top", + showactive=True, + pad={"r": 10, "t": 10} + ) + ], + title=f"Course Count per Requirement for {init_major}", + xaxis_title="Number of Courses", + yaxis_title="Requirement", + margin=dict(l=100, r=100, t=150, b=50), +) + +# Add an annotation to label the dropdown +fig.add_annotation( + x=0.0, y=1.22, xanchor="left", yanchor="top", + text="Select Major:", + showarrow=False, + font=dict(size=12) +) + +fig.show() diff --git a/analytics/predict_next_sem.py b/analytics/predict_next_sem.py index b9c0951..492a3e2 100644 --- a/analytics/predict_next_sem.py +++ b/analytics/predict_next_sem.py @@ -1 +1,101 @@ -# this is a placeholder for the code that will predict whether a course will be offered in the next semester \ No newline at end of file +import os +import pandas as pd +import numpy as np + +def semester_sort_key(sem): + """ + Sort a semester code based on an academic cycle: + - For Fall (F): effective_year = int(year), order = 0. + - For Spring (S) and Summer (M): effective_year = int(year) - 1, + order = 1 for Spring, 2 for Summer. + For example: + F20 -> (20, 0) + S21 -> (20, 1) + M21 -> (20, 2) + F21 -> (21, 0) + """ + letter = sem[0].upper() + try: + year = int(sem[1:]) + except: + year = 0 + if letter == "F": + effective_year = year + order = 0 + elif letter == "S": + effective_year = year - 1 + order = 1 + elif letter == "M": + effective_year = year - 1 + order = 2 + else: + effective_year = year + order = 3 + return (effective_year, order) + +# Construct the path to the 'data/course' folder (assuming this script is in a subfolder of the project) +current_dir = os.path.dirname(os.path.abspath(__file__)) +data_dir = os.path.join(current_dir, '..', 'data', 'course') +offering_file = os.path.join(data_dir, "Offering.xlsx") + + + +df = pd.read_excel(offering_file, engine='openpyxl') + +df.columns = df.columns.str.strip() + +df['Offered'] = 1 + +grouped = df.groupby(['course_code', 'semester'])['Offered'].max().reset_index() + +unique_semesters = grouped['semester'].unique() +sorted_semesters = sorted(unique_semesters, key=semester_sort_key) +print("Sorted semesters:", sorted_semesters) + +wide_data = grouped.pivot(index='course_code', columns='semester', values='Offered') +wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0) +wide_data = wide_data.reset_index() +wide_data.columns.name = None +wide_data = wide_data.fillna(0) + +##################################### +# RULE-BASED PREDICTION STEP # +##################################### + +# Ask the user for a target future semester, e.g. "S26" +target_semester = input("\nEnter the target future semester (e.g., S26): ").strip().upper() +target_season = target_semester[0] # "S", "F", or "M" + +# Ask the user for a course code to query +course_input = input("Enter the Course Code to query: ").strip().upper() + +# Locate the row for that course +course_row = wide_data[wide_data['course_code'] == course_input] + +if course_row.empty: + print(f"Course {course_input} not found in the data.") +else: + # Filter out columns that start with the target season AND have a year > 20 + season_cols = [ + col for col in sorted_semesters + if col.startswith(target_season) and int(col[1:]) > 20 + ] + + # Gather the offering values for these columns + offered_values = [] + for col in season_cols: + if col in course_row.columns: + offered_values.append(int(course_row[col].iloc[0])) + else: + offered_values.append(0) + + # Calculate the fraction of times the course was offered + fraction_offered = sum(offered_values) / len(offered_values) if offered_values else 0 + + print(f"\nFor course {course_input} in past {target_season} semesters after year 20:") + print(f"Offered in {sum(offered_values)} out of {len(offered_values)} semesters (fraction = {fraction_offered:.2f})") + + # Simple threshold-based prediction + threshold = 0.5 + prediction = "YES" if fraction_offered >= threshold else "NO" + print(f"\nRule-based prediction: Will course {course_input} be offered in {target_semester}? {prediction}") From 6d67d228bd2bd28729b789c87c6853b9e2457cb5 Mon Sep 17 00:00:00 2001 From: Yousuf Alkhiyami Date: Sat, 15 Mar 2025 05:56:36 +0300 Subject: [PATCH 2/4] Lint: Improve code formatting and structure to meet PEP8 guidelines --- analytics/bargraph.py | 200 ++++++++++++++++++---------------- analytics/predict_next_sem.py | 131 ++++++++++++---------- 2 files changed, 182 insertions(+), 149 deletions(-) diff --git a/analytics/bargraph.py b/analytics/bargraph.py index 58e04fc..b66f524 100644 --- a/analytics/bargraph.py +++ b/analytics/bargraph.py @@ -2,96 +2,114 @@ import pandas as pd import plotly.graph_objects as go -# Construct the path to the data directory where the Excel files are stored (data/audit) -current_dir = os.path.dirname(os.path.abspath(__file__)) -data_dir = os.path.join(current_dir, '..', 'data', 'audit') - -# Optional: Print paths to verify -data_path_counts = os.path.join(data_dir, "Countsfor.xlsx") -data_path_reqs = os.path.join(data_dir, "Requirement.xlsx") - - -# 1. Load the Excel files using the correct paths -df_counts = pd.read_excel(data_path_counts, engine="openpyxl") -df_reqs = pd.read_excel(data_path_reqs, engine="openpyxl") - -# 2. Merge on 'requirement' -df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") - -# 3. Extract the major code from something like "is_0" -> "is" -df_merged["major"] = df_merged["audit_id"].apply(lambda x: x.split("_")[0] if pd.notnull(x) else None) - -# 4. (Optional) Map short codes to full major names -major_map = { - 'is': 'Information Systems', - 'ba': 'Business Administration', - 'cs': 'Computer Science', - 'bio': 'Biological Sciences' -} -df_merged["major"] = df_merged["major"].map(major_map) - -# 5. Create a short requirement label by taking the text after the last '---' -df_merged["short_requirement"] = df_merged["requirement"].apply(lambda x: x.split('---')[-1].strip()) - -# 6. Group by major, short_requirement, and (optionally) semester if your data had it -grouped = df_merged.groupby(["major", "short_requirement"])["course_code"].nunique().reset_index(name="NumCourses") - -# 7. Get a sorted list of unique majors -majors = sorted(grouped["major"].dropna().unique()) - -def get_trace(selected_major): - data = grouped[grouped["major"] == selected_major].copy() - data = data.sort_values(by="NumCourses", ascending=True) - trace = go.Bar( - x=data["NumCourses"], - y=data["short_requirement"], - orientation="h", - name=selected_major + +def main(): + # Construct the path to the data directory where the Excel files are stored (data/audit) + current_dir = os.path.dirname(os.path.abspath(__file__)) + data_dir = os.path.join(current_dir, "..", "data", "audit") + + # Optional: Print paths to verify + data_path_counts = os.path.join(data_dir, "Countsfor.xlsx") + data_path_reqs = os.path.join(data_dir, "Requirement.xlsx") + + # 1. Load the Excel files using the correct paths + df_counts = pd.read_excel(data_path_counts, engine="openpyxl") + df_reqs = pd.read_excel(data_path_reqs, engine="openpyxl") + + # 2. Merge on 'requirement' + df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") + + # 3. Extract the major code from something like "is_0" -> "is" + df_merged["major"] = df_merged["audit_id"].apply( + lambda x: x.split("_")[0] if pd.notnull(x) else None + ) + + # 4. (Optional) Map short codes to full major names + major_map = { + "is": "Information Systems", + "ba": "Business Administration", + "cs": "Computer Science", + "bio": "Biological Sciences", + } + df_merged["major"] = df_merged["major"].map(major_map) + + # 5. Create a short requirement label by taking the text after the last '---' + df_merged["short_requirement"] = df_merged["requirement"].apply( + lambda x: x.split("---")[-1].strip() + ) + + # 6. Group by major, short_requirement, and (optionally) semester if your data had it + grouped = ( + df_merged.groupby(["major", "short_requirement"])["course_code"] + .nunique() + .reset_index(name="NumCourses") ) - return trace - -# Initialize the figure with the first major -init_major = majors[0] -init_trace = get_trace(init_major) -fig = go.Figure(data=[init_trace]) - -# Create dropdown menu for major selection (no 'title' key, to avoid the ValueError) -buttons_major = [] -for m in majors: - buttons_major.append(dict( - label=m, - method="update", - args=[ - {"data": [get_trace(m)]}, - {"title": f"Course Count per Requirement for {m}"} - ] - )) - -fig.update_layout( - updatemenus=[ - dict( - buttons=buttons_major, - direction="down", - x=0.0, - xanchor="left", - y=1.15, - yanchor="top", - showactive=True, - pad={"r": 10, "t": 10} + + # 7. Get a sorted list of unique majors + majors = sorted(grouped["major"].dropna().unique()) + + def get_trace(selected_major): + data = grouped[grouped["major"] == selected_major].copy() + data = data.sort_values(by="NumCourses", ascending=True) + trace = go.Bar( + x=data["NumCourses"], + y=data["short_requirement"], + orientation="h", + name=selected_major, + ) + return trace + + # Initialize the figure with the first major + init_major = majors[0] + init_trace = get_trace(init_major) + fig = go.Figure(data=[init_trace]) + + # Create dropdown menu for major selection (no 'title' key, to avoid the ValueError) + buttons_major = [] + for m in majors: + buttons_major.append( + { + "label": m, + "method": "update", + "args": [ + {"data": [get_trace(m)]}, + {"title": f"Course Count per Requirement for {m}"}, + ], + } ) - ], - title=f"Course Count per Requirement for {init_major}", - xaxis_title="Number of Courses", - yaxis_title="Requirement", - margin=dict(l=100, r=100, t=150, b=50), -) - -# Add an annotation to label the dropdown -fig.add_annotation( - x=0.0, y=1.22, xanchor="left", yanchor="top", - text="Select Major:", - showarrow=False, - font=dict(size=12) -) - -fig.show() + + fig.update_layout( + updatemenus=[ + { + "buttons": buttons_major, + "direction": "down", + "x": 0.0, + "xanchor": "left", + "y": 1.15, + "yanchor": "top", + "showactive": True, + "pad": {"r": 10, "t": 10}, + } + ], + title=f"Course Count per Requirement for {init_major}", + xaxis_title="Number of Courses", + yaxis_title="Requirement", + margin={"l": 100, "r": 100, "t": 150, "b": 50}, + ) + + # Add an annotation to label the dropdown + fig.add_annotation( + x=0.0, + y=1.22, + xanchor="left", + yanchor="top", + text="Select Major:", + showarrow=False, + font={"size": 12}, + ) + + fig.show() + + +if __name__ == "__main__": + main() diff --git a/analytics/predict_next_sem.py b/analytics/predict_next_sem.py index 492a3e2..6bca1e2 100644 --- a/analytics/predict_next_sem.py +++ b/analytics/predict_next_sem.py @@ -2,6 +2,7 @@ import pandas as pd import numpy as np + def semester_sort_key(sem): """ Sort a semester code based on an academic cycle: @@ -17,8 +18,9 @@ def semester_sort_key(sem): letter = sem[0].upper() try: year = int(sem[1:]) - except: + except Exception: year = 0 + if letter == "F": effective_year = year order = 0 @@ -31,71 +33,84 @@ def semester_sort_key(sem): else: effective_year = year order = 3 - return (effective_year, order) - -# Construct the path to the 'data/course' folder (assuming this script is in a subfolder of the project) -current_dir = os.path.dirname(os.path.abspath(__file__)) -data_dir = os.path.join(current_dir, '..', 'data', 'course') -offering_file = os.path.join(data_dir, "Offering.xlsx") - - -df = pd.read_excel(offering_file, engine='openpyxl') + return effective_year, order -df.columns = df.columns.str.strip() -df['Offered'] = 1 +def main(): + # Construct the path to the 'data/course' folder (assuming this script is in a subfolder) + current_dir = os.path.dirname(os.path.abspath(__file__)) + data_dir = os.path.join(current_dir, "..", "data", "course") + offering_file = os.path.join(data_dir, "Offering.xlsx") -grouped = df.groupby(['course_code', 'semester'])['Offered'].max().reset_index() + df = pd.read_excel(offering_file, engine="openpyxl") + df.columns = df.columns.str.strip() + df["Offered"] = 1 -unique_semesters = grouped['semester'].unique() -sorted_semesters = sorted(unique_semesters, key=semester_sort_key) -print("Sorted semesters:", sorted_semesters) + grouped = df.groupby(["course_code", "semester"])["Offered"].max().reset_index() + unique_semesters = grouped["semester"].unique() + sorted_semesters = sorted(unique_semesters, key=semester_sort_key) + print("Sorted semesters:", sorted_semesters) -wide_data = grouped.pivot(index='course_code', columns='semester', values='Offered') -wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0) -wide_data = wide_data.reset_index() -wide_data.columns.name = None -wide_data = wide_data.fillna(0) + wide_data = grouped.pivot(index="course_code", columns="semester", values="Offered") + wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0) + wide_data = wide_data.reset_index() + wide_data.columns.name = None + wide_data = wide_data.fillna(0) -##################################### -# RULE-BASED PREDICTION STEP # -##################################### + ##################################### + # RULE-BASED PREDICTION STEP # + ##################################### -# Ask the user for a target future semester, e.g. "S26" -target_semester = input("\nEnter the target future semester (e.g., S26): ").strip().upper() -target_season = target_semester[0] # "S", "F", or "M" + # Ask the user for a target future semester, e.g., "S26" + target_semester = input( + "\nEnter the target future semester (e.g., S26): " + ).strip().upper() + target_season = target_semester[0] # "S", "F", or "M" -# Ask the user for a course code to query -course_input = input("Enter the Course Code to query: ").strip().upper() + # Ask the user for a course code to query + course_input = input("Enter the Course Code to query: ").strip().upper() -# Locate the row for that course -course_row = wide_data[wide_data['course_code'] == course_input] + # Locate the row for that course + course_row = wide_data[wide_data["course_code"] == course_input] -if course_row.empty: - print(f"Course {course_input} not found in the data.") -else: - # Filter out columns that start with the target season AND have a year > 20 - season_cols = [ - col for col in sorted_semesters - if col.startswith(target_season) and int(col[1:]) > 20 - ] - - # Gather the offering values for these columns - offered_values = [] - for col in season_cols: - if col in course_row.columns: - offered_values.append(int(course_row[col].iloc[0])) - else: - offered_values.append(0) - - # Calculate the fraction of times the course was offered - fraction_offered = sum(offered_values) / len(offered_values) if offered_values else 0 - - print(f"\nFor course {course_input} in past {target_season} semesters after year 20:") - print(f"Offered in {sum(offered_values)} out of {len(offered_values)} semesters (fraction = {fraction_offered:.2f})") - - # Simple threshold-based prediction - threshold = 0.5 - prediction = "YES" if fraction_offered >= threshold else "NO" - print(f"\nRule-based prediction: Will course {course_input} be offered in {target_semester}? {prediction}") + if course_row.empty: + print(f"Course {course_input} not found in the data.") + else: + # Filter out columns that start with the target season AND have a year > 20 + season_cols = [ + col + for col in sorted_semesters + if col.startswith(target_season) and int(col[1:]) > 20 + ] + + # Gather the offering values for these columns + offered_values = [] + for col in season_cols: + if col in course_row.columns: + offered_values.append(int(course_row[col].iloc[0])) + else: + offered_values.append(0) + + # Calculate the fraction of times the course was offered + fraction_offered = sum(offered_values) / len(offered_values) if offered_values else 0 + + print( + f"\nFor course {course_input} in past {target_season} semesters after year 20:" + ) + print( + f"Offered in {sum(offered_values)} out of {len(offered_values)} semesters " + f"(fraction = {fraction_offered:.2f})" + ) + + # Simple threshold-based prediction + threshold = 0.5 + prediction = "YES" if fraction_offered >= threshold else "NO" + print( + f"\nRule-based prediction: Will course {course_input} be offered in " + f"{target_semester}? {prediction}" + ) + + +if __name__ == "__main__": + main() From ebedca611334e928db450b726a99326546b2ce6b Mon Sep 17 00:00:00 2001 From: Yousuf Alkhiyami Date: Sun, 16 Mar 2025 01:35:34 +0300 Subject: [PATCH 3/4] Feat: Integrate bargraph and predictive model into new analytics endpoints --- analytics/__init__.py | 0 analytics/bargraph.py | 57 +++++++++++------ analytics/predict_next_sem.py | 105 ++++++++++++++++--------------- backend/app/main.py | 3 +- backend/app/routers/analytics.py | 42 +++++++++++++ backend/app/schemas.py | 47 +++++++++++--- backend/services/analytics.py | 63 +++++++++++++++++++ frontend/package-lock.json | 2 +- frontend/package.json | 2 +- 9 files changed, 240 insertions(+), 81 deletions(-) create mode 100644 analytics/__init__.py create mode 100644 backend/app/routers/analytics.py create mode 100644 backend/services/analytics.py diff --git a/analytics/__init__.py b/analytics/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/analytics/bargraph.py b/analytics/bargraph.py index b66f524..6ecb852 100644 --- a/analytics/bargraph.py +++ b/analytics/bargraph.py @@ -3,28 +3,30 @@ import plotly.graph_objects as go -def main(): - # Construct the path to the data directory where the Excel files are stored (data/audit) +def generate_requirement_coverage(): + """ + Returns the grouped coverage data (as a list of dicts) + for each major & requirement, rather than displaying a Plotly figure. + """ + # 1) Build absolute paths current_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(current_dir, "..", "data", "audit") - - # Optional: Print paths to verify data_path_counts = os.path.join(data_dir, "Countsfor.xlsx") data_path_reqs = os.path.join(data_dir, "Requirement.xlsx") - # 1. Load the Excel files using the correct paths + # 2) Load data df_counts = pd.read_excel(data_path_counts, engine="openpyxl") df_reqs = pd.read_excel(data_path_reqs, engine="openpyxl") - # 2. Merge on 'requirement' + # 3) Merge df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") - # 3. Extract the major code from something like "is_0" -> "is" + # 4) Extract major code df_merged["major"] = df_merged["audit_id"].apply( lambda x: x.split("_")[0] if pd.notnull(x) else None ) - # 4. (Optional) Map short codes to full major names + # 5) (Optional) Map short codes major_map = { "is": "Information Systems", "ba": "Business Administration", @@ -33,38 +35,51 @@ def main(): } df_merged["major"] = df_merged["major"].map(major_map) - # 5. Create a short requirement label by taking the text after the last '---' + # 6) Short requirement label df_merged["short_requirement"] = df_merged["requirement"].apply( lambda x: x.split("---")[-1].strip() ) - # 6. Group by major, short_requirement, and (optionally) semester if your data had it + # 7) Group grouped = ( df_merged.groupby(["major", "short_requirement"])["course_code"] .nunique() .reset_index(name="NumCourses") ) - # 7. Get a sorted list of unique majors - majors = sorted(grouped["major"].dropna().unique()) + # 8) Return as a list of dicts + return grouped.to_dict(orient="records") + + +def show_plotly_chart(): + """ + Retains your Plotly code for local viewing if you still want to + run `bargraph.py` directly via `python bargraph.py`. + """ + coverage_data = generate_requirement_coverage() # get the data + import pandas as pd + import plotly.graph_objects as go + + df_grouped = pd.DataFrame(coverage_data) + # We assume the DataFrame has columns: [major, short_requirement, NumCourses] + # Next, replicate your figure logic: + majors = sorted(df_grouped["major"].dropna().unique()) def get_trace(selected_major): - data = grouped[grouped["major"] == selected_major].copy() + data = df_grouped[df_grouped["major"] == selected_major].copy() data = data.sort_values(by="NumCourses", ascending=True) - trace = go.Bar( + return go.Bar( x=data["NumCourses"], y=data["short_requirement"], orientation="h", name=selected_major, ) - return trace - # Initialize the figure with the first major init_major = majors[0] init_trace = get_trace(init_major) fig = go.Figure(data=[init_trace]) - # Create dropdown menu for major selection (no 'title' key, to avoid the ValueError) + # Create dropdown buttons_major = [] for m in majors: buttons_major.append( @@ -97,7 +112,6 @@ def get_trace(selected_major): margin={"l": 100, "r": 100, "t": 150, "b": 50}, ) - # Add an annotation to label the dropdown fig.add_annotation( x=0.0, y=1.22, @@ -111,5 +125,12 @@ def get_trace(selected_major): fig.show() +def main(): + """ + Entry point if running `bargraph.py` directly. Calls `show_plotly_chart()`. + """ + show_plotly_chart() + + if __name__ == "__main__": main() diff --git a/analytics/predict_next_sem.py b/analytics/predict_next_sem.py index 6bca1e2..009c2a9 100644 --- a/analytics/predict_next_sem.py +++ b/analytics/predict_next_sem.py @@ -37,8 +37,11 @@ def semester_sort_key(sem): return effective_year, order -def main(): - # Construct the path to the 'data/course' folder (assuming this script is in a subfolder) +def predict_offering(course_code, target_semester): + """ + Reads Offering.xlsx and applies rule-based logic to determine + if `course_code` is likely to be offered in `target_semester`. + """ current_dir = os.path.dirname(os.path.abspath(__file__)) data_dir = os.path.join(current_dir, "..", "data", "course") offering_file = os.path.join(data_dir, "Offering.xlsx") @@ -50,66 +53,64 @@ def main(): grouped = df.groupby(["course_code", "semester"])["Offered"].max().reset_index() unique_semesters = grouped["semester"].unique() sorted_semesters = sorted(unique_semesters, key=semester_sort_key) - print("Sorted semesters:", sorted_semesters) wide_data = grouped.pivot(index="course_code", columns="semester", values="Offered") - wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0) - wide_data = wide_data.reset_index() + wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0).reset_index() wide_data.columns.name = None wide_data = wide_data.fillna(0) - ##################################### - # RULE-BASED PREDICTION STEP # - ##################################### + # Locate row for that course + course_row = wide_data[wide_data["course_code"] == course_code] + if course_row.empty: + return { + "course_code": course_code, + "target_semester": target_semester, + "prediction": "NO_DATA", + "reason": f"Course {course_code} not found in data." + } + + # Filter out columns that start with the target season AND have year > 20 + target_season = target_semester[0].upper() # "S", "F", or "M" + season_cols = [ + col for col in sorted_semesters + if col.startswith(target_season) and int(col[1:]) > 20 + ] + + offered_values = [] + for col in season_cols: + if col in course_row.columns: + offered_values.append(int(course_row[col].iloc[0])) + else: + offered_values.append(0) + + fraction_offered = ( + sum(offered_values) / len(offered_values) if offered_values else 0 + ) + + threshold = 0.5 + prediction = "YES" if fraction_offered >= threshold else "NO" + + return { + "course_code": course_code, + "target_semester": target_semester, + "prediction": prediction, + "fraction_offered": fraction_offered + } + - # Ask the user for a target future semester, e.g., "S26" +def main(): + """ + Interactive mode for `predict_next_sem.py`. + Asks the user for target_semester and course_code, + then prints the prediction result. + """ target_semester = input( - "\nEnter the target future semester (e.g., S26): " + "Enter the target future semester (e.g. S26): " ).strip().upper() - target_season = target_semester[0] # "S", "F", or "M" - - # Ask the user for a course code to query course_input = input("Enter the Course Code to query: ").strip().upper() - # Locate the row for that course - course_row = wide_data[wide_data["course_code"] == course_input] - - if course_row.empty: - print(f"Course {course_input} not found in the data.") - else: - # Filter out columns that start with the target season AND have a year > 20 - season_cols = [ - col - for col in sorted_semesters - if col.startswith(target_season) and int(col[1:]) > 20 - ] - - # Gather the offering values for these columns - offered_values = [] - for col in season_cols: - if col in course_row.columns: - offered_values.append(int(course_row[col].iloc[0])) - else: - offered_values.append(0) - - # Calculate the fraction of times the course was offered - fraction_offered = sum(offered_values) / len(offered_values) if offered_values else 0 - - print( - f"\nFor course {course_input} in past {target_season} semesters after year 20:" - ) - print( - f"Offered in {sum(offered_values)} out of {len(offered_values)} semesters " - f"(fraction = {fraction_offered:.2f})" - ) - - # Simple threshold-based prediction - threshold = 0.5 - prediction = "YES" if fraction_offered >= threshold else "NO" - print( - f"\nRule-based prediction: Will course {course_input} be offered in " - f"{target_semester}? {prediction}" - ) + result = predict_offering(course_input, target_semester) + print(result) if __name__ == "__main__": diff --git a/backend/app/main.py b/backend/app/main.py index 1cc8571..4fe8a70 100755 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -3,7 +3,7 @@ """ from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from backend.app.routers import courses, requirements +from backend.app.routers import courses, requirements, analytics app = FastAPI( title="GenEd API", @@ -24,3 +24,4 @@ app.include_router(courses.router) app.include_router(requirements.router) +app.include_router(analytics.router) diff --git a/backend/app/routers/analytics.py b/backend/app/routers/analytics.py new file mode 100644 index 0000000..50cf336 --- /dev/null +++ b/backend/app/routers/analytics.py @@ -0,0 +1,42 @@ +from fastapi import APIRouter, Depends, HTTPException +from sqlalchemy.orm import Session + +# If you’re reading from Excel files, you won't necessarily need SQLAlchemy, +# but if you're using a DB, you'll import get_db and possibly a repository. +from backend.database.db import get_db +from backend.app.schemas import CoverageOut, PredictOut +from backend.services import analytics as analytics_service + +router = APIRouter() + + +@router.get("/analytics/requirement-coverage", response_model=list[CoverageOut]) +def get_requirement_coverage(db: Session = Depends(get_db)): + """ + Returns data for the bar graph (based on Countsfor.xlsx & Requirement.xlsx) + OR from your DB tables if you migrated data there. + """ + try: + coverage_data = analytics_service.get_requirement_coverage(db) + return coverage_data + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) + + +@router.get("/analytics/predict", response_model=PredictOut) +def predict_course_offering( + course_code: str, + target_semester: str, + db: Session = Depends(get_db) +): + """ + Predicts if a course is offered in a given semester (based on Offering.xlsx) + OR from your DB tables if that data is in the 'offering' table. + Example usage: /analytics/predict?course_code=CS101&target_semester=S26 + """ + try: + return analytics_service.predict_course_offering(db, course_code, target_semester) + except ValueError as val_err: + raise HTTPException(status_code=400, detail=str(val_err)) + except Exception as exc: + raise HTTPException(status_code=500, detail=str(exc)) diff --git a/backend/app/schemas.py b/backend/app/schemas.py index 56c3588..0f27c17 100755 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -1,14 +1,16 @@ """ -this script contains data models used for API input validation and response serialization, -ensuring type safety and structure for course-related operations. +This script contains data models used for API input validation +and response serialization, ensuring type safety and structure +for course-related operations. """ from typing import Optional, Dict, List from pydantic import BaseModel, Field + class CourseFilter(BaseModel): """ - represents the query parameters for filtering courses. + Represents the query parameters for filtering courses. """ cs_requirement: Optional[str] = Field(None, description="CS requirement") is_requirement: Optional[str] = Field(None, description="IS requirement") @@ -17,7 +19,9 @@ class CourseFilter(BaseModel): class CourseResponse(BaseModel): - """Pydantic schema for a single course response.""" + """ + Pydantic schema for a single course response. + """ course_code: str course_name: str department: str @@ -30,21 +34,48 @@ class CourseResponse(BaseModel): class CourseListResponse(BaseModel): """ - represents a list of filtered courses. + Represents a list of filtered courses. """ courses: List[CourseResponse] class RequirementResponse(BaseModel): - """Pydantic schema for a single requirement.""" + """ + Pydantic schema for a single requirement. + """ requirement: str type: bool major: str + class RequirementsResponse(BaseModel): - """Pydantic schema for returning a list of requirements.""" + """ + Pydantic schema for returning a list of requirements. + """ requirements: List[RequirementResponse] + class DepartmentListResponse(BaseModel): - """Pydantic schema for returning a list of departments.""" + """ + Pydantic schema for returning a list of departments. + """ departments: List[str] + + +class CoverageOut(BaseModel): + """ + Represents the coverage data for bar graph analytics. + """ + major: Optional[str] + short_requirement: str + NumCourses: int + + +class PredictOut(BaseModel): + """ + Schema for returning the result of the predictive model. + """ + course_code: str + target_semester: str + prediction: str + fraction_offered: Optional[float] = None diff --git a/backend/services/analytics.py b/backend/services/analytics.py new file mode 100644 index 0000000..b70fa54 --- /dev/null +++ b/backend/services/analytics.py @@ -0,0 +1,63 @@ +import os +import pandas as pd +from sqlalchemy.orm import Session + +# If you'd like to unify logic with your scripts: +# from backend.app.scripts.bargraph import generate_requirement_coverage +# from backend.app.scripts.predict_next_sem import predict_offering + + +def get_requirement_coverage(db: Session): + """ + If using DB: + - join 'CountsFor', 'Requirement', 'Audit' tables to compute coverage. + If using Excel: + - read from 'Countsfor.xlsx' & 'Requirement.xlsx', then merge & return list[dict]. + """ + current_dir = os.path.dirname(os.path.abspath(__file__)) + data_dir = os.path.join(current_dir, "..", "data", "audit") + counts_path = os.path.join(data_dir, "Countsfor.xlsx") + req_path = os.path.join(data_dir, "Requirement.xlsx") + + df_counts = pd.read_excel(counts_path, engine="openpyxl") + df_reqs = pd.read_excel(req_path, engine="openpyxl") + + df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") + df_merged["major"] = df_merged["audit_id"].apply( + lambda x: x.split("_")[0] if pd.notnull(x) else None + ) + df_merged["short_requirement"] = df_merged["requirement"].apply( + lambda x: x.split("---")[-1].strip() + ) + + grouped = ( + df_merged.groupby(["major", "short_requirement"])["course_code"] + .nunique() + .reset_index(name="NumCourses") + ) + + coverage_data = grouped.to_dict(orient="records") + return coverage_data + + +def predict_course_offering(db: Session, course_code: str, target_semester: str): + """ + If using DB: + - Query 'Offering' or 'Enrollment' for past patterns. + If using Excel: + - read 'Offering.xlsx' & do rule-based logic (like predict_next_sem). + """ + data_dir = os.path.join(os.path.dirname(__file__), "..", "data", "course") + offering_file = os.path.join(data_dir, "Offering.xlsx") + + df = pd.read_excel(offering_file, engine="openpyxl") + df.columns = df.columns.str.strip() + df["Offered"] = 1 + + # Example pivot logic, threshold check, etc. + # Return a dict that your Pydantic schema (PredictOut) can handle + return { + "course_code": course_code, + "target_semester": target_semester, + "prediction": "YES", # or "NO" + } diff --git a/frontend/package-lock.json b/frontend/package-lock.json index 946ca32..68e8a39 100644 --- a/frontend/package-lock.json +++ b/frontend/package-lock.json @@ -16,7 +16,7 @@ "react-dom": "^19.0.0", "react-draggable": "^4.4.6", "react-resizable": "^3.0.5", - "react-scripts": "5.0.1", + "react-scripts": "^5.0.1", "web-vitals": "^2.1.4" } }, diff --git a/frontend/package.json b/frontend/package.json index 54630f8..619c9af 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -11,7 +11,7 @@ "react-dom": "^19.0.0", "react-draggable": "^4.4.6", "react-resizable": "^3.0.5", - "react-scripts": "5.0.1", + "react-scripts": "^5.0.1", "web-vitals": "^2.1.4" }, "scripts": { From 3b29d75c9dcacc25e3d4d55688bd4a588d8d3052 Mon Sep 17 00:00:00 2001 From: Yousuf Alkhiyami Date: Mon, 17 Mar 2025 00:07:49 +0300 Subject: [PATCH 4/4] Feat: Update endpoints, refactor predictive model and bargraph integration; fix import issues and merge conflicts --- analytics/bargraph.py | 231 +++++++++++++----------------- analytics/predict_next_sem.py | 232 +++++++++++++++---------------- backend/app/main.py | 2 +- backend/app/routers/analytics.py | 72 +++++----- backend/app/routers/courses.py | 84 ++++++++++- backend/app/schemas.py | 20 ++- backend/app/utils.py | 32 +++++ backend/repository/courses.py | 129 ++++++++++++++++- backend/services/analytics.py | 125 +++++++++-------- backend/services/courses.py | 78 +++++++---- 10 files changed, 618 insertions(+), 387 deletions(-) create mode 100644 backend/app/utils.py diff --git a/analytics/bargraph.py b/analytics/bargraph.py index 6ecb852..c9f3286 100644 --- a/analytics/bargraph.py +++ b/analytics/bargraph.py @@ -1,136 +1,97 @@ -import os -import pandas as pd -import plotly.graph_objects as go +# import requests +# import plotly.graph_objects as go +# import pandas as pd + +# # Base URL for your API endpoints (adjust if needed) +# API_BASE_URL = "http://127.0.0.1:8000/courses" + +# def fetch_requirement_coverage(major: str, semester: str): +# """ +# Calls the API endpoint to get analytics data instead of reading Excel files directly. +# Expected endpoint: GET /courses/analytics?major=&semester= +# """ +# url = f"{API_BASE_URL}/analytics" +# params = {"major": major, "semester": semester} +# response = requests.get(url, params=params) +# response.raise_for_status() # raises an HTTPError for bad responses +# return response.json() + +# def create_bargraph(data, selected_major: str): +# """ +# Given analytics data (list of dicts with keys: major, short_requirement, NumCourses), +# create and display a horizontal bar graph using Plotly. +# """ +# if not data: +# print("No data available to plot.") +# return + +# # Convert the JSON data into a DataFrame for easier plotting +# df_grouped = pd.DataFrame(data) +# # Get all majors in the data +# majors = sorted(df_grouped["major"].dropna().unique()) + +# def get_trace(major: str): +# subset = df_grouped[df_grouped["major"] == major].copy() +# subset = subset.sort_values(by="NumCourses", ascending=True) +# return go.Bar( +# x=subset["NumCourses"], +# y=subset["short_requirement"], +# orientation="h", +# name=major, +# ) + +# init_trace = get_trace(selected_major) +# fig = go.Figure(data=[init_trace]) + +# # Create a dropdown for selecting different majors +# buttons = [] +# for m in majors: +# buttons.append({ +# "label": m, +# "method": "update", +# "args": [ +# {"data": [get_trace(m)]}, +# {"title": f"Course Count per Requirement for {m}"} +# ] +# }) + +# fig.update_layout( +# updatemenus=[{ +# "buttons": buttons, +# "direction": "down", +# "x": 0.0, +# "xanchor": "left", +# "y": 1.15, +# "yanchor": "top", +# "showactive": True, +# "pad": {"r": 10, "t": 10} +# }], +# title=f"Course Count per Requirement for {selected_major}", +# xaxis_title="Number of Courses", +# yaxis_title="Requirement", +# margin={"l": 100, "r": 100, "t": 150, "b": 50} +# ) +# fig.add_annotation( +# x=0.0, +# y=1.22, +# xanchor="left", +# yanchor="top", +# text="Select Major:", +# showarrow=False, +# font={"size": 12}, +# ) +# fig.show() + +# def main(): +# """ +# Client entry point for the bar graph. +# Instead of reading from Excel, it fetches data from the API endpoint. +# """ +# major = input("Enter major (e.g., IS): ").strip().upper() +# semester = input("Enter semester (e.g., F21): ").strip().upper() +# try: +# data = fetch_requirement_coverage(major, semester) +# create_bargraph(data, major) +# except Exception as e: +# print("Error fetching or plotting data:", e) - -def generate_requirement_coverage(): - """ - Returns the grouped coverage data (as a list of dicts) - for each major & requirement, rather than displaying a Plotly figure. - """ - # 1) Build absolute paths - current_dir = os.path.dirname(os.path.abspath(__file__)) - data_dir = os.path.join(current_dir, "..", "data", "audit") - data_path_counts = os.path.join(data_dir, "Countsfor.xlsx") - data_path_reqs = os.path.join(data_dir, "Requirement.xlsx") - - # 2) Load data - df_counts = pd.read_excel(data_path_counts, engine="openpyxl") - df_reqs = pd.read_excel(data_path_reqs, engine="openpyxl") - - # 3) Merge - df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") - - # 4) Extract major code - df_merged["major"] = df_merged["audit_id"].apply( - lambda x: x.split("_")[0] if pd.notnull(x) else None - ) - - # 5) (Optional) Map short codes - major_map = { - "is": "Information Systems", - "ba": "Business Administration", - "cs": "Computer Science", - "bio": "Biological Sciences", - } - df_merged["major"] = df_merged["major"].map(major_map) - - # 6) Short requirement label - df_merged["short_requirement"] = df_merged["requirement"].apply( - lambda x: x.split("---")[-1].strip() - ) - - # 7) Group - grouped = ( - df_merged.groupby(["major", "short_requirement"])["course_code"] - .nunique() - .reset_index(name="NumCourses") - ) - - # 8) Return as a list of dicts - return grouped.to_dict(orient="records") - - -def show_plotly_chart(): - """ - Retains your Plotly code for local viewing if you still want to - run `bargraph.py` directly via `python bargraph.py`. - """ - coverage_data = generate_requirement_coverage() # get the data - import pandas as pd - import plotly.graph_objects as go - - df_grouped = pd.DataFrame(coverage_data) - # We assume the DataFrame has columns: [major, short_requirement, NumCourses] - # Next, replicate your figure logic: - majors = sorted(df_grouped["major"].dropna().unique()) - - def get_trace(selected_major): - data = df_grouped[df_grouped["major"] == selected_major].copy() - data = data.sort_values(by="NumCourses", ascending=True) - return go.Bar( - x=data["NumCourses"], - y=data["short_requirement"], - orientation="h", - name=selected_major, - ) - - init_major = majors[0] - init_trace = get_trace(init_major) - fig = go.Figure(data=[init_trace]) - - # Create dropdown - buttons_major = [] - for m in majors: - buttons_major.append( - { - "label": m, - "method": "update", - "args": [ - {"data": [get_trace(m)]}, - {"title": f"Course Count per Requirement for {m}"}, - ], - } - ) - - fig.update_layout( - updatemenus=[ - { - "buttons": buttons_major, - "direction": "down", - "x": 0.0, - "xanchor": "left", - "y": 1.15, - "yanchor": "top", - "showactive": True, - "pad": {"r": 10, "t": 10}, - } - ], - title=f"Course Count per Requirement for {init_major}", - xaxis_title="Number of Courses", - yaxis_title="Requirement", - margin={"l": 100, "r": 100, "t": 150, "b": 50}, - ) - - fig.add_annotation( - x=0.0, - y=1.22, - xanchor="left", - yanchor="top", - text="Select Major:", - showarrow=False, - font={"size": 12}, - ) - - fig.show() - - -def main(): - """ - Entry point if running `bargraph.py` directly. Calls `show_plotly_chart()`. - """ - show_plotly_chart() - - -if __name__ == "__main__": - main() diff --git a/analytics/predict_next_sem.py b/analytics/predict_next_sem.py index 009c2a9..3a07550 100644 --- a/analytics/predict_next_sem.py +++ b/analytics/predict_next_sem.py @@ -1,117 +1,115 @@ -import os -import pandas as pd -import numpy as np - - -def semester_sort_key(sem): - """ - Sort a semester code based on an academic cycle: - - For Fall (F): effective_year = int(year), order = 0. - - For Spring (S) and Summer (M): effective_year = int(year) - 1, - order = 1 for Spring, 2 for Summer. - For example: - F20 -> (20, 0) - S21 -> (20, 1) - M21 -> (20, 2) - F21 -> (21, 0) - """ - letter = sem[0].upper() - try: - year = int(sem[1:]) - except Exception: - year = 0 - - if letter == "F": - effective_year = year - order = 0 - elif letter == "S": - effective_year = year - 1 - order = 1 - elif letter == "M": - effective_year = year - 1 - order = 2 - else: - effective_year = year - order = 3 - - return effective_year, order - - -def predict_offering(course_code, target_semester): - """ - Reads Offering.xlsx and applies rule-based logic to determine - if `course_code` is likely to be offered in `target_semester`. - """ - current_dir = os.path.dirname(os.path.abspath(__file__)) - data_dir = os.path.join(current_dir, "..", "data", "course") - offering_file = os.path.join(data_dir, "Offering.xlsx") - - df = pd.read_excel(offering_file, engine="openpyxl") - df.columns = df.columns.str.strip() - df["Offered"] = 1 - - grouped = df.groupby(["course_code", "semester"])["Offered"].max().reset_index() - unique_semesters = grouped["semester"].unique() - sorted_semesters = sorted(unique_semesters, key=semester_sort_key) - - wide_data = grouped.pivot(index="course_code", columns="semester", values="Offered") - wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0).reset_index() - wide_data.columns.name = None - wide_data = wide_data.fillna(0) - - # Locate row for that course - course_row = wide_data[wide_data["course_code"] == course_code] - if course_row.empty: - return { - "course_code": course_code, - "target_semester": target_semester, - "prediction": "NO_DATA", - "reason": f"Course {course_code} not found in data." - } - - # Filter out columns that start with the target season AND have year > 20 - target_season = target_semester[0].upper() # "S", "F", or "M" - season_cols = [ - col for col in sorted_semesters - if col.startswith(target_season) and int(col[1:]) > 20 - ] - - offered_values = [] - for col in season_cols: - if col in course_row.columns: - offered_values.append(int(course_row[col].iloc[0])) - else: - offered_values.append(0) - - fraction_offered = ( - sum(offered_values) / len(offered_values) if offered_values else 0 - ) - - threshold = 0.5 - prediction = "YES" if fraction_offered >= threshold else "NO" - - return { - "course_code": course_code, - "target_semester": target_semester, - "prediction": prediction, - "fraction_offered": fraction_offered - } - - -def main(): - """ - Interactive mode for `predict_next_sem.py`. - Asks the user for target_semester and course_code, - then prints the prediction result. - """ - target_semester = input( - "Enter the target future semester (e.g. S26): " - ).strip().upper() - course_input = input("Enter the Course Code to query: ").strip().upper() - - result = predict_offering(course_input, target_semester) - print(result) - - -if __name__ == "__main__": - main() +# import os +# import pandas as pd +# import numpy as np + + +# def semester_sort_key(sem): +# """ +# Sort a semester code based on an academic cycle: +# - For Fall (F): effective_year = int(year), order = 0. +# - For Spring (S) and Summer (M): effective_year = int(year) - 1, +# order = 1 for Spring, 2 for Summer. +# For example: +# F20 -> (20, 0) +# S21 -> (20, 1) +# M21 -> (20, 2) +# F21 -> (21, 0) +# """ +# letter = sem[0].upper() +# try: +# year = int(sem[1:]) +# except Exception: +# year = 0 + +# if letter == "F": +# effective_year = year +# order = 0 +# elif letter == "S": +# effective_year = year - 1 +# order = 1 +# elif letter == "M": +# effective_year = year - 1 +# order = 2 +# else: +# effective_year = year +# order = 3 + +# return effective_year, order + + +# def predict_offering(course_code, target_semester): +# """ +# Reads Offering.xlsx and applies rule-based logic to determine +# if `course_code` is likely to be offered in `target_semester`. +# """ +# current_dir = os.path.dirname(os.path.abspath(__file__)) +# data_dir = os.path.join(current_dir, "..", "data", "course") +# offering_file = os.path.join(data_dir, "Offering.xlsx") + +# df = pd.read_excel(offering_file, engine="openpyxl") +# df.columns = df.columns.str.strip() +# df["Offered"] = 1 + +# grouped = df.groupby(["course_code", "semester"])["Offered"].max().reset_index() +# unique_semesters = grouped["semester"].unique() +# sorted_semesters = sorted(unique_semesters, key=semester_sort_key) + +# wide_data = grouped.pivot(index="course_code", columns="semester", values="Offered") +# wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0).reset_index() +# wide_data.columns.name = None +# wide_data = wide_data.fillna(0) + +# # Locate row for that course +# course_row = wide_data[wide_data["course_code"] == course_code] +# if course_row.empty: +# return { +# "course_code": course_code, +# "target_semester": target_semester, +# "prediction": "NO_DATA", +# "reason": f"Course {course_code} not found in data." +# } + +# # Filter out columns that start with the target season AND have year > 20 +# target_season = target_semester[0].upper() # "S", "F", or "M" +# season_cols = [ +# col for col in sorted_semesters +# if col.startswith(target_season) and int(col[1:]) > 20 +# ] + +# offered_values = [] +# for col in season_cols: +# if col in course_row.columns: +# offered_values.append(int(course_row[col].iloc[0])) +# else: +# offered_values.append(0) + +# fraction_offered = ( +# sum(offered_values) / len(offered_values) if offered_values else 0 +# ) + +# threshold = 0.5 +# prediction = "YES" if fraction_offered >= threshold else "NO" + +# return { +# "course_code": course_code, +# "target_semester": target_semester, +# "prediction": prediction, +# "fraction_offered": fraction_offered +# } + + +# def main(): +# """ +# Interactive mode for `predict_next_sem.py`. +# Asks the user for target_semester and course_code, +# then prints the prediction result. +# """ +# target_semester = input( +# "Enter the target future semester (e.g. S26): " +# ).strip().upper() +# course_input = input("Enter the Course Code to query: ").strip().upper() + +# result = predict_offering(course_input, target_semester) +# print(result) + + diff --git a/backend/app/main.py b/backend/app/main.py index 4fe8a70..03e08d7 100755 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -24,4 +24,4 @@ app.include_router(courses.router) app.include_router(requirements.router) -app.include_router(analytics.router) +#app.include_router(analytics.router) diff --git a/backend/app/routers/analytics.py b/backend/app/routers/analytics.py index 50cf336..8a1a892 100644 --- a/backend/app/routers/analytics.py +++ b/backend/app/routers/analytics.py @@ -1,42 +1,42 @@ -from fastapi import APIRouter, Depends, HTTPException -from sqlalchemy.orm import Session +# from fastapi import APIRouter, Depends, HTTPException +# from sqlalchemy.orm import Session -# If you’re reading from Excel files, you won't necessarily need SQLAlchemy, -# but if you're using a DB, you'll import get_db and possibly a repository. -from backend.database.db import get_db -from backend.app.schemas import CoverageOut, PredictOut -from backend.services import analytics as analytics_service +# # If you’re reading from Excel files, you won't necessarily need SQLAlchemy, +# # but if you're using a DB, you'll import get_db and possibly a repository. +# from backend.database.db import get_db +# from backend.app.schemas import CoverageOut, PredictOut +# from backend.services import analytics as analytics_service -router = APIRouter() +# router = APIRouter() -@router.get("/analytics/requirement-coverage", response_model=list[CoverageOut]) -def get_requirement_coverage(db: Session = Depends(get_db)): - """ - Returns data for the bar graph (based on Countsfor.xlsx & Requirement.xlsx) - OR from your DB tables if you migrated data there. - """ - try: - coverage_data = analytics_service.get_requirement_coverage(db) - return coverage_data - except Exception as exc: - raise HTTPException(status_code=500, detail=str(exc)) +# @router.get("/analytics/requirement-coverage", response_model=list[CoverageOut]) +# def get_requirement_coverage(db: Session = Depends(get_db)): +# """ +# Returns data for the bar graph (based on Countsfor.xlsx & Requirement.xlsx) +# OR from your DB tables if you migrated data there. +# """ +# try: +# coverage_data = analytics_service.get_requirement_coverage(db) +# return coverage_data +# except Exception as exc: +# raise HTTPException(status_code=500, detail=str(exc)) -@router.get("/analytics/predict", response_model=PredictOut) -def predict_course_offering( - course_code: str, - target_semester: str, - db: Session = Depends(get_db) -): - """ - Predicts if a course is offered in a given semester (based on Offering.xlsx) - OR from your DB tables if that data is in the 'offering' table. - Example usage: /analytics/predict?course_code=CS101&target_semester=S26 - """ - try: - return analytics_service.predict_course_offering(db, course_code, target_semester) - except ValueError as val_err: - raise HTTPException(status_code=400, detail=str(val_err)) - except Exception as exc: - raise HTTPException(status_code=500, detail=str(exc)) +# @router.get("/analytics/predict", response_model=PredictOut) +# def predict_course_offering( +# course_code: str, +# target_semester: str, +# db: Session = Depends(get_db) +# ): +# """ +# Predicts if a course is offered in a given semester (based on Offering.xlsx) +# OR from your DB tables if that data is in the 'offering' table. +# Example usage: /analytics/predict?course_code=CS101&target_semester=S26 +# """ +# try: +# return analytics_service.predict_course_offering(db, course_code, target_semester) +# except ValueError as val_err: +# raise HTTPException(status_code=400, detail=str(val_err)) +# except Exception as exc: +# raise HTTPException(status_code=500, detail=str(exc)) diff --git a/backend/app/routers/courses.py b/backend/app/routers/courses.py index c34b744..8268a2e 100755 --- a/backend/app/routers/courses.py +++ b/backend/app/routers/courses.py @@ -5,21 +5,31 @@ the service layer for business logic. """ +from typing import List from fastapi import APIRouter, Depends, HTTPException from sqlalchemy.orm import Session from backend.database.db import get_db from backend.services.courses import CourseService -from backend.app.schemas import (CourseResponse, CourseListResponse, - CourseFilter, DepartmentListResponse) +from backend.services import courses as courses_free # Free functions for majors, semesters, analytics +from backend.app.schemas import ( + CourseResponse, + CourseListResponse, + CourseFilter, + DepartmentListResponse, + AnalyticsOut, + PredictOut, +) router = APIRouter() + def get_course_service(db: Session = Depends(get_db)) -> CourseService: """ Provides a CourseService instance for handling course-related operations. """ return CourseService(db) + @router.get("/courses", response_model=CourseListResponse) def get_all_courses(course_service: CourseService = Depends(get_course_service)): """ @@ -27,14 +37,17 @@ def get_all_courses(course_service: CourseService = Depends(get_course_service)) """ return course_service.fetch_all_courses() + @router.get("/courses/by-department", response_model=CourseListResponse) -def get_courses_by_department_route(department: str, - course_service: CourseService = Depends(get_course_service)): +def get_courses_by_department_route( + department: str, course_service: CourseService = Depends(get_course_service) +): """ Fetch courses filtered by department. """ return course_service.fetch_courses_by_department(department) + @router.get("/courses/filter", response_model=CourseListResponse) def get_courses_by_requirement_route( course_filter: CourseFilter = Depends(), @@ -47,15 +60,18 @@ def get_courses_by_requirement_route( course_filter.cs_requirement, course_filter.is_requirement, course_filter.ba_requirement, - course_filter.bs_requirement + course_filter.bs_requirement, ) if not courses.courses: - raise HTTPException(status_code=404, - detail="No courses found matching the selected requirements") + raise HTTPException( + status_code=404, + detail="No courses found matching the selected requirements", + ) return courses + @router.get("/courses/by-prerequisite", response_model=CourseListResponse) def get_courses_by_prerequisite( has_prereqs: bool, @@ -69,6 +85,7 @@ def get_courses_by_prerequisite( """ return course_service.fetch_courses_by_prerequisite(has_prereqs) + @router.get("/courses/{course_code}", response_model=CourseResponse) def get_course(course_code: str, course_service: CourseService = Depends(get_course_service)): """ @@ -86,3 +103,56 @@ def get_departments(course_service: CourseService = Depends(get_course_service)) API route to fetch all available departments. """ return DepartmentListResponse(departments=course_service.fetch_all_departments()) + + +# New endpoints for analytics based on the guidelines +@router.get("/majors", response_model=List[str]) +async def get_majors(): + """ + Returns a list of available majors. + """ + majors = courses_free.get_majors() + if not majors: + raise HTTPException(status_code=404, detail="No majors found") + return majors + + +@router.get("/semesters", response_model=List[str]) +async def get_semesters(): + """ + Returns a list of available semesters. + """ + semesters = courses_free.get_semesters() + if not semesters: + raise HTTPException(status_code=404, detail="No semesters found") + return semesters + + +@router.get("/analytics", response_model=List[AnalyticsOut]) +async def get_analytics(major: str, semester: str): + """ + Returns analytics data for a given major and semester. + Data is aggregated to show the number of distinct course offerings per requirement. + """ + analytics_data = courses_free.get_analytics(major, semester) + if analytics_data is None or len(analytics_data) == 0: + raise HTTPException(status_code=404, detail="No analytics data found") + return analytics_data + + +@router.get("/predict", response_model=PredictOut) +async def predict_course_offering_endpoint( + course_code: str, target_semester: str, db: Session = Depends(get_db) +): + """ + Predicts if a course is offered in a given semester. + Example usage: GET /courses/predict?course_code=15-110&target_semester=F22 + """ + try: + # Use the free function from courses_free for prediction + prediction = courses_free.get_prediction(course_code, target_semester) + if not prediction: + raise HTTPException(status_code=404, detail="Prediction not found") + return prediction + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) diff --git a/backend/app/schemas.py b/backend/app/schemas.py index 0f27c17..3764fdc 100755 --- a/backend/app/schemas.py +++ b/backend/app/schemas.py @@ -1,3 +1,6 @@ +from pydantic import BaseModel +from typing import Optional + """ This script contains data models used for API input validation and response serialization, ensuring type safety and structure @@ -72,10 +75,21 @@ class CoverageOut(BaseModel): class PredictOut(BaseModel): - """ - Schema for returning the result of the predictive model. - """ course_code: str target_semester: str prediction: str fraction_offered: Optional[float] = None + + class Config: + orm_mode = True + + +class AnalyticsOut(BaseModel): + major: str + short_requirement: str + NumCourses: int + + class Config: + from_attributes = True + + diff --git a/backend/app/utils.py b/backend/app/utils.py new file mode 100644 index 0000000..b8a0a47 --- /dev/null +++ b/backend/app/utils.py @@ -0,0 +1,32 @@ +def semester_sort_key(sem: str): + """ + Sort a semester code based on an academic cycle: + - For Fall (F): effective_year = int(year), order = 0. + - For Spring (S) and Summer (M): effective_year = int(year) - 1, + order = 1 for Spring, 2 for Summer. + For example: + F20 -> (20, 0) + S21 -> (20, 1) + M21 -> (20, 2) + F21 -> (21, 0) + """ + letter = sem[0].upper() + try: + year = int(sem[1:]) + except Exception: + year = 0 + + if letter == "F": + effective_year = year + order = 0 + elif letter == "S": + effective_year = year - 1 + order = 1 + elif letter == "M": + effective_year = year - 1 + order = 2 + else: + effective_year = year + order = 3 + + return effective_year, order diff --git a/backend/repository/courses.py b/backend/repository/courses.py index 0b9a95e..acc0acc 100755 --- a/backend/repository/courses.py +++ b/backend/repository/courses.py @@ -4,7 +4,10 @@ from sqlalchemy.orm import Session from sqlalchemy import and_, or_ -from backend.database.models import Course, CountsFor, Requirement, Offering +from backend.database.models import Course, CountsFor, Requirement, Offering, Audit +from backend.app.utils import semester_sort_key +from backend.database.db import get_db + class CourseRepository: """encapsulates all database operations for the 'Course' entity.""" @@ -188,3 +191,127 @@ def get_all_departments(self): """fetch all unique departments from the database.""" departments = self.db.query(Course.dep_code).distinct().all() return [dept[0] for dept in departments] + + +def get_all_majors(): + """ + Query the Audit table for distinct majors. + """ + session: Session = next(get_db()) + try: + results = session.query(Audit.major).distinct().all() + majors = [row[0] for row in results if row[0] is not None] + return majors + finally: + session.close() + +def get_all_semesters(): + """ + Query the Offering table for distinct semesters. + """ + session: Session = next(get_db()) + try: + results = session.query(Offering.semester).distinct().all() + semesters = [row[0] for row in results if row[0] is not None] + return semesters + finally: + session.close() + +def get_analytics_data(major: str, semester: str): + """ + Aggregates analytics data by counting distinct courses for each requirement, + for a given major (from Audit.major) and semester (from Offering.semester). + + Joins: + - Requirement joined with Audit (via audit_id) to filter by major. + - Requirement joined with CountsFor (on requirement). + - CountsFor joined with Course (on course_code). + - Course joined with Offering (on course_code) to filter by semester. + """ + session: Session = next(get_db()) + try: + results = ( + session.query( + Requirement.requirement, + func.count(func.distinct(Course.course_code)).label("NumCourses") + ) + .join(Audit, Requirement.audit_id == Audit.audit_id) + .join(CountsFor, Requirement.requirement == CountsFor.requirement) + .join(Course, CountsFor.course_code == Course.course_code) + .join(Offering, Course.course_code == Offering.course_code) + .filter(Audit.major == major, Offering.semester == semester) + .group_by(Requirement.requirement) + .all() + ) + analytics = [] + for requirement_text, num_courses in results: + short_requirement = requirement_text.split('---')[-1].strip() if requirement_text else "" + analytics.append({ + "major": major, + "short_requirement": short_requirement, + "NumCourses": num_courses + }) + return analytics + finally: + session.close() + +def get_prediction_data(course_code: str, target_semester: str): + """ + Reads the Offering.xlsx file and applies rule-based logic to determine + if the given course is likely to be offered in the target semester. + """ + import os + import pandas as pd + + # Adjust the path to go from repository to the data folder + data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "data", "course") + offering_file = os.path.join(data_dir, "Offering.xlsx") + + df = pd.read_excel(offering_file, engine="openpyxl") + df.columns = df.columns.str.strip() + df["Offered"] = 1 + + grouped = df.groupby(["course_code", "semester"])["Offered"].max().reset_index() + unique_semesters = grouped["semester"].unique() + # Ensure you have the semester_sort_key imported or defined here + from backend.app.utils import semester_sort_key # if you put it in a utils module + sorted_semesters = sorted(unique_semesters, key=semester_sort_key) + + wide_data = grouped.pivot(index="course_code", columns="semester", values="Offered") + wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0).reset_index() + wide_data.columns.name = None + wide_data = wide_data.fillna(0) + + # Locate row for the course + course_row = wide_data[wide_data["course_code"] == course_code] + if course_row.empty: + return { + "course_code": course_code, + "target_semester": target_semester, + "prediction": "NO_DATA", + "reason": f"Course {course_code} not found in data." + } + + target_season = target_semester[0].upper() # e.g., "F", "S", or "M" + season_cols = [ + col for col in sorted_semesters + if col.startswith(target_season) and int(col[1:]) > 20 + ] + + offered_values = [] + for col in season_cols: + if col in course_row.columns: + offered_values.append(int(course_row[col].iloc[0])) + else: + offered_values.append(0) + + fraction_offered = sum(offered_values) / len(offered_values) if offered_values else 0 + threshold = 0.5 + prediction = "YES" if fraction_offered >= threshold else "NO" + + return { + "course_code": course_code, + "target_semester": target_semester, + "prediction": prediction, + "fraction_offered": fraction_offered + } \ No newline at end of file diff --git a/backend/services/analytics.py b/backend/services/analytics.py index b70fa54..2894e16 100644 --- a/backend/services/analytics.py +++ b/backend/services/analytics.py @@ -1,63 +1,62 @@ -import os -import pandas as pd -from sqlalchemy.orm import Session - -# If you'd like to unify logic with your scripts: -# from backend.app.scripts.bargraph import generate_requirement_coverage -# from backend.app.scripts.predict_next_sem import predict_offering - - -def get_requirement_coverage(db: Session): - """ - If using DB: - - join 'CountsFor', 'Requirement', 'Audit' tables to compute coverage. - If using Excel: - - read from 'Countsfor.xlsx' & 'Requirement.xlsx', then merge & return list[dict]. - """ - current_dir = os.path.dirname(os.path.abspath(__file__)) - data_dir = os.path.join(current_dir, "..", "data", "audit") - counts_path = os.path.join(data_dir, "Countsfor.xlsx") - req_path = os.path.join(data_dir, "Requirement.xlsx") - - df_counts = pd.read_excel(counts_path, engine="openpyxl") - df_reqs = pd.read_excel(req_path, engine="openpyxl") - - df_merged = pd.merge(df_counts, df_reqs, on="requirement", how="left") - df_merged["major"] = df_merged["audit_id"].apply( - lambda x: x.split("_")[0] if pd.notnull(x) else None - ) - df_merged["short_requirement"] = df_merged["requirement"].apply( - lambda x: x.split("---")[-1].strip() - ) - - grouped = ( - df_merged.groupby(["major", "short_requirement"])["course_code"] - .nunique() - .reset_index(name="NumCourses") - ) - - coverage_data = grouped.to_dict(orient="records") - return coverage_data - - -def predict_course_offering(db: Session, course_code: str, target_semester: str): - """ - If using DB: - - Query 'Offering' or 'Enrollment' for past patterns. - If using Excel: - - read 'Offering.xlsx' & do rule-based logic (like predict_next_sem). - """ - data_dir = os.path.join(os.path.dirname(__file__), "..", "data", "course") - offering_file = os.path.join(data_dir, "Offering.xlsx") - - df = pd.read_excel(offering_file, engine="openpyxl") - df.columns = df.columns.str.strip() - df["Offered"] = 1 - - # Example pivot logic, threshold check, etc. - # Return a dict that your Pydantic schema (PredictOut) can handle - return { - "course_code": course_code, - "target_semester": target_semester, - "prediction": "YES", # or "NO" - } +# from sqlalchemy.orm import Session +# from backend.database.models import Course, CountsFor, Requirement, Offering, Audit +# from analytics.predict_next_sem import semester_sort_key +# import os +# import pandas as pd + + +# def predict_course_offering(db: Session, course_code: str, target_semester: str): +# """ +# If using Excel: +# - read from 'Offering.xlsx' and apply rule-based logic. +# """ +# # Adjust relative path: go two levels up to repository root, then into data/course. +# data_dir = os.path.join(os.path.dirname(__file__), "..", "..", "data", "course") +# offering_file = os.path.join(data_dir, "Offering.xlsx") + +# df = pd.read_excel(offering_file, engine="openpyxl") +# df.columns = df.columns.str.strip() +# df["Offered"] = 1 + +# grouped = df.groupby(["course_code", "semester"])["Offered"].max().reset_index() +# unique_semesters = grouped["semester"].unique() +# sorted_semesters = sorted(unique_semesters, key=semester_sort_key) + +# wide_data = grouped.pivot(index="course_code", columns="semester", values="Offered") +# wide_data = wide_data.reindex(columns=sorted_semesters, fill_value=0).reset_index() +# wide_data.columns.name = None +# wide_data = wide_data.fillna(0) + +# # Locate row for that course +# course_row = wide_data[wide_data["course_code"] == course_code] +# if course_row.empty: +# return { +# "course_code": course_code, +# "target_semester": target_semester, +# "prediction": "NO_DATA", +# "reason": f"Course {course_code} not found in data." +# } + +# target_season = target_semester[0].upper() # "S", "F", or "M" +# season_cols = [ +# col for col in sorted_semesters +# if col.startswith(target_season) and int(col[1:]) > 20 +# ] + +# offered_values = [] +# for col in season_cols: +# if col in course_row.columns: +# offered_values.append(int(course_row[col].iloc[0])) +# else: +# offered_values.append(0) + +# fraction_offered = sum(offered_values) / len(offered_values) if offered_values else 0 +# threshold = 0.5 +# prediction = "YES" if fraction_offered >= threshold else "NO" + +# return { +# "course_code": course_code, +# "target_semester": target_semester, +# "prediction": prediction, +# "fraction_offered": fraction_offered +# } diff --git a/backend/services/courses.py b/backend/services/courses.py index daa98cb..a9d5c8e 100755 --- a/backend/services/courses.py +++ b/backend/services/courses.py @@ -3,26 +3,26 @@ """ from typing import Dict, Optional, List from sqlalchemy.orm import Session -from backend.repository.courses import CourseRepository +from backend.repository import courses as courses_repo from backend.app.schemas import CourseResponse, CourseListResponse class CourseService: - """encapsulates business logic for handling courses.""" + """Encapsulates business logic for handling courses.""" def __init__(self, db: Session): - self.course_repo = CourseRepository(db) + self.course_repo = courses_repo.CourseRepository(db) def fetch_course_by_code(self, course_code: str) -> Optional[CourseResponse]: - """fetch a course and format its response.""" + """Fetch a course and format its response.""" course = self.course_repo.get_course_by_code(course_code) if not course: return None - # fetch offered semesters + # Fetch offered semesters offered_semesters = self.course_repo.get_offered_semesters(course_code) - # fetch course requirements + # Fetch course requirements requirements = self.course_repo.get_course_requirements(course_code) return CourseResponse( @@ -36,10 +36,8 @@ def fetch_course_by_code(self, course_code: str) -> Optional[CourseResponse]: requirements=requirements, ) - def fetch_all_courses(self) -> CourseListResponse: - """fetch and structure all courses, prioritizing courses that fulfill - at least one requirement.""" + """Fetch and structure all courses, prioritizing courses that fulfill at least one requirement.""" courses = self.course_repo.get_all_courses() for course in courses: @@ -49,7 +47,7 @@ def fetch_all_courses(self) -> CourseListResponse: sorted_courses = sorted( courses, key=lambda c: (c["num_requirements"] == 0, -c["num_offered_semesters"]), - reverse=False + reverse=False, ) structured_courses = [ @@ -68,20 +66,21 @@ def fetch_all_courses(self) -> CourseListResponse: return CourseListResponse(courses=structured_courses) - - - - def fetch_courses_by_requirement(self, cs_requirement=None, is_requirement=None, - ba_requirement=None, - bs_requirement=None) -> CourseListResponse: - """fetch and process courses matching requirements.""" - raw_results = self.course_repo.get_courses_by_requirement(cs_requirement, is_requirement, - ba_requirement, bs_requirement) + def fetch_courses_by_requirement( + self, + cs_requirement=None, + is_requirement=None, + ba_requirement=None, + bs_requirement=None, + ) -> CourseListResponse: + """Fetch and process courses matching requirements.""" + raw_results = self.course_repo.get_courses_by_requirement( + cs_requirement, is_requirement, ba_requirement, bs_requirement + ) # Process results into structured output course_dict: Dict[str, dict] = {} - for (course_code, course_name, department, - prerequisites, requirement, audit_id) in raw_results: + for (course_code, course_name, department, prerequisites, requirement, audit_id) in raw_results: if course_code not in course_dict: offered_semesters = self.course_repo.get_offered_semesters(course_code) @@ -107,7 +106,7 @@ def fetch_courses_by_requirement(self, cs_requirement=None, is_requirement=None, for course in course_dict.values()]) def fetch_courses_by_prerequisite(self, has_prereqs: bool) -> CourseListResponse: - """fetch and structure courses based on whether they have prerequisites.""" + """Fetch and structure courses based on whether they have prerequisites.""" courses = self.course_repo.get_courses_by_prerequisite(has_prereqs) structured_courses = [ @@ -127,7 +126,7 @@ def fetch_courses_by_prerequisite(self, has_prereqs: bool) -> CourseListResponse return CourseListResponse(courses=structured_courses) def fetch_courses_by_department(self, department: str) -> CourseListResponse: - """fetch and structure courses filtered by department.""" + """Fetch and structure courses filtered by department.""" courses = self.course_repo.get_courses_by_department(department) structured_courses = [ @@ -147,5 +146,36 @@ def fetch_courses_by_department(self, department: str) -> CourseListResponse: return CourseListResponse(courses=structured_courses) def fetch_all_departments(self) -> List[str]: - """fetch a distinct list of all departments.""" + """Fetch a distinct list of all departments.""" return self.course_repo.get_all_departments() + + +# Free functions for new endpoints + +def get_majors(): + """ + Returns a list of distinct majors from the Audit table. + """ + return courses_repo.get_all_majors() + + +def get_semesters(): + """ + Returns a list of distinct semesters from the Offering table. + """ + return courses_repo.get_all_semesters() + + +def get_analytics(major: str, semester: str): + """ + Retrieves analytics data for the given major and semester. + Aggregates the number of distinct courses per requirement. + """ + return courses_repo.get_analytics_data(major, semester) + +def get_prediction(course_code: str, target_semester: str): + """ + Retrieves prediction data using the repository function. + """ + from backend.repository import courses as courses_repo + return courses_repo.get_prediction_data(course_code, target_semester) \ No newline at end of file