From d393e6f4aa8859a7094c79021e9437b769688cd3 Mon Sep 17 00:00:00 2001 From: Jovaun Allen Date: Wed, 25 Feb 2026 19:31:43 -0800 Subject: [PATCH] Enhanced Describe-method.py with modular OOP structure and visualization --- Describe-method.py | 84 +++++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 30 deletions(-) diff --git a/Describe-method.py b/Describe-method.py index 01d517f..2fb58ec 100644 --- a/Describe-method.py +++ b/Describe-method.py @@ -1,33 +1,57 @@ -#step 1 sample data frame - import pandas as pd +import matplotlib.pyplot as plt + + +class EmployeeDataAnalyzer: + def __init__(self, data): + self.df = pd.DataFrame(data) + + def display_data(self): + print("Displaying the DataFrame:") + print(self.df) + + def describe_data(self): + print("\nDescribing the DataFrame:") + print(self.df.describe()) + + def dataset_info(self): + print(f"\nShape of the DataFrame: {self.df.shape}") + print(f"Columns in the DataFrame: {self.df.columns.tolist()}") + print(f"\nData Types:\n{self.df.dtypes}") -data={ - "Name":['Prince','Riya','Karan','Anjali','Rahul','Sita','Ravi','Amit'], - "Age":[18,20,24,31,45,53,60,70], - "Salary":[30000,20000,43000,40000,54000,60000,48000,58000], - "Performance":[5,6,7,8,9,10,4,3] + def categorize_performance(self): + def category(score): + if score >= 8: + return "High" + elif score >= 5: + return "Medium" + else: + return "Low" + + self.df["Performance Category"] = self.df["Performance"].apply(category) + print("\nPerformance categories added.") + print(self.df[["Name", "Performance", "Performance Category"]]) + + def visualize_salary_distribution(self): + plt.hist(self.df["Salary"], bins=5) + plt.title("Salary Distribution") + plt.xlabel("Salary") + plt.ylabel("Frequency") + plt.show() + + +# Sample Data +data = { + "Name": ['Prince','Riya','Karan','Anjali','Rahul','Sita','Ravi','Amit'], + "Age": [18,20,24,31,45,53,60,70], + "Salary": [30000,20000,43000,40000,54000,60000,48000,58000], + "Performance": [5,6,7,8,9,10,4,3] } -df = pd.DataFrame(data) -# Display the DataFrame -print("Displaying the DataFrame:") -print(df) - -#step 2 describe method -print("\nDescribing the DataFrame:") -print(df.describe()) - -''' -1- How to use the describe method in pandas? -The `describe()` method in pandas is used to generate descriptive statistics of a DataFrame. It -provides a summary of the central tendency, dispersion, and shape of the dataset's distribution, excluding `NaN` values. The output includes count, mean, standard deviation, minimum, maximum, and percentiles (25%, 50%, 75%) for each numeric column. - -''' - -''' -1-How big is your dataSet -2- What are the names of the columns in your dataSet? -3- What is the data type of each column in your dataSet? -''' -print(f"Shape of the DataFrame: {df.shape}") -print(f"Columns in the DataFrame: {df.columns.tolist()}") + +if __name__ == "__main__": + analyzer = EmployeeDataAnalyzer(data) + analyzer.display_data() + analyzer.describe_data() + analyzer.dataset_info() + analyzer.categorize_performance() + analyzer.visualize_salary_distribution() \ No newline at end of file