Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 54 additions & 30 deletions Describe-method.py
Original file line number Diff line number Diff line change
@@ -1,33 +1,57 @@
#step 1 sample data frame

import pandas as pd
import matplotlib.pyplot as plt


class EmployeeDataAnalyzer:
def __init__(self, data):
self.df = pd.DataFrame(data)

def display_data(self):
print("Displaying the DataFrame:")
print(self.df)

def describe_data(self):
print("\nDescribing the DataFrame:")
print(self.df.describe())

def dataset_info(self):
print(f"\nShape of the DataFrame: {self.df.shape}")
print(f"Columns in the DataFrame: {self.df.columns.tolist()}")
print(f"\nData Types:\n{self.df.dtypes}")

data={
"Name":['Prince','Riya','Karan','Anjali','Rahul','Sita','Ravi','Amit'],
"Age":[18,20,24,31,45,53,60,70],
"Salary":[30000,20000,43000,40000,54000,60000,48000,58000],
"Performance":[5,6,7,8,9,10,4,3]
def categorize_performance(self):
def category(score):
if score >= 8:
return "High"
elif score >= 5:
return "Medium"
else:
return "Low"

self.df["Performance Category"] = self.df["Performance"].apply(category)
print("\nPerformance categories added.")
print(self.df[["Name", "Performance", "Performance Category"]])

def visualize_salary_distribution(self):
plt.hist(self.df["Salary"], bins=5)
plt.title("Salary Distribution")
plt.xlabel("Salary")
plt.ylabel("Frequency")
plt.show()


# Sample Data
data = {
"Name": ['Prince','Riya','Karan','Anjali','Rahul','Sita','Ravi','Amit'],
"Age": [18,20,24,31,45,53,60,70],
"Salary": [30000,20000,43000,40000,54000,60000,48000,58000],
"Performance": [5,6,7,8,9,10,4,3]
}
df = pd.DataFrame(data)
# Display the DataFrame
print("Displaying the DataFrame:")
print(df)

#step 2 describe method
print("\nDescribing the DataFrame:")
print(df.describe())

'''
1- How to use the describe method in pandas?
The `describe()` method in pandas is used to generate descriptive statistics of a DataFrame. It
provides a summary of the central tendency, dispersion, and shape of the dataset's distribution, excluding `NaN` values. The output includes count, mean, standard deviation, minimum, maximum, and percentiles (25%, 50%, 75%) for each numeric column.

'''

'''
1-How big is your dataSet
2- What are the names of the columns in your dataSet?
3- What is the data type of each column in your dataSet?
'''
print(f"Shape of the DataFrame: {df.shape}")
print(f"Columns in the DataFrame: {df.columns.tolist()}")

if __name__ == "__main__":
analyzer = EmployeeDataAnalyzer(data)
analyzer.display_data()
analyzer.describe_data()
analyzer.dataset_info()
analyzer.categorize_performance()
analyzer.visualize_salary_distribution()