-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmodeling.py
More file actions
100 lines (78 loc) · 3.34 KB
/
modeling.py
File metadata and controls
100 lines (78 loc) · 3.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
# Load the CSV file from the specified location
file_path = "/FILEPATH/data.csv" # Replace with the correct path
data = pd.read_csv(file_path)
# Replace 'NULL' with NaN for proper handling of missing values
data.replace('NULL', np.nan, inplace=True)
# Convert the DataFrame from wide to long format
data_long = pd.melt(data, id_vars=['Analyst'], var_name='Month', value_name='Value')
# Convert 'Month' to a datetime object
data_long['Month'] = pd.to_datetime(data_long['Month'], format='%y-%b')
# Sort the data by Analyst and Month
data_long.sort_values(by=['Analyst', 'Month'], inplace=True)
# Function to forecast using linear regression
def forecast_linear_regression(data, periods=7):
# Prepare the data
data = data.dropna()
data = data.reset_index(drop=True)
data['Index'] = range(len(data))
# Prepare the training data
X = data['Index'].values.reshape(-1, 1)
y = data['Value'].values
# Fit the linear regression model
model = LinearRegression()
model.fit(X, y)
# Predict future values
future_indices = np.arange(len(data), len(data) + periods).reshape(-1, 1)
future_values = model.predict(future_indices)
# Ensure no negative values
future_values[future_values < 0] = 0
# Round to whole numbers
future_values = np.round(future_values)
# Create a DataFrame with future values
last_date = data['Month'].iloc[-1]
future_dates = pd.date_range(last_date, periods=periods + 1, freq='M')[1:]
forecast_df = pd.DataFrame({'Month': future_dates, 'Value': future_values})
return forecast_df
# Forecast the next 7 months for each analyst
forecasts = {}
for analyst in data['Analyst'].unique():
analyst_data = data_long[data_long['Analyst'] == analyst][['Month', 'Value']]
forecast = forecast_linear_regression(analyst_data)
forecast.set_index('Month', inplace=True)
forecasts[analyst] = forecast['Value']
# Combine the forecasts into a DataFrame
forecast_df = pd.DataFrame(forecasts)
# Convert the forecasted data back to wide format
forecast_wide = forecast_df.T
forecast_wide.columns = [f"{date.strftime('%b-%y')}" for date in forecast_df.index]
# Combine original and forecasted data
combined_df = pd.concat([data.set_index('Analyst'), forecast_wide], axis=1)
# Prepare the data for the table
combined_df.reset_index(inplace=True)
table_data = combined_df.values.tolist()
header = combined_df.columns.tolist()
# Determine which cells are forecasted
forecast_columns = forecast_wide.columns.tolist()
# Create color-scaled table with subtle color for forecasted cells
fig = go.Figure(data=[go.Table(
header=dict(values=header,
fill_color='paleturquoise',
align='left'),
cells=dict(values=[combined_df[col] for col in combined_df.columns],
fill_color=[
['#FFFFFF'] * len(combined_df) if col not in forecast_columns else ['#E0FFFF'] * len(combined_df)
for col in combined_df.columns
],
align='left'))
])
fig.update_layout(
title="Analyst Performance Forecast",
title_font=dict(size=24, family='Arial, sans-serif', color='black'),
template="plotly_white"
)
fig.show()