diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..17e15f2 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} \ No newline at end of file diff --git a/__pycache__/model.cpython-39.pyc b/__pycache__/model.cpython-39.pyc new file mode 100644 index 0000000..bbd930b Binary files /dev/null and b/__pycache__/model.cpython-39.pyc differ diff --git a/api.py b/api.py index a206495..00471e9 100644 --- a/api.py +++ b/api.py @@ -30,7 +30,7 @@ # Load our model into memory. # Please update this path to reflect your own trained model. static_model = load_model( - path_to_model='assets/trained-models/apples_simple_lm_regression.pkl') + path_to_model='assets/trained-models/xgbmodel.pkl') print ('-'*40) print ('Model succesfully loaded') diff --git a/assets/trained-models/xgb model.pkl b/assets/trained-models/xgb model.pkl new file mode 100644 index 0000000..387d1c8 Binary files /dev/null and b/assets/trained-models/xgb model.pkl differ diff --git a/model.py b/model.py index d72c14b..6d5a121 100644 --- a/model.py +++ b/model.py @@ -61,8 +61,20 @@ def _preprocess_data(data): # ----------- Replace this code with your own preprocessing steps -------- - feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')] - predict_vector = feature_vector_df[['Total_Qty_Sold','Stock_On_Hand']] + feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')].drop(columns='Commodities') + + + + feature_vector_df['Date'] = pd.to_datetime(feature_vector_df['Date']) + feature_vector_df['Day'] = feature_vector_df['Date'].dt.day + feature_vector_df['Month'] = feature_vector_df['Date'].dt.month + feature_vector_df.drop(['Date'], inplace = True, axis = 1) + + feature_vector_df.columns = ['province', 'container', 'size_grade', 'weight_kg', 'low_price', + 'high_price', 'sales_total', 'total_qty_sold','total_kg_sold', + 'stock_on_hand', 'avg_price_per_kg', 'day', 'month'] + + predict_vector = pd.get_dummies(feature_vector_df,drop_first=False) # ------------------------------------------------------------------------ diff --git a/utils/request.py b/utils/request.py index 79cfe84..660061e 100644 --- a/utils/request.py +++ b/utils/request.py @@ -36,7 +36,7 @@ # replace the URL below with its public IP: # url = 'http://{public-ip-address-of-remote-machine}:5000/api_v0.1' -url = 'http://127.0.0.1:5000/api_v0.1' +url = 'http://34.242.216.154:5000/api_v0.1' # Perform the POST request. print(f"Sending POST request to web server API at: {url}") diff --git a/utils/train_model.py b/utils/train_model.py index 7ba439b..2c24287 100644 --- a/utils/train_model.py +++ b/utils/train_model.py @@ -12,22 +12,36 @@ # Dependencies import pandas as pd import pickle -from sklearn.linear_model import LinearRegression +from xgboost import XGBRegressor +from sklearn.model_selection import train_test_split # Fetch training data and preprocess for modeling train = pd.read_csv('data/train_data.csv') -train = train[(train['Commodities'] == 'APPLE GOLDEN DELICIOUS')] +train = train[(train['Commodities'] == 'APPLE GOLDEN DELICIOUS')].drop(columns='Commodities') -y_train = train['avg_price_per_kg'] -X_train = train[['Total_Qty_Sold','Stock_On_Hand']] + +train['Date'] = pd.to_datetime(train['Date']) +train['Day'] = train['Date'].dt.day +train['Month'] = train['Date'].dt.month +train.drop(['Date'], inplace = True, axis = 1) + +train.columns = ['province', 'container', 'size_grade', 'weight_kg', 'low_price', + 'high_price', 'sales_total', 'total_qty_sold','total_kg_sold', + 'stock_on_hand', 'avg_price_per_kg', 'day', 'month'] + +dummy_df = pd.get_dummies(train,drop_first=False) + +X = dummy_df.drop('avg_price_per_kg',axis=1) +y = dummy_df['avg_price_per_kg'] # Fit model -lm_regression = LinearRegression(normalize=True) +xgbmodel = XGBRegressor(max_depth=2,min_child_weight=13,subsample=1,colsample_bytree=1, + objective='reg:squarederror',n_estimators=6000, learning_rate=0.3, random_state= 16) print ("Training Model...") -lm_regression.fit(X_train, y_train) +xgbmodel.fit(X, y) # Pickle model for use within our API -save_path = '../assets/trained-models/apples_simple_lm_regression.pkl' +save_path = '../assets/trained-models/xgbmodel.pkl' print (f"Training completed. Saving model to: {save_path}") -pickle.dump(lm_regression, open(save_path,'wb')) +pickle.dump(xgbmodel, open(save_path,'wb'))