Explore-AI · onkarabiletshele · May 27, 2021
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python: Current File",
+            "type": "python",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal"
+        }
+    ]
+}
diff --git a/__pycache__/model.cpython-39.pyc b/__pycache__/model.cpython-39.pyc
diff --git a/api.py b/api.py
@@ -30,7 +30,7 @@
 # Load our model into memory.
 # Please update this path to reflect your own trained model.
 static_model = load_model(
-    path_to_model='assets/trained-models/apples_simple_lm_regression.pkl')
+    path_to_model='assets/trained-models/xgbmodel.pkl')
 
 print ('-'*40)
 print ('Model succesfully loaded')

diff --git a/assets/trained-models/xgb model.pkl b/assets/trained-models/xgb model.pkl
diff --git a/model.py b/model.py
@@ -61,8 +61,20 @@ def _preprocess_data(data):
     # ----------- Replace this code with your own preprocessing steps --------
 
 
-    feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')]
-    predict_vector = feature_vector_df[['Total_Qty_Sold','Stock_On_Hand']]
+    feature_vector_df = feature_vector_df[(feature_vector_df['Commodities'] == 'APPLE GOLDEN DELICIOUS')].drop(columns='Commodities')
+
+
+
+    feature_vector_df['Date'] = pd.to_datetime(feature_vector_df['Date'])
+    feature_vector_df['Day'] = feature_vector_df['Date'].dt.day
+    feature_vector_df['Month'] = feature_vector_df['Date'].dt.month
+    feature_vector_df.drop(['Date'], inplace = True, axis = 1)
+
+    feature_vector_df.columns = ['province', 'container', 'size_grade', 'weight_kg', 'low_price', 
+                 'high_price', 'sales_total', 'total_qty_sold','total_kg_sold', 
+                 'stock_on_hand', 'avg_price_per_kg', 'day', 'month']
+
+    predict_vector = pd.get_dummies(feature_vector_df,drop_first=False)
 
     # ------------------------------------------------------------------------
 

diff --git a/utils/request.py b/utils/request.py
@@ -36,7 +36,7 @@
 # replace the URL below with its public IP:
 
 # url = 'http://{public-ip-address-of-remote-machine}:5000/api_v0.1'
-url = 'http://127.0.0.1:5000/api_v0.1'
+url = 'http://34.242.216.154:5000/api_v0.1'
 
 # Perform the POST request.
 print(f"Sending POST request to web server API at: {url}")

diff --git a/utils/train_model.py b/utils/train_model.py
@@ -12,22 +12,36 @@
 # Dependencies
 import pandas as pd
 import pickle
-from sklearn.linear_model import LinearRegression
+from xgboost import XGBRegressor
+from sklearn.model_selection import train_test_split
 
 # Fetch training data and preprocess for modeling
 train = pd.read_csv('data/train_data.csv')
 
-train = train[(train['Commodities'] == 'APPLE GOLDEN DELICIOUS')]
+train = train[(train['Commodities'] == 'APPLE GOLDEN DELICIOUS')].drop(columns='Commodities')
 
-y_train = train['avg_price_per_kg']
-X_train = train[['Total_Qty_Sold','Stock_On_Hand']]
+
+train['Date'] = pd.to_datetime(train['Date'])
+train['Day'] = train['Date'].dt.day
+train['Month'] = train['Date'].dt.month
+train.drop(['Date'], inplace = True, axis = 1)
+
+train.columns = ['province', 'container', 'size_grade', 'weight_kg', 'low_price', 
+                 'high_price', 'sales_total', 'total_qty_sold','total_kg_sold', 
+                 'stock_on_hand', 'avg_price_per_kg', 'day', 'month']
+
+dummy_df = pd.get_dummies(train,drop_first=False)
+
+X = dummy_df.drop('avg_price_per_kg',axis=1)
+y = dummy_df['avg_price_per_kg']
 
 # Fit model
-lm_regression = LinearRegression(normalize=True)
+xgbmodel = XGBRegressor(max_depth=2,min_child_weight=13,subsample=1,colsample_bytree=1,
+            objective='reg:squarederror',n_estimators=6000, learning_rate=0.3, random_state= 16)
 print ("Training Model...")
-lm_regression.fit(X_train, y_train)
+xgbmodel.fit(X, y)
 
 # Pickle model for use within our API
-save_path = '../assets/trained-models/apples_simple_lm_regression.pkl'
+save_path = '../assets/trained-models/xgbmodel.pkl'
 print (f"Training completed. Saving model to: {save_path}")
-pickle.dump(lm_regression, open(save_path,'wb'))
+pickle.dump(xgbmodel, open(save_path,'wb'))