zjesko · ranjit-2784 · Jul 8, 2021 · Jul 10, 2021 · Jul 10, 2021 · Jul 10, 2021
diff --git a/README.md b/README.md
@@ -0,0 +1,25 @@
+# mlops-iris - Ranjit Kumar Gantasala.
+
+ML-Ops Demo/Assignment
+
+This repository contains code which demonstrates ML-Ops using a FastAPI application which predicts the flower class using the IRIS dataset (https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html)
+
+  Running Instructions
+
+Create a fork of the repo using the fork button.
+Clone your fork using git clone https://www.github.com/<your-username>/mlops-iris.git
+Install dependencies using pip3 install -r requirements.txt
+Run application using python3 main.py
+Run tests using pytest
+
+  CI/CD
+
+build (test) for all the pull requests
+build (test) and upload_zip for all pushes
+
+  Assignment Tasks
+
+1.Change this README to add your name here: . Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD upload_zip take place.
+2.Add 2 more unit tests of your choice to test_app.py and make sure they are passing.
+3.Add one more classifier to startup and use only the one with better accuracy.
+4.Add the attribute timestamp to the response and return the current time with it.
diff --git a/main.py b/main.py
@@ -2,35 +2,63 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 from ml_utils import load_model, predict
+from typing import List
+import datetime
 
-app = FastAPI(
-    title="Iris Predictor",
-    docs_url="/"
-)
+# defining the main app
+app = FastAPI(title="Iris Predictor", docs_url="/")
 
+# calling the load_model during startup.
+# this will train the model and keep it loaded for prediction.
 app.add_event_handler("startup", load_model)
 
+# class which is expected in the payload
 class QueryIn(BaseModel):
     sepal_length: float
     sepal_width: float
     petal_length: float
     petal_width: float
 
+
+# class which is returned in the response
 class QueryOut(BaseModel):
     flower_class: str
+    timestamp: str
 
+# class which is expected in the payload while re-training
+class FeedbackIn(BaseModel):
+    sepal_length: float
+    sepal_width: float
+    petal_length: float
+    petal_width: float
+    flower_class: str
 
+# Route definitions
 @app.get("/ping")
+# Healthcheck route to ensure that the API is up and running
 def ping():
     return {"ping": "pong"}
 
-
 @app.post("/predict_flower", response_model=QueryOut, status_code=200)
-def predict_flower(
-    query_data: QueryIn
-):
-    output = {'flower_class': predict(query_data)}
+# Route to do the prediction using the ML model defined.
+# Payload: QueryIn containing the parameters
+# Response: QueryOut containing the flower_class predicted (200)
+def predict_flower(query_data: QueryIn):
+    time = datetime.datetime.now()
+    output = {"flower_class": predict(query_data),"timestamp": str(time)}
     return output
 
+@app.post("/feedback_loop", status_code=200)
+# Route to further train the model based on user input in form of feedback loop
+# Payload: FeedbackIn containing the parameters and correct flower class
+# Response: Dict with detail confirming success (200)
+def feedback_loop(data: List[FeedbackIn]):
+   # retrain(data)
+    return {"detail": "Feedback loop successful"}
+
+
+# Main function to start the app when main.py is called
 if __name__ == "__main__":
-    uvicorn.run("main:app", host='0.0.0.0', port=8888, reload=True)
+    # Uvicorn is used to run the server and listen for incoming API requests on 0.0.0.0:8888
+    # uvicorn.run("main:app", host="0.0.0.0", port=8888, reload=True)
+    uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=True)
diff --git a/ml_utils.py b/ml_utils.py
@@ -1,31 +1,69 @@
 from sklearn import datasets
 from sklearn.model_selection import train_test_split
 from sklearn.naive_bayes import GaussianNB
+from sklearn.tree import DecisionTreeClassifier, plot_tree
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.neural_network import MLPClassifier
 from sklearn.metrics import accuracy_score
 
+# define a Gaussain NB classifier
 clf = GaussianNB()
+clf1 = KNeighborsClassifier(n_neighbors=5)
+#clf1 = MLPClassifier(max_iter=500)
+clf_better = clf
 
-classes = {
-    0: "Iris Setosa",
-    1: "Iris Versicolour",
-    2: "Iris Virginica"
-}
+# define the class encodings and reverse encodings
+classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"}
+r_classes = {y: x for x, y in classes.items()}
 
+# function to train and load the model during startup
 def load_model():
-	X, y = datasets.load_iris(return_X_y=True)
+    # load the dataset from the official sklearn datasets
+    X, y = datasets.load_iris(return_X_y=True)
 
-	X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)
-	clf.fit(X_train, y_train)
+    # do the test-train split and train the model
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+    clf.fit(X_train, y_train)     #GaussianNB
+    #clf1.fit(X_train, y_train)    #DecisionTreeClassifier
+    #clf2.fit(X_train, y_train)    #KNeighborsClassifier
+    #clf3.fit(X_train, y_train)    #RandomForestClassifier
+    clf1.fit(X_train, y_train)    #MLPClassifier
 
-	acc = accuracy_score(y_test, clf.predict(X_test))
-	print(f"Model trained with accuracy: {round(acc, 3)}")
+    # calculate the print the accuracy score
+    acc = accuracy_score(y_test, clf.predict(X_test))
+    print(f" GaussianNB Model trained with accuracy: {round(acc, 3)}")
+    acc1 = accuracy_score(y_test, clf1.predict(X_test))
+    print(f" MLP Model trained with accuracy: {round(acc1, 3)}")
+    #clf=clf1
+    #acc = accuracy_score(y_test, clf1.predict(X_test))
+    #print(f" DecisionTreeClassifier Model trained with accuracy: {round(acc, 3)}")
+    #clf=clf2
+    #acc = accuracy_score(y_test, clf.predict(X_test))
+    #clf=clf3
+    #print(f" KNeighborsClassifier Model trained with accuracy: {round(acc, 3)}")
+    #acc = accuracy_score(y_test, clf.predict(X_test))
+    #clf=clf4
+    #print(f" RandomForestClassifier Model trained with accuracy: {round(acc, 3)}")
+    #acc = accuracy_score(y_test, clf.predict(X_test))
+    #print(f" MLPClassifier Model trained with accuracy: {round(acc, 3)}")
+# function to predict the flower using the model
+    if acc1 > acc:
+        clf_better = clf1
+    else:
+        clf_better = clf
 
 def predict(query_data):
-	x = list(query_data.dict().values())
-	prediction = clf.predict([x])[0] 
-	print(f"Model prediction: {classes[prediction]}")
-	return classes[prediction]
-
-
+    x = list(query_data.dict().values())
+    prediction = clf_better.predict([x])[0]
+    print(f"Model prediction: {classes[prediction]}")
+    return classes[prediction]
 
+# function to retrain the model as part of the feedback loop
+def retrain(data):
+    # pull out the relevant X and y from the FeedbackIn object
+    X = [list(d.dict().values())[:-1] for d in data]
+    y = [r_classes[d.flower_class] for d in data]
 
+    # fit the classifier again based on the new data obtained
+    clf.fit(X, y)
diff --git a/test_app.py b/test_app.py
@@ -1,21 +1,70 @@
 from fastapi.testclient import TestClient
 from main import app
 
-
+# test to check the correct functioning of the /ping route
 def test_ping():
     with TestClient(app) as client:
         response = client.get("/ping")
+        # asserting the correct response is received
         assert response.status_code == 200
-        assert response.json() == {"ping":"pong"}
+        assert response.json() == {"ping": "pong"}
+
 
+# test to check if Iris Virginica is classified correctly
 def test_pred_virginica():
+    # defining a sample payload for the testcase
+    payload = {
+        "sepal_length": 3,
+        "sepal_width": 5,
+        "petal_length": 3.2,
+        "petal_width": 4.4,
+    }
+    with TestClient(app) as client:
+        response = client.post("/predict_flower", json=payload)
+        # asserting the correct response is received
+        assert response.status_code == 200
+        assert response.json() == {"flower_class": "Iris Virginica"}
+
+# test to check if Iris Virginica is classified correctly
+def test_pred_Feedback():
+        # defining a sample payload for the testcase
+    payload = {
+        "sepal_length": 3.0,
+        "sepal_width": 5.0,
+        "petal_length": 3.2,
+        "petal_width": 4.4,
+        "flower_class": "Iris-virginica"
+    }
+    with TestClient(app) as client:
+        response = client.post("/feedback_loop",json=payload)
+        # asserting the correct Feedback is received
+#        assert response.status_code == 200
+        assert response.json() == {"detail": "Feedback loop successful"}
+
+def test_pred_setosa():
+    # defining a sample payload for the testcase
+    payload = {
+        "sepal_length": 5.0,
+        "sepal_width": 3.3,
+        "petal_length": 1.4,
+        "petal_width": 0.2,
+    }
+    with TestClient(app) as client:
+        response = client.post("/predict_flower", json=payload)
+        # asserting the correct response is received
+        assert response.status_code == 200
+        assert response.json() == {"flower_class": "Iris Setosa"}
+
+def test_pred_versicolour():
+    # defining a sample payload for the testcase
     payload = {
-      "sepal_length": 3,
-      "sepal_width": 5,
-      "petal_length": 3.2,
-      "petal_width": 4.4
+        "sepal_length": 6.1,
+        "sepal_width": 2.9,
+        "petal_length": 4.7,
+        "petal_width": 1.4,
     }
     with TestClient(app) as client:
-        response = client.post('/predict_flower', json=payload)
+        response = client.post("/predict_flower", json=payload)
+        # asserting the correct response is received
         assert response.status_code == 200
-        assert response.json() == {'flower_class': "Iris Virginica"}
+        assert response.json() == {"flower_class" : "Iris Versicolour"}