Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
279ef13
README.md
ranjit-2784 Jul 8, 2021
cb806e6
Update README.md
ranjit-2784 Jul 10, 2021
ffe5505
Update main.py
ranjit-2784 Jul 10, 2021
2c8a7b0
Update ml_utils.py
ranjit-2784 Jul 10, 2021
fc4e6df
Update test_app.py
ranjit-2784 Jul 10, 2021
6bfa9de
Update main.py
ranjit-2784 Jul 10, 2021
2d8226a
Train with different classifier
Jul 10, 2021
51a2c40
Train with different classifier
Jul 10, 2021
d2801f7
Train with different classifier
Jul 10, 2021
fea9d70
Train with different classifier
Jul 10, 2021
11fca7e
Train with different classifier
Jul 10, 2021
99709e3
Train with different classifier
Jul 10, 2021
cd66787
Train with different classifier
Jul 10, 2021
75f83ad
Train with different classifier
Jul 10, 2021
b63d433
Train with different classifier
Jul 10, 2021
bdc32d9
Train with different classifier
Jul 10, 2021
2ed34c1
Train with different classifier
Jul 10, 2021
a781cf6
Train with different classifier
Jul 10, 2021
9a6c5ce
Train with different classifier
Jul 10, 2021
bbbe208
Train with different classifier
Jul 11, 2021
b9d8014
Train with different classifier
Jul 11, 2021
0f72b66
Train with different classifier
Jul 11, 2021
def250f
Train with different classifier
Jul 11, 2021
79eb3b8
Train with different classifier
Jul 11, 2021
3b4ea55
2 classifiers
Jul 11, 2021
9854a12
2 classifiers
Jul 11, 2021
c128078
2 classifiers
Jul 11, 2021
1bdfb7a
2 classifiers
Jul 11, 2021
6389240
2 classifiers
Jul 11, 2021
a6541ce
Merge pull request #1 from ranjit-2784/changes
ranjit-2784 Jul 11, 2021
e95e237
test
Jul 11, 2021
93fe5ea
test
Jul 11, 2021
a110133
test
Jul 11, 2021
db289ab
test
Jul 11, 2021
fd75687
updated README
Jul 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# mlops-iris - Ranjit Kumar Gantasala.

ML-Ops Demo/Assignment

This repository contains code which demonstrates ML-Ops using a FastAPI application which predicts the flower class using the IRIS dataset (https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html)

Running Instructions

Create a fork of the repo using the fork button.
Clone your fork using git clone https://www.github.com/<your-username>/mlops-iris.git
Install dependencies using pip3 install -r requirements.txt
Run application using python3 main.py
Run tests using pytest

CI/CD

build (test) for all the pull requests
build (test) and upload_zip for all pushes

Assignment Tasks

1.Change this README to add your name here: . Add and commit changes to a new branch and create a pull request ONLY TO YOUR OWN FORK to see the CI/CD build happening. If the build succeeds, merge the pull request with master and see the CI/CD upload_zip take place.
2.Add 2 more unit tests of your choice to test_app.py and make sure they are passing.
3.Add one more classifier to startup and use only the one with better accuracy.
4.Add the attribute timestamp to the response and return the current time with it.
48 changes: 38 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,63 @@
from fastapi import FastAPI
from pydantic import BaseModel
from ml_utils import load_model, predict
from typing import List
import datetime

app = FastAPI(
title="Iris Predictor",
docs_url="/"
)
# defining the main app
app = FastAPI(title="Iris Predictor", docs_url="/")

# calling the load_model during startup.
# this will train the model and keep it loaded for prediction.
app.add_event_handler("startup", load_model)

# class which is expected in the payload
class QueryIn(BaseModel):
sepal_length: float
sepal_width: float
petal_length: float
petal_width: float


# class which is returned in the response
class QueryOut(BaseModel):
flower_class: str
timestamp: str

# class which is expected in the payload while re-training
class FeedbackIn(BaseModel):
sepal_length: float
sepal_width: float
petal_length: float
petal_width: float
flower_class: str

# Route definitions
@app.get("/ping")
# Healthcheck route to ensure that the API is up and running
def ping():
return {"ping": "pong"}


@app.post("/predict_flower", response_model=QueryOut, status_code=200)
def predict_flower(
query_data: QueryIn
):
output = {'flower_class': predict(query_data)}
# Route to do the prediction using the ML model defined.
# Payload: QueryIn containing the parameters
# Response: QueryOut containing the flower_class predicted (200)
def predict_flower(query_data: QueryIn):
time = datetime.datetime.now()
output = {"flower_class": predict(query_data),"timestamp": str(time)}
return output

@app.post("/feedback_loop", status_code=200)
# Route to further train the model based on user input in form of feedback loop
# Payload: FeedbackIn containing the parameters and correct flower class
# Response: Dict with detail confirming success (200)
def feedback_loop(data: List[FeedbackIn]):
# retrain(data)
return {"detail": "Feedback loop successful"}


# Main function to start the app when main.py is called
if __name__ == "__main__":
uvicorn.run("main:app", host='0.0.0.0', port=8888, reload=True)
# Uvicorn is used to run the server and listen for incoming API requests on 0.0.0.0:8888
# uvicorn.run("main:app", host="0.0.0.0", port=8888, reload=True)
uvicorn.run("main:app", host="0.0.0.0", port=8080, reload=True)
70 changes: 54 additions & 16 deletions ml_utils.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,69 @@
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score

# define a Gaussain NB classifier
clf = GaussianNB()
clf1 = KNeighborsClassifier(n_neighbors=5)
#clf1 = MLPClassifier(max_iter=500)
clf_better = clf

classes = {
0: "Iris Setosa",
1: "Iris Versicolour",
2: "Iris Virginica"
}
# define the class encodings and reverse encodings
classes = {0: "Iris Setosa", 1: "Iris Versicolour", 2: "Iris Virginica"}
r_classes = {y: x for x, y in classes.items()}

# function to train and load the model during startup
def load_model():
X, y = datasets.load_iris(return_X_y=True)
# load the dataset from the official sklearn datasets
X, y = datasets.load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2)
clf.fit(X_train, y_train)
# do the test-train split and train the model
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf.fit(X_train, y_train) #GaussianNB
#clf1.fit(X_train, y_train) #DecisionTreeClassifier
#clf2.fit(X_train, y_train) #KNeighborsClassifier
#clf3.fit(X_train, y_train) #RandomForestClassifier
clf1.fit(X_train, y_train) #MLPClassifier

acc = accuracy_score(y_test, clf.predict(X_test))
print(f"Model trained with accuracy: {round(acc, 3)}")
# calculate the print the accuracy score
acc = accuracy_score(y_test, clf.predict(X_test))
print(f" GaussianNB Model trained with accuracy: {round(acc, 3)}")
acc1 = accuracy_score(y_test, clf1.predict(X_test))
print(f" MLP Model trained with accuracy: {round(acc1, 3)}")
#clf=clf1
#acc = accuracy_score(y_test, clf1.predict(X_test))
#print(f" DecisionTreeClassifier Model trained with accuracy: {round(acc, 3)}")
#clf=clf2
#acc = accuracy_score(y_test, clf.predict(X_test))
#clf=clf3
#print(f" KNeighborsClassifier Model trained with accuracy: {round(acc, 3)}")
#acc = accuracy_score(y_test, clf.predict(X_test))
#clf=clf4
#print(f" RandomForestClassifier Model trained with accuracy: {round(acc, 3)}")
#acc = accuracy_score(y_test, clf.predict(X_test))
#print(f" MLPClassifier Model trained with accuracy: {round(acc, 3)}")
# function to predict the flower using the model
if acc1 > acc:
clf_better = clf1
else:
clf_better = clf

def predict(query_data):
x = list(query_data.dict().values())
prediction = clf.predict([x])[0]
print(f"Model prediction: {classes[prediction]}")
return classes[prediction]


x = list(query_data.dict().values())
prediction = clf_better.predict([x])[0]
print(f"Model prediction: {classes[prediction]}")
return classes[prediction]

# function to retrain the model as part of the feedback loop
def retrain(data):
# pull out the relevant X and y from the FeedbackIn object
X = [list(d.dict().values())[:-1] for d in data]
y = [r_classes[d.flower_class] for d in data]

# fit the classifier again based on the new data obtained
clf.fit(X, y)
65 changes: 57 additions & 8 deletions test_app.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,70 @@
from fastapi.testclient import TestClient
from main import app


# test to check the correct functioning of the /ping route
def test_ping():
with TestClient(app) as client:
response = client.get("/ping")
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {"ping":"pong"}
assert response.json() == {"ping": "pong"}


# test to check if Iris Virginica is classified correctly
def test_pred_virginica():
# defining a sample payload for the testcase
payload = {
"sepal_length": 3,
"sepal_width": 5,
"petal_length": 3.2,
"petal_width": 4.4,
}
with TestClient(app) as client:
response = client.post("/predict_flower", json=payload)
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {"flower_class": "Iris Virginica"}

# test to check if Iris Virginica is classified correctly
def test_pred_Feedback():
# defining a sample payload for the testcase
payload = {
"sepal_length": 3.0,
"sepal_width": 5.0,
"petal_length": 3.2,
"petal_width": 4.4,
"flower_class": "Iris-virginica"
}
with TestClient(app) as client:
response = client.post("/feedback_loop",json=payload)
# asserting the correct Feedback is received
# assert response.status_code == 200
assert response.json() == {"detail": "Feedback loop successful"}

def test_pred_setosa():
# defining a sample payload for the testcase
payload = {
"sepal_length": 5.0,
"sepal_width": 3.3,
"petal_length": 1.4,
"petal_width": 0.2,
}
with TestClient(app) as client:
response = client.post("/predict_flower", json=payload)
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {"flower_class": "Iris Setosa"}

def test_pred_versicolour():
# defining a sample payload for the testcase
payload = {
"sepal_length": 3,
"sepal_width": 5,
"petal_length": 3.2,
"petal_width": 4.4
"sepal_length": 6.1,
"sepal_width": 2.9,
"petal_length": 4.7,
"petal_width": 1.4,
}
with TestClient(app) as client:
response = client.post('/predict_flower', json=payload)
response = client.post("/predict_flower", json=payload)
# asserting the correct response is received
assert response.status_code == 200
assert response.json() == {'flower_class': "Iris Virginica"}
assert response.json() == {"flower_class" : "Iris Versicolour"}