Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 106 additions & 17 deletions FacialKeypointRecognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# import theano.Tensor as T
# from lasagne.nonlinearities import leaky_rectify, softmax
from pandas.io.parsers import read_csv
from pandas import DataFrame

import networks
from augmentation import histogrammEqualization
Expand All @@ -29,13 +30,27 @@ class FacialKeypointRecognition:
pictures of faces
"""

ftrain = './data/training_30.csv'
ftest = './data/test_30.csv'
fOutFile = './data/solution_30.csv'
ftrain = './data/training.csv'
ftest = './data/test.csv'
fOutFile = './data/solution.csv'

fOutputList = './data/SampleSubmission_30.csv'
fIdList = './data/IdList_30.csv'

fFeatureNames = './data/FeatureNames.csv'

# for network 8 we only use a subsample of columns
cols = None
cols8 = ["left_eye_center_x",
"left_eye_center_y",
"right_eye_center_x",
"right_eye_center_y",
"nose_tip_x",
"nose_tip_y",
"mouth_center_bottom_lip_x",
"mouth_center_bottom_lip_y"]

feature_mapping, id_mapping = {}, {}
X_train, y_train = [], []
X_test = []

Expand All @@ -56,12 +71,51 @@ def setData(self, number):
set data and solution filenames
"""

self.ftrain = './data/training_{}.csv'.format(number)
self.ftest = './data/test_{}.csv'.format(number)
# self.ftrain = './data/training_{}.csv'.format(number)
# self.ftest = './data/test_{}.csv'.format(number)
self.fOutFile = './data/solution_{}.csv'.format(number)
self.fIdList = './data/IdList_{}.csv'.format(number)
self.fOutputList = './data/SampleSubmission_{}.csv'.format(number)

def setMapping(self, cols):
"""
set a mapping from ids/Featurenames to the internal Id of our set

this mapping is needed when we want to save the set again

basically we have a list of names with an index that we filter against
the provided names in 'cols'

Example:

id | name
---------
1 | a
2 | b
3 | c

with cols ['a', 'c'] would give the following dict:
{1: 'a',
3: 'c'}
"""

feature_names = read_csv(os.path.expanduser(self.fFeatureNames),
names=['Id', 'Name'],
index_col='Id')
self.feature_mapping = \
feature_names[feature_names.Name.isin(cols)].Name.to_dict()

def setNetwork(self, number):
"""
set the state for either network 8 or 30
"""
if number is '8':
self.cols = self.cols8
self.network = networks.convolutionalNetwork8()
else:
self.cols = None
self.network = networks.convolutionalNetwork()

def loadData(self, *args, **kwargs):
"""
load the images
Expand All @@ -71,7 +125,7 @@ def loadData(self, *args, **kwargs):
The test-set only contains the images
"""

self.X_train, self.y_train = self.load(*args, **kwargs)
self.X_train, self.y_train = self.load(cols=self.cols, *args, **kwargs)

# the columns only exist in the trainingsset
kwargs.pop("cols", None)
Expand Down Expand Up @@ -113,10 +167,24 @@ def load(self,
if cols:
df = df[list(cols) + ['Image']]

# if we didn't select any columns, we need to set them here
# (just take all columns but 'Image')
if self.cols is None:
self.cols = df.columns[:-1]

# only use cols without missing values
if dropMissing:
df = df.dropna()

# create mapping between imageId and position in our numpy array
# cols['internalId'] = range(len(df))
# self.id_mapping = df['internalId'].to_dict()
# # swap index and value
# self.id_mapping = {v: k for k, v in self.id_mapping.items()}
self.index = df.index.copy()
self.index.name = 'imageId'
print(self.index)

# scale pixel values
if rescale:
X = np.vstack(df['Image'].values) / 255.
Expand Down Expand Up @@ -160,6 +228,26 @@ def predict(self, X=None):
self.prediction = self.network.predict(X)


def savePredictionNew(self, outputfilename=None):
"""
save the predicted coordinates to a csv file

this csv file only holds the predicted features
"""

if outputfilename is None:
outputfilename = '.data/solution.csv'
# transform predictions
prediction = self.prediction * 48 + 48
prediction = prediction.clip(0, 96)

# convert from numpy array to pandas dataframe
# and get our old index and columns back
outputset = DataFrame(prediction)
outputset.index = self.index
outputset.columns = self.cols
outputset.to_csv(self.fOutFile)

def savePrediction(self):
"""save the predicted coordinates into a csv file to upload"""

Expand Down Expand Up @@ -198,7 +286,9 @@ def savePrediction8(self):
idset = read_csv(os.path.expanduser(self.fIdList))

outputPrediction = []
mapping = {1:1, 2:2, 3:3, 4:4, 21:5, 22:6, 29:7, 30:8}
mapping = {1: 1, 2: 2, 3: 3, 4: 4,
5: 5, 6: 6, 7: 7, 8: 8,
21: 5, 22: 6, 29: 7, 30: 8}

for i in range(len(idset)):
# we only predict the second part of the set of images.
Expand Down Expand Up @@ -271,19 +361,21 @@ def main():
ap = ArgumentParser()
ap.add_argument('--picklefile', nargs='?',
help='saved state to resume from')
ap.add_argument('--network', nargs='?',
help='which network to use. '
'The only possible values right now are 8 or 30')
ap.add_argument('--epochs', nargs='?', type=int,
help='how many epochs the network should run fitting')
ap.add_argument('--dataset', nargs='?',
help='which dataset to load and use. '
'The only possible values right now are 8 or 30')
ap.add_argument('--outputfilename', nargs='?',
help='filename of the prediction output')
args = ap.parse_args()


if args.dataset is '8':
fkr = FacialKeypointRecognition(networks.convolutionalNetwork8())
else:
fkr = FacialKeypointRecognition(networks.convolutionalNetwork())

fkr = FacialKeypointRecognition()
if args.network:
fkr.setNetwork(args.network)

if args.picklefile:
# we have a pickle-file that we want to reuse
Expand All @@ -301,10 +393,7 @@ def main():
fkr.predict()
fkr.saveState()

if args.dataset is '8':
fkr.savePrediction8()
else:
fkr.savePrediction()
fkr.savePredictionNew(args.outputfilename)

# only run when loaded as top file
if __name__ == "__main__":
Expand Down