Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 39 additions & 17 deletions interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
# format for text UI taken from project prompt
import util
from timeit import default_timer as timer
import argparse
import logging
import sys


def GetFile():
"""
Get and parse data from the given file.
:return: Parsed data.
"""
print "Welcome to Tyson Loveless' Feature Selection Algorithm."
print("Welcome to Tyson Loveless' Feature Selection Algorithm.")
name = raw_input('Type in the name of the file to test: ')
return util.parse(name)

Expand All @@ -19,42 +22,61 @@ def GetAlgorithm():
Allows the user to select an algorithm to run on a dataset
:return: algorithm to be used.
"""
print "Type the number of the algorithm you want to run."
print "\n 1) Forward Selection"
print "\n 2) Backward Elimination"
print "\n 3) Tyson's Genetic Algorithm\n"
print("Type the number of the algorithm you want to run.")
print("\n 1) Forward Selection")
print("\n 2) Backward Elimination")
print("\n 3) Tyson's Genetic Algorithm\n")
search_type = input(' ')
return search_type


def main():
def get_algorithm(arg):
if arg == 'ss':
return 3
elif arg == 'bs':
return 2
elif arg == 'fs':
return 1


def main(args):
"""
Main method of the program, implements the UI for running the feature selection algorithm(s)
:return: null
"""
data = GetFile()
search_type = GetAlgorithm()
data = util.parse(args.input)
search_type = get_algorithm(args.algorithm)

n = data[0][1].__len__()

print "\nThis dataset has " + str(n) + " features (not including the class attribute), with " \
+ str(data.__len__()) + " instances."
logging.info("This dataset has {} features (not including the class attribute), with {} instances."
.format(n, len(data)))

print("\nPlease wait while I normalize the data... "),
logging.info("Please wait while I normalize the data... ")
data = util.normalize(data)
print "Done!"
logging.info("Done!")

accuracy = util.nearest_neighbor(data)
print "\nRunning nearest neighbor with all " + str(n) + " features, using \"leave-one-out\" evaluation, I get an accuracy of " + str(accuracy*100) + "%"
logging.info('Running nearest neighbor with all {} features, using "leave-one-out" '
"evaluation, I get an accuracy of {}%".format(n, accuracy*100))

print "\nBeginning search.\n"
logging.info("Beginning search.")

start = timer()
feature_set, accuracy = util.search(search_type, data)
end = timer()

print "Finished search!! The best feature subset is {" + ', '.join(str(s+1) for s in feature_set) + "}, which has an accuracy of " + str(accuracy*100) + "%"
print "\nIt took " + str(end-start) + " seconds to find this feature set."
logging.info("Finished search!! The best feature subset is {" + ', '.join(str(s+1) for s in feature_set) + "}, which has an accuracy of " + str(accuracy*100) + "%")
logging.info("It took {} seconds to find this feature set.".format((end-start)))


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("-a", "--algorithm", choices={'ss', 'fs', 'bs'}, required=True,
help="ss = Tyson's special sauce, bs = backward selection, fs = forward selection")
parser.add_argument('-i', '--input', required=True)

args = parser.parse_args(sys.argv[1:])

main()
logging.basicConfig(level=logging.DEBUG)
main(args)
62 changes: 31 additions & 31 deletions util.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def search(option, data):
while True:
for i in range(level, num_features + 1):
if DEBUG:
print "On the " + str(i) + "th level of the tree"
print("On the " + str(i) + "th level of the tree")
best_so_far_accuracy = 0
best_feature_this_level = 0
count = 0
Expand All @@ -116,13 +116,13 @@ def search(option, data):
continue
if k not in current_feature_set:
if DEBUG:
print " --Considering adding feature " + str(k)
print(" --Considering adding feature " + str(k))
accuracy = leave_one_out_cross_validation(data, current_feature_set.union({k}),
best_per_level[i] * 100)
temp.append([k, accuracy])
print " Using feature(s) {" + ', '.join(
print(" Using feature(s) {" + ', '.join(
str(s + 1) for s in current_feature_set.union({k})) + "} accuracy is " + str(
accuracy * 100) + "%"
accuracy * 100) + "%")
total += 1

if accuracy >= best_so_far_accuracy:
Expand Down Expand Up @@ -151,9 +151,9 @@ def search(option, data):
if maxima:
reset = True
if DEBUG:
print "feature set {" + ', '.join(
print("feature set {" + ', '.join(
str(s + 1) for s in best_feature_set) + "} has accuracy " + str(
accuracy)
accuracy))

if stack:
checked.append(current_feature_set.union({best_feature_this_level}))
Expand All @@ -167,12 +167,12 @@ def search(option, data):
reset = False

if best_so_far_accuracy <= 0:
print "\nNo improvement on this path\n"
print("\nNo improvement on this path\n")
break
if updated:
print "\nFeature set {" + ', '.join(
print("\nFeature set {" + ', '.join(
str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(
best_so_far_accuracy * 100) + "%\n"
best_so_far_accuracy * 100) + "%\n")

for index, acc in temp:
if acc < best_so_far_accuracy:
Expand All @@ -182,12 +182,12 @@ def search(option, data):
break
# checked.append(copy.deepcopy(current_feature_set))
current_feature_set, level, this_accuracy = stack.pop()
print "(Checking a different path that tied at level " + str(level) + ")"
print "\nFeature set {" + ', '.join(
str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy) + "\n"
print("(Checking a different path that tied at level " + str(level) + ")")
print("\nFeature set {" + ', '.join(
str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy) + "\n")
level += 1

print "total number expanded: " + str(total)
print("total number expanded: " + str(total))

return best_feature_set, best_accuracy
# backward elimination
Expand All @@ -196,7 +196,7 @@ def search(option, data):
while True:
for i in range(num_features + 1 - level, 0, -1):
if DEBUG:
print "On the " + str(i) + "th level of the tree"
print("On the " + str(i) + "th level of the tree")
best_so_far_accuracy = 0
worst_feature_this_level = 0
count = 0
Expand All @@ -210,13 +210,13 @@ def search(option, data):
continue
if k in current_feature_set:
if DEBUG:
print " --Considering removing feature " + str(k)
print(" --Considering removing feature " + str(k))
accuracy = leave_one_out_cross_validation(data, current_feature_set.difference({k}),
best=best_per_level[i] * 100)
temp.append([k, accuracy])
print " Using feature(s) {" + ', '.join(
print(" Using feature(s) {" + ', '.join(
str(s + 1) for s in current_feature_set.difference({k})) + "} accuracy is " + str(
accuracy * 100) + "%"
accuracy * 100) + "%")
total += 1

if accuracy >= best_so_far_accuracy:
Expand Down Expand Up @@ -245,9 +245,9 @@ def search(option, data):
if maxima:
reset = True
if DEBUG:
print "feature set {" + ', '.join(
print("feature set {" + ', '.join(
str(s + 1) for s in best_feature_set) + "} has accuracy " + str(
accuracy)
accuracy))

if stack:
checked.append(current_feature_set.difference({worst_feature_this_level}))
Expand All @@ -263,22 +263,22 @@ def search(option, data):
maxima = False
reset = False
if best_so_far_accuracy <= 0:
print "\nNo improvement this path\n"
print("\nNo improvement this path\n")
break
if updated:
print "\nFeature set {" + ', '.join(
print("\nFeature set {" + ', '.join(
str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(
best_so_far_accuracy * 100) + "%\n"
best_so_far_accuracy * 100) + "%\n")

if not stack:
break
current_feature_set, level, this_accuracy = stack.pop()
print "(Checking a different path that tied at level " + str(num_features + 1 - level) + ")"
print "\nFeature set {" + ', '.join(
str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy * 100) + "%\n"
print("(Checking a different path that tied at level " + str(num_features + 1 - level) + ")")
print("\nFeature set {" + ', '.join(
str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy * 100) + "%\n")
level += 1

print "total number expanded: " + str(total)
print("total number expanded: " + str(total))

return best_feature_set, best_accuracy
# my searching function
Expand Down Expand Up @@ -309,7 +309,7 @@ def search(option, data):
print("\nPerforming crossover and mutations...")
population = generation(population, fitness, num_features)
print("Done!\n")
print "Total number cross-validated: " + str(total)
print("Total number cross-validated: " + str(total))
return best_feature_set, best_accuracy


Expand Down Expand Up @@ -337,8 +337,8 @@ def selection(data, population):
best = [x[1] for i, x in enumerate(fitness) if i in range(0, length)]
acc = [x[0] for i, x in enumerate(fitness) if i in range(0, length)]
if length > 0:
print "Top " + str(length) + " feature sets:\n{" + '\n{'.join(
', '.join(str(i + 1) for i in list(s)) + '} with accuracy ' + str(x) for s, x in zip(best, acc))
print("Top " + str(length) + " feature sets:\n{" + '\n{'.join(
', '.join(str(i + 1) for i in list(s)) + '} with accuracy ' + str(x) for s, x in zip(best, acc)))
return best, acc, fitness[0][1], total


Expand All @@ -358,11 +358,11 @@ def generation(population, fitness, num_features):
if len(new) == 0:
continue
if new not in pop:
print " New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}"
print(" New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}")
pop.add(frozenset(new))
else:
new = mutation(feature_set, num_features)
print " New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}"
print(" New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}")
pop.add(frozenset(new))
new = best_crossover(population[0], population)
pop.add(frozenset(new))
Expand Down