diff --git a/interface.py b/interface.py index 5927f5c..6521a8d 100644 --- a/interface.py +++ b/interface.py @@ -2,6 +2,9 @@ # format for text UI taken from project prompt import util from timeit import default_timer as timer +import argparse +import logging +import sys def GetFile(): @@ -9,7 +12,7 @@ def GetFile(): Get and parse data from the given file. :return: Parsed data. """ - print "Welcome to Tyson Loveless' Feature Selection Algorithm." + print("Welcome to Tyson Loveless' Feature Selection Algorithm.") name = raw_input('Type in the name of the file to test: ') return util.parse(name) @@ -19,42 +22,61 @@ def GetAlgorithm(): Allows the user to select an algorithm to run on a dataset :return: algorithm to be used. """ - print "Type the number of the algorithm you want to run." - print "\n 1) Forward Selection" - print "\n 2) Backward Elimination" - print "\n 3) Tyson's Genetic Algorithm\n" + print("Type the number of the algorithm you want to run.") + print("\n 1) Forward Selection") + print("\n 2) Backward Elimination") + print("\n 3) Tyson's Genetic Algorithm\n") search_type = input(' ') return search_type -def main(): +def get_algorithm(arg): + if arg == 'ss': + return 3 + elif arg == 'bs': + return 2 + elif arg == 'fs': + return 1 + + +def main(args): """ Main method of the program, implements the UI for running the feature selection algorithm(s) :return: null """ - data = GetFile() - search_type = GetAlgorithm() + data = util.parse(args.input) + search_type = get_algorithm(args.algorithm) n = data[0][1].__len__() - print "\nThis dataset has " + str(n) + " features (not including the class attribute), with " \ - + str(data.__len__()) + " instances." + logging.info("This dataset has {} features (not including the class attribute), with {} instances." + .format(n, len(data))) - print("\nPlease wait while I normalize the data... "), + logging.info("Please wait while I normalize the data... ") data = util.normalize(data) - print "Done!" + logging.info("Done!") accuracy = util.nearest_neighbor(data) - print "\nRunning nearest neighbor with all " + str(n) + " features, using \"leave-one-out\" evaluation, I get an accuracy of " + str(accuracy*100) + "%" + logging.info('Running nearest neighbor with all {} features, using "leave-one-out" ' + "evaluation, I get an accuracy of {}%".format(n, accuracy*100)) - print "\nBeginning search.\n" + logging.info("Beginning search.") start = timer() feature_set, accuracy = util.search(search_type, data) end = timer() - print "Finished search!! The best feature subset is {" + ', '.join(str(s+1) for s in feature_set) + "}, which has an accuracy of " + str(accuracy*100) + "%" - print "\nIt took " + str(end-start) + " seconds to find this feature set." + logging.info("Finished search!! The best feature subset is {" + ', '.join(str(s+1) for s in feature_set) + "}, which has an accuracy of " + str(accuracy*100) + "%") + logging.info("It took {} seconds to find this feature set.".format((end-start))) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--algorithm", choices={'ss', 'fs', 'bs'}, required=True, + help="ss = Tyson's special sauce, bs = backward selection, fs = forward selection") + parser.add_argument('-i', '--input', required=True) + args = parser.parse_args(sys.argv[1:]) -main() + logging.basicConfig(level=logging.DEBUG) + main(args) diff --git a/util.py b/util.py index af5dafd..276d203 100644 --- a/util.py +++ b/util.py @@ -102,7 +102,7 @@ def search(option, data): while True: for i in range(level, num_features + 1): if DEBUG: - print "On the " + str(i) + "th level of the tree" + print("On the " + str(i) + "th level of the tree") best_so_far_accuracy = 0 best_feature_this_level = 0 count = 0 @@ -116,13 +116,13 @@ def search(option, data): continue if k not in current_feature_set: if DEBUG: - print " --Considering adding feature " + str(k) + print(" --Considering adding feature " + str(k)) accuracy = leave_one_out_cross_validation(data, current_feature_set.union({k}), best_per_level[i] * 100) temp.append([k, accuracy]) - print " Using feature(s) {" + ', '.join( + print(" Using feature(s) {" + ', '.join( str(s + 1) for s in current_feature_set.union({k})) + "} accuracy is " + str( - accuracy * 100) + "%" + accuracy * 100) + "%") total += 1 if accuracy >= best_so_far_accuracy: @@ -151,9 +151,9 @@ def search(option, data): if maxima: reset = True if DEBUG: - print "feature set {" + ', '.join( + print("feature set {" + ', '.join( str(s + 1) for s in best_feature_set) + "} has accuracy " + str( - accuracy) + accuracy)) if stack: checked.append(current_feature_set.union({best_feature_this_level})) @@ -167,12 +167,12 @@ def search(option, data): reset = False if best_so_far_accuracy <= 0: - print "\nNo improvement on this path\n" + print("\nNo improvement on this path\n") break if updated: - print "\nFeature set {" + ', '.join( + print("\nFeature set {" + ', '.join( str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str( - best_so_far_accuracy * 100) + "%\n" + best_so_far_accuracy * 100) + "%\n") for index, acc in temp: if acc < best_so_far_accuracy: @@ -182,12 +182,12 @@ def search(option, data): break # checked.append(copy.deepcopy(current_feature_set)) current_feature_set, level, this_accuracy = stack.pop() - print "(Checking a different path that tied at level " + str(level) + ")" - print "\nFeature set {" + ', '.join( - str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy) + "\n" + print("(Checking a different path that tied at level " + str(level) + ")") + print("\nFeature set {" + ', '.join( + str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy) + "\n") level += 1 - print "total number expanded: " + str(total) + print("total number expanded: " + str(total)) return best_feature_set, best_accuracy # backward elimination @@ -196,7 +196,7 @@ def search(option, data): while True: for i in range(num_features + 1 - level, 0, -1): if DEBUG: - print "On the " + str(i) + "th level of the tree" + print("On the " + str(i) + "th level of the tree") best_so_far_accuracy = 0 worst_feature_this_level = 0 count = 0 @@ -210,13 +210,13 @@ def search(option, data): continue if k in current_feature_set: if DEBUG: - print " --Considering removing feature " + str(k) + print(" --Considering removing feature " + str(k)) accuracy = leave_one_out_cross_validation(data, current_feature_set.difference({k}), best=best_per_level[i] * 100) temp.append([k, accuracy]) - print " Using feature(s) {" + ', '.join( + print(" Using feature(s) {" + ', '.join( str(s + 1) for s in current_feature_set.difference({k})) + "} accuracy is " + str( - accuracy * 100) + "%" + accuracy * 100) + "%") total += 1 if accuracy >= best_so_far_accuracy: @@ -245,9 +245,9 @@ def search(option, data): if maxima: reset = True if DEBUG: - print "feature set {" + ', '.join( + print("feature set {" + ', '.join( str(s + 1) for s in best_feature_set) + "} has accuracy " + str( - accuracy) + accuracy)) if stack: checked.append(current_feature_set.difference({worst_feature_this_level})) @@ -263,22 +263,22 @@ def search(option, data): maxima = False reset = False if best_so_far_accuracy <= 0: - print "\nNo improvement this path\n" + print("\nNo improvement this path\n") break if updated: - print "\nFeature set {" + ', '.join( + print("\nFeature set {" + ', '.join( str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str( - best_so_far_accuracy * 100) + "%\n" + best_so_far_accuracy * 100) + "%\n") if not stack: break current_feature_set, level, this_accuracy = stack.pop() - print "(Checking a different path that tied at level " + str(num_features + 1 - level) + ")" - print "\nFeature set {" + ', '.join( - str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy * 100) + "%\n" + print("(Checking a different path that tied at level " + str(num_features + 1 - level) + ")") + print("\nFeature set {" + ', '.join( + str(s + 1) for s in current_feature_set) + "} was best, accuracy is " + str(this_accuracy * 100) + "%\n") level += 1 - print "total number expanded: " + str(total) + print("total number expanded: " + str(total)) return best_feature_set, best_accuracy # my searching function @@ -309,7 +309,7 @@ def search(option, data): print("\nPerforming crossover and mutations...") population = generation(population, fitness, num_features) print("Done!\n") - print "Total number cross-validated: " + str(total) + print("Total number cross-validated: " + str(total)) return best_feature_set, best_accuracy @@ -337,8 +337,8 @@ def selection(data, population): best = [x[1] for i, x in enumerate(fitness) if i in range(0, length)] acc = [x[0] for i, x in enumerate(fitness) if i in range(0, length)] if length > 0: - print "Top " + str(length) + " feature sets:\n{" + '\n{'.join( - ', '.join(str(i + 1) for i in list(s)) + '} with accuracy ' + str(x) for s, x in zip(best, acc)) + print("Top " + str(length) + " feature sets:\n{" + '\n{'.join( + ', '.join(str(i + 1) for i in list(s)) + '} with accuracy ' + str(x) for s, x in zip(best, acc))) return best, acc, fitness[0][1], total @@ -358,11 +358,11 @@ def generation(population, fitness, num_features): if len(new) == 0: continue if new not in pop: - print " New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}" + print(" New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}") pop.add(frozenset(new)) else: new = mutation(feature_set, num_features) - print " New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}" + print(" New individual added to population: {" + ', '.join(str(s + 1) for s in new) + "}") pop.add(frozenset(new)) new = best_crossover(population[0], population) pop.add(frozenset(new))