From baee76bdd683e253a21d859209ea6d8385548f7c Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Mon, 15 Nov 2010 17:06:00 +0100 Subject: [PATCH 1/7] chmod 644 sortvis --- sortvis | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 sortvis diff --git a/sortvis b/sortvis old mode 100644 new mode 100755 From be8ddbd9b4090c102787a5d3e81173b39b4dc039 Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Tue, 16 Nov 2010 23:48:51 +0100 Subject: [PATCH 2/7] All comparisons get logged, ending up in TrackList.comparisonList. As all comparisons are logged, we don't need to call lst.log() from the sorting algorithms anymore (which is good, as they can now be reused for other things than visualization as well). Also using new-style classes now. Timsort needed special treatment, as access to the lst while lst.sort() runs is denied (pointers get set to []). The existing workaround was adapted. Sortable was renamed into Comparator (seemed like a more appropriate name to me). TrackList.wrap was renamed and modified into setComparator, which now allows to substitute the existing comparator wrapper (or wrap it again), giving more direct control of what happens (e.g., used in TimComparator). --- libsortvis/algos/bitonicsort.py | 2 +- libsortvis/algos/bubblesort.py | 2 +- libsortvis/algos/cocktailsort.py | 4 +- libsortvis/algos/combsort.py | 2 +- libsortvis/algos/gnomesort.py | 2 +- libsortvis/algos/heapsort.py | 4 +- libsortvis/algos/insertionsort.py | 2 +- libsortvis/algos/mergesort.py | 2 +- libsortvis/algos/oddevensort.py | 2 +- libsortvis/algos/quicksort.py | 5 +-- libsortvis/algos/radixsort.py | 4 +- libsortvis/algos/selectionsort.py | 4 +- libsortvis/algos/shellsort.py | 2 +- libsortvis/algos/smoothsort.py | 4 +- libsortvis/algos/stoogesort.py | 2 +- libsortvis/algos/timsort.py | 48 +++++++++++---------- libsortvis/sortable.py | 72 ++++++++++++++++++++++++------- 17 files changed, 103 insertions(+), 60 deletions(-) diff --git a/libsortvis/algos/bitonicsort.py b/libsortvis/algos/bitonicsort.py index 2f2162b..31bf3a8 100644 --- a/libsortvis/algos/bitonicsort.py +++ b/libsortvis/algos/bitonicsort.py @@ -5,7 +5,7 @@ def compare(lst, i, j, dir): if dir == (lst[i] > lst[j]): lst[i], lst[j] = lst[j], lst[i] - lst.log() +# lst.log() def merge(lst, lo, n, dir): diff --git a/libsortvis/algos/bubblesort.py b/libsortvis/algos/bubblesort.py index 8b41cff..8281f18 100644 --- a/libsortvis/algos/bubblesort.py +++ b/libsortvis/algos/bubblesort.py @@ -6,7 +6,7 @@ def bubblesort(lst): for j in range(bound): if lst[j] > lst[j+1]: lst[j], lst[j+1] = lst[j+1], lst[j] - lst.log() +# lst.log() t = j if t == 0: break diff --git a/libsortvis/algos/cocktailsort.py b/libsortvis/algos/cocktailsort.py index bcbe98e..493ec5f 100644 --- a/libsortvis/algos/cocktailsort.py +++ b/libsortvis/algos/cocktailsort.py @@ -7,7 +7,7 @@ def cocktailsort(lst): for i in xrange(begin, end): if lst[i] > lst[i + 1]: lst[i], lst[i + 1] = lst[i + 1], lst[i] - lst.log() +# lst.log() finished = False if finished: break @@ -16,7 +16,7 @@ def cocktailsort(lst): for i in reversed(xrange(begin, end)): if lst[i] > lst[i + 1]: lst[i], lst[i + 1] = lst[i + 1], lst[i] - lst.log() +# lst.log() finished = False begin += 1 diff --git a/libsortvis/algos/combsort.py b/libsortvis/algos/combsort.py index 550a54d..fe4e9cf 100644 --- a/libsortvis/algos/combsort.py +++ b/libsortvis/algos/combsort.py @@ -8,7 +8,7 @@ def combsort(lst): for i in xrange(len(lst) - gap): if lst[i] > lst[i + gap]: lst[i], lst[i + gap] = lst[i + gap], lst[i] - lst.log() +# lst.log() swaps = True if not swaps and gap <= 1: break diff --git a/libsortvis/algos/gnomesort.py b/libsortvis/algos/gnomesort.py index 3855b0a..d3d14c2 100644 --- a/libsortvis/algos/gnomesort.py +++ b/libsortvis/algos/gnomesort.py @@ -6,7 +6,7 @@ def gnomesort(lst): i += 1 else: lst[i], lst[i - 1] = lst[i - 1], lst[i] - lst.log() +# lst.log() i -= 1 diff --git a/libsortvis/algos/heapsort.py b/libsortvis/algos/heapsort.py index b55ea0d..c90d964 100644 --- a/libsortvis/algos/heapsort.py +++ b/libsortvis/algos/heapsort.py @@ -7,7 +7,7 @@ def sift(lst, start, count): child += 1 if lst[root] < lst[child]: lst[root], lst[child] = lst[child], lst[root] - lst.log() +# lst.log() root = child else: return @@ -20,6 +20,6 @@ def heapsort(lst): start -= 1 while end > 0: lst[end], lst[0] = lst[0], lst[end] - lst.log() +# lst.log() sift(lst, 0, end) end -= 1 diff --git a/libsortvis/algos/insertionsort.py b/libsortvis/algos/insertionsort.py index 004ae48..1acbc0e 100644 --- a/libsortvis/algos/insertionsort.py +++ b/libsortvis/algos/insertionsort.py @@ -5,4 +5,4 @@ def insertionsort(lst): if lst[i] < lst[j]: x = lst.pop(i) lst.insert(j, x) - lst.log() +# lst.log() diff --git a/libsortvis/algos/mergesort.py b/libsortvis/algos/mergesort.py index 37b38af..f00d37e 100644 --- a/libsortvis/algos/mergesort.py +++ b/libsortvis/algos/mergesort.py @@ -13,5 +13,5 @@ def mergesort(lst, left=0, right=None): i += 1 continue lst[i], lst[i+1:j+1] = lst[j], lst[i:j] - lst.log() +# lst.log() i, end_i, j = i + 1, end_i + 1, j + 1 diff --git a/libsortvis/algos/oddevensort.py b/libsortvis/algos/oddevensort.py index dda24da..09f0d9f 100644 --- a/libsortvis/algos/oddevensort.py +++ b/libsortvis/algos/oddevensort.py @@ -7,7 +7,7 @@ def oddevensort(lst, nloops=2): for i in xrange(n, len(lst) - 1, nloops): if lst[i] > lst[i + 1]: lst[i], lst[i + 1] = lst[i + 1], lst[i] - lst.log() +# lst.log() finished = False diff --git a/libsortvis/algos/quicksort.py b/libsortvis/algos/quicksort.py index 3bace5d..d1cdbbc 100644 --- a/libsortvis/algos/quicksort.py +++ b/libsortvis/algos/quicksort.py @@ -1,4 +1,3 @@ - def quicksort(lst, left=0, right=None): if right is None: right = len(lst) - 1 @@ -13,8 +12,8 @@ def quicksort(lst, left=0, right=None): r -= 1 if l <= r: lst[l], lst[r] = lst[r], lst[l] - if l != r: - lst.log() +# if l != r: +# lst.log() l+=1 r-=1 if left < r: diff --git a/libsortvis/algos/radixsort.py b/libsortvis/algos/radixsort.py index 9fbc96f..038f00d 100644 --- a/libsortvis/algos/radixsort.py +++ b/libsortvis/algos/radixsort.py @@ -19,11 +19,11 @@ def radixsort(lst): for j, item in enumerate(chain(zeroes, orig, ones)): lst[j] = item # for a more simple graph, comment out the line below - lst.log() +# lst.log() # if is_sorted(lst): return - lst.log() +# lst.log() shift = shift << 1 zeroes[:] = [] ones[:] = [] diff --git a/libsortvis/algos/selectionsort.py b/libsortvis/algos/selectionsort.py index e0cff7e..084d4e6 100644 --- a/libsortvis/algos/selectionsort.py +++ b/libsortvis/algos/selectionsort.py @@ -3,6 +3,6 @@ def selectionsort(lst): for j in range(len(lst)-1, -1, -1): m = lst.index(max(lst[:j+1])) # No, this is not efficient ;) lst[m], lst[j] = lst[j], lst[m] - if m != j: - lst.log() +# if m != j: +# lst.log() diff --git a/libsortvis/algos/shellsort.py b/libsortvis/algos/shellsort.py index 7269ca1..96ea2dd 100644 --- a/libsortvis/algos/shellsort.py +++ b/libsortvis/algos/shellsort.py @@ -11,7 +11,7 @@ def shellsort(lst): flag = 1 lst[i+h], lst[i] = lst[i], lst[i+h] i -= h - lst.log() +# lst.log() else: break lst[i+h] = r diff --git a/libsortvis/algos/smoothsort.py b/libsortvis/algos/smoothsort.py index 5a7366d..7b10443 100644 --- a/libsortvis/algos/smoothsort.py +++ b/libsortvis/algos/smoothsort.py @@ -33,7 +33,7 @@ def sift(lst, pshift, head): lst[head], lst[rt] = lst[rt], lst[head] head = rt pshift -= 2 - lst.log() +# lst.log() def trinkle(lst, p, pshift, head, trusty): @@ -47,7 +47,7 @@ def trinkle(lst, p, pshift, head, trusty): if lst[rt] >= lst[stepson] or lst[lf] >= lst[stepson]: break lst[head], lst[stepson] = lst[stepson], lst[head] - lst.log() +# lst.log() head = stepson trail = trailingzeroes(p & ~1) p >>= trail diff --git a/libsortvis/algos/stoogesort.py b/libsortvis/algos/stoogesort.py index 7156c28..77e72b9 100644 --- a/libsortvis/algos/stoogesort.py +++ b/libsortvis/algos/stoogesort.py @@ -4,7 +4,7 @@ def stoogesort(lst, i=0, j=None): j = len(lst) - 1 if lst[j] < lst[i]: lst[i], lst[j] = lst[j], lst[i] - lst.log() +# lst.log() if j - i > 1: t = (j - i + 1) // 3 stoogesort(lst, i, j - t) diff --git a/libsortvis/algos/timsort.py b/libsortvis/algos/timsort.py index 1cb6426..972616e 100644 --- a/libsortvis/algos/timsort.py +++ b/libsortvis/algos/timsort.py @@ -1,38 +1,40 @@ +from ..sortable import Comparator +class TimBreak(Exception): + def __init__(self, *args): + super(TimBreak, self).__init__(*args) -class TimBreak(Exception): pass - - -class TimWrapper: - list = None +class TimComparator(Comparator): comparisons = 0 limit = 0 - def __init__(self, n): - self.n = n + def __init__(self, tracklist, i): + super(TimComparator, self).__init__(tracklist, i) def __cmp__(self, other): - if TimWrapper.comparisons > TimWrapper.limit: - raise TimBreak - TimWrapper.comparisons += 1 - return cmp(self.n, other.n) - - def __getattr__(self, attr): - return getattr(self.n, attr) + TimComparator.comparisons += 1 + if TimComparator.comparisons > TimComparator.limit: + self.tracklist.total_comparisons += 1 + raise TimBreak(self, other) + return cmp(self.i, other.i) def timsort(lst): - lst.wrap(TimWrapper) - TimWrapper.list = lst - prev = [i.n for i in lst] + # we need a hack in this one, as the TrackList.lst pointer to the list + # is set to [] while lst.sort() is running + # so what happens here is: we break each sort after limit comparisons, + # log the outcome, increase limit by 1 and run again from the beginning. + lst.setComparator(TimComparator, wrapOldOne=False) + TimComparator.list = lst while 1: - TimWrapper.comparisons = 0 - TimWrapper.limit += 1 + TimComparator.comparisons = 0 + TimComparator.limit += 1 lst.reset() try: lst.sort() - except TimBreak: - if prev != [i.n for i in lst]: - lst.log() - prev = [i.n for i in lst] + except TimBreak as t: + s,o = t + lst.addComparison(s,o) + lst.log() else: lst.log() break + diff --git a/libsortvis/sortable.py b/libsortvis/sortable.py index 588bf50..9999a1b 100644 --- a/libsortvis/sortable.py +++ b/libsortvis/sortable.py @@ -1,5 +1,9 @@ +from copy import copy -class Sortable: +class Comparator(object): + """ A comparator wrapping a list element and doing all the counts. + In the rare case of extending this class, make sure that the + wrapped element is always self.i, and has a self.path list.""" def __init__(self, tracklist, i): self.tracklist, self.i = tracklist, i self.path = [] @@ -8,38 +12,76 @@ def __cmp__(self, other): """ Counts each comparison between two elements and redirects to the underlying __cmp__ method of the i's wrapped in this.""" self.tracklist.total_comparisons += 1 + self.tracklist.log() + self.tracklist.addComparison(self,other) return cmp(self.i, other.i) - + def __repr__(self): return str(self.i) -class TrackList: +class TrackList(object): """ A list-like object that logs the positions of its elements every time the log() method is called. """ - def __init__(self, itms): - self.lst = [Sortable(self, i) for i in itms] - self.start = self.lst[:] + def __init__(self, itms, comparator=Comparator): + """ You can either specify a comparator at init or set a different one later.""" + self.lst = [comparator(self, i) for i in itms] + self.start = copy(self.lst) self.total_comparisons = 0 + self.comparisonList = [] self.log() - def wrap(self, wrapper): + def setComparator(self, comparator, wrapOldOne=False): """ Allows an additional wrapping of the inner list with the given - wrapper. See algos.timsort as an example. """ - self.lst = [wrapper(i) for i in self.lst] - self.start = self.lst[:] + wrapper or substitution of the existing one (wrapOldOne=False). + See algos.timsort as an example. """ + if wrapOldOne: + self.lst = [comparator(self, i) for i in self.lst] + else: + self.lst = [comparator(self, i.i) for i in self.lst] + self.log() + self.start = copy(self.lst) def reset(self): - self.total_comparisons = 0 - self.lst = self.start[:] + """ reset original ordering. Does _not_ reset counts or path info.""" +# self.total_comparisons = 0 + self.lst = copy(self.start) def __getattr__(self, attr): - """ Redirecting every lookup on this object that didn't succeed to - the internal list (e.g., iterating over self iterates over list).""" + """ Redirecting every lookup (aside from special method lookups) + on this object that didn't succeed to the internal list + (e.g., iterating over self iterates over list).""" return getattr(self.lst, attr) + # special method lookups need to be defined explicitly in new style classes. + def __len__(self): + return len(self.lst) + def __getitem__(self, index): + return self.lst[index] + def __setitem__(self, index, value): + self.lst[index] = value + def __delitem__(self, index): + del self.lst[index] + def __iter__(self): + return iter(self.lst) + def __reversed__(self): + return reversed(self.lst) + def __contains__(self, value): + return value in self.lst + + def addComparison(self,cmps,cmpo): + spos,opos = None, None + if cmps.i == cmpo.i: print "stupid self comparison", cmps.i + for j,k in enumerate(self.lst): + if k.i == cmps.i: spos = j + if k.i == cmpo.i: opos = j + assert spos != None + assert opos != None + self.comparisonList.append((spos, opos)) + def log(self): - for i, v in enumerate(self): + """ logs list whenever the comparison counter was changed.""" + for i, v in enumerate(self.lst): v.path.append(i) From 4a00f6253ec26652931a31d067232669f34081d2 Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Wed, 17 Nov 2010 00:11:09 +0100 Subject: [PATCH 3/7] Comments on quicksort, which does unnecessary comparisons of elements with the same index (e.g., if lst[i] <= lst[i]) --- libsortvis/algos/quicksort.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libsortvis/algos/quicksort.py b/libsortvis/algos/quicksort.py index d1cdbbc..cab7310 100644 --- a/libsortvis/algos/quicksort.py +++ b/libsortvis/algos/quicksort.py @@ -1,11 +1,12 @@ +# note: quicksort does unnecessary comparisons of elements with the same index def quicksort(lst, left=0, right=None): if right is None: right = len(lst) - 1 l = left r = right - if l <= r: + if l <= r: # why not < ? mid = lst[(left+right)/2] - while l <= r: + while l <= r: # why not < ? while l <= right and lst[l] < mid: l += 1 while r > left and lst[r] > mid: From e12a0fd9c1c8a99de84fb9a5c8fd59a02f98e6f7 Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Wed, 17 Nov 2010 00:15:58 +0100 Subject: [PATCH 4/7] Commented out debugging output for unnecessary comparisons. --- libsortvis/sortable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsortvis/sortable.py b/libsortvis/sortable.py index 9999a1b..7659c2c 100644 --- a/libsortvis/sortable.py +++ b/libsortvis/sortable.py @@ -73,7 +73,7 @@ def __contains__(self, value): def addComparison(self,cmps,cmpo): spos,opos = None, None - if cmps.i == cmpo.i: print "stupid self comparison", cmps.i +# if cmps.i == cmpo.i: print "unnecessary self comparison", cmps.i for j,k in enumerate(self.lst): if k.i == cmps.i: spos = j if k.i == cmpo.i: opos = j From 48fe88b2ecd2511258b48315f6d9b3b9563fcc70 Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Wed, 17 Nov 2010 01:49:26 +0100 Subject: [PATCH 5/7] A quick demo script which uses scipy and matplotlib to visualize every comparison and swap in several sorting algorithms. --- libsortvis/quickComparisonsVis.py | 117 ++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 libsortvis/quickComparisonsVis.py diff --git a/libsortvis/quickComparisonsVis.py b/libsortvis/quickComparisonsVis.py new file mode 100644 index 0000000..9a3880a --- /dev/null +++ b/libsortvis/quickComparisonsVis.py @@ -0,0 +1,117 @@ +# -*- coding: utf-8 -*- +''' +A quick & dirty demo script using scipy and matplotlib to visualize compared elements. +Created on Nov 14, 2010 + +@author: joern +''' + +from libsortvis import graph, algos, sortable +import random +import scipy as sc +import matplotlib.pyplot as plt +import matplotlib.colors as colors +import matplotlib.cm as cm + +if __name__ == '__main__': + lst = range(100) + random.shuffle(lst) + + verbose = True # get # of comparisons + debug = False + drawComparisons = True # mark each comparison between 2 elements with a dot on both + compress = False # only draw each column if it changed. + + createGraphic = False # save output + + if createGraphic: + gstart = graph.rgb("ffffff") + gend = graph.rgb("000000") + csource = graph.ColourGradient(gstart, gend) + background = graph.rgb("ffffff") + titlecolour = graph.rgb("000000") + line = 6 + border = 1 + titleheight = 20 + rotate = False + height = (line + border + 5) * len(lst) + width = int(height * 3) + prefix = "" # file output prefix + title = "" + ldrawer = graph.Weave( + csource, + width, + height, + titleheight, + titlecolour, + background, + rotate, + line, + border + ) + + + assert not (drawComparisons and compress), "can't show both in one image,"\ + " decide if you want to show which items are compared or only show lists if they change." + + todraw = ["timsort", "mergesort", "quicksort", "bubblesort", "insertionsort"] + + for algo in todraw: + if verbose: print algo + + track = sortable.TrackList(lst) # wrap lst with our TrackList + algos.algorithms[algo](track) # apply the sorting algorithm and sort the list + track.log() # could happen that the last comparison swaps elements, so log again + + if verbose: + print "\t%s comparisons"%(track.total_comparisons) + + m = [] + for j in track: + m.append(j.path) + a = sc.array(m) + if compress: + prev = -1 + todel = [] + for i in range(a.shape[1]): + col = a[:,i] + if (col == prev).all(): + if debug: print "delete column", i + todel.append(i) + prev = col + a = sc.delete(a, todel, 1) + assert (a[:,0][sc.array(lst)] == sc.array(lst)[a[:,0]]).all(), \ + "first path column are does not correspond to init positions of all sorted elements" + assert (sorted(a[:,-1]) == a[:,-1]).all(), \ + "last path column not sorted" + if debug: print a + + + plt.figure() + plt.title(algo) + + # line colors + sm = cm.ScalarMappable(cmap=cm.get_cmap("hot"), + norm=colors.Normalize(vmin=0, vmax=(1/0.65)*len(lst))) # use only lower 0.65 of cm + for i,row in enumerate(a): + c = sm.to_rgba(i) + plt.plot(row, color=c) + if drawComparisons: + comps = sc.array(track.comparisonList) + plt.plot(range(1,len(track.comparisonList)+1), comps[:,0], 'o', color=(0., 0., 1.), alpha=0.5) + plt.plot(range(1,len(track.comparisonList)+1), comps[:,1], 'o', color=(1., 0., 0.), alpha=0.5) + + + if createGraphic: + name = prefix + algo + ".png" + ldrawer.draw( + track, + algo if title else None, + name, + ) + + plt.show() + + + + From b46e1ec80a81a589efc6b86724f396076583d976 Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Fri, 19 Nov 2010 18:49:46 +0100 Subject: [PATCH 6/7] For repr() of a Comparable call repr of its element (instead of str). --- libsortvis/sortable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsortvis/sortable.py b/libsortvis/sortable.py index 7659c2c..04969d6 100644 --- a/libsortvis/sortable.py +++ b/libsortvis/sortable.py @@ -17,7 +17,7 @@ def __cmp__(self, other): return cmp(self.i, other.i) def __repr__(self): - return str(self.i) + return repr(self.i) class TrackList(object): From e843d51b180dc86f35d72356a0c51ce32029765a Mon Sep 17 00:00:00 2001 From: Joern Hees Date: Sat, 20 Nov 2010 02:37:22 +0100 Subject: [PATCH 7/7] Bug in timsort hack fixed: always reinit TimComparator as successive runs (e.g. for averaging would start with old TimComparator.limit values otherwise). --- libsortvis/algos/timsort.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libsortvis/algos/timsort.py b/libsortvis/algos/timsort.py index 972616e..8c743a6 100644 --- a/libsortvis/algos/timsort.py +++ b/libsortvis/algos/timsort.py @@ -22,6 +22,8 @@ def timsort(lst): # is set to [] while lst.sort() is running # so what happens here is: we break each sort after limit comparisons, # log the outcome, increase limit by 1 and run again from the beginning. + TimComparator.comparisons = 0 + TimComparator.limit = 0 lst.setComparator(TimComparator, wrapOldOne=False) TimComparator.list = lst while 1: