diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..f7c2ff5
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+# Auto detect text files and perform LF normalization
+* text=auto
+
+# Explicitly declare text files we want to always be normalized and converted 
+# to native line endings on checkout.
+*.c text
+*.h text
+*.py text
+
+# Declare files that will always have CRLF line endings on checkout.
+*.sln text eol=crlf
+
+# Denote all files that are truly binary and should not be modified.
+*.png binary
+*.jpg binary
+
+# Custom for Visual Studio
+*.cs     diff=csharp
+*.sln    merge=union
+*.csproj merge=union
+*.vbproj merge=union
+*.fsproj merge=union
+*.dbproj merge=union
+
+# Standard to msysgit
+*.doc	 diff=astextplain
+*.DOC	 diff=astextplain
+*.docx diff=astextplain
+*.DOCX diff=astextplain
+*.dot  diff=astextplain
+*.DOT  diff=astextplain
+*.pdf  diff=astextplain
+*.PDF	 diff=astextplain
+*.rtf	 diff=astextplain
+*.RTF	 diff=astextplain
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5ebd21a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,163 @@
+#################
+## Eclipse
+#################
+
+*.pydevproject
+.project
+.metadata
+bin/
+tmp/
+*.tmp
+*.bak
+*.swp
+*~.nib
+local.properties
+.classpath
+.settings/
+.loadpath
+
+# External tool builders
+.externalToolBuilders/
+
+# Locally stored "Eclipse launch configurations"
+*.launch
+
+# CDT-specific
+.cproject
+
+# PDT-specific
+.buildpath
+
+
+#################
+## Visual Studio
+#################
+
+## Ignore Visual Studio temporary files, build results, and
+## files generated by popular Visual Studio add-ons.
+
+# User-specific files
+*.suo
+*.user
+*.sln.docstates
+
+# Build results
+[Dd]ebug/
+[Rr]elease/
+*_i.c
+*_p.c
+*.ilk
+*.meta
+*.obj
+*.pch
+*.pdb
+*.pgc
+*.pgd
+*.rsp
+*.sbr
+*.tlb
+*.tli
+*.tlh
+*.tmp
+*.vspscc
+.builds
+*.dotCover
+
+## TODO: If you have NuGet Package Restore enabled, uncomment this
+#packages/
+
+# Visual C++ cache files
+ipch/
+*.aps
+*.ncb
+*.opensdf
+*.sdf
+
+# Visual Studio profiler
+*.psess
+*.vsp
+
+# ReSharper is a .NET coding add-in
+_ReSharper*
+
+# Installshield output folder
+[Ee]xpress
+
+# DocProject is a documentation generator add-in
+DocProject/buildhelp/
+DocProject/Help/*.HxT
+DocProject/Help/*.HxC
+DocProject/Help/*.hhc
+DocProject/Help/*.hhk
+DocProject/Help/*.hhp
+DocProject/Help/Html2
+DocProject/Help/html
+
+# Click-Once directory
+publish
+
+# Others
+[Bb]in
+[Oo]bj
+sql
+TestResults
+*.Cache
+ClientBin
+stylecop.*
+~$*
+*.dbmdl
+Generated_Code #added for RIA/Silverlight projects
+
+# Backup & report files from converting an old project file to a newer
+# Visual Studio version. Backup files are not needed, because we have git ;-)
+_UpgradeReport_Files/
+Backup*/
+UpgradeLog*.XML
+
+
+
+############
+## Windows
+############
+
+# Windows image file caches
+Thumbs.db
+
+# Folder config file
+Desktop.ini
+
+
+#############
+## Python
+#############
+
+*.py[co]
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+
+#Translations
+*.mo
+
+#Mr Developer
+.mr.developer.cfg
+
+# Mac crap
+.DS_Store
diff --git a/images/dirt05-icon.jpg b/images/dirt05-icon.jpg
index 38d02e9..262c2b7 100644
Binary files a/images/dirt05-icon.jpg and b/images/dirt05-icon.jpg differ
diff --git a/mdp.py b/mdp.py
index e5142c1..0048843 100644
--- a/mdp.py
+++ b/mdp.py
@@ -98,7 +98,8 @@ def value_iteration(mdp, epsilon=0.001):
             U1[s] = R(s) + gamma * max([sum([p * U[s1] for (p, s1) in T(s, a)])
                                         for a in mdp.actions(s)])
             delta = max(delta, abs(U1[s] - U[s]))
-        if delta < epsilon * (1 - gamma) / gamma:
+        if (((gamma < 1) and (delta < epsilon * (1 - gamma) / gamma)) or
+            ((gamma == 1) and (delta < epsilon))): # allows for gamma to be 1
              return U
 
 def best_policy(mdp, U):
diff --git a/rl.py b/rl.py
index fc0e2c9..8ddcb7f 100644
--- a/rl.py
+++ b/rl.py
@@ -1,15 +1,267 @@
 """Reinforcement Learning (Chapter 21)
 """
 
-from utils import *
+from mdp import value_iteration, policy_evaluation, policy_iteration, \
+     GridMDP, MDP, Fig
+from utils import update, argmax
+from random import random
+from time import time
 import agents
 
 class PassiveADPAgent(agents.Agent):
     """Passive (non-learning) agent that uses adaptive dynamic programming
     on a given MDP and policy. [Fig. 21.2]"""
-    NotImplemented
+    class LearntMDP:
+        """a model of the original mdp that the PassiveADP is trying to learn"""
+        def __init__(self, states, gamma, terminals):
+            update(self,
+                   P={},
+                   reward={},
+                   states=states,
+                   gamma=gamma,
+                   terminals=terminals)
+            
+        def R(self, s):
+            """Return a numeric reward for the state s"""
+            return self.reward.get(s, 0.)
+            
+        def T(self, s, a):
+            """Returns a list of tuples with probabilities for states"""
+            try:
+                return [(p,s) for (s,p) in self.P[s][a].items()]
+            except KeyError:
+                return []
+            
+        def T_set(self, (s,a,t), p):
+            " Adds a value to the transistion model "
+            if (s in self.P) and (a in self.P[s]):
+                self.P[s][a][t] = p
+            elif (s in self.P):
+                self.P[s][a] = {t:p}
+            else:
+                self.P[s] = {a:{t:p}}
+    
+    def __init__(self, mdp, pi):
+        update(self,
+               pi = pi,
+               mdp = self.LearntMDP(mdp.states,mdp.gamma,mdp.terminals),
+               U = {},
+               Ns_sa = {s:{a:{t:0 for (p,t) in mdp.T(s,a)}
+                           for a in mdp.actlist}
+                        for s in mdp.states},
+               Nsa = {s:{a:0. for a in mdp.actlist}
+                      for s in mdp.states},
+               s = None,
+               a = None)
+        
+    def program(self, percept):
+        s1,r1 = percept
+        mdp,U,s,a,Nsa,Ns_sa = self.mdp,self.U,self.s,self.a,self.Nsa,self.Ns_sa
+        if s1 not in mdp.reward: # mdp.R also tracks the visited states
+            U[s1] = mdp.reward[s1] = r1
+        if s is not None:
+            Nsa[s][a] += 1
+            Ns_sa[s][a][s1] += 1
+            for t in Ns_sa[s][a]:
+                if Ns_sa[s][a][t] > 0:
+                    self.mdp.T_set((s,a,t), Ns_sa[s][a][t]/Nsa[s][a])
+        U = policy_evaluation(self.pi, U, mdp)
+        if s1 in mdp.terminals:
+            self.s = self.a = None
+        else:
+            self.s, self.a = s1, self.pi[s1]
+        return self.a
 
 class PassiveTDAgent(agents.Agent):
     """Passive (non-learning) agent that uses temporal differences to learn
     utility estimates. [Fig. 21.4]"""
-    NotImplemented
+    def __init__(self,mdp,pi,alpha=None):
+        update(self,
+               pi = pi,
+               U = {s:0. for s in mdp.states},
+               Ns = {s:0 for s in mdp.states},
+               s = None,
+               a = None,
+               r = None,
+               gamma = mdp.gamma,
+               terminals = mdp.terminals,
+               reached_states = set())
+        
+        if alpha:
+            self.alpha = alpha
+        else:
+            self.alpha = lambda n: 60./(59+n) # page 837
+
+    def program(self, percept):
+        s1, r1 = percept
+        pi, U, Ns, s, a, r = self.pi, self.U, self.Ns, self.s, self.a, self.r
+        alpha, gamma = self.alpha, self.gamma
+        if s1 not in self.reached_states:
+            self.reached_states.add(s1)
+            U[s1] = r1
+        if s is not None:
+            Ns[s] += 1
+            U[s] += alpha(Ns[s]) * (r + gamma * U[s1] - U[s])
+        if s1 in self.terminals:
+            self.s = self.a = self.r = None
+        else:
+            self.s, self.a, self.r = s1, pi[s1], r1
+        return self.a
+
+class QLearningAgent(agents.Agent):
+    """Active TD agent that uses temporal differences to learn an
+    action-utility representation. [Fig. 21.8]"""
+    def __init__(self,mdp,alpha=None,Ne=5,Rplus=2):
+        update(self,
+               Q = {s:{a:0. for a in mdp.actlist} if s not in mdp.terminals
+                    else {None:0.} for s in mdp.states},
+               Nsa = {s:{a:0. for a in mdp.actlist}
+                    for s in mdp.states},
+               s = None,
+               a = None,
+               r = None,
+               Ne = Ne,
+               Rplus = Rplus,
+               gamma = mdp.gamma,
+               terminals = mdp.terminals)
+        
+        if alpha is None:
+            self.alpha = lambda n: 60./(59+n) # page 837
+        else:
+            self.alpha = alpha
+            
+    def f(self,u,n): # exploration function in AIMA(3rd ed), pg 842
+        if n < self.Ne:
+            return self.Rplus
+        else:
+            return u
+            
+    def program(self,percept):
+        s1,r1 = percept
+        Q, Nsa, s, a, r = self.Q, self.Nsa, self.s, self.a, self.r
+        alpha, gamma, f = self.alpha, self.gamma, self.f
+        if s1 in self.terminals:
+            Q[s1][None] = r1
+        if s is not None:
+            Nsa[s][a] += 1
+            Q[s][a] += alpha(Nsa[s][a])*(r+gamma*max(Q[s1].values())-Q[s][a])
+        if s1 in self.terminals:
+            self.s = self.a = self.r = None
+        else:
+            self.s, self.r = s1, r1
+            self.a = argmax(Q[s1].keys(), lambda a1: f(Q[s1][a1],Nsa[s1][a1]))
+        return self.a
+
+# ---
+
+def simulate_move(mdp,(s,a)):
+    r = random() # 0 <= r <= 1
+    p,s1 = zip(*(mdp.T(s,a)))
+    for i in range(len(p)):
+        if sum(p[:i+1]) >= r:
+            return s1[i]
+
+def execute_trial(agent,mdp):
+    current_state = mdp.init
+    while True:
+        current_reward = mdp.R(current_state)
+        next_action = agent.program((current_state, current_reward))
+        if next_action == None:
+            break
+        current_state = simulate_move(mdp,(current_state, next_action))
+
+def demoPassiveADPAgent():
+    print '--------------------'
+    print 'DEMO PassiveADPAgent'
+    print '--------------------'
+    policy = {(0, 1): (0, 1),
+              (1, 2): (1, 0),
+              (3, 2): None,
+              (0, 0): (0, 1),
+              (3, 0): (-1, 0),
+              (3, 1): None,
+              (2, 1): (0, 1),
+              (2, 0): (0, 1),
+              (2, 2): (1, 0),
+              (1, 0): (1, 0),
+              (0, 2): (1, 0)}
+    
+    time_start = time()
+    trials = 100
+    agent = PassiveADPAgent(Fig[17,1], policy)
+    for i in range (0,trials):
+        execute_trial(agent,Fig[17,1])
+    time_end = time()
+    
+    print 'Executed %i trials' % trials
+    print 'Took %d seconds' % (time_end - time_start)
+    print 'Utilities: %s' % agent.U
+    print '\nCorrect Utilities (estimated by value iteration):'
+    print value_iteration(Fig[17,1])
+
+def demoPassiveTDAgent():
+    print '--------------------'
+    print 'DEMO PassiveTDAgent'
+    print '--------------------'
+    # Setup values
+    policy = {(0, 1): (0, 1),
+              (1, 2): (1, 0),
+              (3, 2): None,
+              (0, 0): (0, 1),
+              (3, 0): (-1, 0),
+              (3, 1): None,
+              (2, 1): (0, 1),
+              (2, 0): (0, 1),
+              (2, 2): (1, 0),
+              (1, 0): (1, 0),
+              (0, 2): (1, 0)}
+    
+    time_start = time()
+    trials = 100
+    agent = PassiveTDAgent(Fig[17,1], policy)
+    for i in range (0,trials):
+        execute_trial(agent,Fig[17,1])
+    time_end = time()
+    
+    print 'Executed %i trials' % trials
+    print 'Took %d seconds' % (time_end - time_start)
+    print 'Utilities: %s' % agent.U
+    print '\nCorrect Utilities (estimated by value iteration):'
+    print value_iteration(Fig[17,1])
+
+def demoQLearningAgent():
+    print '--------------------'
+    print 'DEMO QLearningAgent'
+    print '--------------------'
+    # Setup values
+    policy = {(0, 1): (0, 1),
+              (1, 2): (1, 0),
+              (3, 2): None,
+              (0, 0): (0, 1),
+              (3, 0): (-1, 0),
+              (3, 1): None,
+              (2, 1): (0, 1),
+              (2, 0): (0, 1),
+              (2, 2): (1, 0),
+              (1, 0): (1, 0),
+              (0, 2): (1, 0)}
+    
+    time_start = time()
+    trials = 100
+    agent = QLearningAgent(Fig[17,1])
+    for i in range (0,trials):
+        execute_trial(agent,Fig[17,1])
+    time_end = time()
+
+    print 'Executed %i trials' % trials
+    print 'Took %d seconds' % (time_end - time_start)
+    print 'Utilities: %s' % {s:max(agent.Q[s].values()) for s in agent.Q}
+    print '\nCorrect Utilities (estimated by value iteration):'
+    print value_iteration(Fig[17,1])
+
+# ---
+
+if __name__ == '__main__':
+    demoPassiveADPAgent()
+    demoPassiveTDAgent()
+    demoQLearningAgent()
diff --git a/utils.py b/utils.py
index c167589..8356337 100644
--- a/utils.py
+++ b/utils.py
@@ -734,17 +734,22 @@ class PriorityQueue(Queue):
     Also supports dict-like lookup."""
     def __init__(self, order=min, f=lambda x: x):
         update(self, A=[], order=order, f=f)
+        self.membership = {}
     def append(self, item):
         bisect.insort(self.A, (self.f(item), item))
+        hashval = hash(item)
+        self.membership[hashval] = self.membership.get(hashval, 0) + 1
     def __len__(self):
         return len(self.A)
     def pop(self):
         if self.order == min:
-            return self.A.pop(0)[1]
+            item = self.A.pop(0)[1]
         else:
-            return self.A.pop()[1]
+            item = self.A.pop()[1]
+        self._remove_(item)
+        return item
     def __contains__(self, item):
-        return some(lambda (_, x): x == item, self.A)
+        return hash(item) in self.membership
     def __getitem__(self, key):
         for _, item in self.A:
             if item == key:
@@ -752,8 +757,15 @@ def __getitem__(self, key):
     def __delitem__(self, key):
         for i, (value, item) in enumerate(self.A):
             if item == key:
-                self.A.pop(i)
+                item = self.A.pop(i)
+                self._remove_(item)
                 return
+    def _remove_(self, item):
+        hashval = hash(item)
+        self.membership[hashval] -= 1
+        if self.membership[hashval] == 0:
+            del self.membership[hashval]
+
 
 ## Fig: The idea is we can define things like Fig[3,10] later.
 ## Alas, it is Fig[3,10] not Fig[3.10], because that would be the same
@@ -855,7 +867,14 @@ def fixup(test):
 ...     q.extend(nums)
 ...     for num in nums: assert num in q
 ...     assert 42 not in q
-...     return [q.pop() for i in range(len(q))]
+...     result = []
+...     for i in range(len(q)):
+...         num = q.pop()
+...         assert num not in q		# num could appear multiple times, in which case this would fail
+...         result.append(num)
+...
+...     return result
+
 >>> qtest(Stack())
 [0, 3, 4, 99, -99, 6, 5, 7, 2, 8, 1]