From b43ff8e5cf4c9a720e2b9690de9afcf97eab3098 Mon Sep 17 00:00:00 2001
From: nickisghosty <nickisghosty@gmail.com>
Date: Sun, 22 Oct 2017 07:13:22 -0700
Subject: [PATCH 1/6] Start/stop button, random client seed

Replaces 'Free BTC' link with a 'START/STOP BOT' button.

Once profit reaches 500 satoshi the client seed is changed to a random 16 char alphanumeric string
---
 DQ-Trevel.js | 3649 ++++++++++++++++++++++++++------------------------
 1 file changed, 1864 insertions(+), 1785 deletions(-)

diff --git a/DQ-Trevel.js b/DQ-Trevel.js
index ce75d9b..b35d805 100644
--- a/DQ-Trevel.js
+++ b/DQ-Trevel.js
@@ -1,1739 +1,1798 @@
 var R = {}; // the Recurrent library
-(function(global) {
-	"use strict";
-	// Utility fun
-	function assert(condition, message) {
-		// from http://stackoverflow.com/questions/15313418/javascript-assert
-		if (!condition) {
-			message = message || "Assertion failed";
-			if (typeof Error !== "undefined") {
-				throw new Error(message);
-			}
-			throw message; // Fallback
-		}
-	}
-	// Random numbers utils
-	var return_v = false;
-	var v_val = 0.0;
-	var gaussRandom = function() {
-		if (return_v) {
-			return_v = false;
-			return v_val;
-		}
-		var u = 2 * Math.random() - 1;
-		var v = 2 * Math.random() - 1;
-		var r = u * u + v * v;
-		if (r == 0 || r > 1) return gaussRandom();
-		var c = Math.sqrt(-2 * Math.log(r) / r);
-		v_val = v * c; // cache this
-		return_v = true;
-		return u * c;
-	}
-	var randf = function(a, b) {
-		return Math.random() * (b - a) + a;
-	}
-	var randi = function(a, b) {
-		return Math.floor(Math.random() * (b - a) + a);
-	}
-	var randn = function(mu, std) {
-		return mu + gaussRandom() * std;
-	}
-	// helper function returns array of zeros of length n
-	// and uses typed arrays if available
-	var zeros = function(n) {
-		if (typeof(n) === 'undefined' || isNaN(n)) {
-			return [];
-		}
-		if (typeof ArrayBuffer === 'undefined') {
-			// lacking browser support
-			var arr = new Array(n);
-			for (var i = 0; i < n; i++) {
-				arr[i] = 0;
-			}
-			return arr;
-		} else {
-			return new Float64Array(n);
-		}
-	}
-	// Mat holds a matrix
-	var Mat = function(n, d) {
-		// n is number of rows d is number of columns
-		this.n = n;
-		this.d = d;
-		this.w = zeros(n * d);
-		this.dw = zeros(n * d);
-	}
-	Mat.prototype = {
-		get: function(row, col) {
-			// slow but careful accessor function
-			// we want row-major order
-			var ix = (this.d * row) + col;
-			assert(ix >= 0 && ix < this.w.length);
-			return this.w[ix];
-		},
-		set: function(row, col, v) {
-			// slow but careful accessor function
-			var ix = (this.d * row) + col;
-			assert(ix >= 0 && ix < this.w.length);
-			this.w[ix] = v;
-		},
-		setFrom: function(arr) {
-			for (var i = 0, n = arr.length; i < n; i++) {
-				this.w[i] = arr[i];
-			}
-		},
-		setColumn: function(m, i) {
-			for (var q = 0, n = m.w.length; q < n; q++) {
-				this.w[(this.d * q) + i] = m.w[q];
-			}
-		},
-		toJSON: function() {
-			var json = {};
-			json['n'] = this.n;
-			json['d'] = this.d;
-			json['w'] = this.w;
-			return json;
-		},
-		fromJSON: function(json) {
-			this.n = json.n;
-			this.d = json.d;
-			this.w = zeros(this.n * this.d);
-			this.dw = zeros(this.n * this.d);
-			for (var i = 0, n = this.n * this.d; i < n; i++) {
-				this.w[i] = json.w[i]; // copy over weights
-			}
-		}
-	}
-	var copyMat = function(b) {
-		var a = new Mat(b.n, b.d);
-		a.setFrom(b.w);
-		return a;
-	}
-	var copyNet = function(net) {
-		// nets are (k,v) pairs with k = string key, v = Mat()
-		var new_net = {};
-		for (var p in net) {
-			if (net.hasOwnProperty(p)) {
-				new_net[p] = copyMat(net[p]);
-			}
-		}
-		return new_net;
-	}
-	var updateMat = function(m, alpha) {
-		// updates in place
-		for (var i = 0, n = m.n * m.d; i < n; i++) {
-			if (m.dw[i] !== 0) {
-				m.w[i] += -alpha * m.dw[i];
-				m.dw[i] = 0;
-			}
-		}
-	}
-	var updateNet = function(net, alpha) {
-		for (var p in net) {
-			if (net.hasOwnProperty(p)) {
-				updateMat(net[p], alpha);
-			}
-		}
-	}
-	var netToJSON = function(net) {
-		var j = {};
-		for (var p in net) {
-			if (net.hasOwnProperty(p)) {
-				j[p] = net[p].toJSON();
-			}
-		}
-		return j;
-	}
-	var netFromJSON = function(j) {
-		var net = {};
-		for (var p in j) {
-			if (j.hasOwnProperty(p)) {
-				net[p] = new Mat(1, 1); // not proud of this
-				net[p].fromJSON(j[p]);
-			}
-		}
-		return net;
-	}
-	var netZeroGrads = function(net) {
-		for (var p in net) {
-			if (net.hasOwnProperty(p)) {
-				var mat = net[p];
-				gradFillConst(mat, 0);
-			}
-		}
-	}
-	var netFlattenGrads = function(net) {
-		var n = 0;
-		for (var p in net) {
-			if (net.hasOwnProperty(p)) {
-				var mat = net[p];
-				n += mat.dw.length;
-			}
-		}
-		var g = new Mat(n, 1);
-		var ix = 0;
-		for (var p in net) {
-			if (net.hasOwnProperty(p)) {
-				var mat = net[p];
-				for (var i = 0, m = mat.dw.length; i < m; i++) {
-					g.w[ix] = mat.dw[i];
-					ix++;
-				}
-			}
-		}
-		return g;
-	}
-	// return Mat but filled with random numbers from gaussian
-	var RandMat = function(n, d, mu, std) {
-		var m = new Mat(n, d);
-		fillRandn(m, mu, std);
-		//fillRand(m,-std,std); // kind of :P
-		return m;
-	}
-	// Mat utils
-	// fill matrix with random gaussian numbers
-	var fillRandn = function(m, mu, std) {
-		for (var i = 0, n = m.w.length; i < n; i++) {
-			m.w[i] = randn(mu, std);
-		}
-	}
-	var fillRand = function(m, lo, hi) {
-		for (var i = 0, n = m.w.length; i < n; i++) {
-			m.w[i] = randf(lo, hi);
-		}
-	}
-	var gradFillConst = function(m, c) {
-		for (var i = 0, n = m.dw.length; i < n; i++) {
-			m.dw[i] = c
-		}
-	}
-	// Transformer definitions
-	var Graph = function(needs_backprop) {
-		if (typeof needs_backprop === 'undefined') {
-			needs_backprop = true;
-		}
-		this.needs_backprop = needs_backprop;
-		// this will store a list of functions that perform backprop,
-		// in their forward pass order. So in backprop we will go
-		// backwards and evoke each one
-		this.backprop = [];
-	}
-	Graph.prototype = {
-		backward: function() {
-			for (var i = this.backprop.length - 1; i >= 0; i--) {
-				this.backprop[i](); // tick!
-			}
-		},
-		rowPluck: function(m, ix) {
-			// pluck a row of m with index ix and return it as col vector
-			assert(ix >= 0 && ix < m.n);
-			var d = m.d;
-			var out = new Mat(d, 1);
-			for (var i = 0, n = d; i < n; i++) {
-				out.w[i] = m.w[d * ix + i];
-			} // copy over the data
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0, n = d; i < n; i++) {
-						m.dw[d * ix + i] += out.dw[i];
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		tanh: function(m) {
-			// tanh nonlinearity
-			var out = new Mat(m.n, m.d);
-			var n = m.w.length;
-			for (var i = 0; i < n; i++) {
-				out.w[i] = Math.tanh(m.w[i]);
-			}
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0; i < n; i++) {
-						// grad for z = tanh(x) is (1 - z^2)
-						var mwi = out.w[i];
-						m.dw[i] += (1.0 - mwi * mwi) * out.dw[i];
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		sigmoid: function(m) {
-			// sigmoid nonlinearity
-			var out = new Mat(m.n, m.d);
-			var n = m.w.length;
-			for (var i = 0; i < n; i++) {
-				out.w[i] = sig(m.w[i]);
-			}
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0; i < n; i++) {
-						// grad for z = tanh(x) is (1 - z^2)
-						var mwi = out.w[i];
-						m.dw[i] += mwi * (1.0 - mwi) * out.dw[i];
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		relu: function(m) {
-			var out = new Mat(m.n, m.d);
-			var n = m.w.length;
-			for (var i = 0; i < n; i++) {
-				out.w[i] = Math.max(0, m.w[i]); // relu
-			}
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0; i < n; i++) {
-						m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		mul: function(m1, m2) {
-			// multiply matrices m1 * m2
-			assert(m1.d === m2.n, 'matmul dimensions misaligned');
-			var n = m1.n;
-			var d = m2.d;
-			var out = new Mat(n, d);
-			for (var i = 0; i < m1.n; i++) { // loop over rows of m1
-				for (var j = 0; j < m2.d; j++) { // loop over cols of m2
-					var dot = 0.0;
-					for (var k = 0; k < m1.d; k++) { // dot product loop
-						dot += m1.w[m1.d * i + k] * m2.w[m2.d * k + j];
-					}
-					out.w[d * i + j] = dot;
-				}
-			}
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0; i < m1.n; i++) { // loop over rows of m1
-						for (var j = 0; j < m2.d; j++) { // loop over cols of m2
-							for (var k = 0; k < m1.d; k++) { // dot product loop
-								var b = out.dw[d * i + j];
-								m1.dw[m1.d * i + k] += m2.w[m2.d * k + j] * b;
-								m2.dw[m2.d * k + j] += m1.w[m1.d * i + k] * b;
-							}
-						}
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		add: function(m1, m2) {
-			assert(m1.w.length === m2.w.length);
-			var out = new Mat(m1.n, m1.d);
-			for (var i = 0, n = m1.w.length; i < n; i++) {
-				out.w[i] = m1.w[i] + m2.w[i];
-			}
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0, n = m1.w.length; i < n; i++) {
-						m1.dw[i] += out.dw[i];
-						m2.dw[i] += out.dw[i];
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		dot: function(m1, m2) {
-			// m1 m2 are both column vectors
-			assert(m1.w.length === m2.w.length);
-			var out = new Mat(1, 1);
-			var dot = 0.0;
-			for (var i = 0, n = m1.w.length; i < n; i++) {
-				dot += m1.w[i] * m2.w[i];
-			}
-			out.w[0] = dot;
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0, n = m1.w.length; i < n; i++) {
-						m1.dw[i] += m2.w[i] * out.dw[0];
-						m2.dw[i] += m1.w[i] * out.dw[0];
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-		eltmul: function(m1, m2) {
-			assert(m1.w.length === m2.w.length);
-			var out = new Mat(m1.n, m1.d);
-			for (var i = 0, n = m1.w.length; i < n; i++) {
-				out.w[i] = m1.w[i] * m2.w[i];
-			}
-			if (this.needs_backprop) {
-				var backward = function() {
-					for (var i = 0, n = m1.w.length; i < n; i++) {
-						m1.dw[i] += m2.w[i] * out.dw[i];
-						m2.dw[i] += m1.w[i] * out.dw[i];
-					}
-				}
-				this.backprop.push(backward);
-			}
-			return out;
-		},
-	}
-	var softmax = function(m) {
-		var out = new Mat(m.n, m.d); // probability volume
-		var maxval = -999999;
-		for (var i = 0, n = m.w.length; i < n; i++) {
-			if (m.w[i] > maxval) maxval = m.w[i];
-		}
-		var s = 0.0;
-		for (var i = 0, n = m.w.length; i < n; i++) {
-			out.w[i] = Math.exp(m.w[i] - maxval);
-			s += out.w[i];
-		}
-		for (var i = 0, n = m.w.length; i < n; i++) {
-			out.w[i] /= s;
-		}
-		// no backward pass here needed
-		// since we will use the computed probabilities outside
-		// to set gradients directly on m
-		return out;
-	}
-	var Solver = function() {
-		this.decay_rate = 0.999;
-		this.smooth_eps = 1e-8;
-		this.step_cache = {};
-	}
-	Solver.prototype = {
-		step: function(model, step_size, regc, clipval) {
-			// perform parameter update
-			var solver_stats = {};
-			var num_clipped = 0;
-			var num_tot = 0;
-			for (var k in model) {
-				if (model.hasOwnProperty(k)) {
-					var m = model[k]; // mat ref
-					if (!(k in this.step_cache)) {
-						this.step_cache[k] = new Mat(m.n, m.d);
-					}
-					var s = this.step_cache[k];
-					for (var i = 0, n = m.w.length; i < n; i++) {
-						// rmsprop adaptive learning rate
-						var mdwi = m.dw[i];
-						s.w[i] = s.w[i] * this.decay_rate + (1.0 - this.decay_rate) * mdwi * mdwi;
-						// gradient clip
-						if (mdwi > clipval) {
-							mdwi = clipval;
-							num_clipped++;
-						}
-						if (mdwi < -clipval) {
-							mdwi = -clipval;
-							num_clipped++;
-						}
-						num_tot++;
-						// update (and regularize)
-						m.w[i] += -step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i];
-						m.dw[i] = 0; // reset gradients for next iteration
-					}
-				}
-			}
-			solver_stats['ratio_clipped'] = num_clipped * 1.0 / num_tot;
-			return solver_stats;
-		}
-	}
-	var initLSTM = function(input_size, hidden_sizes, output_size) {
-		// hidden size should be a list
-		var model = {};
-		for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
-			var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
-			var hidden_size = hidden_sizes[d];
-			// gates parameters
-			model['Wix' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-			model['Wih' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-			model['bi' + d] = new Mat(hidden_size, 1);
-			model['Wfx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-			model['Wfh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-			model['bf' + d] = new Mat(hidden_size, 1);
-			model['Wox' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-			model['Woh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-			model['bo' + d] = new Mat(hidden_size, 1);
-			// cell write params
-			model['Wcx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-			model['Wch' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-			model['bc' + d] = new Mat(hidden_size, 1);
-		}
-		// decoder params
-		model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
-		model['bd'] = new Mat(output_size, 1);
-		return model;
-	}
-	var forwardLSTM = function(G, model, hidden_sizes, x, prev) {
-		// forward prop for a single tick of LSTM
-		// G is graph to append ops to
-		// model contains LSTM parameters
-		// x is 1D column vector with observation
-		// prev is a struct containing hidden and cell
-		// from previous iteration
-		if (prev == null || typeof prev.h === 'undefined') {
-			var hidden_prevs = [];
-			var cell_prevs = [];
-			for (var d = 0; d < hidden_sizes.length; d++) {
-				hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
-				cell_prevs.push(new R.Mat(hidden_sizes[d], 1));
-			}
-		} else {
-			var hidden_prevs = prev.h;
-			var cell_prevs = prev.c;
-		}
-		var hidden = [];
-		var cell = [];
-		for (var d = 0; d < hidden_sizes.length; d++) {
-			var input_vector = d === 0 ? x : hidden[d - 1];
-			var hidden_prev = hidden_prevs[d];
-			var cell_prev = cell_prevs[d];
-			// input gate
-			var h0 = G.mul(model['Wix' + d], input_vector);
-			var h1 = G.mul(model['Wih' + d], hidden_prev);
-			var input_gate = G.sigmoid(G.add(G.add(h0, h1), model['bi' + d]));
-			// forget gate
-			var h2 = G.mul(model['Wfx' + d], input_vector);
-			var h3 = G.mul(model['Wfh' + d], hidden_prev);
-			var forget_gate = G.sigmoid(G.add(G.add(h2, h3), model['bf' + d]));
-			// output gate
-			var h4 = G.mul(model['Wox' + d], input_vector);
-			var h5 = G.mul(model['Woh' + d], hidden_prev);
-			var output_gate = G.sigmoid(G.add(G.add(h4, h5), model['bo' + d]));
-			// write operation on cells
-			var h6 = G.mul(model['Wcx' + d], input_vector);
-			var h7 = G.mul(model['Wch' + d], hidden_prev);
-			var cell_write = G.tanh(G.add(G.add(h6, h7), model['bc' + d]));
-			// compute new cell activation
-			var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we keep from cell
-			var write_cell = G.eltmul(input_gate, cell_write); // what do we write to cell
-			var cell_d = G.add(retain_cell, write_cell); // new cell contents
-			// compute hidden state as gated, saturated cell activations
-			var hidden_d = G.eltmul(output_gate, G.tanh(cell_d));
-			hidden.push(hidden_d);
-			cell.push(cell_d);
-		}
-		// one decoder to outputs at end
-		var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]), model['bd']);
-		// return cell memory, hidden representation and output
-		return {
-			'h': hidden,
-			'c': cell,
-			'o': output
-		};
-	}
-	var sig = function(x) {
-		// helper function for computing sigmoid
-		return 1.0 / (1 + Math.exp(-x));
-	}
-	var maxi = function(w) {
-		// argmax of array w
-		var maxv = w[0];
-		var maxix = 0;
-		for (var i = 1, n = w.length; i < n; i++) {
-			var v = w[i];
-			if (v > maxv) {
-				maxix = i;
-				maxv = v;
-			}
-		}
-		return maxix;
-	}
-	var samplei = function(w) {
-		// sample argmax from w, assuming w are 
-		// probabilities that sum to one
-		var r = randf(0, 1);
-		var x = 0.0;
-		var i = 0;
-		while (true) {
-			x += w[i];
-			if (x > r) {
-				return i;
-			}
-			i++;
-		}
-		return w.length - 1; // pretty sure we should never get here?
-	}
-	// various utils
-	global.assert = assert;
-	global.zeros = zeros;
-	global.maxi = maxi;
-	global.samplei = samplei;
-	global.randi = randi;
-	global.randn = randn;
-	global.softmax = softmax;
-	// classes
-	global.Mat = Mat;
-	global.RandMat = RandMat;
-	global.forwardLSTM = forwardLSTM;
-	global.initLSTM = initLSTM;
-	// more utils
-	global.updateMat = updateMat;
-	global.updateNet = updateNet;
-	global.copyMat = copyMat;
-	global.copyNet = copyNet;
-	global.netToJSON = netToJSON;
-	global.netFromJSON = netFromJSON;
-	global.netZeroGrads = netZeroGrads;
-	global.netFlattenGrads = netFlattenGrads;
-	// optimization
-	global.Solver = Solver;
-	global.Graph = Graph;
+(function (global) {
+    "use strict";
+    // Utility fun
+    function assert(condition, message) {
+        // from http://stackoverflow.com/questions/15313418/javascript-assert
+        if (!condition) {
+            message = message || "Assertion failed";
+            if (typeof Error !== "undefined") {
+                throw new Error(message);
+            }
+            throw message; // Fallback
+        }
+    }
+    // Random numbers utils
+    var return_v = false;
+    var v_val = 0.0;
+    var gaussRandom = function () {
+        if (return_v) {
+            return_v = false;
+            return v_val;
+        }
+        var u = 2 * Math.random() - 1;
+        var v = 2 * Math.random() - 1;
+        var r = u * u + v * v;
+        if (r == 0 || r > 1) return gaussRandom();
+        var c = Math.sqrt(-2 * Math.log(r) / r);
+        v_val = v * c; // cache this
+        return_v = true;
+        return u * c;
+    }
+    var randf = function (a, b) {
+        return Math.random() * (b - a) + a;
+    }
+    var randi = function (a, b) {
+        return Math.floor(Math.random() * (b - a) + a);
+    }
+    var randn = function (mu, std) {
+        return mu + gaussRandom() * std;
+    }
+    // helper function returns array of zeros of length n
+    // and uses typed arrays if available
+    var zeros = function (n) {
+        if (typeof (n) === 'undefined' || isNaN(n)) {
+            return [];
+        }
+        if (typeof ArrayBuffer === 'undefined') {
+            // lacking browser support
+            var arr = new Array(n);
+            for (var i = 0; i < n; i++) {
+                arr[i] = 0;
+            }
+            return arr;
+        } else {
+            return new Float64Array(n);
+        }
+    }
+    // Mat holds a matrix
+    var Mat = function (n, d) {
+        // n is number of rows d is number of columns
+        this.n = n;
+        this.d = d;
+        this.w = zeros(n * d);
+        this.dw = zeros(n * d);
+    }
+    Mat.prototype = {
+        get: function (row, col) {
+            // slow but careful accessor function
+            // we want row-major order
+            var ix = (this.d * row) + col;
+            assert(ix >= 0 && ix < this.w.length);
+            return this.w[ix];
+        },
+        set: function (row, col, v) {
+            // slow but careful accessor function
+            var ix = (this.d * row) + col;
+            assert(ix >= 0 && ix < this.w.length);
+            this.w[ix] = v;
+        },
+        setFrom: function (arr) {
+            for (var i = 0, n = arr.length; i < n; i++) {
+                this.w[i] = arr[i];
+            }
+        },
+        setColumn: function (m, i) {
+            for (var q = 0, n = m.w.length; q < n; q++) {
+                this.w[(this.d * q) + i] = m.w[q];
+            }
+        },
+        toJSON: function () {
+            var json = {};
+            json['n'] = this.n;
+            json['d'] = this.d;
+            json['w'] = this.w;
+            return json;
+        },
+        fromJSON: function (json) {
+            this.n = json.n;
+            this.d = json.d;
+            this.w = zeros(this.n * this.d);
+            this.dw = zeros(this.n * this.d);
+            for (var i = 0, n = this.n * this.d; i < n; i++) {
+                this.w[i] = json.w[i]; // copy over weights
+            }
+        }
+    }
+    var copyMat = function (b) {
+        var a = new Mat(b.n, b.d);
+        a.setFrom(b.w);
+        return a;
+    }
+    var copyNet = function (net) {
+        // nets are (k,v) pairs with k = string key, v = Mat()
+        var new_net = {};
+        for (var p in net) {
+            if (net.hasOwnProperty(p)) {
+                new_net[p] = copyMat(net[p]);
+            }
+        }
+        return new_net;
+    }
+    var updateMat = function (m, alpha) {
+        // updates in place
+        for (var i = 0, n = m.n * m.d; i < n; i++) {
+            if (m.dw[i] !== 0) {
+                m.w[i] += -alpha * m.dw[i];
+                m.dw[i] = 0;
+            }
+        }
+    }
+    var updateNet = function (net, alpha) {
+        for (var p in net) {
+            if (net.hasOwnProperty(p)) {
+                updateMat(net[p], alpha);
+            }
+        }
+    }
+    var netToJSON = function (net) {
+        var j = {};
+        for (var p in net) {
+            if (net.hasOwnProperty(p)) {
+                j[p] = net[p].toJSON();
+            }
+        }
+        return j;
+    }
+    var netFromJSON = function (j) {
+        var net = {};
+        for (var p in j) {
+            if (j.hasOwnProperty(p)) {
+                net[p] = new Mat(1, 1); // not proud of this
+                net[p].fromJSON(j[p]);
+            }
+        }
+        return net;
+    }
+    var netZeroGrads = function (net) {
+        for (var p in net) {
+            if (net.hasOwnProperty(p)) {
+                var mat = net[p];
+                gradFillConst(mat, 0);
+            }
+        }
+    }
+    var netFlattenGrads = function (net) {
+        var n = 0;
+        for (var p in net) {
+            if (net.hasOwnProperty(p)) {
+                var mat = net[p];
+                n += mat.dw.length;
+            }
+        }
+        var g = new Mat(n, 1);
+        var ix = 0;
+        for (var p in net) {
+            if (net.hasOwnProperty(p)) {
+                var mat = net[p];
+                for (var i = 0, m = mat.dw.length; i < m; i++) {
+                    g.w[ix] = mat.dw[i];
+                    ix++;
+                }
+            }
+        }
+        return g;
+    }
+    // return Mat but filled with random numbers from gaussian
+    var RandMat = function (n, d, mu, std) {
+        var m = new Mat(n, d);
+        fillRandn(m, mu, std);
+        //fillRand(m,-std,std); // kind of :P
+        return m;
+    }
+    // Mat utils
+    // fill matrix with random gaussian numbers
+    var fillRandn = function (m, mu, std) {
+        for (var i = 0, n = m.w.length; i < n; i++) {
+            m.w[i] = randn(mu, std);
+        }
+    }
+    var fillRand = function (m, lo, hi) {
+        for (var i = 0, n = m.w.length; i < n; i++) {
+            m.w[i] = randf(lo, hi);
+        }
+    }
+    var gradFillConst = function (m, c) {
+        for (var i = 0, n = m.dw.length; i < n; i++) {
+            m.dw[i] = c
+        }
+    }
+    // Transformer definitions
+    var Graph = function (needs_backprop) {
+        if (typeof needs_backprop === 'undefined') {
+            needs_backprop = true;
+        }
+        this.needs_backprop = needs_backprop;
+        // this will store a list of functions that perform backprop,
+        // in their forward pass order. So in backprop we will go
+        // backwards and evoke each one
+        this.backprop = [];
+    }
+    Graph.prototype = {
+        backward: function () {
+            for (var i = this.backprop.length - 1; i >= 0; i--) {
+                this.backprop[i](); // tick!
+            }
+        },
+        rowPluck: function (m, ix) {
+            // pluck a row of m with index ix and return it as col vector
+            assert(ix >= 0 && ix < m.n);
+            var d = m.d;
+            var out = new Mat(d, 1);
+            for (var i = 0, n = d; i < n; i++) {
+                out.w[i] = m.w[d * ix + i];
+            } // copy over the data
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0, n = d; i < n; i++) {
+                        m.dw[d * ix + i] += out.dw[i];
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        tanh: function (m) {
+            // tanh nonlinearity
+            var out = new Mat(m.n, m.d);
+            var n = m.w.length;
+            for (var i = 0; i < n; i++) {
+                out.w[i] = Math.tanh(m.w[i]);
+            }
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0; i < n; i++) {
+                        // grad for z = tanh(x) is (1 - z^2)
+                        var mwi = out.w[i];
+                        m.dw[i] += (1.0 - mwi * mwi) * out.dw[i];
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        sigmoid: function (m) {
+            // sigmoid nonlinearity
+            var out = new Mat(m.n, m.d);
+            var n = m.w.length;
+            for (var i = 0; i < n; i++) {
+                out.w[i] = sig(m.w[i]);
+            }
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0; i < n; i++) {
+                        // grad for z = tanh(x) is (1 - z^2)
+                        var mwi = out.w[i];
+                        m.dw[i] += mwi * (1.0 - mwi) * out.dw[i];
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        relu: function (m) {
+            var out = new Mat(m.n, m.d);
+            var n = m.w.length;
+            for (var i = 0; i < n; i++) {
+                out.w[i] = Math.max(0, m.w[i]); // relu
+            }
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0; i < n; i++) {
+                        m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        mul: function (m1, m2) {
+            // multiply matrices m1 * m2
+            assert(m1.d === m2.n, 'matmul dimensions misaligned');
+            var n = m1.n;
+            var d = m2.d;
+            var out = new Mat(n, d);
+            for (var i = 0; i < m1.n; i++) { // loop over rows of m1
+                for (var j = 0; j < m2.d; j++) { // loop over cols of m2
+                    var dot = 0.0;
+                    for (var k = 0; k < m1.d; k++) { // dot product loop
+                        dot += m1.w[m1.d * i + k] * m2.w[m2.d * k + j];
+                    }
+                    out.w[d * i + j] = dot;
+                }
+            }
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0; i < m1.n; i++) { // loop over rows of m1
+                        for (var j = 0; j < m2.d; j++) { // loop over cols of m2
+                            for (var k = 0; k < m1.d; k++) { // dot product loop
+                                var b = out.dw[d * i + j];
+                                m1.dw[m1.d * i + k] += m2.w[m2.d * k + j] * b;
+                                m2.dw[m2.d * k + j] += m1.w[m1.d * i + k] * b;
+                            }
+                        }
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        add: function (m1, m2) {
+            assert(m1.w.length === m2.w.length);
+            var out = new Mat(m1.n, m1.d);
+            for (var i = 0, n = m1.w.length; i < n; i++) {
+                out.w[i] = m1.w[i] + m2.w[i];
+            }
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0, n = m1.w.length; i < n; i++) {
+                        m1.dw[i] += out.dw[i];
+                        m2.dw[i] += out.dw[i];
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        dot: function (m1, m2) {
+            // m1 m2 are both column vectors
+            assert(m1.w.length === m2.w.length);
+            var out = new Mat(1, 1);
+            var dot = 0.0;
+            for (var i = 0, n = m1.w.length; i < n; i++) {
+                dot += m1.w[i] * m2.w[i];
+            }
+            out.w[0] = dot;
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0, n = m1.w.length; i < n; i++) {
+                        m1.dw[i] += m2.w[i] * out.dw[0];
+                        m2.dw[i] += m1.w[i] * out.dw[0];
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+        eltmul: function (m1, m2) {
+            assert(m1.w.length === m2.w.length);
+            var out = new Mat(m1.n, m1.d);
+            for (var i = 0, n = m1.w.length; i < n; i++) {
+                out.w[i] = m1.w[i] * m2.w[i];
+            }
+            if (this.needs_backprop) {
+                var backward = function () {
+                    for (var i = 0, n = m1.w.length; i < n; i++) {
+                        m1.dw[i] += m2.w[i] * out.dw[i];
+                        m2.dw[i] += m1.w[i] * out.dw[i];
+                    }
+                }
+                this.backprop.push(backward);
+            }
+            return out;
+        },
+    }
+    var softmax = function (m) {
+        var out = new Mat(m.n, m.d); // probability volume
+        var maxval = -999999;
+        for (var i = 0, n = m.w.length; i < n; i++) {
+            if (m.w[i] > maxval) maxval = m.w[i];
+        }
+        var s = 0.0;
+        for (var i = 0, n = m.w.length; i < n; i++) {
+            out.w[i] = Math.exp(m.w[i] - maxval);
+            s += out.w[i];
+        }
+        for (var i = 0, n = m.w.length; i < n; i++) {
+            out.w[i] /= s;
+        }
+        // no backward pass here needed
+        // since we will use the computed probabilities outside
+        // to set gradients directly on m
+        return out;
+    }
+    var Solver = function () {
+        this.decay_rate = 0.999;
+        this.smooth_eps = 1e-8;
+        this.step_cache = {};
+    }
+    Solver.prototype = {
+        step: function (model, step_size, regc, clipval) {
+            // perform parameter update
+            var solver_stats = {};
+            var num_clipped = 0;
+            var num_tot = 0;
+            for (var k in model) {
+                if (model.hasOwnProperty(k)) {
+                    var m = model[k]; // mat ref
+                    if (!(k in this.step_cache)) {
+                        this.step_cache[k] = new Mat(m.n, m.d);
+                    }
+                    var s = this.step_cache[k];
+                    for (var i = 0, n = m.w.length; i < n; i++) {
+                        // rmsprop adaptive learning rate
+                        var mdwi = m.dw[i];
+                        s.w[i] = s.w[i] * this.decay_rate + (1.0 - this.decay_rate) * mdwi * mdwi;
+                        // gradient clip
+                        if (mdwi > clipval) {
+                            mdwi = clipval;
+                            num_clipped++;
+                        }
+                        if (mdwi < -clipval) {
+                            mdwi = -clipval;
+                            num_clipped++;
+                        }
+                        num_tot++;
+                        // update (and regularize)
+                        m.w[i] += -step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i];
+                        m.dw[i] = 0; // reset gradients for next iteration
+                    }
+                }
+            }
+            solver_stats['ratio_clipped'] = num_clipped * 1.0 / num_tot;
+            return solver_stats;
+        }
+    }
+    var initLSTM = function (input_size, hidden_sizes, output_size) {
+        // hidden size should be a list
+        var model = {};
+        for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
+            var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
+            var hidden_size = hidden_sizes[d];
+            // gates parameters
+            model['Wix' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+            model['Wih' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+            model['bi' + d] = new Mat(hidden_size, 1);
+            model['Wfx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+            model['Wfh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+            model['bf' + d] = new Mat(hidden_size, 1);
+            model['Wox' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+            model['Woh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+            model['bo' + d] = new Mat(hidden_size, 1);
+            // cell write params
+            model['Wcx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+            model['Wch' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+            model['bc' + d] = new Mat(hidden_size, 1);
+        }
+        // decoder params
+        model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
+        model['bd'] = new Mat(output_size, 1);
+        return model;
+    }
+    var forwardLSTM = function (G, model, hidden_sizes, x, prev) {
+        // forward prop for a single tick of LSTM
+        // G is graph to append ops to
+        // model contains LSTM parameters
+        // x is 1D column vector with observation
+        // prev is a struct containing hidden and cell
+        // from previous iteration
+        if (prev == null || typeof prev.h === 'undefined') {
+            var hidden_prevs = [];
+            var cell_prevs = [];
+            for (var d = 0; d < hidden_sizes.length; d++) {
+                hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
+                cell_prevs.push(new R.Mat(hidden_sizes[d], 1));
+            }
+        } else {
+            var hidden_prevs = prev.h;
+            var cell_prevs = prev.c;
+        }
+        var hidden = [];
+        var cell = [];
+        for (var d = 0; d < hidden_sizes.length; d++) {
+            var input_vector = d === 0 ? x : hidden[d - 1];
+            var hidden_prev = hidden_prevs[d];
+            var cell_prev = cell_prevs[d];
+            // input gate
+            var h0 = G.mul(model['Wix' + d], input_vector);
+            var h1 = G.mul(model['Wih' + d], hidden_prev);
+            var input_gate = G.sigmoid(G.add(G.add(h0, h1), model['bi' + d]));
+            // forget gate
+            var h2 = G.mul(model['Wfx' + d], input_vector);
+            var h3 = G.mul(model['Wfh' + d], hidden_prev);
+            var forget_gate = G.sigmoid(G.add(G.add(h2, h3), model['bf' + d]));
+            // output gate
+            var h4 = G.mul(model['Wox' + d], input_vector);
+            var h5 = G.mul(model['Woh' + d], hidden_prev);
+            var output_gate = G.sigmoid(G.add(G.add(h4, h5), model['bo' + d]));
+            // write operation on cells
+            var h6 = G.mul(model['Wcx' + d], input_vector);
+            var h7 = G.mul(model['Wch' + d], hidden_prev);
+            var cell_write = G.tanh(G.add(G.add(h6, h7), model['bc' + d]));
+            // compute new cell activation
+            var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we keep from cell
+            var write_cell = G.eltmul(input_gate, cell_write); // what do we write to cell
+            var cell_d = G.add(retain_cell, write_cell); // new cell contents
+            // compute hidden state as gated, saturated cell activations
+            var hidden_d = G.eltmul(output_gate, G.tanh(cell_d));
+            hidden.push(hidden_d);
+            cell.push(cell_d);
+        }
+        // one decoder to outputs at end
+        var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]), model['bd']);
+        // return cell memory, hidden representation and output
+        return {
+            'h': hidden,
+            'c': cell,
+            'o': output
+        };
+    }
+    var sig = function (x) {
+        // helper function for computing sigmoid
+        return 1.0 / (1 + Math.exp(-x));
+    }
+    var maxi = function (w) {
+        // argmax of array w
+        var maxv = w[0];
+        var maxix = 0;
+        for (var i = 1, n = w.length; i < n; i++) {
+            var v = w[i];
+            if (v > maxv) {
+                maxix = i;
+                maxv = v;
+            }
+        }
+        return maxix;
+    }
+    var samplei = function (w) {
+        // sample argmax from w, assuming w are 
+        // probabilities that sum to one
+        var r = randf(0, 1);
+        var x = 0.0;
+        var i = 0;
+        while (true) {
+            x += w[i];
+            if (x > r) {
+                return i;
+            }
+            i++;
+        }
+        return w.length - 1; // pretty sure we should never get here?
+    }
+    // various utils
+    global.assert = assert;
+    global.zeros = zeros;
+    global.maxi = maxi;
+    global.samplei = samplei;
+    global.randi = randi;
+    global.randn = randn;
+    global.softmax = softmax;
+    // classes
+    global.Mat = Mat;
+    global.RandMat = RandMat;
+    global.forwardLSTM = forwardLSTM;
+    global.initLSTM = initLSTM;
+    // more utils
+    global.updateMat = updateMat;
+    global.updateNet = updateNet;
+    global.copyMat = copyMat;
+    global.copyNet = copyNet;
+    global.netToJSON = netToJSON;
+    global.netFromJSON = netFromJSON;
+    global.netZeroGrads = netZeroGrads;
+    global.netFlattenGrads = netFlattenGrads;
+    // optimization
+    global.Solver = Solver;
+    global.Graph = Graph;
 })(R);
 // END OF RECURRENTJS
 var RL = {};
-(function(global) {
-	"use strict";
-	// syntactic sugar function for getting default parameter values
-	var getopt = function(opt, field_name, default_value) {
-		if (typeof opt === 'undefined') {
-			return default_value;
-		}
-		return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
-	}
-	var zeros = R.zeros; // inherit these
-	var assert = R.assert;
-	var randi = R.randi;
-	var randf = R.randf;
-	var setConst = function(arr, c) {
-		for (var i = 0, n = arr.length; i < n; i++) {
-			arr[i] = c;
-		}
-	}
-	var sampleWeighted = function(p) {
-		var r = Math.random();
-		var c = 0.0;
-		for (var i = 0, n = p.length; i < n; i++) {
-			c += p[i];
-			if (c >= r) {
-				return i;
-			}
-		}
-		assert(false, 'wtf');
-	}
-	// ------
-	// AGENTS
-	// ------
-	// DPAgent performs Value Iteration
-	// - can also be used for Policy Iteration if you really wanted to
-	// - requires model of the environment :(
-	// - does not learn from experience :(
-	// - assumes finite MDP :(
-	var DPAgent = function(env, opt) {
-		this.V = null; // state value function
-		this.P = null; // policy distribution \pi(s,a)
-		this.env = env; // store pointer to environment
-		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
-		this.reset();
-	}
-	DPAgent.prototype = {
-		reset: function() {
-			// reset the agent's policy and value function
-			this.ns = this.env.getNumStates();
-			this.na = this.env.getMaxNumActions();
-			this.V = zeros(this.ns);
-			this.P = zeros(this.ns * this.na);
-			// initialize uniform random policy
-			for (var s = 0; s < this.ns; s++) {
-				var poss = this.env.allowedActions(s);
-				for (var i = 0, n = poss.length; i < n; i++) {
-					this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
-				}
-			}
-		},
-		act: function(s) {
-			// behave according to the learned policy
-			var poss = this.env.allowedActions(s);
-			var ps = [];
-			for (var i = 0, n = poss.length; i < n; i++) {
-				var a = poss[i];
-				var prob = this.P[a * this.ns + s];
-				ps.push(prob);
-			}
-			var maxi = sampleWeighted(ps);
-			return poss[maxi];
-		},
-		learn: function() {
-			// perform a single round of value iteration
-			self.evaluatePolicy(); // writes this.V
-			self.updatePolicy(); // writes this.P
-		},
-		evaluatePolicy: function() {
-			// perform a synchronous update of the value function
-			var Vnew = zeros(this.ns);
-			for (var s = 0; s < this.ns; s++) {
-				// integrate over actions in a stochastic policy
-				// note that we assume that policy probability mass over allowed actions sums to one
-				var v = 0.0;
-				var poss = this.env.allowedActions(s);
-				for (var i = 0, n = poss.length; i < n; i++) {
-					var a = poss[i];
-					var prob = this.P[a * this.ns + s]; // probability of taking action under policy
-					if (prob === 0) {
-						continue;
-					} // no contribution, skip for speed
-					var ns = this.env.nextStateDistribution(s, a);
-					var rs = this.env.reward(s, a, ns); // reward for s->a->ns transition
-					v += prob * (rs + this.gamma * this.V[ns]);
-				}
-				Vnew[s] = v;
-			}
-			this.V = Vnew; // swap
-		},
-		updatePolicy: function() {
-			// update policy to be greedy w.r.t. learned Value function
-			for (var s = 0; s < this.ns; s++) {
-				var poss = this.env.allowedActions(s);
-				// compute value of taking each allowed action
-				var vmax, nmax;
-				var vs = [];
-				for (var i = 0, n = poss.length; i < n; i++) {
-					var a = poss[i];
-					var ns = this.env.nextStateDistribution(s, a);
-					var rs = this.env.reward(s, a, ns);
-					var v = rs + this.gamma * this.V[ns];
-					vs.push(v);
-					if (i === 0 || v > vmax) {
-						vmax = v;
-						nmax = 1;
-					} else if (v === vmax) {
-						nmax += 1;
-					}
-				}
-				// update policy smoothly across all argmaxy actions
-				for (var i = 0, n = poss.length; i < n; i++) {
-					var a = poss[i];
-					this.P[a * this.ns + s] = (vs[i] === vmax) ? 1.0 / nmax : 0.0;
-				}
-			}
-		},
-	}
-	// QAgent uses TD (Q-Learning, SARSA)
-	// - does not require environment model :)
-	// - learns from experience :)
-	var TDAgent = function(env, opt) {
-		this.update = getopt(opt, 'update', 'qlearn'); // qlearn | sarsa
-		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
-		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
-		this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
-		// class allows non-deterministic policy, and smoothly regressing towards the optimal policy based on Q
-		this.smooth_policy_update = getopt(opt, 'smooth_policy_update', false);
-		this.beta = getopt(opt, 'beta', 0.01); // learning rate for policy, if smooth updates are on
-		// eligibility traces
-		this.lambda = getopt(opt, 'lambda', 0); // eligibility trace decay. 0 = no eligibility traces used
-		this.replacing_traces = getopt(opt, 'replacing_traces', true);
-		// optional optimistic initial values
-		this.q_init_val = getopt(opt, 'q_init_val', 0);
-		this.planN = getopt(opt, 'planN', 0); // number of planning steps per learning iteration (0 = no planning)
-		this.Q = null; // state action value function
-		this.P = null; // policy distribution \pi(s,a)
-		this.e = null; // eligibility trace
-		this.env_model_s = null;; // environment model (s,a) -> (s',r)
-		this.env_model_r = null;; // environment model (s,a) -> (s',r)
-		this.env = env; // store pointer to environment
-		this.reset();
-	}
-	TDAgent.prototype = {
-		reset: function() {
-			// reset the agent's policy and value function
-			this.ns = this.env.getNumStates();
-			this.na = this.env.getMaxNumActions();
-			this.Q = zeros(this.ns * this.na);
-			if (this.q_init_val !== 0) {
-				setConst(this.Q, this.q_init_val);
-			}
-			this.P = zeros(this.ns * this.na);
-			this.e = zeros(this.ns * this.na);
-			// model/planning vars
-			this.env_model_s = zeros(this.ns * this.na);
-			setConst(this.env_model_s, -1); // init to -1 so we can test if we saw the state before
-			this.env_model_r = zeros(this.ns * this.na);
-			this.sa_seen = [];
-			this.pq = zeros(this.ns * this.na);
-			// initialize uniform random policy
-			for (var s = 0; s < this.ns; s++) {
-				var poss = this.env.allowedActions(s);
-				for (var i = 0, n = poss.length; i < n; i++) {
-					this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
-				}
-			}
-			// agent memory, needed for streaming updates
-			// (s0,a0,r0,s1,a1,r1,...)
-			this.r0 = null;
-			this.s0 = null;
-			this.s1 = null;
-			this.a0 = null;
-			this.a1 = null;
-		},
-		resetEpisode: function() {
-			// an episode finished
-		},
-		act: function(s) {
-			// act according to epsilon greedy policy
-			var poss = this.env.allowedActions(s);
-			var probs = [];
-			for (var i = 0, n = poss.length; i < n; i++) {
-				probs.push(this.P[poss[i] * this.ns + s]);
-			}
-			// epsilon greedy policy
-			if (Math.random() < this.epsilon) {
-				var a = poss[randi(0, poss.length)]; // random available action
-				this.explored = true;
-			} else {
-				var a = poss[sampleWeighted(probs)];
-				this.explored = false;
-			}
-			// shift state memory
-			this.s0 = this.s1;
-			this.a0 = this.a1;
-			this.s1 = s;
-			this.a1 = a;
-			return a;
-		},
-		learn: function(r1) {
-			// takes reward for previous action, which came from a call to act()
-			if (!(this.r0 == null)) {
-				this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1, this.lambda);
-				if (this.planN > 0) {
-					this.updateModel(this.s0, this.a0, this.r0, this.s1);
-					this.plan();
-				}
-			}
-			this.r0 = r1; // store this for next update
-		},
-		updateModel: function(s0, a0, r0, s1) {
-			// transition (s0,a0) -> (r0,s1) was observed. Update environment model
-			var sa = a0 * this.ns + s0;
-			if (this.env_model_s[sa] === -1) {
-				// first time we see this state action
-				this.sa_seen.push(a0 * this.ns + s0); // add as seen state
-			}
-			this.env_model_s[sa] = s1;
-			this.env_model_r[sa] = r0;
-		},
-		plan: function() {
-			// order the states based on current priority queue information
-			var spq = [];
-			for (var i = 0, n = this.sa_seen.length; i < n; i++) {
-				var sa = this.sa_seen[i];
-				var sap = this.pq[sa];
-				if (sap > 1e-5) { // gain a bit of efficiency
-					spq.push({
-						sa: sa,
-						p: sap
-					});
-				}
-			}
-			spq.sort(function(a, b) {
-				return a.p < b.p ? 1 : -1
-			});
-			// perform the updates
-			var nsteps = Math.min(this.planN, spq.length);
-			for (var k = 0; k < nsteps; k++) {
-				// random exploration
-				//var i = randi(0, this.sa_seen.length); // pick random prev seen state action
-				//var s0a0 = this.sa_seen[i];
-				var s0a0 = spq[k].sa;
-				this.pq[s0a0] = 0; // erase priority, since we're backing up this state
-				var s0 = s0a0 % this.ns;
-				var a0 = Math.floor(s0a0 / this.ns);
-				var r0 = this.env_model_r[s0a0];
-				var s1 = this.env_model_s[s0a0];
-				var a1 = -1; // not used for Q learning
-				if (this.update === 'sarsa') {
-					// generate random action?...
-					var poss = this.env.allowedActions(s1);
-					var a1 = poss[randi(0, poss.length)];
-				}
-				this.learnFromTuple(s0, a0, r0, s1, a1, 0); // note lambda = 0 - shouldnt use eligibility trace here
-			}
-		},
-		learnFromTuple: function(s0, a0, r0, s1, a1, lambda) {
-			var sa = a0 * this.ns + s0;
-			// calculate the target for Q(s,a)
-			if (this.update === 'qlearn') {
-				// Q learning target is Q(s0,a0) = r0 + gamma * max_a Q[s1,a]
-				var poss = this.env.allowedActions(s1);
-				var qmax = 0;
-				for (var i = 0, n = poss.length; i < n; i++) {
-					var s1a = poss[i] * this.ns + s1;
-					var qval = this.Q[s1a];
-					if (i === 0 || qval > qmax) {
-						qmax = qval;
-					}
-				}
-				var target = r0 + this.gamma * qmax;
-			} else if (this.update === 'sarsa') {
-				// SARSA target is Q(s0,a0) = r0 + gamma * Q[s1,a1]
-				var s1a1 = a1 * this.ns + s1;
-				var target = r0 + this.gamma * this.Q[s1a1];
-			}
-			if (lambda > 0) {
-				// perform an eligibility trace update
-				if (this.replacing_traces) {
-					this.e[sa] = 1;
-				} else {
-					this.e[sa] += 1;
-				}
-				var edecay = lambda * this.gamma;
-				var state_update = zeros(this.ns);
-				for (var s = 0; s < this.ns; s++) {
-					var poss = this.env.allowedActions(s);
-					for (var i = 0; i < poss.length; i++) {
-						var a = poss[i];
-						var saloop = a * this.ns + s;
-						var esa = this.e[saloop];
-						var update = this.alpha * esa * (target - this.Q[saloop]);
-						this.Q[saloop] += update;
-						this.updatePriority(s, a, update);
-						this.e[saloop] *= edecay;
-						var u = Math.abs(update);
-						if (u > state_update[s]) {
-							state_update[s] = u;
-						}
-					}
-				}
-				for (var s = 0; s < this.ns; s++) {
-					if (state_update[s] > 1e-5) { // save efficiency here
-						this.updatePolicy(s);
-					}
-				}
-				if (this.explored && this.update === 'qlearn') {
-					// have to wipe the trace since q learning is off-policy :(
-					this.e = zeros(this.ns * this.na);
-				}
-			} else {
-				// simpler and faster update without eligibility trace
-				// update Q[sa] towards it with some step size
-				var update = this.alpha * (target - this.Q[sa]);
-				this.Q[sa] += update;
-				this.updatePriority(s0, a0, update);
-				// update the policy to reflect the change (if appropriate)
-				this.updatePolicy(s0);
-			}
-		},
-		updatePriority: function(s, a, u) {
-			// used in planning. Invoked when Q[sa] += update
-			// we should find all states that lead to (s,a) and upgrade their priority
-			// of being update in the next planning step
-			u = Math.abs(u);
-			if (u < 1e-5) {
-				return;
-			} // for efficiency skip small updates
-			if (this.planN === 0) {
-				return;
-			} // there is no planning to be done, skip.
-			for (var si = 0; si < this.ns; si++) {
-				// note we are also iterating over impossible actions at all states,
-				// but this should be okay because their env_model_s should simply be -1
-				// as initialized, so they will never be predicted to point to any state
-				// because they will never be observed, and hence never be added to the model
-				for (var ai = 0; ai < this.na; ai++) {
-					var siai = ai * this.ns + si;
-					if (this.env_model_s[siai] === s) {
-						// this state leads to s, add it to priority queue
-						this.pq[siai] += u;
-					}
-				}
-			}
-		},
-		updatePolicy: function(s) {
-			var poss = this.env.allowedActions(s);
-			// set policy at s to be the action that achieves max_a Q(s,a)
-			// first find the maxy Q values
-			var qmax, nmax;
-			var qs = [];
-			for (var i = 0, n = poss.length; i < n; i++) {
-				var a = poss[i];
-				var qval = this.Q[a * this.ns + s];
-				qs.push(qval);
-				if (i === 0 || qval > qmax) {
-					qmax = qval;
-					nmax = 1;
-				} else if (qval === qmax) {
-					nmax += 1;
-				}
-			}
-			// now update the policy smoothly towards the argmaxy actions
-			var psum = 0.0;
-			for (var i = 0, n = poss.length; i < n; i++) {
-				var a = poss[i];
-				var target = (qs[i] === qmax) ? 1.0 / nmax : 0.0;
-				var ix = a * this.ns + s;
-				if (this.smooth_policy_update) {
-					// slightly hacky :p
-					this.P[ix] += this.beta * (target - this.P[ix]);
-					psum += this.P[ix];
-				} else {
-					// set hard target
-					this.P[ix] = target;
-				}
-			}
-			if (this.smooth_policy_update) {
-				// renomalize P if we're using smooth policy updates
-				for (var i = 0, n = poss.length; i < n; i++) {
-					var a = poss[i];
-					this.P[a * this.ns + s] /= psum;
-				}
-			}
-		}
-	}
-	var DQNAgent = function(env, opt) {
-		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
-		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
-		this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
-		this.experience_add_every = getopt(opt, 'experience_add_every', 25); // number of time steps before we add another experience to replay memory
-		this.experience_size = getopt(opt, 'experience_size', 5000); // size of experience replay
-		this.learning_steps_per_iteration = getopt(opt, 'learning_steps_per_iteration', 10);
-		this.tderror_clamp = getopt(opt, 'tderror_clamp', 1.0);
-		this.num_hidden_units = getopt(opt, 'num_hidden_units', 100);
-		this.env = env;
-		this.reset();
-	}
-	DQNAgent.prototype = {
-		reset: function() {
-			this.nh = this.num_hidden_units; // number of hidden units
-			this.ns = this.env.getNumStates();
-			this.na = this.env.getMaxNumActions();
-			// nets are hardcoded for now as key (str) -> Mat
-			// not proud of this. better solution is to have a whole Net object
-			// on top of Mats, but for now sticking with this
-			this.net = {};
-			this.net.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
-			this.net.b1 = new R.Mat(this.nh, 1, 0, 0.01);
-			this.net.W2 = new R.RandMat(this.na, this.nh, 0, 0.01);
-			this.net.b2 = new R.Mat(this.na, 1, 0, 0.01);
-			this.exp = []; // experience
-			this.expi = 0; // where to insert
-			this.t = 0;
-			this.r0 = null;
-			this.s0 = null;
-			this.s1 = null;
-			this.a0 = null;
-			this.a1 = null;
-			this.tderror = 0; // for visualization only...
-		},
-		toJSON: function() {
-			// save function
-			var j = {};
-			j.nh = this.nh;
-			j.ns = this.ns;
-			j.na = this.na;
-			j.net = R.netToJSON(this.net);
-			return j;
-		},
-		fromJSON: function(j) {
-			// load function
-			this.nh = j.nh;
-			this.ns = j.ns;
-			this.na = j.na;
-			this.net = R.netFromJSON(j.net);
-		},
-		forwardQ: function(net, s, needs_backprop) {
-			var G = new R.Graph(needs_backprop);
-			var a1mat = G.add(G.mul(net.W1, s), net.b1);
-			var h1mat = G.tanh(a1mat);
-			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-			this.lastG = G; // back this up. Kind of hacky isn't it
-			return a2mat;
-		},
-		act: function(slist) {
-			// convert to a Mat column vector
-			var s = new R.Mat(this.ns, 1);
-			s.setFrom(slist);
-			// epsilon greedy policy
-			if (Math.random() < this.epsilon) {
-				var a = randi(0, this.na);
-			} else {
-				// greedy wrt Q function
-				var amat = this.forwardQ(this.net, s, false);
-				var a = R.maxi(amat.w); // returns index of argmax action
-			}
-			// shift state memory
-			this.s0 = this.s1;
-			this.a0 = this.a1;
-			this.s1 = s;
-			this.a1 = a;
-			return a;
-		},
-		learn: function(r1) {
-			// perform an update on Q function
-			if (!(this.r0 == null) && this.alpha > 0) {
-				// learn from this tuple to get a sense of how "surprising" it is to the agent
-				var tderror = this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1);
-				this.tderror = tderror; // a measure of surprise
-				// decide if we should keep this experience in the replay
-				if (this.t % this.experience_add_every === 0) {
-					this.exp[this.expi] = [this.s0, this.a0, this.r0, this.s1, this.a1];
-					this.expi += 1;
-					if (this.expi > this.experience_size) {
-						this.expi = 0;
-					} // roll over when we run out
-				}
-				this.t += 1;
-				// sample some additional experience from replay memory and learn from it
-				for (var k = 0; k < this.learning_steps_per_iteration; k++) {
-					var ri = randi(0, this.exp.length); // todo: priority sweeps?
-					var e = this.exp[ri];
-					this.learnFromTuple(e[0], e[1], e[2], e[3], e[4])
-				}
-			}
-			this.r0 = r1; // store for next update
-		},
-		learnFromTuple: function(s0, a0, r0, s1, a1) {
-			// want: Q(s,a) = r + gamma * max_a' Q(s',a')
-			// compute the target Q value
-			var tmat = this.forwardQ(this.net, s1, false);
-			var qmax = r0 + this.gamma * tmat.w[R.maxi(tmat.w)];
-			// now predict
-			var pred = this.forwardQ(this.net, s0, true);
-			var tderror = pred.w[a0] - qmax;
-			var clamp = this.tderror_clamp;
-			if (Math.abs(tderror) > clamp) { // huber loss to robustify
-				if (tderror > clamp) tderror = clamp;
-				if (tderror < -clamp) tderror = -clamp;
-			}
-			pred.dw[a0] = tderror;
-			this.lastG.backward(); // compute gradients on net params
-			// update net
-			R.updateNet(this.net, this.alpha);
-			return tderror;
-		}
-	}
-	// buggy implementation, doesnt work...
-	var SimpleReinforceAgent = function(env, opt) {
-		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
-		this.epsilon = getopt(opt, 'epsilon', 0.75); // for epsilon-greedy policy
-		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
-		this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
-		this.env = env;
-		this.reset();
-	}
-	SimpleReinforceAgent.prototype = {
-		reset: function() {
-			this.ns = this.env.getNumStates();
-			this.na = this.env.getMaxNumActions();
-			this.nh = 100; // number of hidden units
-			this.nhb = 100; // and also in the baseline lstm
-			this.actorNet = {};
-			this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
-			this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
-			this.actorNet.W2 = new R.RandMat(this.na, this.nh, 0, 0.1);
-			this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
-			this.actorOutputs = [];
-			this.actorGraphs = [];
-			this.actorActions = []; // sampled ones
-			this.rewardHistory = [];
-			this.baselineNet = {};
-			this.baselineNet.W1 = new R.RandMat(this.nhb, this.ns, 0, 0.01);
-			this.baselineNet.b1 = new R.Mat(this.nhb, 1, 0, 0.01);
-			this.baselineNet.W2 = new R.RandMat(this.na, this.nhb, 0, 0.01);
-			this.baselineNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
-			this.baselineOutputs = [];
-			this.baselineGraphs = [];
-			this.t = 0;
-		},
-		forwardActor: function(s, needs_backprop) {
-			var net = this.actorNet;
-			var G = new R.Graph(needs_backprop);
-			var a1mat = G.add(G.mul(net.W1, s), net.b1);
-			var h1mat = G.tanh(a1mat);
-			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-			return {
-				'a': a2mat,
-				'G': G
-			}
-		},
-		forwardValue: function(s, needs_backprop) {
-			var net = this.baselineNet;
-			var G = new R.Graph(needs_backprop);
-			var a1mat = G.add(G.mul(net.W1, s), net.b1);
-			var h1mat = G.tanh(a1mat);
-			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-			return {
-				'a': a2mat,
-				'G': G
-			}
-		},
-		act: function(slist) {
-			// convert to a Mat column vector
-			var s = new R.Mat(this.ns, 1);
-			s.setFrom(slist);
-			// forward the actor to get action output
-			var ans = this.forwardActor(s, true);
-			var amat = ans.a;
-			var ag = ans.G;
-			this.actorOutputs.push(amat);
-			this.actorGraphs.push(ag);
-			// forward the baseline estimator
-			var ans = this.forwardValue(s, true);
-			var vmat = ans.a;
-			var vg = ans.G;
-			this.baselineOutputs.push(vmat);
-			this.baselineGraphs.push(vg);
-			// sample action from the stochastic gaussian policy
-			var a = R.copyMat(amat);
-			var gaussVar = 0.02;
-			a.w[0] = R.randn(0, gaussVar);
-			a.w[1] = R.randn(0, gaussVar);
-			this.actorActions.push(a);
-			// shift state memory
-			this.s0 = this.s1;
-			this.a0 = this.a1;
-			this.s1 = s;
-			this.a1 = a;
-			return a;
-		},
-		learn: function(r1) {
-			// perform an update on Q function
-			this.rewardHistory.push(r1);
-			var n = this.rewardHistory.length;
-			var baselineMSE = 0.0;
-			var nup = 100; // what chunk of experience to take
-			var nuse = 80; // what chunk to update from
-			if (n >= nup) {
-				// lets learn and flush
-				// first: compute the sample values at all points
-				var vs = [];
-				for (var t = 0; t < nuse; t++) {
-					var mul = 1;
-					// compute the actual discounted reward for this time step
-					var V = 0;
-					for (var t2 = t; t2 < n; t2++) {
-						V += mul * this.rewardHistory[t2];
-						mul *= this.gamma;
-						if (mul < 1e-5) {
-							break;
-						} // efficiency savings
-					}
-					// get the predicted baseline at this time step
-					var b = this.baselineOutputs[t].w[0];
-					for (var i = 0; i < this.na; i++) {
-						// [the action delta] * [the desirebility]
-						var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
-						if (update > 0.1) {
-							update = 0.1;
-						}
-						if (update < -0.1) {
-							update = -0.1;
-						}
-						this.actorOutputs[t].dw[i] += update;
-					}
-					var update = -(V - b);
-					if (update > 0.1) {
-						update = 0.1;
-					}
-					if (update < 0.1) {
-						update = -0.1;
-					}
-					this.baselineOutputs[t].dw[0] += update;
-					baselineMSE += (V - b) * (V - b);
-					vs.push(V);
-				}
-				baselineMSE /= nuse;
-				// backprop all the things
-				for (var t = 0; t < nuse; t++) {
-					this.actorGraphs[t].backward();
-					this.baselineGraphs[t].backward();
-				}
-				R.updateNet(this.actorNet, this.alpha); // update actor network
-				R.updateNet(this.baselineNet, this.beta); // update baseline network
-				// flush
-				this.actorOutputs = [];
-				this.rewardHistory = [];
-				this.actorActions = [];
-				this.baselineOutputs = [];
-				this.actorGraphs = [];
-				this.baselineGraphs = [];
-				this.tderror = baselineMSE;
-			}
-			this.t += 1;
-			this.r0 = r1; // store for next update
-		},
-	}
-	// buggy implementation as well, doesn't work
-	var RecurrentReinforceAgent = function(env, opt) {
-		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
-		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
-		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
-		this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
-		this.env = env;
-		this.reset();
-	}
-	RecurrentReinforceAgent.prototype = {
-		reset: function() {
-			this.ns = this.env.getNumStates();
-			this.na = this.env.getMaxNumActions();
-			this.nh = 40; // number of hidden units
-			this.nhb = 40; // and also in the baseline lstm
-			this.actorLSTM = R.initLSTM(this.ns, [this.nh], this.na);
-			this.actorG = new R.Graph();
-			this.actorPrev = null;
-			this.actorOutputs = [];
-			this.rewardHistory = [];
-			this.actorActions = [];
-			this.baselineLSTM = R.initLSTM(this.ns, [this.nhb], 1);
-			this.baselineG = new R.Graph();
-			this.baselinePrev = null;
-			this.baselineOutputs = [];
-			this.t = 0;
-			this.r0 = null;
-			this.s0 = null;
-			this.s1 = null;
-			this.a0 = null;
-			this.a1 = null;
-		},
-		act: function(slist) {
-			// convert to a Mat column vector
-			var s = new R.Mat(this.ns, 1);
-			s.setFrom(slist);
-			// forward the LSTM to get action distribution
-			var actorNext = R.forwardLSTM(this.actorG, this.actorLSTM, [this.nh], s, this.actorPrev);
-			this.actorPrev = actorNext;
-			var amat = actorNext.o;
-			this.actorOutputs.push(amat);
-			// forward the baseline LSTM
-			var baselineNext = R.forwardLSTM(this.baselineG, this.baselineLSTM, [this.nhb], s, this.baselinePrev);
-			this.baselinePrev = baselineNext;
-			this.baselineOutputs.push(baselineNext.o);
-			// sample action from actor policy
-			var gaussVar = 0.05;
-			var a = R.copyMat(amat);
-			for (var i = 0, n = a.w.length; i < n; i++) {
-				a.w[0] += R.randn(0, gaussVar);
-				a.w[1] += R.randn(0, gaussVar);
-			}
-			this.actorActions.push(a);
-			// shift state memory
-			this.s0 = this.s1;
-			this.a0 = this.a1;
-			this.s1 = s;
-			this.a1 = a;
-			return a;
-		},
-		learn: function(r1) {
-			// perform an update on Q function
-			this.rewardHistory.push(r1);
-			var n = this.rewardHistory.length;
-			var baselineMSE = 0.0;
-			var nup = 100; // what chunk of experience to take
-			var nuse = 80; // what chunk to also update
-			if (n >= nup) {
-				// lets learn and flush
-				// first: compute the sample values at all points
-				var vs = [];
-				for (var t = 0; t < nuse; t++) {
-					var mul = 1;
-					var V = 0;
-					for (var t2 = t; t2 < n; t2++) {
-						V += mul * this.rewardHistory[t2];
-						mul *= this.gamma;
-						if (mul < 1e-5) {
-							break;
-						} // efficiency savings
-					}
-					var b = this.baselineOutputs[t].w[0];
-					// todo: take out the constants etc.
-					for (var i = 0; i < this.na; i++) {
-						// [the action delta] * [the desirebility]
-						var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
-						if (update > 0.1) {
-							update = 0.1;
-						}
-						if (update < -0.1) {
-							update = -0.1;
-						}
-						this.actorOutputs[t].dw[i] += update;
-					}
-					var update = -(V - b);
-					if (update > 0.1) {
-						update = 0.1;
-					}
-					if (update < 0.1) {
-						update = -0.1;
-					}
-					this.baselineOutputs[t].dw[0] += update;
-					baselineMSE += (V - b) * (V - b);
-					vs.push(V);
-				}
-				baselineMSE /= nuse;
-				this.actorG.backward(); // update params! woohoo!
-				this.baselineG.backward();
-				R.updateNet(this.actorLSTM, this.alpha); // update actor network
-				R.updateNet(this.baselineLSTM, this.beta); // update baseline network
-				// flush
-				this.actorG = new R.Graph();
-				this.actorPrev = null;
-				this.actorOutputs = [];
-				this.rewardHistory = [];
-				this.actorActions = [];
-				this.baselineG = new R.Graph();
-				this.baselinePrev = null;
-				this.baselineOutputs = [];
-				this.tderror = baselineMSE;
-			}
-			this.t += 1;
-			this.r0 = r1; // store for next update
-		},
-	}
-	// Currently buggy implementation, doesnt work
-	var DeterministPG = function(env, opt) {
-		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
-		this.epsilon = getopt(opt, 'epsilon', 0.5); // for epsilon-greedy policy
-		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
-		this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
-		this.env = env;
-		this.reset();
-	}
-	DeterministPG.prototype = {
-		reset: function() {
-			this.ns = this.env.getNumStates();
-			this.na = this.env.getMaxNumActions();
-			this.nh = 100; // number of hidden units
-			// actor
-			this.actorNet = {};
-			this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
-			this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
-			this.actorNet.W2 = new R.RandMat(this.na, this.ns, 0, 0.1);
-			this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
-			this.ntheta = this.na * this.ns + this.na; // number of params in actor
-			// critic
-			this.criticw = new R.RandMat(1, this.ntheta, 0, 0.01); // row vector
-			this.r0 = null;
-			this.s0 = null;
-			this.s1 = null;
-			this.a0 = null;
-			this.a1 = null;
-			this.t = 0;
-		},
-		forwardActor: function(s, needs_backprop) {
-			var net = this.actorNet;
-			var G = new R.Graph(needs_backprop);
-			var a1mat = G.add(G.mul(net.W1, s), net.b1);
-			var h1mat = G.tanh(a1mat);
-			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-			return {
-				'a': a2mat,
-				'G': G
-			}
-		},
-		act: function(slist) {
-			// convert to a Mat column vector
-			var s = new R.Mat(this.ns, 1);
-			s.setFrom(slist);
-			// forward the actor to get action output
-			var ans = this.forwardActor(s, false);
-			var amat = ans.a;
-			var ag = ans.G;
-			// sample action from the stochastic gaussian policy
-			var a = R.copyMat(amat);
-			if (Math.random() < this.epsilon) {
-				var gaussVar = 0.02;
-				a.w[0] = R.randn(0, gaussVar);
-				a.w[1] = R.randn(0, gaussVar);
-			}
-			var clamp = 0.25;
-			if (a.w[0] > clamp) a.w[0] = clamp;
-			if (a.w[0] < -clamp) a.w[0] = -clamp;
-			if (a.w[1] > clamp) a.w[1] = clamp;
-			if (a.w[1] < -clamp) a.w[1] = -clamp;
-			// shift state memory
-			this.s0 = this.s1;
-			this.a0 = this.a1;
-			this.s1 = s;
-			this.a1 = a;
-			return a;
-		},
-		utilJacobianAt: function(s) {
-			var ujacobian = new R.Mat(this.ntheta, this.na);
-			for (var a = 0; a < this.na; a++) {
-				R.netZeroGrads(this.actorNet);
-				var ag = this.forwardActor(this.s0, true);
-				ag.a.dw[a] = 1.0;
-				ag.G.backward();
-				var gflat = R.netFlattenGrads(this.actorNet);
-				ujacobian.setColumn(gflat, a);
-			}
-			return ujacobian;
-		},
-		learn: function(r1) {
-			// perform an update on Q function
-			//this.rewardHistory.push(r1);
-			if (!(this.r0 == null)) {
-				var Gtmp = new R.Graph(false);
-				// dpg update:
-				// first compute the features psi:
-				// the jacobian matrix of the actor for s
-				var ujacobian0 = this.utilJacobianAt(this.s0);
-				// now form the features \psi(s,a)
-				var psi_sa0 = Gtmp.mul(ujacobian0, this.a0); // should be [this.ntheta x 1] "feature" vector
-				var qw0 = Gtmp.mul(this.criticw, psi_sa0); // 1x1
-				// now do the same thing because we need \psi(s_{t+1}, \mu\_\theta(s\_t{t+1}))
-				var ujacobian1 = this.utilJacobianAt(this.s1);
-				var ag = this.forwardActor(this.s1, false);
-				var psi_sa1 = Gtmp.mul(ujacobian1, ag.a);
-				var qw1 = Gtmp.mul(this.criticw, psi_sa1); // 1x1
-				// get the td error finally
-				var tderror = this.r0 + this.gamma * qw1.w[0] - qw0.w[0]; // lol
-				if (tderror > 0.5) tderror = 0.5; // clamp
-				if (tderror < -0.5) tderror = -0.5;
-				this.tderror = tderror;
-				// update actor policy with natural gradient
-				var net = this.actorNet;
-				var ix = 0;
-				for (var p in net) {
-					var mat = net[p];
-					if (net.hasOwnProperty(p)) {
-						for (var i = 0, n = mat.w.length; i < n; i++) {
-							mat.w[i] += this.alpha * this.criticw.w[ix]; // natural gradient update
-							ix += 1;
-						}
-					}
-				}
-				// update the critic parameters too
-				for (var i = 0; i < this.ntheta; i++) {
-					var update = this.beta * tderror * psi_sa0.w[i];
-					this.criticw.w[i] += update;
-				}
-			}
-			this.r0 = r1; // store for next update
-		},
-	}
-	// exports
-	global.DPAgent = DPAgent;
-	global.TDAgent = TDAgent;
-	global.DQNAgent = DQNAgent;
-	//global.SimpleReinforceAgent = SimpleReinforceAgent;
-	//global.RecurrentReinforceAgent = RecurrentReinforceAgent;
-	//global.DeterministPG = DeterministPG;
+(function (global) {
+    "use strict";
+    // syntactic sugar function for getting default parameter values
+    var getopt = function (opt, field_name, default_value) {
+        if (typeof opt === 'undefined') {
+            return default_value;
+        }
+        return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
+    }
+    var zeros = R.zeros; // inherit these
+    var assert = R.assert;
+    var randi = R.randi;
+    var randf = R.randf;
+    var setConst = function (arr, c) {
+        for (var i = 0, n = arr.length; i < n; i++) {
+            arr[i] = c;
+        }
+    }
+    var sampleWeighted = function (p) {
+        var r = Math.random();
+        var c = 0.0;
+        for (var i = 0, n = p.length; i < n; i++) {
+            c += p[i];
+            if (c >= r) {
+                return i;
+            }
+        }
+        assert(false, 'wtf');
+    }
+    // ------
+    // AGENTS
+    // ------
+    // DPAgent performs Value Iteration
+    // - can also be used for Policy Iteration if you really wanted to
+    // - requires model of the environment :(
+    // - does not learn from experience :(
+    // - assumes finite MDP :(
+    var DPAgent = function (env, opt) {
+        this.V = null; // state value function
+        this.P = null; // policy distribution \pi(s,a)
+        this.env = env; // store pointer to environment
+        this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
+        this.reset();
+    }
+    DPAgent.prototype = {
+        reset: function () {
+            // reset the agent's policy and value function
+            this.ns = this.env.getNumStates();
+            this.na = this.env.getMaxNumActions();
+            this.V = zeros(this.ns);
+            this.P = zeros(this.ns * this.na);
+            // initialize uniform random policy
+            for (var s = 0; s < this.ns; s++) {
+                var poss = this.env.allowedActions(s);
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
+                }
+            }
+        },
+        act: function (s) {
+            // behave according to the learned policy
+            var poss = this.env.allowedActions(s);
+            var ps = [];
+            for (var i = 0, n = poss.length; i < n; i++) {
+                var a = poss[i];
+                var prob = this.P[a * this.ns + s];
+                ps.push(prob);
+            }
+            var maxi = sampleWeighted(ps);
+            return poss[maxi];
+        },
+        learn: function () {
+            // perform a single round of value iteration
+            self.evaluatePolicy(); // writes this.V
+            self.updatePolicy(); // writes this.P
+        },
+        evaluatePolicy: function () {
+            // perform a synchronous update of the value function
+            var Vnew = zeros(this.ns);
+            for (var s = 0; s < this.ns; s++) {
+                // integrate over actions in a stochastic policy
+                // note that we assume that policy probability mass over allowed actions sums to one
+                var v = 0.0;
+                var poss = this.env.allowedActions(s);
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    var a = poss[i];
+                    var prob = this.P[a * this.ns + s]; // probability of taking action under policy
+                    if (prob === 0) {
+                        continue;
+                    } // no contribution, skip for speed
+                    var ns = this.env.nextStateDistribution(s, a);
+                    var rs = this.env.reward(s, a, ns); // reward for s->a->ns transition
+                    v += prob * (rs + this.gamma * this.V[ns]);
+                }
+                Vnew[s] = v;
+            }
+            this.V = Vnew; // swap
+        },
+        updatePolicy: function () {
+            // update policy to be greedy w.r.t. learned Value function
+            for (var s = 0; s < this.ns; s++) {
+                var poss = this.env.allowedActions(s);
+                // compute value of taking each allowed action
+                var vmax, nmax;
+                var vs = [];
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    var a = poss[i];
+                    var ns = this.env.nextStateDistribution(s, a);
+                    var rs = this.env.reward(s, a, ns);
+                    var v = rs + this.gamma * this.V[ns];
+                    vs.push(v);
+                    if (i === 0 || v > vmax) {
+                        vmax = v;
+                        nmax = 1;
+                    } else if (v === vmax) {
+                        nmax += 1;
+                    }
+                }
+                // update policy smoothly across all argmaxy actions
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    var a = poss[i];
+                    this.P[a * this.ns + s] = (vs[i] === vmax) ? 1.0 / nmax : 0.0;
+                }
+            }
+        },
+    }
+    // QAgent uses TD (Q-Learning, SARSA)
+    // - does not require environment model :)
+    // - learns from experience :)
+    var TDAgent = function (env, opt) {
+        this.update = getopt(opt, 'update', 'qlearn'); // qlearn | sarsa
+        this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
+        this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
+        this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
+        // class allows non-deterministic policy, and smoothly regressing towards the optimal policy based on Q
+        this.smooth_policy_update = getopt(opt, 'smooth_policy_update', false);
+        this.beta = getopt(opt, 'beta', 0.01); // learning rate for policy, if smooth updates are on
+        // eligibility traces
+        this.lambda = getopt(opt, 'lambda', 0); // eligibility trace decay. 0 = no eligibility traces used
+        this.replacing_traces = getopt(opt, 'replacing_traces', true);
+        // optional optimistic initial values
+        this.q_init_val = getopt(opt, 'q_init_val', 0);
+        this.planN = getopt(opt, 'planN', 0); // number of planning steps per learning iteration (0 = no planning)
+        this.Q = null; // state action value function
+        this.P = null; // policy distribution \pi(s,a)
+        this.e = null; // eligibility trace
+        this.env_model_s = null;; // environment model (s,a) -> (s',r)
+        this.env_model_r = null;; // environment model (s,a) -> (s',r)
+        this.env = env; // store pointer to environment
+        this.reset();
+    }
+    TDAgent.prototype = {
+        reset: function () {
+            // reset the agent's policy and value function
+            this.ns = this.env.getNumStates();
+            this.na = this.env.getMaxNumActions();
+            this.Q = zeros(this.ns * this.na);
+            if (this.q_init_val !== 0) {
+                setConst(this.Q, this.q_init_val);
+            }
+            this.P = zeros(this.ns * this.na);
+            this.e = zeros(this.ns * this.na);
+            // model/planning vars
+            this.env_model_s = zeros(this.ns * this.na);
+            setConst(this.env_model_s, -1); // init to -1 so we can test if we saw the state before
+            this.env_model_r = zeros(this.ns * this.na);
+            this.sa_seen = [];
+            this.pq = zeros(this.ns * this.na);
+            // initialize uniform random policy
+            for (var s = 0; s < this.ns; s++) {
+                var poss = this.env.allowedActions(s);
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
+                }
+            }
+            // agent memory, needed for streaming updates
+            // (s0,a0,r0,s1,a1,r1,...)
+            this.r0 = null;
+            this.s0 = null;
+            this.s1 = null;
+            this.a0 = null;
+            this.a1 = null;
+        },
+        resetEpisode: function () {
+            // an episode finished
+        },
+        act: function (s) {
+            // act according to epsilon greedy policy
+            var poss = this.env.allowedActions(s);
+            var probs = [];
+            for (var i = 0, n = poss.length; i < n; i++) {
+                probs.push(this.P[poss[i] * this.ns + s]);
+            }
+            // epsilon greedy policy
+            if (Math.random() < this.epsilon) {
+                var a = poss[randi(0, poss.length)]; // random available action
+                this.explored = true;
+            } else {
+                var a = poss[sampleWeighted(probs)];
+                this.explored = false;
+            }
+            // shift state memory
+            this.s0 = this.s1;
+            this.a0 = this.a1;
+            this.s1 = s;
+            this.a1 = a;
+            return a;
+        },
+        learn: function (r1) {
+            // takes reward for previous action, which came from a call to act()
+            if (!(this.r0 == null)) {
+                this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1, this.lambda);
+                if (this.planN > 0) {
+                    this.updateModel(this.s0, this.a0, this.r0, this.s1);
+                    this.plan();
+                }
+            }
+            this.r0 = r1; // store this for next update
+        },
+        updateModel: function (s0, a0, r0, s1) {
+            // transition (s0,a0) -> (r0,s1) was observed. Update environment model
+            var sa = a0 * this.ns + s0;
+            if (this.env_model_s[sa] === -1) {
+                // first time we see this state action
+                this.sa_seen.push(a0 * this.ns + s0); // add as seen state
+            }
+            this.env_model_s[sa] = s1;
+            this.env_model_r[sa] = r0;
+        },
+        plan: function () {
+            // order the states based on current priority queue information
+            var spq = [];
+            for (var i = 0, n = this.sa_seen.length; i < n; i++) {
+                var sa = this.sa_seen[i];
+                var sap = this.pq[sa];
+                if (sap > 1e-5) { // gain a bit of efficiency
+                    spq.push({
+                        sa: sa,
+                        p: sap
+                    });
+                }
+            }
+            spq.sort(function (a, b) {
+                return a.p < b.p ? 1 : -1
+            });
+            // perform the updates
+            var nsteps = Math.min(this.planN, spq.length);
+            for (var k = 0; k < nsteps; k++) {
+                // random exploration
+                //var i = randi(0, this.sa_seen.length); // pick random prev seen state action
+                //var s0a0 = this.sa_seen[i];
+                var s0a0 = spq[k].sa;
+                this.pq[s0a0] = 0; // erase priority, since we're backing up this state
+                var s0 = s0a0 % this.ns;
+                var a0 = Math.floor(s0a0 / this.ns);
+                var r0 = this.env_model_r[s0a0];
+                var s1 = this.env_model_s[s0a0];
+                var a1 = -1; // not used for Q learning
+                if (this.update === 'sarsa') {
+                    // generate random action?...
+                    var poss = this.env.allowedActions(s1);
+                    var a1 = poss[randi(0, poss.length)];
+                }
+                this.learnFromTuple(s0, a0, r0, s1, a1, 0); // note lambda = 0 - shouldnt use eligibility trace here
+            }
+        },
+        learnFromTuple: function (s0, a0, r0, s1, a1, lambda) {
+            var sa = a0 * this.ns + s0;
+            // calculate the target for Q(s,a)
+            if (this.update === 'qlearn') {
+                // Q learning target is Q(s0,a0) = r0 + gamma * max_a Q[s1,a]
+                var poss = this.env.allowedActions(s1);
+                var qmax = 0;
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    var s1a = poss[i] * this.ns + s1;
+                    var qval = this.Q[s1a];
+                    if (i === 0 || qval > qmax) {
+                        qmax = qval;
+                    }
+                }
+                var target = r0 + this.gamma * qmax;
+            } else if (this.update === 'sarsa') {
+                // SARSA target is Q(s0,a0) = r0 + gamma * Q[s1,a1]
+                var s1a1 = a1 * this.ns + s1;
+                var target = r0 + this.gamma * this.Q[s1a1];
+            }
+            if (lambda > 0) {
+                // perform an eligibility trace update
+                if (this.replacing_traces) {
+                    this.e[sa] = 1;
+                } else {
+                    this.e[sa] += 1;
+                }
+                var edecay = lambda * this.gamma;
+                var state_update = zeros(this.ns);
+                for (var s = 0; s < this.ns; s++) {
+                    var poss = this.env.allowedActions(s);
+                    for (var i = 0; i < poss.length; i++) {
+                        var a = poss[i];
+                        var saloop = a * this.ns + s;
+                        var esa = this.e[saloop];
+                        var update = this.alpha * esa * (target - this.Q[saloop]);
+                        this.Q[saloop] += update;
+                        this.updatePriority(s, a, update);
+                        this.e[saloop] *= edecay;
+                        var u = Math.abs(update);
+                        if (u > state_update[s]) {
+                            state_update[s] = u;
+                        }
+                    }
+                }
+                for (var s = 0; s < this.ns; s++) {
+                    if (state_update[s] > 1e-5) { // save efficiency here
+                        this.updatePolicy(s);
+                    }
+                }
+                if (this.explored && this.update === 'qlearn') {
+                    // have to wipe the trace since q learning is off-policy :(
+                    this.e = zeros(this.ns * this.na);
+                }
+            } else {
+                // simpler and faster update without eligibility trace
+                // update Q[sa] towards it with some step size
+                var update = this.alpha * (target - this.Q[sa]);
+                this.Q[sa] += update;
+                this.updatePriority(s0, a0, update);
+                // update the policy to reflect the change (if appropriate)
+                this.updatePolicy(s0);
+            }
+        },
+        updatePriority: function (s, a, u) {
+            // used in planning. Invoked when Q[sa] += update
+            // we should find all states that lead to (s,a) and upgrade their priority
+            // of being update in the next planning step
+            u = Math.abs(u);
+            if (u < 1e-5) {
+                return;
+            } // for efficiency skip small updates
+            if (this.planN === 0) {
+                return;
+            } // there is no planning to be done, skip.
+            for (var si = 0; si < this.ns; si++) {
+                // note we are also iterating over impossible actions at all states,
+                // but this should be okay because their env_model_s should simply be -1
+                // as initialized, so they will never be predicted to point to any state
+                // because they will never be observed, and hence never be added to the model
+                for (var ai = 0; ai < this.na; ai++) {
+                    var siai = ai * this.ns + si;
+                    if (this.env_model_s[siai] === s) {
+                        // this state leads to s, add it to priority queue
+                        this.pq[siai] += u;
+                    }
+                }
+            }
+        },
+        updatePolicy: function (s) {
+            var poss = this.env.allowedActions(s);
+            // set policy at s to be the action that achieves max_a Q(s,a)
+            // first find the maxy Q values
+            var qmax, nmax;
+            var qs = [];
+            for (var i = 0, n = poss.length; i < n; i++) {
+                var a = poss[i];
+                var qval = this.Q[a * this.ns + s];
+                qs.push(qval);
+                if (i === 0 || qval > qmax) {
+                    qmax = qval;
+                    nmax = 1;
+                } else if (qval === qmax) {
+                    nmax += 1;
+                }
+            }
+            // now update the policy smoothly towards the argmaxy actions
+            var psum = 0.0;
+            for (var i = 0, n = poss.length; i < n; i++) {
+                var a = poss[i];
+                var target = (qs[i] === qmax) ? 1.0 / nmax : 0.0;
+                var ix = a * this.ns + s;
+                if (this.smooth_policy_update) {
+                    // slightly hacky :p
+                    this.P[ix] += this.beta * (target - this.P[ix]);
+                    psum += this.P[ix];
+                } else {
+                    // set hard target
+                    this.P[ix] = target;
+                }
+            }
+            if (this.smooth_policy_update) {
+                // renomalize P if we're using smooth policy updates
+                for (var i = 0, n = poss.length; i < n; i++) {
+                    var a = poss[i];
+                    this.P[a * this.ns + s] /= psum;
+                }
+            }
+        }
+    }
+    var DQNAgent = function (env, opt) {
+        this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
+        this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
+        this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
+        this.experience_add_every = getopt(opt, 'experience_add_every', 25); // number of time steps before we add another experience to replay memory
+        this.experience_size = getopt(opt, 'experience_size', 5000); // size of experience replay
+        this.learning_steps_per_iteration = getopt(opt, 'learning_steps_per_iteration', 10);
+        this.tderror_clamp = getopt(opt, 'tderror_clamp', 1.0);
+        this.num_hidden_units = getopt(opt, 'num_hidden_units', 100);
+        this.env = env;
+        this.reset();
+    }
+    DQNAgent.prototype = {
+        reset: function () {
+            this.nh = this.num_hidden_units; // number of hidden units
+            this.ns = this.env.getNumStates();
+            this.na = this.env.getMaxNumActions();
+            // nets are hardcoded for now as key (str) -> Mat
+            // not proud of this. better solution is to have a whole Net object
+            // on top of Mats, but for now sticking with this
+            this.net = {};
+            this.net.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
+            this.net.b1 = new R.Mat(this.nh, 1, 0, 0.01);
+            this.net.W2 = new R.RandMat(this.na, this.nh, 0, 0.01);
+            this.net.b2 = new R.Mat(this.na, 1, 0, 0.01);
+            this.exp = []; // experience
+            this.expi = 0; // where to insert
+            this.t = 0;
+            this.r0 = null;
+            this.s0 = null;
+            this.s1 = null;
+            this.a0 = null;
+            this.a1 = null;
+            this.tderror = 0; // for visualization only...
+        },
+        toJSON: function () {
+            // save function
+            var j = {};
+            j.nh = this.nh;
+            j.ns = this.ns;
+            j.na = this.na;
+            j.net = R.netToJSON(this.net);
+            return j;
+        },
+        fromJSON: function (j) {
+            // load function
+            this.nh = j.nh;
+            this.ns = j.ns;
+            this.na = j.na;
+            this.net = R.netFromJSON(j.net);
+        },
+        forwardQ: function (net, s, needs_backprop) {
+            var G = new R.Graph(needs_backprop);
+            var a1mat = G.add(G.mul(net.W1, s), net.b1);
+            var h1mat = G.tanh(a1mat);
+            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+            this.lastG = G; // back this up. Kind of hacky isn't it
+            return a2mat;
+        },
+        act: function (slist) {
+            // convert to a Mat column vector
+            var s = new R.Mat(this.ns, 1);
+            s.setFrom(slist);
+            // epsilon greedy policy
+            if (Math.random() < this.epsilon) {
+                var a = randi(0, this.na);
+            } else {
+                // greedy wrt Q function
+                var amat = this.forwardQ(this.net, s, false);
+                var a = R.maxi(amat.w); // returns index of argmax action
+            }
+            // shift state memory
+            this.s0 = this.s1;
+            this.a0 = this.a1;
+            this.s1 = s;
+            this.a1 = a;
+            return a;
+        },
+        learn: function (r1) {
+            // perform an update on Q function
+            if (!(this.r0 == null) && this.alpha > 0) {
+                // learn from this tuple to get a sense of how "surprising" it is to the agent
+                var tderror = this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1);
+                this.tderror = tderror; // a measure of surprise
+                // decide if we should keep this experience in the replay
+                if (this.t % this.experience_add_every === 0) {
+                    this.exp[this.expi] = [this.s0, this.a0, this.r0, this.s1, this.a1];
+                    this.expi += 1;
+                    if (this.expi > this.experience_size) {
+                        this.expi = 0;
+                    } // roll over when we run out
+                }
+                this.t += 1;
+                // sample some additional experience from replay memory and learn from it
+                for (var k = 0; k < this.learning_steps_per_iteration; k++) {
+                    var ri = randi(0, this.exp.length); // todo: priority sweeps?
+                    var e = this.exp[ri];
+                    this.learnFromTuple(e[0], e[1], e[2], e[3], e[4])
+                }
+            }
+            this.r0 = r1; // store for next update
+        },
+        learnFromTuple: function (s0, a0, r0, s1, a1) {
+            // want: Q(s,a) = r + gamma * max_a' Q(s',a')
+            // compute the target Q value
+            var tmat = this.forwardQ(this.net, s1, false);
+            var qmax = r0 + this.gamma * tmat.w[R.maxi(tmat.w)];
+            // now predict
+            var pred = this.forwardQ(this.net, s0, true);
+            var tderror = pred.w[a0] - qmax;
+            var clamp = this.tderror_clamp;
+            if (Math.abs(tderror) > clamp) { // huber loss to robustify
+                if (tderror > clamp) tderror = clamp;
+                if (tderror < -clamp) tderror = -clamp;
+            }
+            pred.dw[a0] = tderror;
+            this.lastG.backward(); // compute gradients on net params
+            // update net
+            R.updateNet(this.net, this.alpha);
+            return tderror;
+        }
+    }
+    // buggy implementation, doesnt work...
+    var SimpleReinforceAgent = function (env, opt) {
+        this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
+        this.epsilon = getopt(opt, 'epsilon', 0.75); // for epsilon-greedy policy
+        this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
+        this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
+        this.env = env;
+        this.reset();
+    }
+    SimpleReinforceAgent.prototype = {
+        reset: function () {
+            this.ns = this.env.getNumStates();
+            this.na = this.env.getMaxNumActions();
+            this.nh = 100; // number of hidden units
+            this.nhb = 100; // and also in the baseline lstm
+            this.actorNet = {};
+            this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
+            this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
+            this.actorNet.W2 = new R.RandMat(this.na, this.nh, 0, 0.1);
+            this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
+            this.actorOutputs = [];
+            this.actorGraphs = [];
+            this.actorActions = []; // sampled ones
+            this.rewardHistory = [];
+            this.baselineNet = {};
+            this.baselineNet.W1 = new R.RandMat(this.nhb, this.ns, 0, 0.01);
+            this.baselineNet.b1 = new R.Mat(this.nhb, 1, 0, 0.01);
+            this.baselineNet.W2 = new R.RandMat(this.na, this.nhb, 0, 0.01);
+            this.baselineNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
+            this.baselineOutputs = [];
+            this.baselineGraphs = [];
+            this.t = 0;
+        },
+        forwardActor: function (s, needs_backprop) {
+            var net = this.actorNet;
+            var G = new R.Graph(needs_backprop);
+            var a1mat = G.add(G.mul(net.W1, s), net.b1);
+            var h1mat = G.tanh(a1mat);
+            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+            return {
+                'a': a2mat,
+                'G': G
+            }
+        },
+        forwardValue: function (s, needs_backprop) {
+            var net = this.baselineNet;
+            var G = new R.Graph(needs_backprop);
+            var a1mat = G.add(G.mul(net.W1, s), net.b1);
+            var h1mat = G.tanh(a1mat);
+            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+            return {
+                'a': a2mat,
+                'G': G
+            }
+        },
+        act: function (slist) {
+            // convert to a Mat column vector
+            var s = new R.Mat(this.ns, 1);
+            s.setFrom(slist);
+            // forward the actor to get action output
+            var ans = this.forwardActor(s, true);
+            var amat = ans.a;
+            var ag = ans.G;
+            this.actorOutputs.push(amat);
+            this.actorGraphs.push(ag);
+            // forward the baseline estimator
+            var ans = this.forwardValue(s, true);
+            var vmat = ans.a;
+            var vg = ans.G;
+            this.baselineOutputs.push(vmat);
+            this.baselineGraphs.push(vg);
+            // sample action from the stochastic gaussian policy
+            var a = R.copyMat(amat);
+            var gaussVar = 0.02;
+            a.w[0] = R.randn(0, gaussVar);
+            a.w[1] = R.randn(0, gaussVar);
+            this.actorActions.push(a);
+            // shift state memory
+            this.s0 = this.s1;
+            this.a0 = this.a1;
+            this.s1 = s;
+            this.a1 = a;
+            return a;
+        },
+        learn: function (r1) {
+            // perform an update on Q function
+            this.rewardHistory.push(r1);
+            var n = this.rewardHistory.length;
+            var baselineMSE = 0.0;
+            var nup = 100; // what chunk of experience to take
+            var nuse = 80; // what chunk to update from
+            if (n >= nup) {
+                // lets learn and flush
+                // first: compute the sample values at all points
+                var vs = [];
+                for (var t = 0; t < nuse; t++) {
+                    var mul = 1;
+                    // compute the actual discounted reward for this time step
+                    var V = 0;
+                    for (var t2 = t; t2 < n; t2++) {
+                        V += mul * this.rewardHistory[t2];
+                        mul *= this.gamma;
+                        if (mul < 1e-5) {
+                            break;
+                        } // efficiency savings
+                    }
+                    // get the predicted baseline at this time step
+                    var b = this.baselineOutputs[t].w[0];
+                    for (var i = 0; i < this.na; i++) {
+                        // [the action delta] * [the desirebility]
+                        var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
+                        if (update > 0.1) {
+                            update = 0.1;
+                        }
+                        if (update < -0.1) {
+                            update = -0.1;
+                        }
+                        this.actorOutputs[t].dw[i] += update;
+                    }
+                    var update = -(V - b);
+                    if (update > 0.1) {
+                        update = 0.1;
+                    }
+                    if (update < 0.1) {
+                        update = -0.1;
+                    }
+                    this.baselineOutputs[t].dw[0] += update;
+                    baselineMSE += (V - b) * (V - b);
+                    vs.push(V);
+                }
+                baselineMSE /= nuse;
+                // backprop all the things
+                for (var t = 0; t < nuse; t++) {
+                    this.actorGraphs[t].backward();
+                    this.baselineGraphs[t].backward();
+                }
+                R.updateNet(this.actorNet, this.alpha); // update actor network
+                R.updateNet(this.baselineNet, this.beta); // update baseline network
+                // flush
+                this.actorOutputs = [];
+                this.rewardHistory = [];
+                this.actorActions = [];
+                this.baselineOutputs = [];
+                this.actorGraphs = [];
+                this.baselineGraphs = [];
+                this.tderror = baselineMSE;
+            }
+            this.t += 1;
+            this.r0 = r1; // store for next update
+        },
+    }
+    // buggy implementation as well, doesn't work
+    var RecurrentReinforceAgent = function (env, opt) {
+        this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
+        this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
+        this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
+        this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
+        this.env = env;
+        this.reset();
+    }
+    RecurrentReinforceAgent.prototype = {
+        reset: function () {
+            this.ns = this.env.getNumStates();
+            this.na = this.env.getMaxNumActions();
+            this.nh = 40; // number of hidden units
+            this.nhb = 40; // and also in the baseline lstm
+            this.actorLSTM = R.initLSTM(this.ns, [this.nh], this.na);
+            this.actorG = new R.Graph();
+            this.actorPrev = null;
+            this.actorOutputs = [];
+            this.rewardHistory = [];
+            this.actorActions = [];
+            this.baselineLSTM = R.initLSTM(this.ns, [this.nhb], 1);
+            this.baselineG = new R.Graph();
+            this.baselinePrev = null;
+            this.baselineOutputs = [];
+            this.t = 0;
+            this.r0 = null;
+            this.s0 = null;
+            this.s1 = null;
+            this.a0 = null;
+            this.a1 = null;
+        },
+        act: function (slist) {
+            // convert to a Mat column vector
+            var s = new R.Mat(this.ns, 1);
+            s.setFrom(slist);
+            // forward the LSTM to get action distribution
+            var actorNext = R.forwardLSTM(this.actorG, this.actorLSTM, [this.nh], s, this.actorPrev);
+            this.actorPrev = actorNext;
+            var amat = actorNext.o;
+            this.actorOutputs.push(amat);
+            // forward the baseline LSTM
+            var baselineNext = R.forwardLSTM(this.baselineG, this.baselineLSTM, [this.nhb], s, this.baselinePrev);
+            this.baselinePrev = baselineNext;
+            this.baselineOutputs.push(baselineNext.o);
+            // sample action from actor policy
+            var gaussVar = 0.05;
+            var a = R.copyMat(amat);
+            for (var i = 0, n = a.w.length; i < n; i++) {
+                a.w[0] += R.randn(0, gaussVar);
+                a.w[1] += R.randn(0, gaussVar);
+            }
+            this.actorActions.push(a);
+            // shift state memory
+            this.s0 = this.s1;
+            this.a0 = this.a1;
+            this.s1 = s;
+            this.a1 = a;
+            return a;
+        },
+        learn: function (r1) {
+            // perform an update on Q function
+            this.rewardHistory.push(r1);
+            var n = this.rewardHistory.length;
+            var baselineMSE = 0.0;
+            var nup = 100; // what chunk of experience to take
+            var nuse = 80; // what chunk to also update
+            if (n >= nup) {
+                // lets learn and flush
+                // first: compute the sample values at all points
+                var vs = [];
+                for (var t = 0; t < nuse; t++) {
+                    var mul = 1;
+                    var V = 0;
+                    for (var t2 = t; t2 < n; t2++) {
+                        V += mul * this.rewardHistory[t2];
+                        mul *= this.gamma;
+                        if (mul < 1e-5) {
+                            break;
+                        } // efficiency savings
+                    }
+                    var b = this.baselineOutputs[t].w[0];
+                    // todo: take out the constants etc.
+                    for (var i = 0; i < this.na; i++) {
+                        // [the action delta] * [the desirebility]
+                        var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
+                        if (update > 0.1) {
+                            update = 0.1;
+                        }
+                        if (update < -0.1) {
+                            update = -0.1;
+                        }
+                        this.actorOutputs[t].dw[i] += update;
+                    }
+                    var update = -(V - b);
+                    if (update > 0.1) {
+                        update = 0.1;
+                    }
+                    if (update < 0.1) {
+                        update = -0.1;
+                    }
+                    this.baselineOutputs[t].dw[0] += update;
+                    baselineMSE += (V - b) * (V - b);
+                    vs.push(V);
+                }
+                baselineMSE /= nuse;
+                this.actorG.backward(); // update params! woohoo!
+                this.baselineG.backward();
+                R.updateNet(this.actorLSTM, this.alpha); // update actor network
+                R.updateNet(this.baselineLSTM, this.beta); // update baseline network
+                // flush
+                this.actorG = new R.Graph();
+                this.actorPrev = null;
+                this.actorOutputs = [];
+                this.rewardHistory = [];
+                this.actorActions = [];
+                this.baselineG = new R.Graph();
+                this.baselinePrev = null;
+                this.baselineOutputs = [];
+                this.tderror = baselineMSE;
+            }
+            this.t += 1;
+            this.r0 = r1; // store for next update
+        },
+    }
+    // Currently buggy implementation, doesnt work
+    var DeterministPG = function (env, opt) {
+        this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
+        this.epsilon = getopt(opt, 'epsilon', 0.5); // for epsilon-greedy policy
+        this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
+        this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
+        this.env = env;
+        this.reset();
+    }
+    DeterministPG.prototype = {
+        reset: function () {
+            this.ns = this.env.getNumStates();
+            this.na = this.env.getMaxNumActions();
+            this.nh = 100; // number of hidden units
+            // actor
+            this.actorNet = {};
+            this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
+            this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
+            this.actorNet.W2 = new R.RandMat(this.na, this.ns, 0, 0.1);
+            this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
+            this.ntheta = this.na * this.ns + this.na; // number of params in actor
+            // critic
+            this.criticw = new R.RandMat(1, this.ntheta, 0, 0.01); // row vector
+            this.r0 = null;
+            this.s0 = null;
+            this.s1 = null;
+            this.a0 = null;
+            this.a1 = null;
+            this.t = 0;
+        },
+        forwardActor: function (s, needs_backprop) {
+            var net = this.actorNet;
+            var G = new R.Graph(needs_backprop);
+            var a1mat = G.add(G.mul(net.W1, s), net.b1);
+            var h1mat = G.tanh(a1mat);
+            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+            return {
+                'a': a2mat,
+                'G': G
+            }
+        },
+        act: function (slist) {
+            // convert to a Mat column vector
+            var s = new R.Mat(this.ns, 1);
+            s.setFrom(slist);
+            // forward the actor to get action output
+            var ans = this.forwardActor(s, false);
+            var amat = ans.a;
+            var ag = ans.G;
+            // sample action from the stochastic gaussian policy
+            var a = R.copyMat(amat);
+            if (Math.random() < this.epsilon) {
+                var gaussVar = 0.02;
+                a.w[0] = R.randn(0, gaussVar);
+                a.w[1] = R.randn(0, gaussVar);
+            }
+            var clamp = 0.25;
+            if (a.w[0] > clamp) a.w[0] = clamp;
+            if (a.w[0] < -clamp) a.w[0] = -clamp;
+            if (a.w[1] > clamp) a.w[1] = clamp;
+            if (a.w[1] < -clamp) a.w[1] = -clamp;
+            // shift state memory
+            this.s0 = this.s1;
+            this.a0 = this.a1;
+            this.s1 = s;
+            this.a1 = a;
+            return a;
+        },
+        utilJacobianAt: function (s) {
+            var ujacobian = new R.Mat(this.ntheta, this.na);
+            for (var a = 0; a < this.na; a++) {
+                R.netZeroGrads(this.actorNet);
+                var ag = this.forwardActor(this.s0, true);
+                ag.a.dw[a] = 1.0;
+                ag.G.backward();
+                var gflat = R.netFlattenGrads(this.actorNet);
+                ujacobian.setColumn(gflat, a);
+            }
+            return ujacobian;
+        },
+        learn: function (r1) {
+            // perform an update on Q function
+            //this.rewardHistory.push(r1);
+            if (!(this.r0 == null)) {
+                var Gtmp = new R.Graph(false);
+                // dpg update:
+                // first compute the features psi:
+                // the jacobian matrix of the actor for s
+                var ujacobian0 = this.utilJacobianAt(this.s0);
+                // now form the features \psi(s,a)
+                var psi_sa0 = Gtmp.mul(ujacobian0, this.a0); // should be [this.ntheta x 1] "feature" vector
+                var qw0 = Gtmp.mul(this.criticw, psi_sa0); // 1x1
+                // now do the same thing because we need \psi(s_{t+1}, \mu\_\theta(s\_t{t+1}))
+                var ujacobian1 = this.utilJacobianAt(this.s1);
+                var ag = this.forwardActor(this.s1, false);
+                var psi_sa1 = Gtmp.mul(ujacobian1, ag.a);
+                var qw1 = Gtmp.mul(this.criticw, psi_sa1); // 1x1
+                // get the td error finally
+                var tderror = this.r0 + this.gamma * qw1.w[0] - qw0.w[0]; // lol
+                if (tderror > 0.5) tderror = 0.5; // clamp
+                if (tderror < -0.5) tderror = -0.5;
+                this.tderror = tderror;
+                // update actor policy with natural gradient
+                var net = this.actorNet;
+                var ix = 0;
+                for (var p in net) {
+                    var mat = net[p];
+                    if (net.hasOwnProperty(p)) {
+                        for (var i = 0, n = mat.w.length; i < n; i++) {
+                            mat.w[i] += this.alpha * this.criticw.w[ix]; // natural gradient update
+                            ix += 1;
+                        }
+                    }
+                }
+                // update the critic parameters too
+                for (var i = 0; i < this.ntheta; i++) {
+                    var update = this.beta * tderror * psi_sa0.w[i];
+                    this.criticw.w[i] += update;
+                }
+            }
+            this.r0 = r1; // store for next update
+        },
+    }
+    // exports
+    global.DPAgent = DPAgent;
+    global.TDAgent = TDAgent;
+    global.DQNAgent = DQNAgent;
+    //global.SimpleReinforceAgent = SimpleReinforceAgent;
+    //global.RecurrentReinforceAgent = RecurrentReinforceAgent;
+    //global.DeterministPG = DeterministPG;
 })(RL);
 var Trevel = {
-	//settings you can change
-	stop: false,
-	maxBet: 0.00001,
-	minBet: 0.00000002,
-	swap: true,
-	betSpeed: 100,//change this on init
-	verbose: true,
-	isTesting: false,
-	//money management
-	useKelly: false,//martingale performs better on live account!
-	kellyPercent: 5, //can't be more than 100 or less than 1
-	useMartingale: true, //if kelly is true this won't work
-	martingaleMultiplier: 2,
-	//bot settings, these are set automaticcally don't bother
-	currentBalance: 0,
-	startingBalance: 0,
-	betAmount: 0,
-	profit: 0,
-	totalBets: 0,
-	totalWins: 0,
-	winRate: 0,
-	betHistory: [], //this is a sequence of all winning bets not the sequence of bets we placed
-	betOutcomes: [],
-	hbProbability: 0,
-	lbProbability: 0,
-	hbCount: 0,
-	lbcount: 0,
-	nextBet: "",
-	previousReward:0,
-	addBet: function(bet, outcome) {
-		if (bet === "LB" && outcome === "Win") {
-			Trevel.betHistory.push("LO");
-			Trevel.betOutcomes.push("W");
-			Trevel.totalWins++;
-			Trevel.lbcount++;
-		}
-		if (bet === "LB" && outcome === "Loose") {
-			Trevel.betHistory.push("HI");
-			Trevel.hbCount++;
-			Trevel.betOutcomes.push("L");
-		}
-		if (bet === "HB" && outcome === "Win") {
-			Trevel.betHistory.push("HI");
-			Trevel.totalWins++;
-			Trevel.hbCount++;
-			Trevel.betOutcomes.push("W");
-		}
-		if (bet === "HB" && outcome === "Loose") {
-			Trevel.betHistory.push("LO");
-			Trevel.lbcount++;
-			Trevel.betOutcomes.push("L");
-		}
-		Trevel.totalBets++;
-	},
-	calculateProbabilities: function() {
-		Trevel.hbProbability = Trevel.hbCount / Trevel.betHistory.length;
-		Trevel.lbProbability = Trevel.lbcount / Trevel.betHistory.length;
-		Trevel.winRate = Trevel.totalWins / Trevel.totalBets;
-		if(Trevel.isTesting===false){
-		Trevel.profit = Trevel.getProfit();
-		}
-	},
-	getCurrentBalance: function() {
-		return parseFloat($('#balance').html());
-	},
-	placeHighBet: function() {
-		$('#double_your_btc_bet_hi_button').click();
-	},
-	placeLowBet: function() {
-		$('#double_your_btc_bet_lo_button').click();
-	},
-	setBetAmount: function(amount) {
-		var elem = document.getElementById("double_your_btc_stake");
-		elem.value = amount;
-	},
-	setOutcome: function(bet) {
-		if ($('#double_your_btc_bet_lose').html() !== '') {
-			Trevel.addBet(bet, "Loose");
-		} else {
-			Trevel.addBet(bet, "Win");
-		}
-	},
-	prepareBet: function() {
-		Trevel.calculateProbabilities();
-		if (Trevel.betHistory.length < 10) {
-			if (Trevel.useMartingale === true && Trevel.betHistory.length>12) {
-				if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier < Trevel.maxBet) {
-					Trevel.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier).toFixed(8));
-				} else {
-					Trevel.setBetAmount(Trevel.minBet);
-				}
-			}
-		} else {
-			if (Trevel.useKelly === true && Trevel.betHistory.length>12) {
-				Trevel.currentBalance = Trevel.getCurrentBalance();
-				var currMulty = document.getElementById("double_your_btc_payout_multiplier").value;
-				var kellyAmount = (((Trevel.currentBalance * Trevel.kellyPercent) / 100) * ((Trevel.winRate * currMulty - 1)) / (currMulty - 1)).toFixed(8);
-				if (kellyAmount > 0 && kellyAmount < Trevel.maxBet) {
-					Trevel.setBetAmount(kellyAmount);
-				} else {
-					Trevel.setBetAmount(Trevel.minBet);
-				}
-			} else if (Trevel.useMartingale === true && Trevel.betHistory.length>12) {
-				if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier < Trevel.maxBet) {
-					Trevel.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * Trevel.martingaleMultiplier).toFixed(8));
-				} else {
-					Trevel.setBetAmount(Trevel.minBet);
-				}
-			}
-		}
-	},
-	placeBet: function() {
-		if (Trevel.nextBet === "HB") {
-			Trevel.placeHighBet();
-		} else if (Trevel.nextBet === "LB") {
-			Trevel.placeLowBet();
-		} else if (Trevel.betHistory.length > 0 && Trevel.swap === true) {
-			var prev = Trevel.betHistory[Trevel.betHistory.length - 1];
-			if (prev === "LO") {
-				Trevel.placeHighBet();
-			} else {
-				Trevel.placeLowBet();
-			}
-		} else {
-			Trevel.placeLowBet();
-		}
-	},
-	getProfit: function() {
-		return (Trevel.getCurrentBalance() - Trevel.startingBalance).toFixed(8);
-	},
-	getNumStates: function() {
-		return 8;
-	},
-	getMaxNumActions: function() {
-		return 2;
-	},
-	getSentiment: function(bet) {
-		if (bet === "HI") {
-			return 1;
-		} else {
-			return 0;
-		}
-	},
-	getPreviousBets: function() {
-		var hist = [];
-		if (Trevel.betHistory.length > 12) {
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 1]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 2]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 3]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 4]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 5]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 6]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 7]));
-			hist.push(Trevel.getSentiment(Trevel.betHistory[Trevel.betHistory.length - 8]));
-		} else {
-			hist = [0, 1, 0, 1, 0, 1, 0, 1]; //incase we just started...
-		}
-		return hist;
-	},
-	getAgentState: function() { //we'll observe the last 8 bets
-		var s = Trevel.getPreviousBets();
-		return s;
-	},
-	getReward: function() {
-		var reward = 0;
-		var out1=Trevel.betOutcomes[Trevel.betOutcomes.length - 1];
-		var out2=Trevel.betOutcomes[Trevel.betOutcomes.length - 2];
-		if(out1==="L"){
-			if(Trevel.previousReward<0){
-				reward=Trevel.previousReward;
-				reward+=-0.03;
-				if(out2==="L"){
-					reward+=-0.03;
-				}
-			}
-			else{
-				reward=-0.03;
-				if(out2==="L"){
-					reward+=-0.03;
-				}
-			}
-		}
-		else{
-			if(Trevel.previousReward>0){
-				reward=Trevel.previousReward;
-				reward+=0.01;
-				if(out2==="W"){
-					reward+=0.01;
-				}
-			}
-			else{
-				reward=0.01;
-				if(out2==="W"){
-					reward+=0.01;
-				}
-			}
-		}
-		return reward;
-	},
-	//for raw testing only
-	randomNumber: function(min, max) {
-		return Math.floor(Math.random() * (max - min + 1) + min);
-	},
-	getTestOutcome: function(random) {
-		if (random % 2 == 0) {
-			return "HI";
-		} else {
-			return "LO";
-		}
-	},
-	//initialize Trevel
-	init: function() {
-		Trevel.startingBalance = Trevel.currentBalance = parseFloat($('#balance').html());
-		Trevel.setBetAmount(Trevel.minBet);
-		Trevel.stop = false;
-		Trevel.swap = true;
-		Trevel.betSpeed=3000;
-	}
+    //settings you can change
+    stop: true,
+    maxBet: 0.00001,
+    minBet: 0.00000005,
+    swap: true,
+    betSpeed: 100,//change this on init
+    verbose: true,
+    isTesting: false,
+    //money management
+    useKelly: false,//martingale performs better on live account!
+    korm: false,
+    kellyPercent: 5, //can't be more than 100 or less than 1
+    useMartingale: true, //if kelly is true this won't work
+
+    martingaleMultiplier: 2,
+    //bot settings, these are set automaticcally don't bother
+    currentBalance: 0,
+    startingBalance: 0,
+    betAmount: 0,
+    profit: 0,
+    totalBets: 0,
+    totalWins: 0,
+    winRate: 0,
+    betHistory: [], //this is a sequence of all winning bets not the sequence of bets we placed
+    betOutcomes: [],
+    hbProbability: 0,
+    lbProbability: 0,
+    hbCount: 0,
+    lbcount: 0,
+    nextBet: "",
+    previousReward: 0,
+    addBet: function (bet, outcome) {
+        if (bet === "LB" && outcome === "Win") {
+            this.betHistory.push("LO");
+            this.betOutcomes.push("W");
+            this.totalWins++;
+            this.lbcount++;
+        }
+        if (bet === "LB" && outcome === "Loose") {
+            this.betHistory.push("HI");
+            this.hbCount++;
+            this.betOutcomes.push("L");
+        }
+        if (bet === "HB" && outcome === "Win") {
+            this.betHistory.push("HI");
+            this.totalWins++;
+            this.hbCount++;
+            this.betOutcomes.push("W");
+        }
+        if (bet === "HB" && outcome === "Loose") {
+            this.betHistory.push("LO");
+            this.lbcount++;
+            this.betOutcomes.push("L");
+        }
+        this.totalBets++;
+    },
+    calculateProbabilities: function () {
+        this.hbProbability = this.hbCount / this.betHistory.length;
+        this.lbProbability = this.lbcount / this.betHistory.length;
+        this.winRate = this.totalWins / this.totalBets;
+        if (this.isTesting === false) {
+            this.profit = this.getProfit();
+        }
+    },
+    getCurrentBalance: function () {
+        return parseFloat($('#balance').html());
+    },
+    placeHighBet: function () {
+        $('#double_your_btc_bet_hi_button').click();
+    },
+    placeLowBet: function () {
+        $('#double_your_btc_bet_lo_button').click();
+    },
+    setBetAmount: function (amount) {
+        var elem = document.getElementById("double_your_btc_stake");
+        elem.value = amount;
+    },
+    setOutcome: function (bet) {
+        if ($('#double_your_btc_bet_lose').html() !== '') {
+            this.addBet(bet, "Loose");
+        } else {
+            this.addBet(bet, "Win");
+        }
+    },
+    prepareBet: function () {
+        this.calculateProbabilities();
+        if (this.betHistory.length < 10) {
+            if (this.useMartingale === true && this.betHistory.length > 12) {
+                if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier < this.maxBet) {
+                    this.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier).toFixed(8));
+                } else {
+                    this.setBetAmount(this.minBet);
+                }
+            }
+        } else {
+            if (this.useKelly === true && this.betHistory.length > 12) {
+                this.currentBalance = this.getCurrentBalance();
+                var currMulty = document.getElementById("double_your_btc_payout_multiplier").value;
+                var kellyAmount = (((this.currentBalance * this.kellyPercent) / 100) * ((this.winRate * currMulty - 1)) / (currMulty - 1)).toFixed(8);
+                if (kellyAmount > 0 && kellyAmount < this.maxBet) {
+                    this.setBetAmount(kellyAmount);
+                } else {
+                    this.setBetAmount(this.minBet);
+                }
+            } else if (this.useMartingale === true && this.betHistory.length > 12) {
+                if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier < this.maxBet) {
+                    this.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier).toFixed(8));
+                } else {
+                    this.setBetAmount(this.minBet);
+                }
+            }
+        }
+    },
+    placeBet: function () {
+        if (this.nextBet === "HB") {
+            this.placeHighBet();
+        } else if (this.nextBet === "LB") {
+            this.placeLowBet();
+        } else if (this.betHistory.length > 0 && this.swap === true) {
+            var prev = this.betHistory[this.betHistory.length - 1];
+            if (prev === "LO") {
+                this.placeHighBet();
+            } else {
+                this.placeLowBet();
+            }
+        } else {
+            this.placeLowBet();
+        }
+    },
+    getProfit: function () {
+        return (this.getCurrentBalance() - this.startingBalance).toFixed(8);
+    },
+    getNumStates: function () {
+        return 8;
+    },
+    getMaxNumActions: function () {
+        return 2;
+    },
+    getSentiment: function (bet) {
+        if (bet === "HI") {
+            return 1;
+        } else {
+            return 0;
+        }
+    },
+    getPreviousBets: function () {
+        var hist = [];
+        if (this.betHistory.length > 12) {
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 1]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 2]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 3]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 4]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 5]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 6]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 7]));
+            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 8]));
+        } else {
+            hist = [0, 1, 0, 1, 0, 1, 0, 1]; //incase we just started...
+        }
+        return hist;
+    },
+    getAgentState: function () { //we'll observe the last 8 bets
+        var s = this.getPreviousBets();
+        return s;
+    },
+    getReward: function () {
+        var reward = 0;
+        var out1 = this.betOutcomes[this.betOutcomes.length - 1];
+        var out2 = this.betOutcomes[this.betOutcomes.length - 2];
+        if (out1 === "L") {
+            if (this.previousReward < 0) {
+                reward = this.previousReward;
+                reward += -0.03;
+                if (out2 === "L") {
+                    reward += -0.03;
+                }
+            }
+            else {
+                reward = -0.03;
+                if (out2 === "L") {
+                    reward += -0.03;
+                }
+            }
+        }
+        else {
+            if (this.previousReward > 0) {
+                reward = this.previousReward;
+                reward += 0.01;
+                if (out2 === "W") {
+                    reward += 0.01;
+                }
+            }
+            else {
+                reward = 0.01;
+                if (out2 === "W") {
+                    reward += 0.01;
+                }
+            }
+        }
+        return reward;
+    },
+    //for raw testing only
+    randomNumber: function (min, max) {
+        return Math.floor(Math.random() * (max - min + 1) + min);
+    },
+    getTestOutcome: function (random) {
+        if (random % 2 === 0) {
+            return "HI";
+        } else {
+            return "LO";
+        }
+    },
+    //random string for random seed
+    rString: function (length, chars) {
+        var result = '';
+        var length = 16;
+        var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
+        for (var i = length; i > 0; --i) result += chars[Math.floor(Math.random() * chars.length)];
+        return result;
+    },
+    //set client seed as random string
+    rSeed: function () { 
+        $('#next_client_seed').val(rString());
+     },
+    //initialize this
+    init: function () {
+        this.startingBalance = this.currentBalance = parseFloat($('#balance').html());
+        this.setBetAmount(this.minBet);
+        this.stop = true;
+        this.swap = true;
+        this.betSpeed = 3000;
+        document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.config()" class="free_play_link">START BOT</a>';
+
+    },
+    // ask user config variables
+    config: function () {
+
+        //define temporary variables
+        var maxb = 0, minb = 0;
+
+        //prompt questions
+        maxb = prompt('Maximum bet stake in Satoshi', 1000);
+        this.maxBet = sattobtc(maxb);
+        minb = prompt('Minimum bet stake in Satoshi', 2);
+        this.minBet = sattobtc(minb);
+        this.martingaleMultiplier = prompt('Bet multiplier on lose', 2);
+        this.swap = prompt('True for swap enabled, false for disabled', 'true');
+        this.korm = prompt('True to enable Kelly, false to enabled martingale, leave blank for both', 'false');
+        this.betSpeed = prompt('Wait time before next bet is placed in ms', 3000);
+
+        //convert satoshi to btc
+        function sattobtc(sat) {
+            var btc = 0.00000001;
+            return sat * btc;
+        }
+        if (this.korm === 'true') {
+            this.useKelly = true;
+            this.useMartingale = false;
+        }
+        else if (this.korm === 'false') {
+            this.useMartingale = true;
+            this.useKelly = false;
+        }
+        else {
+            this.useKelly = this.useMartingale = true;
+        }
+        //start betting
+        startbetting();
+    },
+    stopbets: function () {
+        env.stop = true;
+        clearInterval(interval);
+        console.log('Bet session has been stopped, to start over click start.');
+        document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.config()" class="free_play_link">START BOT</a>';
+
+
+    }
 };
 //Deep Q learning with reinforceJS
-var spec = {}
+var spec = {};
 spec.update = 'qlearn';
 spec.gamma = 0.9;
 spec.epsilon = 0.45;
@@ -1741,67 +1800,87 @@ spec.alpha = 0.01;
 spec.experience_add_every = 12;
 spec.experience_size = 100000;
 spec.learning_steps_per_iteration = 24;
-spec.tderror_clamp = 0.7; 
+spec.tderror_clamp = 0.7;
 spec.num_hidden_units = 24;
 // create an environment object
 var env = Trevel;
+var interval = null;
 if (env.isTesting === false) {
-	env.init();
+    env.init();
 }
 // create the DQN agent
 agent = new RL.DQNAgent(env, spec);
-setInterval(function() {
-	if (env.stop === false) {
-		var state = env.getAgentState();
-		var action = agent.act(state);
-		var outcome = "";
-		if (env.isTesting === false) {
-			if (action === 0) {
-				env.nextBet = "LB";
-				env.prepareBet();
-				env.placeBet();
-				env.setOutcome("LB");
-				outcome = env.betOutcomes[env.betOutcomes.length - 1];
-			} else if (action === 1) {
-				env.nextBet = "HB";
-				env.prepareBet();
-				env.placeBet();
-				env.setOutcome("HB");
-				outcome = env.betOutcomes[env.betOutcomes.length - 1];
-			}
-			if (env.verbose === true) {
-				env.calculateProbabilities();
-				//console.log("Machine Bet: " + action + "{" + env.nextBet + "} isKelly: " + env.useKelly + " isMartingale: " + env.useMartingale);
-				console.log("Profit: " + env.profit+" WinRate: " + (env.winRate*100).toFixed(2));
-			}
-		} else {
-			console.log("Action: " + action);
-			var testOutcome = env.getTestOutcome(env.randomNumber(0, 1000));
-			if (action === 0 && testOutcome === "LO") {
-				env.addBet("LB", "Win");
-				outcome = "W";
-			} else if (action === 0 && testOutcome === "HI") {
-				env.addBet("LB", "Loose");
-				outcome = "L";
-			} else if (action === 1 && testOutcome === "HI") {
-				env.addBet("HB", "Win");
-				outcome = "W";
-			} else if (action === 1 && testOutcome === "LO") {
-				env.addBet("HB", "Loose");
-				outcome = "L";
-			}
-			env.calculateProbabilities();
-			console.log("Winrate: " + (env.winRate*100).toFixed(2));
-		}
-		var reward = env.getReward();
-		if (reward == 0) {
-			if (outcome === "L") {
-				reward = -0.03;
-			} else {
-				reward = 0.01;
-			}
-		}
-		agent.learn(reward);
-		env.previousReward=reward;
-	}
-}, env.betSpeed);
\ No newline at end of file
+
+// start betting function/agent interval
+function startbetting() {
+    console.log('Starting bet session, to stop click STOP BOT');
+    document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.stopbets()" class="free_play_link">STOP BOT</a>';
+    env.stop = false;
+    interval = setInterval(function () { loop(); }, env.betSpeed);
+}
+console.clear();
+console.log('You are using Trevel, with ReinforceJS');
+console.log('If you shall notice, the Free BTC link has been replaced with a START/STOP BOT button.');
+console.log('Click it to set the config. Note: These settings are not persistent.');
+console.log('To change the default values for these settings, search the script for "prompt"');
+console.log('Enjoy');
+function loop() {
+if(env.profit > 0.00000500)
+{
+    env.rSeed();
+}
+    if (env.stop === false) {
+        var state = env.getAgentState();
+        var action = agent.act(state);
+        var outcome = "";
+        if (env.isTesting === false) {
+            if (action === 0) {
+                env.nextBet = "LB";
+                env.prepareBet();
+                env.placeBet();
+                env.setOutcome("LB");
+                outcome = env.betOutcomes[env.betOutcomes.length - 1];
+            } else if (action === 1) {
+                env.nextBet = "HB";
+                env.prepareBet();
+                env.placeBet();
+                env.setOutcome("HB");
+                outcome = env.betOutcomes[env.betOutcomes.length - 1];
+            }
+            if (env.verbose === true) {
+                env.calculateProbabilities();
+                //console.log("Machine Bet: " + action + "{" + env.nextBet + "} isKelly: " + env.useKelly + " isMartingale: " + env.useMartingale);
+                console.log("Profit: " + env.profit + " WinRate: " + (env.winRate * 100).toFixed(2));
+            }
+        } else {
+            console.log("Action: " + action);
+            var testOutcome = env.getTestOutcome(env.randomNumber(0, 1000));
+            if (action === 0 && testOutcome === "LO") {
+                env.addBet("LB", "Win");
+                outcome = "W";
+            } else if (action === 0 && testOutcome === "HI") {
+                env.addBet("LB", "Loose");
+                outcome = "L";
+            } else if (action === 1 && testOutcome === "HI") {
+                env.addBet("HB", "Win");
+                outcome = "W";
+            } else if (action === 1 && testOutcome === "LO") {
+                env.addBet("HB", "Loose");
+                outcome = "L";
+            }
+            env.calculateProbabilities();
+            console.log("Winrate: " + (env.winRate * 100).toFixed(2));
+        }
+        var reward = env.getReward();
+        if (reward === 0) {
+            if (outcome === "L") {
+                reward = -0.03;
+            } else {
+                reward = 0.01;
+            }
+        }
+        agent.learn(reward);
+        env.previousReward = reward;
+    }
+
+}

From 39ebf534a9ee60a4d4df5c4251fc55774d6231ed Mon Sep 17 00:00:00 2001
From: nickisghosty <nickisghosty@gmail.com>
Date: Sun, 22 Oct 2017 07:24:46 -0700
Subject: [PATCH 2/6] fixed indentation

---
 DQ-Trevel.js | 3677 +++++++++++++++++++++++++-------------------------
 1 file changed, 1838 insertions(+), 1839 deletions(-)

diff --git a/DQ-Trevel.js b/DQ-Trevel.js
index b35d805..7a68dde 100644
--- a/DQ-Trevel.js
+++ b/DQ-Trevel.js
@@ -1,1795 +1,1795 @@
 var R = {}; // the Recurrent library
 (function (global) {
-    "use strict";
-    // Utility fun
-    function assert(condition, message) {
-        // from http://stackoverflow.com/questions/15313418/javascript-assert
-        if (!condition) {
-            message = message || "Assertion failed";
-            if (typeof Error !== "undefined") {
-                throw new Error(message);
-            }
-            throw message; // Fallback
-        }
-    }
-    // Random numbers utils
-    var return_v = false;
-    var v_val = 0.0;
-    var gaussRandom = function () {
-        if (return_v) {
-            return_v = false;
-            return v_val;
-        }
-        var u = 2 * Math.random() - 1;
-        var v = 2 * Math.random() - 1;
-        var r = u * u + v * v;
-        if (r == 0 || r > 1) return gaussRandom();
-        var c = Math.sqrt(-2 * Math.log(r) / r);
-        v_val = v * c; // cache this
-        return_v = true;
-        return u * c;
-    }
-    var randf = function (a, b) {
-        return Math.random() * (b - a) + a;
-    }
-    var randi = function (a, b) {
-        return Math.floor(Math.random() * (b - a) + a);
-    }
-    var randn = function (mu, std) {
-        return mu + gaussRandom() * std;
-    }
-    // helper function returns array of zeros of length n
-    // and uses typed arrays if available
-    var zeros = function (n) {
-        if (typeof (n) === 'undefined' || isNaN(n)) {
-            return [];
-        }
-        if (typeof ArrayBuffer === 'undefined') {
-            // lacking browser support
-            var arr = new Array(n);
-            for (var i = 0; i < n; i++) {
-                arr[i] = 0;
-            }
-            return arr;
-        } else {
-            return new Float64Array(n);
-        }
-    }
-    // Mat holds a matrix
-    var Mat = function (n, d) {
-        // n is number of rows d is number of columns
-        this.n = n;
-        this.d = d;
-        this.w = zeros(n * d);
-        this.dw = zeros(n * d);
-    }
-    Mat.prototype = {
-        get: function (row, col) {
-            // slow but careful accessor function
-            // we want row-major order
-            var ix = (this.d * row) + col;
-            assert(ix >= 0 && ix < this.w.length);
-            return this.w[ix];
-        },
-        set: function (row, col, v) {
-            // slow but careful accessor function
-            var ix = (this.d * row) + col;
-            assert(ix >= 0 && ix < this.w.length);
-            this.w[ix] = v;
-        },
-        setFrom: function (arr) {
-            for (var i = 0, n = arr.length; i < n; i++) {
-                this.w[i] = arr[i];
-            }
-        },
-        setColumn: function (m, i) {
-            for (var q = 0, n = m.w.length; q < n; q++) {
-                this.w[(this.d * q) + i] = m.w[q];
-            }
-        },
-        toJSON: function () {
-            var json = {};
-            json['n'] = this.n;
-            json['d'] = this.d;
-            json['w'] = this.w;
-            return json;
-        },
-        fromJSON: function (json) {
-            this.n = json.n;
-            this.d = json.d;
-            this.w = zeros(this.n * this.d);
-            this.dw = zeros(this.n * this.d);
-            for (var i = 0, n = this.n * this.d; i < n; i++) {
-                this.w[i] = json.w[i]; // copy over weights
-            }
-        }
-    }
-    var copyMat = function (b) {
-        var a = new Mat(b.n, b.d);
-        a.setFrom(b.w);
-        return a;
-    }
-    var copyNet = function (net) {
-        // nets are (k,v) pairs with k = string key, v = Mat()
-        var new_net = {};
-        for (var p in net) {
-            if (net.hasOwnProperty(p)) {
-                new_net[p] = copyMat(net[p]);
-            }
-        }
-        return new_net;
-    }
-    var updateMat = function (m, alpha) {
-        // updates in place
-        for (var i = 0, n = m.n * m.d; i < n; i++) {
-            if (m.dw[i] !== 0) {
-                m.w[i] += -alpha * m.dw[i];
-                m.dw[i] = 0;
-            }
-        }
-    }
-    var updateNet = function (net, alpha) {
-        for (var p in net) {
-            if (net.hasOwnProperty(p)) {
-                updateMat(net[p], alpha);
-            }
-        }
-    }
-    var netToJSON = function (net) {
-        var j = {};
-        for (var p in net) {
-            if (net.hasOwnProperty(p)) {
-                j[p] = net[p].toJSON();
-            }
-        }
-        return j;
-    }
-    var netFromJSON = function (j) {
-        var net = {};
-        for (var p in j) {
-            if (j.hasOwnProperty(p)) {
-                net[p] = new Mat(1, 1); // not proud of this
-                net[p].fromJSON(j[p]);
-            }
-        }
-        return net;
-    }
-    var netZeroGrads = function (net) {
-        for (var p in net) {
-            if (net.hasOwnProperty(p)) {
-                var mat = net[p];
-                gradFillConst(mat, 0);
-            }
-        }
-    }
-    var netFlattenGrads = function (net) {
-        var n = 0;
-        for (var p in net) {
-            if (net.hasOwnProperty(p)) {
-                var mat = net[p];
-                n += mat.dw.length;
-            }
-        }
-        var g = new Mat(n, 1);
-        var ix = 0;
-        for (var p in net) {
-            if (net.hasOwnProperty(p)) {
-                var mat = net[p];
-                for (var i = 0, m = mat.dw.length; i < m; i++) {
-                    g.w[ix] = mat.dw[i];
-                    ix++;
-                }
-            }
-        }
-        return g;
-    }
-    // return Mat but filled with random numbers from gaussian
-    var RandMat = function (n, d, mu, std) {
-        var m = new Mat(n, d);
-        fillRandn(m, mu, std);
-        //fillRand(m,-std,std); // kind of :P
-        return m;
-    }
-    // Mat utils
-    // fill matrix with random gaussian numbers
-    var fillRandn = function (m, mu, std) {
-        for (var i = 0, n = m.w.length; i < n; i++) {
-            m.w[i] = randn(mu, std);
-        }
-    }
-    var fillRand = function (m, lo, hi) {
-        for (var i = 0, n = m.w.length; i < n; i++) {
-            m.w[i] = randf(lo, hi);
-        }
-    }
-    var gradFillConst = function (m, c) {
-        for (var i = 0, n = m.dw.length; i < n; i++) {
-            m.dw[i] = c
-        }
-    }
-    // Transformer definitions
-    var Graph = function (needs_backprop) {
-        if (typeof needs_backprop === 'undefined') {
-            needs_backprop = true;
-        }
-        this.needs_backprop = needs_backprop;
-        // this will store a list of functions that perform backprop,
-        // in their forward pass order. So in backprop we will go
-        // backwards and evoke each one
-        this.backprop = [];
-    }
-    Graph.prototype = {
-        backward: function () {
-            for (var i = this.backprop.length - 1; i >= 0; i--) {
-                this.backprop[i](); // tick!
-            }
-        },
-        rowPluck: function (m, ix) {
-            // pluck a row of m with index ix and return it as col vector
-            assert(ix >= 0 && ix < m.n);
-            var d = m.d;
-            var out = new Mat(d, 1);
-            for (var i = 0, n = d; i < n; i++) {
-                out.w[i] = m.w[d * ix + i];
-            } // copy over the data
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0, n = d; i < n; i++) {
-                        m.dw[d * ix + i] += out.dw[i];
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        tanh: function (m) {
-            // tanh nonlinearity
-            var out = new Mat(m.n, m.d);
-            var n = m.w.length;
-            for (var i = 0; i < n; i++) {
-                out.w[i] = Math.tanh(m.w[i]);
-            }
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0; i < n; i++) {
-                        // grad for z = tanh(x) is (1 - z^2)
-                        var mwi = out.w[i];
-                        m.dw[i] += (1.0 - mwi * mwi) * out.dw[i];
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        sigmoid: function (m) {
-            // sigmoid nonlinearity
-            var out = new Mat(m.n, m.d);
-            var n = m.w.length;
-            for (var i = 0; i < n; i++) {
-                out.w[i] = sig(m.w[i]);
-            }
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0; i < n; i++) {
-                        // grad for z = tanh(x) is (1 - z^2)
-                        var mwi = out.w[i];
-                        m.dw[i] += mwi * (1.0 - mwi) * out.dw[i];
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        relu: function (m) {
-            var out = new Mat(m.n, m.d);
-            var n = m.w.length;
-            for (var i = 0; i < n; i++) {
-                out.w[i] = Math.max(0, m.w[i]); // relu
-            }
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0; i < n; i++) {
-                        m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        mul: function (m1, m2) {
-            // multiply matrices m1 * m2
-            assert(m1.d === m2.n, 'matmul dimensions misaligned');
-            var n = m1.n;
-            var d = m2.d;
-            var out = new Mat(n, d);
-            for (var i = 0; i < m1.n; i++) { // loop over rows of m1
-                for (var j = 0; j < m2.d; j++) { // loop over cols of m2
-                    var dot = 0.0;
-                    for (var k = 0; k < m1.d; k++) { // dot product loop
-                        dot += m1.w[m1.d * i + k] * m2.w[m2.d * k + j];
-                    }
-                    out.w[d * i + j] = dot;
-                }
-            }
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0; i < m1.n; i++) { // loop over rows of m1
-                        for (var j = 0; j < m2.d; j++) { // loop over cols of m2
-                            for (var k = 0; k < m1.d; k++) { // dot product loop
-                                var b = out.dw[d * i + j];
-                                m1.dw[m1.d * i + k] += m2.w[m2.d * k + j] * b;
-                                m2.dw[m2.d * k + j] += m1.w[m1.d * i + k] * b;
-                            }
-                        }
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        add: function (m1, m2) {
-            assert(m1.w.length === m2.w.length);
-            var out = new Mat(m1.n, m1.d);
-            for (var i = 0, n = m1.w.length; i < n; i++) {
-                out.w[i] = m1.w[i] + m2.w[i];
-            }
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0, n = m1.w.length; i < n; i++) {
-                        m1.dw[i] += out.dw[i];
-                        m2.dw[i] += out.dw[i];
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        dot: function (m1, m2) {
-            // m1 m2 are both column vectors
-            assert(m1.w.length === m2.w.length);
-            var out = new Mat(1, 1);
-            var dot = 0.0;
-            for (var i = 0, n = m1.w.length; i < n; i++) {
-                dot += m1.w[i] * m2.w[i];
-            }
-            out.w[0] = dot;
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0, n = m1.w.length; i < n; i++) {
-                        m1.dw[i] += m2.w[i] * out.dw[0];
-                        m2.dw[i] += m1.w[i] * out.dw[0];
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-        eltmul: function (m1, m2) {
-            assert(m1.w.length === m2.w.length);
-            var out = new Mat(m1.n, m1.d);
-            for (var i = 0, n = m1.w.length; i < n; i++) {
-                out.w[i] = m1.w[i] * m2.w[i];
-            }
-            if (this.needs_backprop) {
-                var backward = function () {
-                    for (var i = 0, n = m1.w.length; i < n; i++) {
-                        m1.dw[i] += m2.w[i] * out.dw[i];
-                        m2.dw[i] += m1.w[i] * out.dw[i];
-                    }
-                }
-                this.backprop.push(backward);
-            }
-            return out;
-        },
-    }
-    var softmax = function (m) {
-        var out = new Mat(m.n, m.d); // probability volume
-        var maxval = -999999;
-        for (var i = 0, n = m.w.length; i < n; i++) {
-            if (m.w[i] > maxval) maxval = m.w[i];
-        }
-        var s = 0.0;
-        for (var i = 0, n = m.w.length; i < n; i++) {
-            out.w[i] = Math.exp(m.w[i] - maxval);
-            s += out.w[i];
-        }
-        for (var i = 0, n = m.w.length; i < n; i++) {
-            out.w[i] /= s;
-        }
-        // no backward pass here needed
-        // since we will use the computed probabilities outside
-        // to set gradients directly on m
-        return out;
-    }
-    var Solver = function () {
-        this.decay_rate = 0.999;
-        this.smooth_eps = 1e-8;
-        this.step_cache = {};
-    }
-    Solver.prototype = {
-        step: function (model, step_size, regc, clipval) {
-            // perform parameter update
-            var solver_stats = {};
-            var num_clipped = 0;
-            var num_tot = 0;
-            for (var k in model) {
-                if (model.hasOwnProperty(k)) {
-                    var m = model[k]; // mat ref
-                    if (!(k in this.step_cache)) {
-                        this.step_cache[k] = new Mat(m.n, m.d);
-                    }
-                    var s = this.step_cache[k];
-                    for (var i = 0, n = m.w.length; i < n; i++) {
-                        // rmsprop adaptive learning rate
-                        var mdwi = m.dw[i];
-                        s.w[i] = s.w[i] * this.decay_rate + (1.0 - this.decay_rate) * mdwi * mdwi;
-                        // gradient clip
-                        if (mdwi > clipval) {
-                            mdwi = clipval;
-                            num_clipped++;
-                        }
-                        if (mdwi < -clipval) {
-                            mdwi = -clipval;
-                            num_clipped++;
-                        }
-                        num_tot++;
-                        // update (and regularize)
-                        m.w[i] += -step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i];
-                        m.dw[i] = 0; // reset gradients for next iteration
-                    }
-                }
-            }
-            solver_stats['ratio_clipped'] = num_clipped * 1.0 / num_tot;
-            return solver_stats;
-        }
-    }
-    var initLSTM = function (input_size, hidden_sizes, output_size) {
-        // hidden size should be a list
-        var model = {};
-        for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
-            var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
-            var hidden_size = hidden_sizes[d];
-            // gates parameters
-            model['Wix' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-            model['Wih' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-            model['bi' + d] = new Mat(hidden_size, 1);
-            model['Wfx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-            model['Wfh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-            model['bf' + d] = new Mat(hidden_size, 1);
-            model['Wox' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-            model['Woh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-            model['bo' + d] = new Mat(hidden_size, 1);
-            // cell write params
-            model['Wcx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
-            model['Wch' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
-            model['bc' + d] = new Mat(hidden_size, 1);
-        }
-        // decoder params
-        model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
-        model['bd'] = new Mat(output_size, 1);
-        return model;
-    }
-    var forwardLSTM = function (G, model, hidden_sizes, x, prev) {
-        // forward prop for a single tick of LSTM
-        // G is graph to append ops to
-        // model contains LSTM parameters
-        // x is 1D column vector with observation
-        // prev is a struct containing hidden and cell
-        // from previous iteration
-        if (prev == null || typeof prev.h === 'undefined') {
-            var hidden_prevs = [];
-            var cell_prevs = [];
-            for (var d = 0; d < hidden_sizes.length; d++) {
-                hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
-                cell_prevs.push(new R.Mat(hidden_sizes[d], 1));
-            }
-        } else {
-            var hidden_prevs = prev.h;
-            var cell_prevs = prev.c;
-        }
-        var hidden = [];
-        var cell = [];
-        for (var d = 0; d < hidden_sizes.length; d++) {
-            var input_vector = d === 0 ? x : hidden[d - 1];
-            var hidden_prev = hidden_prevs[d];
-            var cell_prev = cell_prevs[d];
-            // input gate
-            var h0 = G.mul(model['Wix' + d], input_vector);
-            var h1 = G.mul(model['Wih' + d], hidden_prev);
-            var input_gate = G.sigmoid(G.add(G.add(h0, h1), model['bi' + d]));
-            // forget gate
-            var h2 = G.mul(model['Wfx' + d], input_vector);
-            var h3 = G.mul(model['Wfh' + d], hidden_prev);
-            var forget_gate = G.sigmoid(G.add(G.add(h2, h3), model['bf' + d]));
-            // output gate
-            var h4 = G.mul(model['Wox' + d], input_vector);
-            var h5 = G.mul(model['Woh' + d], hidden_prev);
-            var output_gate = G.sigmoid(G.add(G.add(h4, h5), model['bo' + d]));
-            // write operation on cells
-            var h6 = G.mul(model['Wcx' + d], input_vector);
-            var h7 = G.mul(model['Wch' + d], hidden_prev);
-            var cell_write = G.tanh(G.add(G.add(h6, h7), model['bc' + d]));
-            // compute new cell activation
-            var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we keep from cell
-            var write_cell = G.eltmul(input_gate, cell_write); // what do we write to cell
-            var cell_d = G.add(retain_cell, write_cell); // new cell contents
-            // compute hidden state as gated, saturated cell activations
-            var hidden_d = G.eltmul(output_gate, G.tanh(cell_d));
-            hidden.push(hidden_d);
-            cell.push(cell_d);
-        }
-        // one decoder to outputs at end
-        var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]), model['bd']);
-        // return cell memory, hidden representation and output
-        return {
-            'h': hidden,
-            'c': cell,
-            'o': output
-        };
-    }
-    var sig = function (x) {
-        // helper function for computing sigmoid
-        return 1.0 / (1 + Math.exp(-x));
-    }
-    var maxi = function (w) {
-        // argmax of array w
-        var maxv = w[0];
-        var maxix = 0;
-        for (var i = 1, n = w.length; i < n; i++) {
-            var v = w[i];
-            if (v > maxv) {
-                maxix = i;
-                maxv = v;
-            }
-        }
-        return maxix;
-    }
-    var samplei = function (w) {
-        // sample argmax from w, assuming w are 
-        // probabilities that sum to one
-        var r = randf(0, 1);
-        var x = 0.0;
-        var i = 0;
-        while (true) {
-            x += w[i];
-            if (x > r) {
-                return i;
-            }
-            i++;
-        }
-        return w.length - 1; // pretty sure we should never get here?
-    }
-    // various utils
-    global.assert = assert;
-    global.zeros = zeros;
-    global.maxi = maxi;
-    global.samplei = samplei;
-    global.randi = randi;
-    global.randn = randn;
-    global.softmax = softmax;
-    // classes
-    global.Mat = Mat;
-    global.RandMat = RandMat;
-    global.forwardLSTM = forwardLSTM;
-    global.initLSTM = initLSTM;
-    // more utils
-    global.updateMat = updateMat;
-    global.updateNet = updateNet;
-    global.copyMat = copyMat;
-    global.copyNet = copyNet;
-    global.netToJSON = netToJSON;
-    global.netFromJSON = netFromJSON;
-    global.netZeroGrads = netZeroGrads;
-    global.netFlattenGrads = netFlattenGrads;
-    // optimization
-    global.Solver = Solver;
-    global.Graph = Graph;
+	"use strict";
+	// Utility fun
+	function assert(condition, message) {
+		// from http://stackoverflow.com/questions/15313418/javascript-assert
+		if (!condition) {
+			message = message || "Assertion failed";
+			if (typeof Error !== "undefined") {
+				throw new Error(message);
+			}
+			throw message; // Fallback
+		}
+	}
+	// Random numbers utils
+	var return_v = false;
+	var v_val = 0.0;
+	var gaussRandom = function () {
+		if (return_v) {
+			return_v = false;
+			return v_val;
+		}
+		var u = 2 * Math.random() - 1;
+		var v = 2 * Math.random() - 1;
+		var r = u * u + v * v;
+		if (r == 0 || r > 1) return gaussRandom();
+		var c = Math.sqrt(-2 * Math.log(r) / r);
+		v_val = v * c; // cache this
+		return_v = true;
+		return u * c;
+	}
+	var randf = function (a, b) {
+		return Math.random() * (b - a) + a;
+	}
+	var randi = function (a, b) {
+		return Math.floor(Math.random() * (b - a) + a);
+	}
+	var randn = function (mu, std) {
+		return mu + gaussRandom() * std;
+	}
+	// helper function returns array of zeros of length n
+	// and uses typed arrays if available
+	var zeros = function (n) {
+		if (typeof (n) === 'undefined' || isNaN(n)) {
+			return [];
+		}
+		if (typeof ArrayBuffer === 'undefined') {
+			// lacking browser support
+			var arr = new Array(n);
+			for (var i = 0; i < n; i++) {
+				arr[i] = 0;
+			}
+			return arr;
+		} else {
+			return new Float64Array(n);
+		}
+	}
+	// Mat holds a matrix
+	var Mat = function (n, d) {
+		// n is number of rows d is number of columns
+		this.n = n;
+		this.d = d;
+		this.w = zeros(n * d);
+		this.dw = zeros(n * d);
+	}
+	Mat.prototype = {
+		get: function (row, col) {
+			// slow but careful accessor function
+			// we want row-major order
+			var ix = (this.d * row) + col;
+			assert(ix >= 0 && ix < this.w.length);
+			return this.w[ix];
+		},
+		set: function (row, col, v) {
+			// slow but careful accessor function
+			var ix = (this.d * row) + col;
+			assert(ix >= 0 && ix < this.w.length);
+			this.w[ix] = v;
+		},
+		setFrom: function (arr) {
+			for (var i = 0, n = arr.length; i < n; i++) {
+				this.w[i] = arr[i];
+			}
+		},
+		setColumn: function (m, i) {
+			for (var q = 0, n = m.w.length; q < n; q++) {
+				this.w[(this.d * q) + i] = m.w[q];
+			}
+		},
+		toJSON: function () {
+			var json = {};
+			json['n'] = this.n;
+			json['d'] = this.d;
+			json['w'] = this.w;
+			return json;
+		},
+		fromJSON: function (json) {
+			this.n = json.n;
+			this.d = json.d;
+			this.w = zeros(this.n * this.d);
+			this.dw = zeros(this.n * this.d);
+			for (var i = 0, n = this.n * this.d; i < n; i++) {
+				this.w[i] = json.w[i]; // copy over weights
+			}
+		}
+	}
+	var copyMat = function (b) {
+		var a = new Mat(b.n, b.d);
+		a.setFrom(b.w);
+		return a;
+	}
+	var copyNet = function (net) {
+		// nets are (k,v) pairs with k = string key, v = Mat()
+		var new_net = {};
+		for (var p in net) {
+			if (net.hasOwnProperty(p)) {
+				new_net[p] = copyMat(net[p]);
+			}
+		}
+		return new_net;
+	}
+	var updateMat = function (m, alpha) {
+		// updates in place
+		for (var i = 0, n = m.n * m.d; i < n; i++) {
+			if (m.dw[i] !== 0) {
+				m.w[i] += -alpha * m.dw[i];
+				m.dw[i] = 0;
+			}
+		}
+	}
+	var updateNet = function (net, alpha) {
+		for (var p in net) {
+			if (net.hasOwnProperty(p)) {
+				updateMat(net[p], alpha);
+			}
+		}
+	}
+	var netToJSON = function (net) {
+		var j = {};
+		for (var p in net) {
+			if (net.hasOwnProperty(p)) {
+				j[p] = net[p].toJSON();
+			}
+		}
+		return j;
+	}
+	var netFromJSON = function (j) {
+		var net = {};
+		for (var p in j) {
+			if (j.hasOwnProperty(p)) {
+				net[p] = new Mat(1, 1); // not proud of this
+				net[p].fromJSON(j[p]);
+			}
+		}
+		return net;
+	}
+	var netZeroGrads = function (net) {
+		for (var p in net) {
+			if (net.hasOwnProperty(p)) {
+				var mat = net[p];
+				gradFillConst(mat, 0);
+			}
+		}
+	}
+	var netFlattenGrads = function (net) {
+		var n = 0;
+		for (var p in net) {
+			if (net.hasOwnProperty(p)) {
+				var mat = net[p];
+				n += mat.dw.length;
+			}
+		}
+		var g = new Mat(n, 1);
+		var ix = 0;
+		for (var p in net) {
+			if (net.hasOwnProperty(p)) {
+				var mat = net[p];
+				for (var i = 0, m = mat.dw.length; i < m; i++) {
+					g.w[ix] = mat.dw[i];
+					ix++;
+				}
+			}
+		}
+		return g;
+	}
+	// return Mat but filled with random numbers from gaussian
+	var RandMat = function (n, d, mu, std) {
+		var m = new Mat(n, d);
+		fillRandn(m, mu, std);
+		//fillRand(m,-std,std); // kind of :P
+		return m;
+	}
+	// Mat utils
+	// fill matrix with random gaussian numbers
+	var fillRandn = function (m, mu, std) {
+		for (var i = 0, n = m.w.length; i < n; i++) {
+			m.w[i] = randn(mu, std);
+		}
+	}
+	var fillRand = function (m, lo, hi) {
+		for (var i = 0, n = m.w.length; i < n; i++) {
+			m.w[i] = randf(lo, hi);
+		}
+	}
+	var gradFillConst = function (m, c) {
+		for (var i = 0, n = m.dw.length; i < n; i++) {
+			m.dw[i] = c
+		}
+	}
+	// Transformer definitions
+	var Graph = function (needs_backprop) {
+		if (typeof needs_backprop === 'undefined') {
+			needs_backprop = true;
+		}
+		this.needs_backprop = needs_backprop;
+		// this will store a list of functions that perform backprop,
+		// in their forward pass order. So in backprop we will go
+		// backwards and evoke each one
+		this.backprop = [];
+	}
+	Graph.prototype = {
+		backward: function () {
+			for (var i = this.backprop.length - 1; i >= 0; i--) {
+				this.backprop[i](); // tick!
+			}
+		},
+		rowPluck: function (m, ix) {
+			// pluck a row of m with index ix and return it as col vector
+			assert(ix >= 0 && ix < m.n);
+			var d = m.d;
+			var out = new Mat(d, 1);
+			for (var i = 0, n = d; i < n; i++) {
+				out.w[i] = m.w[d * ix + i];
+			} // copy over the data
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0, n = d; i < n; i++) {
+						m.dw[d * ix + i] += out.dw[i];
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		tanh: function (m) {
+			// tanh nonlinearity
+			var out = new Mat(m.n, m.d);
+			var n = m.w.length;
+			for (var i = 0; i < n; i++) {
+				out.w[i] = Math.tanh(m.w[i]);
+			}
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0; i < n; i++) {
+						// grad for z = tanh(x) is (1 - z^2)
+						var mwi = out.w[i];
+						m.dw[i] += (1.0 - mwi * mwi) * out.dw[i];
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		sigmoid: function (m) {
+			// sigmoid nonlinearity
+			var out = new Mat(m.n, m.d);
+			var n = m.w.length;
+			for (var i = 0; i < n; i++) {
+				out.w[i] = sig(m.w[i]);
+			}
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0; i < n; i++) {
+						// grad for z = tanh(x) is (1 - z^2)
+						var mwi = out.w[i];
+						m.dw[i] += mwi * (1.0 - mwi) * out.dw[i];
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		relu: function (m) {
+			var out = new Mat(m.n, m.d);
+			var n = m.w.length;
+			for (var i = 0; i < n; i++) {
+				out.w[i] = Math.max(0, m.w[i]); // relu
+			}
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0; i < n; i++) {
+						m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		mul: function (m1, m2) {
+			// multiply matrices m1 * m2
+			assert(m1.d === m2.n, 'matmul dimensions misaligned');
+			var n = m1.n;
+			var d = m2.d;
+			var out = new Mat(n, d);
+			for (var i = 0; i < m1.n; i++) { // loop over rows of m1
+				for (var j = 0; j < m2.d; j++) { // loop over cols of m2
+					var dot = 0.0;
+					for (var k = 0; k < m1.d; k++) { // dot product loop
+						dot += m1.w[m1.d * i + k] * m2.w[m2.d * k + j];
+					}
+					out.w[d * i + j] = dot;
+				}
+			}
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0; i < m1.n; i++) { // loop over rows of m1
+						for (var j = 0; j < m2.d; j++) { // loop over cols of m2
+							for (var k = 0; k < m1.d; k++) { // dot product loop
+								var b = out.dw[d * i + j];
+								m1.dw[m1.d * i + k] += m2.w[m2.d * k + j] * b;
+								m2.dw[m2.d * k + j] += m1.w[m1.d * i + k] * b;
+							}
+						}
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		add: function (m1, m2) {
+			assert(m1.w.length === m2.w.length);
+			var out = new Mat(m1.n, m1.d);
+			for (var i = 0, n = m1.w.length; i < n; i++) {
+				out.w[i] = m1.w[i] + m2.w[i];
+			}
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0, n = m1.w.length; i < n; i++) {
+						m1.dw[i] += out.dw[i];
+						m2.dw[i] += out.dw[i];
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		dot: function (m1, m2) {
+			// m1 m2 are both column vectors
+			assert(m1.w.length === m2.w.length);
+			var out = new Mat(1, 1);
+			var dot = 0.0;
+			for (var i = 0, n = m1.w.length; i < n; i++) {
+				dot += m1.w[i] * m2.w[i];
+			}
+			out.w[0] = dot;
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0, n = m1.w.length; i < n; i++) {
+						m1.dw[i] += m2.w[i] * out.dw[0];
+						m2.dw[i] += m1.w[i] * out.dw[0];
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+		eltmul: function (m1, m2) {
+			assert(m1.w.length === m2.w.length);
+			var out = new Mat(m1.n, m1.d);
+			for (var i = 0, n = m1.w.length; i < n; i++) {
+				out.w[i] = m1.w[i] * m2.w[i];
+			}
+			if (this.needs_backprop) {
+				var backward = function () {
+					for (var i = 0, n = m1.w.length; i < n; i++) {
+						m1.dw[i] += m2.w[i] * out.dw[i];
+						m2.dw[i] += m1.w[i] * out.dw[i];
+					}
+				}
+				this.backprop.push(backward);
+			}
+			return out;
+		},
+	}
+	var softmax = function (m) {
+		var out = new Mat(m.n, m.d); // probability volume
+		var maxval = -999999;
+		for (var i = 0, n = m.w.length; i < n; i++) {
+			if (m.w[i] > maxval) maxval = m.w[i];
+		}
+		var s = 0.0;
+		for (var i = 0, n = m.w.length; i < n; i++) {
+			out.w[i] = Math.exp(m.w[i] - maxval);
+			s += out.w[i];
+		}
+		for (var i = 0, n = m.w.length; i < n; i++) {
+			out.w[i] /= s;
+		}
+		// no backward pass here needed
+		// since we will use the computed probabilities outside
+		// to set gradients directly on m
+		return out;
+	}
+	var Solver = function () {
+		this.decay_rate = 0.999;
+		this.smooth_eps = 1e-8;
+		this.step_cache = {};
+	}
+	Solver.prototype = {
+		step: function (model, step_size, regc, clipval) {
+			// perform parameter update
+			var solver_stats = {};
+			var num_clipped = 0;
+			var num_tot = 0;
+			for (var k in model) {
+				if (model.hasOwnProperty(k)) {
+					var m = model[k]; // mat ref
+					if (!(k in this.step_cache)) {
+						this.step_cache[k] = new Mat(m.n, m.d);
+					}
+					var s = this.step_cache[k];
+					for (var i = 0, n = m.w.length; i < n; i++) {
+						// rmsprop adaptive learning rate
+						var mdwi = m.dw[i];
+						s.w[i] = s.w[i] * this.decay_rate + (1.0 - this.decay_rate) * mdwi * mdwi;
+						// gradient clip
+						if (mdwi > clipval) {
+							mdwi = clipval;
+							num_clipped++;
+						}
+						if (mdwi < -clipval) {
+							mdwi = -clipval;
+							num_clipped++;
+						}
+						num_tot++;
+						// update (and regularize)
+						m.w[i] += -step_size * mdwi / Math.sqrt(s.w[i] + this.smooth_eps) - regc * m.w[i];
+						m.dw[i] = 0; // reset gradients for next iteration
+					}
+				}
+			}
+			solver_stats['ratio_clipped'] = num_clipped * 1.0 / num_tot;
+			return solver_stats;
+		}
+	}
+	var initLSTM = function (input_size, hidden_sizes, output_size) {
+		// hidden size should be a list
+		var model = {};
+		for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
+			var prev_size = d === 0 ? input_size : hidden_sizes[d - 1];
+			var hidden_size = hidden_sizes[d];
+			// gates parameters
+			model['Wix' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+			model['Wih' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+			model['bi' + d] = new Mat(hidden_size, 1);
+			model['Wfx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+			model['Wfh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+			model['bf' + d] = new Mat(hidden_size, 1);
+			model['Wox' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+			model['Woh' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+			model['bo' + d] = new Mat(hidden_size, 1);
+			// cell write params
+			model['Wcx' + d] = new RandMat(hidden_size, prev_size, 0, 0.08);
+			model['Wch' + d] = new RandMat(hidden_size, hidden_size, 0, 0.08);
+			model['bc' + d] = new Mat(hidden_size, 1);
+		}
+		// decoder params
+		model['Whd'] = new RandMat(output_size, hidden_size, 0, 0.08);
+		model['bd'] = new Mat(output_size, 1);
+		return model;
+	}
+	var forwardLSTM = function (G, model, hidden_sizes, x, prev) {
+		// forward prop for a single tick of LSTM
+		// G is graph to append ops to
+		// model contains LSTM parameters
+		// x is 1D column vector with observation
+		// prev is a struct containing hidden and cell
+		// from previous iteration
+		if (prev == null || typeof prev.h === 'undefined') {
+			var hidden_prevs = [];
+			var cell_prevs = [];
+			for (var d = 0; d < hidden_sizes.length; d++) {
+				hidden_prevs.push(new R.Mat(hidden_sizes[d], 1));
+				cell_prevs.push(new R.Mat(hidden_sizes[d], 1));
+			}
+		} else {
+			var hidden_prevs = prev.h;
+			var cell_prevs = prev.c;
+		}
+		var hidden = [];
+		var cell = [];
+		for (var d = 0; d < hidden_sizes.length; d++) {
+			var input_vector = d === 0 ? x : hidden[d - 1];
+			var hidden_prev = hidden_prevs[d];
+			var cell_prev = cell_prevs[d];
+			// input gate
+			var h0 = G.mul(model['Wix' + d], input_vector);
+			var h1 = G.mul(model['Wih' + d], hidden_prev);
+			var input_gate = G.sigmoid(G.add(G.add(h0, h1), model['bi' + d]));
+			// forget gate
+			var h2 = G.mul(model['Wfx' + d], input_vector);
+			var h3 = G.mul(model['Wfh' + d], hidden_prev);
+			var forget_gate = G.sigmoid(G.add(G.add(h2, h3), model['bf' + d]));
+			// output gate
+			var h4 = G.mul(model['Wox' + d], input_vector);
+			var h5 = G.mul(model['Woh' + d], hidden_prev);
+			var output_gate = G.sigmoid(G.add(G.add(h4, h5), model['bo' + d]));
+			// write operation on cells
+			var h6 = G.mul(model['Wcx' + d], input_vector);
+			var h7 = G.mul(model['Wch' + d], hidden_prev);
+			var cell_write = G.tanh(G.add(G.add(h6, h7), model['bc' + d]));
+			// compute new cell activation
+			var retain_cell = G.eltmul(forget_gate, cell_prev); // what do we keep from cell
+			var write_cell = G.eltmul(input_gate, cell_write); // what do we write to cell
+			var cell_d = G.add(retain_cell, write_cell); // new cell contents
+			// compute hidden state as gated, saturated cell activations
+			var hidden_d = G.eltmul(output_gate, G.tanh(cell_d));
+			hidden.push(hidden_d);
+			cell.push(cell_d);
+		}
+		// one decoder to outputs at end
+		var output = G.add(G.mul(model['Whd'], hidden[hidden.length - 1]), model['bd']);
+		// return cell memory, hidden representation and output
+		return {
+			'h': hidden,
+			'c': cell,
+			'o': output
+		};
+	}
+	var sig = function (x) {
+		// helper function for computing sigmoid
+		return 1.0 / (1 + Math.exp(-x));
+	}
+	var maxi = function (w) {
+		// argmax of array w
+		var maxv = w[0];
+		var maxix = 0;
+		for (var i = 1, n = w.length; i < n; i++) {
+			var v = w[i];
+			if (v > maxv) {
+				maxix = i;
+				maxv = v;
+			}
+		}
+		return maxix;
+	}
+	var samplei = function (w) {
+		// sample argmax from w, assuming w are 
+		// probabilities that sum to one
+		var r = randf(0, 1);
+		var x = 0.0;
+		var i = 0;
+		while (true) {
+			x += w[i];
+			if (x > r) {
+				return i;
+			}
+			i++;
+		}
+		return w.length - 1; // pretty sure we should never get here?
+	}
+	// various utils
+	global.assert = assert;
+	global.zeros = zeros;
+	global.maxi = maxi;
+	global.samplei = samplei;
+	global.randi = randi;
+	global.randn = randn;
+	global.softmax = softmax;
+	// classes
+	global.Mat = Mat;
+	global.RandMat = RandMat;
+	global.forwardLSTM = forwardLSTM;
+	global.initLSTM = initLSTM;
+	// more utils
+	global.updateMat = updateMat;
+	global.updateNet = updateNet;
+	global.copyMat = copyMat;
+	global.copyNet = copyNet;
+	global.netToJSON = netToJSON;
+	global.netFromJSON = netFromJSON;
+	global.netZeroGrads = netZeroGrads;
+	global.netFlattenGrads = netFlattenGrads;
+	// optimization
+	global.Solver = Solver;
+	global.Graph = Graph;
 })(R);
 // END OF RECURRENTJS
 var RL = {};
 (function (global) {
-    "use strict";
-    // syntactic sugar function for getting default parameter values
-    var getopt = function (opt, field_name, default_value) {
-        if (typeof opt === 'undefined') {
-            return default_value;
-        }
-        return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
-    }
-    var zeros = R.zeros; // inherit these
-    var assert = R.assert;
-    var randi = R.randi;
-    var randf = R.randf;
-    var setConst = function (arr, c) {
-        for (var i = 0, n = arr.length; i < n; i++) {
-            arr[i] = c;
-        }
-    }
-    var sampleWeighted = function (p) {
-        var r = Math.random();
-        var c = 0.0;
-        for (var i = 0, n = p.length; i < n; i++) {
-            c += p[i];
-            if (c >= r) {
-                return i;
-            }
-        }
-        assert(false, 'wtf');
-    }
-    // ------
-    // AGENTS
-    // ------
-    // DPAgent performs Value Iteration
-    // - can also be used for Policy Iteration if you really wanted to
-    // - requires model of the environment :(
-    // - does not learn from experience :(
-    // - assumes finite MDP :(
-    var DPAgent = function (env, opt) {
-        this.V = null; // state value function
-        this.P = null; // policy distribution \pi(s,a)
-        this.env = env; // store pointer to environment
-        this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
-        this.reset();
-    }
-    DPAgent.prototype = {
-        reset: function () {
-            // reset the agent's policy and value function
-            this.ns = this.env.getNumStates();
-            this.na = this.env.getMaxNumActions();
-            this.V = zeros(this.ns);
-            this.P = zeros(this.ns * this.na);
-            // initialize uniform random policy
-            for (var s = 0; s < this.ns; s++) {
-                var poss = this.env.allowedActions(s);
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
-                }
-            }
-        },
-        act: function (s) {
-            // behave according to the learned policy
-            var poss = this.env.allowedActions(s);
-            var ps = [];
-            for (var i = 0, n = poss.length; i < n; i++) {
-                var a = poss[i];
-                var prob = this.P[a * this.ns + s];
-                ps.push(prob);
-            }
-            var maxi = sampleWeighted(ps);
-            return poss[maxi];
-        },
-        learn: function () {
-            // perform a single round of value iteration
-            self.evaluatePolicy(); // writes this.V
-            self.updatePolicy(); // writes this.P
-        },
-        evaluatePolicy: function () {
-            // perform a synchronous update of the value function
-            var Vnew = zeros(this.ns);
-            for (var s = 0; s < this.ns; s++) {
-                // integrate over actions in a stochastic policy
-                // note that we assume that policy probability mass over allowed actions sums to one
-                var v = 0.0;
-                var poss = this.env.allowedActions(s);
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    var a = poss[i];
-                    var prob = this.P[a * this.ns + s]; // probability of taking action under policy
-                    if (prob === 0) {
-                        continue;
-                    } // no contribution, skip for speed
-                    var ns = this.env.nextStateDistribution(s, a);
-                    var rs = this.env.reward(s, a, ns); // reward for s->a->ns transition
-                    v += prob * (rs + this.gamma * this.V[ns]);
-                }
-                Vnew[s] = v;
-            }
-            this.V = Vnew; // swap
-        },
-        updatePolicy: function () {
-            // update policy to be greedy w.r.t. learned Value function
-            for (var s = 0; s < this.ns; s++) {
-                var poss = this.env.allowedActions(s);
-                // compute value of taking each allowed action
-                var vmax, nmax;
-                var vs = [];
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    var a = poss[i];
-                    var ns = this.env.nextStateDistribution(s, a);
-                    var rs = this.env.reward(s, a, ns);
-                    var v = rs + this.gamma * this.V[ns];
-                    vs.push(v);
-                    if (i === 0 || v > vmax) {
-                        vmax = v;
-                        nmax = 1;
-                    } else if (v === vmax) {
-                        nmax += 1;
-                    }
-                }
-                // update policy smoothly across all argmaxy actions
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    var a = poss[i];
-                    this.P[a * this.ns + s] = (vs[i] === vmax) ? 1.0 / nmax : 0.0;
-                }
-            }
-        },
-    }
-    // QAgent uses TD (Q-Learning, SARSA)
-    // - does not require environment model :)
-    // - learns from experience :)
-    var TDAgent = function (env, opt) {
-        this.update = getopt(opt, 'update', 'qlearn'); // qlearn | sarsa
-        this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
-        this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
-        this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
-        // class allows non-deterministic policy, and smoothly regressing towards the optimal policy based on Q
-        this.smooth_policy_update = getopt(opt, 'smooth_policy_update', false);
-        this.beta = getopt(opt, 'beta', 0.01); // learning rate for policy, if smooth updates are on
-        // eligibility traces
-        this.lambda = getopt(opt, 'lambda', 0); // eligibility trace decay. 0 = no eligibility traces used
-        this.replacing_traces = getopt(opt, 'replacing_traces', true);
-        // optional optimistic initial values
-        this.q_init_val = getopt(opt, 'q_init_val', 0);
-        this.planN = getopt(opt, 'planN', 0); // number of planning steps per learning iteration (0 = no planning)
-        this.Q = null; // state action value function
-        this.P = null; // policy distribution \pi(s,a)
-        this.e = null; // eligibility trace
-        this.env_model_s = null;; // environment model (s,a) -> (s',r)
-        this.env_model_r = null;; // environment model (s,a) -> (s',r)
-        this.env = env; // store pointer to environment
-        this.reset();
-    }
-    TDAgent.prototype = {
-        reset: function () {
-            // reset the agent's policy and value function
-            this.ns = this.env.getNumStates();
-            this.na = this.env.getMaxNumActions();
-            this.Q = zeros(this.ns * this.na);
-            if (this.q_init_val !== 0) {
-                setConst(this.Q, this.q_init_val);
-            }
-            this.P = zeros(this.ns * this.na);
-            this.e = zeros(this.ns * this.na);
-            // model/planning vars
-            this.env_model_s = zeros(this.ns * this.na);
-            setConst(this.env_model_s, -1); // init to -1 so we can test if we saw the state before
-            this.env_model_r = zeros(this.ns * this.na);
-            this.sa_seen = [];
-            this.pq = zeros(this.ns * this.na);
-            // initialize uniform random policy
-            for (var s = 0; s < this.ns; s++) {
-                var poss = this.env.allowedActions(s);
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
-                }
-            }
-            // agent memory, needed for streaming updates
-            // (s0,a0,r0,s1,a1,r1,...)
-            this.r0 = null;
-            this.s0 = null;
-            this.s1 = null;
-            this.a0 = null;
-            this.a1 = null;
-        },
-        resetEpisode: function () {
-            // an episode finished
-        },
-        act: function (s) {
-            // act according to epsilon greedy policy
-            var poss = this.env.allowedActions(s);
-            var probs = [];
-            for (var i = 0, n = poss.length; i < n; i++) {
-                probs.push(this.P[poss[i] * this.ns + s]);
-            }
-            // epsilon greedy policy
-            if (Math.random() < this.epsilon) {
-                var a = poss[randi(0, poss.length)]; // random available action
-                this.explored = true;
-            } else {
-                var a = poss[sampleWeighted(probs)];
-                this.explored = false;
-            }
-            // shift state memory
-            this.s0 = this.s1;
-            this.a0 = this.a1;
-            this.s1 = s;
-            this.a1 = a;
-            return a;
-        },
-        learn: function (r1) {
-            // takes reward for previous action, which came from a call to act()
-            if (!(this.r0 == null)) {
-                this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1, this.lambda);
-                if (this.planN > 0) {
-                    this.updateModel(this.s0, this.a0, this.r0, this.s1);
-                    this.plan();
-                }
-            }
-            this.r0 = r1; // store this for next update
-        },
-        updateModel: function (s0, a0, r0, s1) {
-            // transition (s0,a0) -> (r0,s1) was observed. Update environment model
-            var sa = a0 * this.ns + s0;
-            if (this.env_model_s[sa] === -1) {
-                // first time we see this state action
-                this.sa_seen.push(a0 * this.ns + s0); // add as seen state
-            }
-            this.env_model_s[sa] = s1;
-            this.env_model_r[sa] = r0;
-        },
-        plan: function () {
-            // order the states based on current priority queue information
-            var spq = [];
-            for (var i = 0, n = this.sa_seen.length; i < n; i++) {
-                var sa = this.sa_seen[i];
-                var sap = this.pq[sa];
-                if (sap > 1e-5) { // gain a bit of efficiency
-                    spq.push({
-                        sa: sa,
-                        p: sap
-                    });
-                }
-            }
-            spq.sort(function (a, b) {
-                return a.p < b.p ? 1 : -1
-            });
-            // perform the updates
-            var nsteps = Math.min(this.planN, spq.length);
-            for (var k = 0; k < nsteps; k++) {
-                // random exploration
-                //var i = randi(0, this.sa_seen.length); // pick random prev seen state action
-                //var s0a0 = this.sa_seen[i];
-                var s0a0 = spq[k].sa;
-                this.pq[s0a0] = 0; // erase priority, since we're backing up this state
-                var s0 = s0a0 % this.ns;
-                var a0 = Math.floor(s0a0 / this.ns);
-                var r0 = this.env_model_r[s0a0];
-                var s1 = this.env_model_s[s0a0];
-                var a1 = -1; // not used for Q learning
-                if (this.update === 'sarsa') {
-                    // generate random action?...
-                    var poss = this.env.allowedActions(s1);
-                    var a1 = poss[randi(0, poss.length)];
-                }
-                this.learnFromTuple(s0, a0, r0, s1, a1, 0); // note lambda = 0 - shouldnt use eligibility trace here
-            }
-        },
-        learnFromTuple: function (s0, a0, r0, s1, a1, lambda) {
-            var sa = a0 * this.ns + s0;
-            // calculate the target for Q(s,a)
-            if (this.update === 'qlearn') {
-                // Q learning target is Q(s0,a0) = r0 + gamma * max_a Q[s1,a]
-                var poss = this.env.allowedActions(s1);
-                var qmax = 0;
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    var s1a = poss[i] * this.ns + s1;
-                    var qval = this.Q[s1a];
-                    if (i === 0 || qval > qmax) {
-                        qmax = qval;
-                    }
-                }
-                var target = r0 + this.gamma * qmax;
-            } else if (this.update === 'sarsa') {
-                // SARSA target is Q(s0,a0) = r0 + gamma * Q[s1,a1]
-                var s1a1 = a1 * this.ns + s1;
-                var target = r0 + this.gamma * this.Q[s1a1];
-            }
-            if (lambda > 0) {
-                // perform an eligibility trace update
-                if (this.replacing_traces) {
-                    this.e[sa] = 1;
-                } else {
-                    this.e[sa] += 1;
-                }
-                var edecay = lambda * this.gamma;
-                var state_update = zeros(this.ns);
-                for (var s = 0; s < this.ns; s++) {
-                    var poss = this.env.allowedActions(s);
-                    for (var i = 0; i < poss.length; i++) {
-                        var a = poss[i];
-                        var saloop = a * this.ns + s;
-                        var esa = this.e[saloop];
-                        var update = this.alpha * esa * (target - this.Q[saloop]);
-                        this.Q[saloop] += update;
-                        this.updatePriority(s, a, update);
-                        this.e[saloop] *= edecay;
-                        var u = Math.abs(update);
-                        if (u > state_update[s]) {
-                            state_update[s] = u;
-                        }
-                    }
-                }
-                for (var s = 0; s < this.ns; s++) {
-                    if (state_update[s] > 1e-5) { // save efficiency here
-                        this.updatePolicy(s);
-                    }
-                }
-                if (this.explored && this.update === 'qlearn') {
-                    // have to wipe the trace since q learning is off-policy :(
-                    this.e = zeros(this.ns * this.na);
-                }
-            } else {
-                // simpler and faster update without eligibility trace
-                // update Q[sa] towards it with some step size
-                var update = this.alpha * (target - this.Q[sa]);
-                this.Q[sa] += update;
-                this.updatePriority(s0, a0, update);
-                // update the policy to reflect the change (if appropriate)
-                this.updatePolicy(s0);
-            }
-        },
-        updatePriority: function (s, a, u) {
-            // used in planning. Invoked when Q[sa] += update
-            // we should find all states that lead to (s,a) and upgrade their priority
-            // of being update in the next planning step
-            u = Math.abs(u);
-            if (u < 1e-5) {
-                return;
-            } // for efficiency skip small updates
-            if (this.planN === 0) {
-                return;
-            } // there is no planning to be done, skip.
-            for (var si = 0; si < this.ns; si++) {
-                // note we are also iterating over impossible actions at all states,
-                // but this should be okay because their env_model_s should simply be -1
-                // as initialized, so they will never be predicted to point to any state
-                // because they will never be observed, and hence never be added to the model
-                for (var ai = 0; ai < this.na; ai++) {
-                    var siai = ai * this.ns + si;
-                    if (this.env_model_s[siai] === s) {
-                        // this state leads to s, add it to priority queue
-                        this.pq[siai] += u;
-                    }
-                }
-            }
-        },
-        updatePolicy: function (s) {
-            var poss = this.env.allowedActions(s);
-            // set policy at s to be the action that achieves max_a Q(s,a)
-            // first find the maxy Q values
-            var qmax, nmax;
-            var qs = [];
-            for (var i = 0, n = poss.length; i < n; i++) {
-                var a = poss[i];
-                var qval = this.Q[a * this.ns + s];
-                qs.push(qval);
-                if (i === 0 || qval > qmax) {
-                    qmax = qval;
-                    nmax = 1;
-                } else if (qval === qmax) {
-                    nmax += 1;
-                }
-            }
-            // now update the policy smoothly towards the argmaxy actions
-            var psum = 0.0;
-            for (var i = 0, n = poss.length; i < n; i++) {
-                var a = poss[i];
-                var target = (qs[i] === qmax) ? 1.0 / nmax : 0.0;
-                var ix = a * this.ns + s;
-                if (this.smooth_policy_update) {
-                    // slightly hacky :p
-                    this.P[ix] += this.beta * (target - this.P[ix]);
-                    psum += this.P[ix];
-                } else {
-                    // set hard target
-                    this.P[ix] = target;
-                }
-            }
-            if (this.smooth_policy_update) {
-                // renomalize P if we're using smooth policy updates
-                for (var i = 0, n = poss.length; i < n; i++) {
-                    var a = poss[i];
-                    this.P[a * this.ns + s] /= psum;
-                }
-            }
-        }
-    }
-    var DQNAgent = function (env, opt) {
-        this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
-        this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
-        this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
-        this.experience_add_every = getopt(opt, 'experience_add_every', 25); // number of time steps before we add another experience to replay memory
-        this.experience_size = getopt(opt, 'experience_size', 5000); // size of experience replay
-        this.learning_steps_per_iteration = getopt(opt, 'learning_steps_per_iteration', 10);
-        this.tderror_clamp = getopt(opt, 'tderror_clamp', 1.0);
-        this.num_hidden_units = getopt(opt, 'num_hidden_units', 100);
-        this.env = env;
-        this.reset();
-    }
-    DQNAgent.prototype = {
-        reset: function () {
-            this.nh = this.num_hidden_units; // number of hidden units
-            this.ns = this.env.getNumStates();
-            this.na = this.env.getMaxNumActions();
-            // nets are hardcoded for now as key (str) -> Mat
-            // not proud of this. better solution is to have a whole Net object
-            // on top of Mats, but for now sticking with this
-            this.net = {};
-            this.net.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
-            this.net.b1 = new R.Mat(this.nh, 1, 0, 0.01);
-            this.net.W2 = new R.RandMat(this.na, this.nh, 0, 0.01);
-            this.net.b2 = new R.Mat(this.na, 1, 0, 0.01);
-            this.exp = []; // experience
-            this.expi = 0; // where to insert
-            this.t = 0;
-            this.r0 = null;
-            this.s0 = null;
-            this.s1 = null;
-            this.a0 = null;
-            this.a1 = null;
-            this.tderror = 0; // for visualization only...
-        },
-        toJSON: function () {
-            // save function
-            var j = {};
-            j.nh = this.nh;
-            j.ns = this.ns;
-            j.na = this.na;
-            j.net = R.netToJSON(this.net);
-            return j;
-        },
-        fromJSON: function (j) {
-            // load function
-            this.nh = j.nh;
-            this.ns = j.ns;
-            this.na = j.na;
-            this.net = R.netFromJSON(j.net);
-        },
-        forwardQ: function (net, s, needs_backprop) {
-            var G = new R.Graph(needs_backprop);
-            var a1mat = G.add(G.mul(net.W1, s), net.b1);
-            var h1mat = G.tanh(a1mat);
-            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-            this.lastG = G; // back this up. Kind of hacky isn't it
-            return a2mat;
-        },
-        act: function (slist) {
-            // convert to a Mat column vector
-            var s = new R.Mat(this.ns, 1);
-            s.setFrom(slist);
-            // epsilon greedy policy
-            if (Math.random() < this.epsilon) {
-                var a = randi(0, this.na);
-            } else {
-                // greedy wrt Q function
-                var amat = this.forwardQ(this.net, s, false);
-                var a = R.maxi(amat.w); // returns index of argmax action
-            }
-            // shift state memory
-            this.s0 = this.s1;
-            this.a0 = this.a1;
-            this.s1 = s;
-            this.a1 = a;
-            return a;
-        },
-        learn: function (r1) {
-            // perform an update on Q function
-            if (!(this.r0 == null) && this.alpha > 0) {
-                // learn from this tuple to get a sense of how "surprising" it is to the agent
-                var tderror = this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1);
-                this.tderror = tderror; // a measure of surprise
-                // decide if we should keep this experience in the replay
-                if (this.t % this.experience_add_every === 0) {
-                    this.exp[this.expi] = [this.s0, this.a0, this.r0, this.s1, this.a1];
-                    this.expi += 1;
-                    if (this.expi > this.experience_size) {
-                        this.expi = 0;
-                    } // roll over when we run out
-                }
-                this.t += 1;
-                // sample some additional experience from replay memory and learn from it
-                for (var k = 0; k < this.learning_steps_per_iteration; k++) {
-                    var ri = randi(0, this.exp.length); // todo: priority sweeps?
-                    var e = this.exp[ri];
-                    this.learnFromTuple(e[0], e[1], e[2], e[3], e[4])
-                }
-            }
-            this.r0 = r1; // store for next update
-        },
-        learnFromTuple: function (s0, a0, r0, s1, a1) {
-            // want: Q(s,a) = r + gamma * max_a' Q(s',a')
-            // compute the target Q value
-            var tmat = this.forwardQ(this.net, s1, false);
-            var qmax = r0 + this.gamma * tmat.w[R.maxi(tmat.w)];
-            // now predict
-            var pred = this.forwardQ(this.net, s0, true);
-            var tderror = pred.w[a0] - qmax;
-            var clamp = this.tderror_clamp;
-            if (Math.abs(tderror) > clamp) { // huber loss to robustify
-                if (tderror > clamp) tderror = clamp;
-                if (tderror < -clamp) tderror = -clamp;
-            }
-            pred.dw[a0] = tderror;
-            this.lastG.backward(); // compute gradients on net params
-            // update net
-            R.updateNet(this.net, this.alpha);
-            return tderror;
-        }
-    }
-    // buggy implementation, doesnt work...
-    var SimpleReinforceAgent = function (env, opt) {
-        this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
-        this.epsilon = getopt(opt, 'epsilon', 0.75); // for epsilon-greedy policy
-        this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
-        this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
-        this.env = env;
-        this.reset();
-    }
-    SimpleReinforceAgent.prototype = {
-        reset: function () {
-            this.ns = this.env.getNumStates();
-            this.na = this.env.getMaxNumActions();
-            this.nh = 100; // number of hidden units
-            this.nhb = 100; // and also in the baseline lstm
-            this.actorNet = {};
-            this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
-            this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
-            this.actorNet.W2 = new R.RandMat(this.na, this.nh, 0, 0.1);
-            this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
-            this.actorOutputs = [];
-            this.actorGraphs = [];
-            this.actorActions = []; // sampled ones
-            this.rewardHistory = [];
-            this.baselineNet = {};
-            this.baselineNet.W1 = new R.RandMat(this.nhb, this.ns, 0, 0.01);
-            this.baselineNet.b1 = new R.Mat(this.nhb, 1, 0, 0.01);
-            this.baselineNet.W2 = new R.RandMat(this.na, this.nhb, 0, 0.01);
-            this.baselineNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
-            this.baselineOutputs = [];
-            this.baselineGraphs = [];
-            this.t = 0;
-        },
-        forwardActor: function (s, needs_backprop) {
-            var net = this.actorNet;
-            var G = new R.Graph(needs_backprop);
-            var a1mat = G.add(G.mul(net.W1, s), net.b1);
-            var h1mat = G.tanh(a1mat);
-            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-            return {
-                'a': a2mat,
-                'G': G
-            }
-        },
-        forwardValue: function (s, needs_backprop) {
-            var net = this.baselineNet;
-            var G = new R.Graph(needs_backprop);
-            var a1mat = G.add(G.mul(net.W1, s), net.b1);
-            var h1mat = G.tanh(a1mat);
-            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-            return {
-                'a': a2mat,
-                'G': G
-            }
-        },
-        act: function (slist) {
-            // convert to a Mat column vector
-            var s = new R.Mat(this.ns, 1);
-            s.setFrom(slist);
-            // forward the actor to get action output
-            var ans = this.forwardActor(s, true);
-            var amat = ans.a;
-            var ag = ans.G;
-            this.actorOutputs.push(amat);
-            this.actorGraphs.push(ag);
-            // forward the baseline estimator
-            var ans = this.forwardValue(s, true);
-            var vmat = ans.a;
-            var vg = ans.G;
-            this.baselineOutputs.push(vmat);
-            this.baselineGraphs.push(vg);
-            // sample action from the stochastic gaussian policy
-            var a = R.copyMat(amat);
-            var gaussVar = 0.02;
-            a.w[0] = R.randn(0, gaussVar);
-            a.w[1] = R.randn(0, gaussVar);
-            this.actorActions.push(a);
-            // shift state memory
-            this.s0 = this.s1;
-            this.a0 = this.a1;
-            this.s1 = s;
-            this.a1 = a;
-            return a;
-        },
-        learn: function (r1) {
-            // perform an update on Q function
-            this.rewardHistory.push(r1);
-            var n = this.rewardHistory.length;
-            var baselineMSE = 0.0;
-            var nup = 100; // what chunk of experience to take
-            var nuse = 80; // what chunk to update from
-            if (n >= nup) {
-                // lets learn and flush
-                // first: compute the sample values at all points
-                var vs = [];
-                for (var t = 0; t < nuse; t++) {
-                    var mul = 1;
-                    // compute the actual discounted reward for this time step
-                    var V = 0;
-                    for (var t2 = t; t2 < n; t2++) {
-                        V += mul * this.rewardHistory[t2];
-                        mul *= this.gamma;
-                        if (mul < 1e-5) {
-                            break;
-                        } // efficiency savings
-                    }
-                    // get the predicted baseline at this time step
-                    var b = this.baselineOutputs[t].w[0];
-                    for (var i = 0; i < this.na; i++) {
-                        // [the action delta] * [the desirebility]
-                        var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
-                        if (update > 0.1) {
-                            update = 0.1;
-                        }
-                        if (update < -0.1) {
-                            update = -0.1;
-                        }
-                        this.actorOutputs[t].dw[i] += update;
-                    }
-                    var update = -(V - b);
-                    if (update > 0.1) {
-                        update = 0.1;
-                    }
-                    if (update < 0.1) {
-                        update = -0.1;
-                    }
-                    this.baselineOutputs[t].dw[0] += update;
-                    baselineMSE += (V - b) * (V - b);
-                    vs.push(V);
-                }
-                baselineMSE /= nuse;
-                // backprop all the things
-                for (var t = 0; t < nuse; t++) {
-                    this.actorGraphs[t].backward();
-                    this.baselineGraphs[t].backward();
-                }
-                R.updateNet(this.actorNet, this.alpha); // update actor network
-                R.updateNet(this.baselineNet, this.beta); // update baseline network
-                // flush
-                this.actorOutputs = [];
-                this.rewardHistory = [];
-                this.actorActions = [];
-                this.baselineOutputs = [];
-                this.actorGraphs = [];
-                this.baselineGraphs = [];
-                this.tderror = baselineMSE;
-            }
-            this.t += 1;
-            this.r0 = r1; // store for next update
-        },
-    }
-    // buggy implementation as well, doesn't work
-    var RecurrentReinforceAgent = function (env, opt) {
-        this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
-        this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
-        this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
-        this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
-        this.env = env;
-        this.reset();
-    }
-    RecurrentReinforceAgent.prototype = {
-        reset: function () {
-            this.ns = this.env.getNumStates();
-            this.na = this.env.getMaxNumActions();
-            this.nh = 40; // number of hidden units
-            this.nhb = 40; // and also in the baseline lstm
-            this.actorLSTM = R.initLSTM(this.ns, [this.nh], this.na);
-            this.actorG = new R.Graph();
-            this.actorPrev = null;
-            this.actorOutputs = [];
-            this.rewardHistory = [];
-            this.actorActions = [];
-            this.baselineLSTM = R.initLSTM(this.ns, [this.nhb], 1);
-            this.baselineG = new R.Graph();
-            this.baselinePrev = null;
-            this.baselineOutputs = [];
-            this.t = 0;
-            this.r0 = null;
-            this.s0 = null;
-            this.s1 = null;
-            this.a0 = null;
-            this.a1 = null;
-        },
-        act: function (slist) {
-            // convert to a Mat column vector
-            var s = new R.Mat(this.ns, 1);
-            s.setFrom(slist);
-            // forward the LSTM to get action distribution
-            var actorNext = R.forwardLSTM(this.actorG, this.actorLSTM, [this.nh], s, this.actorPrev);
-            this.actorPrev = actorNext;
-            var amat = actorNext.o;
-            this.actorOutputs.push(amat);
-            // forward the baseline LSTM
-            var baselineNext = R.forwardLSTM(this.baselineG, this.baselineLSTM, [this.nhb], s, this.baselinePrev);
-            this.baselinePrev = baselineNext;
-            this.baselineOutputs.push(baselineNext.o);
-            // sample action from actor policy
-            var gaussVar = 0.05;
-            var a = R.copyMat(amat);
-            for (var i = 0, n = a.w.length; i < n; i++) {
-                a.w[0] += R.randn(0, gaussVar);
-                a.w[1] += R.randn(0, gaussVar);
-            }
-            this.actorActions.push(a);
-            // shift state memory
-            this.s0 = this.s1;
-            this.a0 = this.a1;
-            this.s1 = s;
-            this.a1 = a;
-            return a;
-        },
-        learn: function (r1) {
-            // perform an update on Q function
-            this.rewardHistory.push(r1);
-            var n = this.rewardHistory.length;
-            var baselineMSE = 0.0;
-            var nup = 100; // what chunk of experience to take
-            var nuse = 80; // what chunk to also update
-            if (n >= nup) {
-                // lets learn and flush
-                // first: compute the sample values at all points
-                var vs = [];
-                for (var t = 0; t < nuse; t++) {
-                    var mul = 1;
-                    var V = 0;
-                    for (var t2 = t; t2 < n; t2++) {
-                        V += mul * this.rewardHistory[t2];
-                        mul *= this.gamma;
-                        if (mul < 1e-5) {
-                            break;
-                        } // efficiency savings
-                    }
-                    var b = this.baselineOutputs[t].w[0];
-                    // todo: take out the constants etc.
-                    for (var i = 0; i < this.na; i++) {
-                        // [the action delta] * [the desirebility]
-                        var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
-                        if (update > 0.1) {
-                            update = 0.1;
-                        }
-                        if (update < -0.1) {
-                            update = -0.1;
-                        }
-                        this.actorOutputs[t].dw[i] += update;
-                    }
-                    var update = -(V - b);
-                    if (update > 0.1) {
-                        update = 0.1;
-                    }
-                    if (update < 0.1) {
-                        update = -0.1;
-                    }
-                    this.baselineOutputs[t].dw[0] += update;
-                    baselineMSE += (V - b) * (V - b);
-                    vs.push(V);
-                }
-                baselineMSE /= nuse;
-                this.actorG.backward(); // update params! woohoo!
-                this.baselineG.backward();
-                R.updateNet(this.actorLSTM, this.alpha); // update actor network
-                R.updateNet(this.baselineLSTM, this.beta); // update baseline network
-                // flush
-                this.actorG = new R.Graph();
-                this.actorPrev = null;
-                this.actorOutputs = [];
-                this.rewardHistory = [];
-                this.actorActions = [];
-                this.baselineG = new R.Graph();
-                this.baselinePrev = null;
-                this.baselineOutputs = [];
-                this.tderror = baselineMSE;
-            }
-            this.t += 1;
-            this.r0 = r1; // store for next update
-        },
-    }
-    // Currently buggy implementation, doesnt work
-    var DeterministPG = function (env, opt) {
-        this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
-        this.epsilon = getopt(opt, 'epsilon', 0.5); // for epsilon-greedy policy
-        this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
-        this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
-        this.env = env;
-        this.reset();
-    }
-    DeterministPG.prototype = {
-        reset: function () {
-            this.ns = this.env.getNumStates();
-            this.na = this.env.getMaxNumActions();
-            this.nh = 100; // number of hidden units
-            // actor
-            this.actorNet = {};
-            this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
-            this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
-            this.actorNet.W2 = new R.RandMat(this.na, this.ns, 0, 0.1);
-            this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
-            this.ntheta = this.na * this.ns + this.na; // number of params in actor
-            // critic
-            this.criticw = new R.RandMat(1, this.ntheta, 0, 0.01); // row vector
-            this.r0 = null;
-            this.s0 = null;
-            this.s1 = null;
-            this.a0 = null;
-            this.a1 = null;
-            this.t = 0;
-        },
-        forwardActor: function (s, needs_backprop) {
-            var net = this.actorNet;
-            var G = new R.Graph(needs_backprop);
-            var a1mat = G.add(G.mul(net.W1, s), net.b1);
-            var h1mat = G.tanh(a1mat);
-            var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
-            return {
-                'a': a2mat,
-                'G': G
-            }
-        },
-        act: function (slist) {
-            // convert to a Mat column vector
-            var s = new R.Mat(this.ns, 1);
-            s.setFrom(slist);
-            // forward the actor to get action output
-            var ans = this.forwardActor(s, false);
-            var amat = ans.a;
-            var ag = ans.G;
-            // sample action from the stochastic gaussian policy
-            var a = R.copyMat(amat);
-            if (Math.random() < this.epsilon) {
-                var gaussVar = 0.02;
-                a.w[0] = R.randn(0, gaussVar);
-                a.w[1] = R.randn(0, gaussVar);
-            }
-            var clamp = 0.25;
-            if (a.w[0] > clamp) a.w[0] = clamp;
-            if (a.w[0] < -clamp) a.w[0] = -clamp;
-            if (a.w[1] > clamp) a.w[1] = clamp;
-            if (a.w[1] < -clamp) a.w[1] = -clamp;
-            // shift state memory
-            this.s0 = this.s1;
-            this.a0 = this.a1;
-            this.s1 = s;
-            this.a1 = a;
-            return a;
-        },
-        utilJacobianAt: function (s) {
-            var ujacobian = new R.Mat(this.ntheta, this.na);
-            for (var a = 0; a < this.na; a++) {
-                R.netZeroGrads(this.actorNet);
-                var ag = this.forwardActor(this.s0, true);
-                ag.a.dw[a] = 1.0;
-                ag.G.backward();
-                var gflat = R.netFlattenGrads(this.actorNet);
-                ujacobian.setColumn(gflat, a);
-            }
-            return ujacobian;
-        },
-        learn: function (r1) {
-            // perform an update on Q function
-            //this.rewardHistory.push(r1);
-            if (!(this.r0 == null)) {
-                var Gtmp = new R.Graph(false);
-                // dpg update:
-                // first compute the features psi:
-                // the jacobian matrix of the actor for s
-                var ujacobian0 = this.utilJacobianAt(this.s0);
-                // now form the features \psi(s,a)
-                var psi_sa0 = Gtmp.mul(ujacobian0, this.a0); // should be [this.ntheta x 1] "feature" vector
-                var qw0 = Gtmp.mul(this.criticw, psi_sa0); // 1x1
-                // now do the same thing because we need \psi(s_{t+1}, \mu\_\theta(s\_t{t+1}))
-                var ujacobian1 = this.utilJacobianAt(this.s1);
-                var ag = this.forwardActor(this.s1, false);
-                var psi_sa1 = Gtmp.mul(ujacobian1, ag.a);
-                var qw1 = Gtmp.mul(this.criticw, psi_sa1); // 1x1
-                // get the td error finally
-                var tderror = this.r0 + this.gamma * qw1.w[0] - qw0.w[0]; // lol
-                if (tderror > 0.5) tderror = 0.5; // clamp
-                if (tderror < -0.5) tderror = -0.5;
-                this.tderror = tderror;
-                // update actor policy with natural gradient
-                var net = this.actorNet;
-                var ix = 0;
-                for (var p in net) {
-                    var mat = net[p];
-                    if (net.hasOwnProperty(p)) {
-                        for (var i = 0, n = mat.w.length; i < n; i++) {
-                            mat.w[i] += this.alpha * this.criticw.w[ix]; // natural gradient update
-                            ix += 1;
-                        }
-                    }
-                }
-                // update the critic parameters too
-                for (var i = 0; i < this.ntheta; i++) {
-                    var update = this.beta * tderror * psi_sa0.w[i];
-                    this.criticw.w[i] += update;
-                }
-            }
-            this.r0 = r1; // store for next update
-        },
-    }
-    // exports
-    global.DPAgent = DPAgent;
-    global.TDAgent = TDAgent;
-    global.DQNAgent = DQNAgent;
-    //global.SimpleReinforceAgent = SimpleReinforceAgent;
-    //global.RecurrentReinforceAgent = RecurrentReinforceAgent;
-    //global.DeterministPG = DeterministPG;
+	"use strict";
+	// syntactic sugar function for getting default parameter values
+	var getopt = function (opt, field_name, default_value) {
+		if (typeof opt === 'undefined') {
+			return default_value;
+		}
+		return (typeof opt[field_name] !== 'undefined') ? opt[field_name] : default_value;
+	}
+	var zeros = R.zeros; // inherit these
+	var assert = R.assert;
+	var randi = R.randi;
+	var randf = R.randf;
+	var setConst = function (arr, c) {
+		for (var i = 0, n = arr.length; i < n; i++) {
+			arr[i] = c;
+		}
+	}
+	var sampleWeighted = function (p) {
+		var r = Math.random();
+		var c = 0.0;
+		for (var i = 0, n = p.length; i < n; i++) {
+			c += p[i];
+			if (c >= r) {
+				return i;
+			}
+		}
+		assert(false, 'wtf');
+	}
+	// ------
+	// AGENTS
+	// ------
+	// DPAgent performs Value Iteration
+	// - can also be used for Policy Iteration if you really wanted to
+	// - requires model of the environment :(
+	// - does not learn from experience :(
+	// - assumes finite MDP :(
+	var DPAgent = function (env, opt) {
+		this.V = null; // state value function
+		this.P = null; // policy distribution \pi(s,a)
+		this.env = env; // store pointer to environment
+		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
+		this.reset();
+	}
+	DPAgent.prototype = {
+		reset: function () {
+			// reset the agent's policy and value function
+			this.ns = this.env.getNumStates();
+			this.na = this.env.getMaxNumActions();
+			this.V = zeros(this.ns);
+			this.P = zeros(this.ns * this.na);
+			// initialize uniform random policy
+			for (var s = 0; s < this.ns; s++) {
+				var poss = this.env.allowedActions(s);
+				for (var i = 0, n = poss.length; i < n; i++) {
+					this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
+				}
+			}
+		},
+		act: function (s) {
+			// behave according to the learned policy
+			var poss = this.env.allowedActions(s);
+			var ps = [];
+			for (var i = 0, n = poss.length; i < n; i++) {
+				var a = poss[i];
+				var prob = this.P[a * this.ns + s];
+				ps.push(prob);
+			}
+			var maxi = sampleWeighted(ps);
+			return poss[maxi];
+		},
+		learn: function () {
+			// perform a single round of value iteration
+			self.evaluatePolicy(); // writes this.V
+			self.updatePolicy(); // writes this.P
+		},
+		evaluatePolicy: function () {
+			// perform a synchronous update of the value function
+			var Vnew = zeros(this.ns);
+			for (var s = 0; s < this.ns; s++) {
+				// integrate over actions in a stochastic policy
+				// note that we assume that policy probability mass over allowed actions sums to one
+				var v = 0.0;
+				var poss = this.env.allowedActions(s);
+				for (var i = 0, n = poss.length; i < n; i++) {
+					var a = poss[i];
+					var prob = this.P[a * this.ns + s]; // probability of taking action under policy
+					if (prob === 0) {
+						continue;
+					} // no contribution, skip for speed
+					var ns = this.env.nextStateDistribution(s, a);
+					var rs = this.env.reward(s, a, ns); // reward for s->a->ns transition
+					v += prob * (rs + this.gamma * this.V[ns]);
+				}
+				Vnew[s] = v;
+			}
+			this.V = Vnew; // swap
+		},
+		updatePolicy: function () {
+			// update policy to be greedy w.r.t. learned Value function
+			for (var s = 0; s < this.ns; s++) {
+				var poss = this.env.allowedActions(s);
+				// compute value of taking each allowed action
+				var vmax, nmax;
+				var vs = [];
+				for (var i = 0, n = poss.length; i < n; i++) {
+					var a = poss[i];
+					var ns = this.env.nextStateDistribution(s, a);
+					var rs = this.env.reward(s, a, ns);
+					var v = rs + this.gamma * this.V[ns];
+					vs.push(v);
+					if (i === 0 || v > vmax) {
+						vmax = v;
+						nmax = 1;
+					} else if (v === vmax) {
+						nmax += 1;
+					}
+				}
+				// update policy smoothly across all argmaxy actions
+				for (var i = 0, n = poss.length; i < n; i++) {
+					var a = poss[i];
+					this.P[a * this.ns + s] = (vs[i] === vmax) ? 1.0 / nmax : 0.0;
+				}
+			}
+		},
+	}
+	// QAgent uses TD (Q-Learning, SARSA)
+	// - does not require environment model :)
+	// - learns from experience :)
+	var TDAgent = function (env, opt) {
+		this.update = getopt(opt, 'update', 'qlearn'); // qlearn | sarsa
+		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
+		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
+		this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
+		// class allows non-deterministic policy, and smoothly regressing towards the optimal policy based on Q
+		this.smooth_policy_update = getopt(opt, 'smooth_policy_update', false);
+		this.beta = getopt(opt, 'beta', 0.01); // learning rate for policy, if smooth updates are on
+		// eligibility traces
+		this.lambda = getopt(opt, 'lambda', 0); // eligibility trace decay. 0 = no eligibility traces used
+		this.replacing_traces = getopt(opt, 'replacing_traces', true);
+		// optional optimistic initial values
+		this.q_init_val = getopt(opt, 'q_init_val', 0);
+		this.planN = getopt(opt, 'planN', 0); // number of planning steps per learning iteration (0 = no planning)
+		this.Q = null; // state action value function
+		this.P = null; // policy distribution \pi(s,a)
+		this.e = null; // eligibility trace
+		this.env_model_s = null;; // environment model (s,a) -> (s',r)
+		this.env_model_r = null;; // environment model (s,a) -> (s',r)
+		this.env = env; // store pointer to environment
+		this.reset();
+	}
+	TDAgent.prototype = {
+		reset: function () {
+			// reset the agent's policy and value function
+			this.ns = this.env.getNumStates();
+			this.na = this.env.getMaxNumActions();
+			this.Q = zeros(this.ns * this.na);
+			if (this.q_init_val !== 0) {
+				setConst(this.Q, this.q_init_val);
+			}
+			this.P = zeros(this.ns * this.na);
+			this.e = zeros(this.ns * this.na);
+			// model/planning vars
+			this.env_model_s = zeros(this.ns * this.na);
+			setConst(this.env_model_s, -1); // init to -1 so we can test if we saw the state before
+			this.env_model_r = zeros(this.ns * this.na);
+			this.sa_seen = [];
+			this.pq = zeros(this.ns * this.na);
+			// initialize uniform random policy
+			for (var s = 0; s < this.ns; s++) {
+				var poss = this.env.allowedActions(s);
+				for (var i = 0, n = poss.length; i < n; i++) {
+					this.P[poss[i] * this.ns + s] = 1.0 / poss.length;
+				}
+			}
+			// agent memory, needed for streaming updates
+			// (s0,a0,r0,s1,a1,r1,...)
+			this.r0 = null;
+			this.s0 = null;
+			this.s1 = null;
+			this.a0 = null;
+			this.a1 = null;
+		},
+		resetEpisode: function () {
+			// an episode finished
+		},
+		act: function (s) {
+			// act according to epsilon greedy policy
+			var poss = this.env.allowedActions(s);
+			var probs = [];
+			for (var i = 0, n = poss.length; i < n; i++) {
+				probs.push(this.P[poss[i] * this.ns + s]);
+			}
+			// epsilon greedy policy
+			if (Math.random() < this.epsilon) {
+				var a = poss[randi(0, poss.length)]; // random available action
+				this.explored = true;
+			} else {
+				var a = poss[sampleWeighted(probs)];
+				this.explored = false;
+			}
+			// shift state memory
+			this.s0 = this.s1;
+			this.a0 = this.a1;
+			this.s1 = s;
+			this.a1 = a;
+			return a;
+		},
+		learn: function (r1) {
+			// takes reward for previous action, which came from a call to act()
+			if (!(this.r0 == null)) {
+				this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1, this.lambda);
+				if (this.planN > 0) {
+					this.updateModel(this.s0, this.a0, this.r0, this.s1);
+					this.plan();
+				}
+			}
+			this.r0 = r1; // store this for next update
+		},
+		updateModel: function (s0, a0, r0, s1) {
+			// transition (s0,a0) -> (r0,s1) was observed. Update environment model
+			var sa = a0 * this.ns + s0;
+			if (this.env_model_s[sa] === -1) {
+				// first time we see this state action
+				this.sa_seen.push(a0 * this.ns + s0); // add as seen state
+			}
+			this.env_model_s[sa] = s1;
+			this.env_model_r[sa] = r0;
+		},
+		plan: function () {
+			// order the states based on current priority queue information
+			var spq = [];
+			for (var i = 0, n = this.sa_seen.length; i < n; i++) {
+				var sa = this.sa_seen[i];
+				var sap = this.pq[sa];
+				if (sap > 1e-5) { // gain a bit of efficiency
+					spq.push({
+						sa: sa,
+						p: sap
+					});
+				}
+			}
+			spq.sort(function (a, b) {
+				return a.p < b.p ? 1 : -1
+			});
+			// perform the updates
+			var nsteps = Math.min(this.planN, spq.length);
+			for (var k = 0; k < nsteps; k++) {
+				// random exploration
+				//var i = randi(0, this.sa_seen.length); // pick random prev seen state action
+				//var s0a0 = this.sa_seen[i];
+				var s0a0 = spq[k].sa;
+				this.pq[s0a0] = 0; // erase priority, since we're backing up this state
+				var s0 = s0a0 % this.ns;
+				var a0 = Math.floor(s0a0 / this.ns);
+				var r0 = this.env_model_r[s0a0];
+				var s1 = this.env_model_s[s0a0];
+				var a1 = -1; // not used for Q learning
+				if (this.update === 'sarsa') {
+					// generate random action?...
+					var poss = this.env.allowedActions(s1);
+					var a1 = poss[randi(0, poss.length)];
+				}
+				this.learnFromTuple(s0, a0, r0, s1, a1, 0); // note lambda = 0 - shouldnt use eligibility trace here
+			}
+		},
+		learnFromTuple: function (s0, a0, r0, s1, a1, lambda) {
+			var sa = a0 * this.ns + s0;
+			// calculate the target for Q(s,a)
+			if (this.update === 'qlearn') {
+				// Q learning target is Q(s0,a0) = r0 + gamma * max_a Q[s1,a]
+				var poss = this.env.allowedActions(s1);
+				var qmax = 0;
+				for (var i = 0, n = poss.length; i < n; i++) {
+					var s1a = poss[i] * this.ns + s1;
+					var qval = this.Q[s1a];
+					if (i === 0 || qval > qmax) {
+						qmax = qval;
+					}
+				}
+				var target = r0 + this.gamma * qmax;
+			} else if (this.update === 'sarsa') {
+				// SARSA target is Q(s0,a0) = r0 + gamma * Q[s1,a1]
+				var s1a1 = a1 * this.ns + s1;
+				var target = r0 + this.gamma * this.Q[s1a1];
+			}
+			if (lambda > 0) {
+				// perform an eligibility trace update
+				if (this.replacing_traces) {
+					this.e[sa] = 1;
+				} else {
+					this.e[sa] += 1;
+				}
+				var edecay = lambda * this.gamma;
+				var state_update = zeros(this.ns);
+				for (var s = 0; s < this.ns; s++) {
+					var poss = this.env.allowedActions(s);
+					for (var i = 0; i < poss.length; i++) {
+						var a = poss[i];
+						var saloop = a * this.ns + s;
+						var esa = this.e[saloop];
+						var update = this.alpha * esa * (target - this.Q[saloop]);
+						this.Q[saloop] += update;
+						this.updatePriority(s, a, update);
+						this.e[saloop] *= edecay;
+						var u = Math.abs(update);
+						if (u > state_update[s]) {
+							state_update[s] = u;
+						}
+					}
+				}
+				for (var s = 0; s < this.ns; s++) {
+					if (state_update[s] > 1e-5) { // save efficiency here
+						this.updatePolicy(s);
+					}
+				}
+				if (this.explored && this.update === 'qlearn') {
+					// have to wipe the trace since q learning is off-policy :(
+					this.e = zeros(this.ns * this.na);
+				}
+			} else {
+				// simpler and faster update without eligibility trace
+				// update Q[sa] towards it with some step size
+				var update = this.alpha * (target - this.Q[sa]);
+				this.Q[sa] += update;
+				this.updatePriority(s0, a0, update);
+				// update the policy to reflect the change (if appropriate)
+				this.updatePolicy(s0);
+			}
+		},
+		updatePriority: function (s, a, u) {
+			// used in planning. Invoked when Q[sa] += update
+			// we should find all states that lead to (s,a) and upgrade their priority
+			// of being update in the next planning step
+			u = Math.abs(u);
+			if (u < 1e-5) {
+				return;
+			} // for efficiency skip small updates
+			if (this.planN === 0) {
+				return;
+			} // there is no planning to be done, skip.
+			for (var si = 0; si < this.ns; si++) {
+				// note we are also iterating over impossible actions at all states,
+				// but this should be okay because their env_model_s should simply be -1
+				// as initialized, so they will never be predicted to point to any state
+				// because they will never be observed, and hence never be added to the model
+				for (var ai = 0; ai < this.na; ai++) {
+					var siai = ai * this.ns + si;
+					if (this.env_model_s[siai] === s) {
+						// this state leads to s, add it to priority queue
+						this.pq[siai] += u;
+					}
+				}
+			}
+		},
+		updatePolicy: function (s) {
+			var poss = this.env.allowedActions(s);
+			// set policy at s to be the action that achieves max_a Q(s,a)
+			// first find the maxy Q values
+			var qmax, nmax;
+			var qs = [];
+			for (var i = 0, n = poss.length; i < n; i++) {
+				var a = poss[i];
+				var qval = this.Q[a * this.ns + s];
+				qs.push(qval);
+				if (i === 0 || qval > qmax) {
+					qmax = qval;
+					nmax = 1;
+				} else if (qval === qmax) {
+					nmax += 1;
+				}
+			}
+			// now update the policy smoothly towards the argmaxy actions
+			var psum = 0.0;
+			for (var i = 0, n = poss.length; i < n; i++) {
+				var a = poss[i];
+				var target = (qs[i] === qmax) ? 1.0 / nmax : 0.0;
+				var ix = a * this.ns + s;
+				if (this.smooth_policy_update) {
+					// slightly hacky :p
+					this.P[ix] += this.beta * (target - this.P[ix]);
+					psum += this.P[ix];
+				} else {
+					// set hard target
+					this.P[ix] = target;
+				}
+			}
+			if (this.smooth_policy_update) {
+				// renomalize P if we're using smooth policy updates
+				for (var i = 0, n = poss.length; i < n; i++) {
+					var a = poss[i];
+					this.P[a * this.ns + s] /= psum;
+				}
+			}
+		}
+	}
+	var DQNAgent = function (env, opt) {
+		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
+		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
+		this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
+		this.experience_add_every = getopt(opt, 'experience_add_every', 25); // number of time steps before we add another experience to replay memory
+		this.experience_size = getopt(opt, 'experience_size', 5000); // size of experience replay
+		this.learning_steps_per_iteration = getopt(opt, 'learning_steps_per_iteration', 10);
+		this.tderror_clamp = getopt(opt, 'tderror_clamp', 1.0);
+		this.num_hidden_units = getopt(opt, 'num_hidden_units', 100);
+		this.env = env;
+		this.reset();
+	}
+	DQNAgent.prototype = {
+		reset: function () {
+			this.nh = this.num_hidden_units; // number of hidden units
+			this.ns = this.env.getNumStates();
+			this.na = this.env.getMaxNumActions();
+			// nets are hardcoded for now as key (str) -> Mat
+			// not proud of this. better solution is to have a whole Net object
+			// on top of Mats, but for now sticking with this
+			this.net = {};
+			this.net.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
+			this.net.b1 = new R.Mat(this.nh, 1, 0, 0.01);
+			this.net.W2 = new R.RandMat(this.na, this.nh, 0, 0.01);
+			this.net.b2 = new R.Mat(this.na, 1, 0, 0.01);
+			this.exp = []; // experience
+			this.expi = 0; // where to insert
+			this.t = 0;
+			this.r0 = null;
+			this.s0 = null;
+			this.s1 = null;
+			this.a0 = null;
+			this.a1 = null;
+			this.tderror = 0; // for visualization only...
+		},
+		toJSON: function () {
+			// save function
+			var j = {};
+			j.nh = this.nh;
+			j.ns = this.ns;
+			j.na = this.na;
+			j.net = R.netToJSON(this.net);
+			return j;
+		},
+		fromJSON: function (j) {
+			// load function
+			this.nh = j.nh;
+			this.ns = j.ns;
+			this.na = j.na;
+			this.net = R.netFromJSON(j.net);
+		},
+		forwardQ: function (net, s, needs_backprop) {
+			var G = new R.Graph(needs_backprop);
+			var a1mat = G.add(G.mul(net.W1, s), net.b1);
+			var h1mat = G.tanh(a1mat);
+			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+			this.lastG = G; // back this up. Kind of hacky isn't it
+			return a2mat;
+		},
+		act: function (slist) {
+			// convert to a Mat column vector
+			var s = new R.Mat(this.ns, 1);
+			s.setFrom(slist);
+			// epsilon greedy policy
+			if (Math.random() < this.epsilon) {
+				var a = randi(0, this.na);
+			} else {
+				// greedy wrt Q function
+				var amat = this.forwardQ(this.net, s, false);
+				var a = R.maxi(amat.w); // returns index of argmax action
+			}
+			// shift state memory
+			this.s0 = this.s1;
+			this.a0 = this.a1;
+			this.s1 = s;
+			this.a1 = a;
+			return a;
+		},
+		learn: function (r1) {
+			// perform an update on Q function
+			if (!(this.r0 == null) && this.alpha > 0) {
+				// learn from this tuple to get a sense of how "surprising" it is to the agent
+				var tderror = this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1);
+				this.tderror = tderror; // a measure of surprise
+				// decide if we should keep this experience in the replay
+				if (this.t % this.experience_add_every === 0) {
+					this.exp[this.expi] = [this.s0, this.a0, this.r0, this.s1, this.a1];
+					this.expi += 1;
+					if (this.expi > this.experience_size) {
+						this.expi = 0;
+					} // roll over when we run out
+				}
+				this.t += 1;
+				// sample some additional experience from replay memory and learn from it
+				for (var k = 0; k < this.learning_steps_per_iteration; k++) {
+					var ri = randi(0, this.exp.length); // todo: priority sweeps?
+					var e = this.exp[ri];
+					this.learnFromTuple(e[0], e[1], e[2], e[3], e[4])
+				}
+			}
+			this.r0 = r1; // store for next update
+		},
+		learnFromTuple: function (s0, a0, r0, s1, a1) {
+			// want: Q(s,a) = r + gamma * max_a' Q(s',a')
+			// compute the target Q value
+			var tmat = this.forwardQ(this.net, s1, false);
+			var qmax = r0 + this.gamma * tmat.w[R.maxi(tmat.w)];
+			// now predict
+			var pred = this.forwardQ(this.net, s0, true);
+			var tderror = pred.w[a0] - qmax;
+			var clamp = this.tderror_clamp;
+			if (Math.abs(tderror) > clamp) { // huber loss to robustify
+				if (tderror > clamp) tderror = clamp;
+				if (tderror < -clamp) tderror = -clamp;
+			}
+			pred.dw[a0] = tderror;
+			this.lastG.backward(); // compute gradients on net params
+			// update net
+			R.updateNet(this.net, this.alpha);
+			return tderror;
+		}
+	}
+	// buggy implementation, doesnt work...
+	var SimpleReinforceAgent = function (env, opt) {
+		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
+		this.epsilon = getopt(opt, 'epsilon', 0.75); // for epsilon-greedy policy
+		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
+		this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
+		this.env = env;
+		this.reset();
+	}
+	SimpleReinforceAgent.prototype = {
+		reset: function () {
+			this.ns = this.env.getNumStates();
+			this.na = this.env.getMaxNumActions();
+			this.nh = 100; // number of hidden units
+			this.nhb = 100; // and also in the baseline lstm
+			this.actorNet = {};
+			this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
+			this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
+			this.actorNet.W2 = new R.RandMat(this.na, this.nh, 0, 0.1);
+			this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
+			this.actorOutputs = [];
+			this.actorGraphs = [];
+			this.actorActions = []; // sampled ones
+			this.rewardHistory = [];
+			this.baselineNet = {};
+			this.baselineNet.W1 = new R.RandMat(this.nhb, this.ns, 0, 0.01);
+			this.baselineNet.b1 = new R.Mat(this.nhb, 1, 0, 0.01);
+			this.baselineNet.W2 = new R.RandMat(this.na, this.nhb, 0, 0.01);
+			this.baselineNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
+			this.baselineOutputs = [];
+			this.baselineGraphs = [];
+			this.t = 0;
+		},
+		forwardActor: function (s, needs_backprop) {
+			var net = this.actorNet;
+			var G = new R.Graph(needs_backprop);
+			var a1mat = G.add(G.mul(net.W1, s), net.b1);
+			var h1mat = G.tanh(a1mat);
+			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+			return {
+				'a': a2mat,
+				'G': G
+			}
+		},
+		forwardValue: function (s, needs_backprop) {
+			var net = this.baselineNet;
+			var G = new R.Graph(needs_backprop);
+			var a1mat = G.add(G.mul(net.W1, s), net.b1);
+			var h1mat = G.tanh(a1mat);
+			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+			return {
+				'a': a2mat,
+				'G': G
+			}
+		},
+		act: function (slist) {
+			// convert to a Mat column vector
+			var s = new R.Mat(this.ns, 1);
+			s.setFrom(slist);
+			// forward the actor to get action output
+			var ans = this.forwardActor(s, true);
+			var amat = ans.a;
+			var ag = ans.G;
+			this.actorOutputs.push(amat);
+			this.actorGraphs.push(ag);
+			// forward the baseline estimator
+			var ans = this.forwardValue(s, true);
+			var vmat = ans.a;
+			var vg = ans.G;
+			this.baselineOutputs.push(vmat);
+			this.baselineGraphs.push(vg);
+			// sample action from the stochastic gaussian policy
+			var a = R.copyMat(amat);
+			var gaussVar = 0.02;
+			a.w[0] = R.randn(0, gaussVar);
+			a.w[1] = R.randn(0, gaussVar);
+			this.actorActions.push(a);
+			// shift state memory
+			this.s0 = this.s1;
+			this.a0 = this.a1;
+			this.s1 = s;
+			this.a1 = a;
+			return a;
+		},
+		learn: function (r1) {
+			// perform an update on Q function
+			this.rewardHistory.push(r1);
+			var n = this.rewardHistory.length;
+			var baselineMSE = 0.0;
+			var nup = 100; // what chunk of experience to take
+			var nuse = 80; // what chunk to update from
+			if (n >= nup) {
+				// lets learn and flush
+				// first: compute the sample values at all points
+				var vs = [];
+				for (var t = 0; t < nuse; t++) {
+					var mul = 1;
+					// compute the actual discounted reward for this time step
+					var V = 0;
+					for (var t2 = t; t2 < n; t2++) {
+						V += mul * this.rewardHistory[t2];
+						mul *= this.gamma;
+						if (mul < 1e-5) {
+							break;
+						} // efficiency savings
+					}
+					// get the predicted baseline at this time step
+					var b = this.baselineOutputs[t].w[0];
+					for (var i = 0; i < this.na; i++) {
+						// [the action delta] * [the desirebility]
+						var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
+						if (update > 0.1) {
+							update = 0.1;
+						}
+						if (update < -0.1) {
+							update = -0.1;
+						}
+						this.actorOutputs[t].dw[i] += update;
+					}
+					var update = -(V - b);
+					if (update > 0.1) {
+						update = 0.1;
+					}
+					if (update < 0.1) {
+						update = -0.1;
+					}
+					this.baselineOutputs[t].dw[0] += update;
+					baselineMSE += (V - b) * (V - b);
+					vs.push(V);
+				}
+				baselineMSE /= nuse;
+				// backprop all the things
+				for (var t = 0; t < nuse; t++) {
+					this.actorGraphs[t].backward();
+					this.baselineGraphs[t].backward();
+				}
+				R.updateNet(this.actorNet, this.alpha); // update actor network
+				R.updateNet(this.baselineNet, this.beta); // update baseline network
+				// flush
+				this.actorOutputs = [];
+				this.rewardHistory = [];
+				this.actorActions = [];
+				this.baselineOutputs = [];
+				this.actorGraphs = [];
+				this.baselineGraphs = [];
+				this.tderror = baselineMSE;
+			}
+			this.t += 1;
+			this.r0 = r1; // store for next update
+		},
+	}
+	// buggy implementation as well, doesn't work
+	var RecurrentReinforceAgent = function (env, opt) {
+		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
+		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
+		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
+		this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
+		this.env = env;
+		this.reset();
+	}
+	RecurrentReinforceAgent.prototype = {
+		reset: function () {
+			this.ns = this.env.getNumStates();
+			this.na = this.env.getMaxNumActions();
+			this.nh = 40; // number of hidden units
+			this.nhb = 40; // and also in the baseline lstm
+			this.actorLSTM = R.initLSTM(this.ns, [this.nh], this.na);
+			this.actorG = new R.Graph();
+			this.actorPrev = null;
+			this.actorOutputs = [];
+			this.rewardHistory = [];
+			this.actorActions = [];
+			this.baselineLSTM = R.initLSTM(this.ns, [this.nhb], 1);
+			this.baselineG = new R.Graph();
+			this.baselinePrev = null;
+			this.baselineOutputs = [];
+			this.t = 0;
+			this.r0 = null;
+			this.s0 = null;
+			this.s1 = null;
+			this.a0 = null;
+			this.a1 = null;
+		},
+		act: function (slist) {
+			// convert to a Mat column vector
+			var s = new R.Mat(this.ns, 1);
+			s.setFrom(slist);
+			// forward the LSTM to get action distribution
+			var actorNext = R.forwardLSTM(this.actorG, this.actorLSTM, [this.nh], s, this.actorPrev);
+			this.actorPrev = actorNext;
+			var amat = actorNext.o;
+			this.actorOutputs.push(amat);
+			// forward the baseline LSTM
+			var baselineNext = R.forwardLSTM(this.baselineG, this.baselineLSTM, [this.nhb], s, this.baselinePrev);
+			this.baselinePrev = baselineNext;
+			this.baselineOutputs.push(baselineNext.o);
+			// sample action from actor policy
+			var gaussVar = 0.05;
+			var a = R.copyMat(amat);
+			for (var i = 0, n = a.w.length; i < n; i++) {
+				a.w[0] += R.randn(0, gaussVar);
+				a.w[1] += R.randn(0, gaussVar);
+			}
+			this.actorActions.push(a);
+			// shift state memory
+			this.s0 = this.s1;
+			this.a0 = this.a1;
+			this.s1 = s;
+			this.a1 = a;
+			return a;
+		},
+		learn: function (r1) {
+			// perform an update on Q function
+			this.rewardHistory.push(r1);
+			var n = this.rewardHistory.length;
+			var baselineMSE = 0.0;
+			var nup = 100; // what chunk of experience to take
+			var nuse = 80; // what chunk to also update
+			if (n >= nup) {
+				// lets learn and flush
+				// first: compute the sample values at all points
+				var vs = [];
+				for (var t = 0; t < nuse; t++) {
+					var mul = 1;
+					var V = 0;
+					for (var t2 = t; t2 < n; t2++) {
+						V += mul * this.rewardHistory[t2];
+						mul *= this.gamma;
+						if (mul < 1e-5) {
+							break;
+						} // efficiency savings
+					}
+					var b = this.baselineOutputs[t].w[0];
+					// todo: take out the constants etc.
+					for (var i = 0; i < this.na; i++) {
+						// [the action delta] * [the desirebility]
+						var update = -(V - b) * (this.actorActions[t].w[i] - this.actorOutputs[t].w[i]);
+						if (update > 0.1) {
+							update = 0.1;
+						}
+						if (update < -0.1) {
+							update = -0.1;
+						}
+						this.actorOutputs[t].dw[i] += update;
+					}
+					var update = -(V - b);
+					if (update > 0.1) {
+						update = 0.1;
+					}
+					if (update < 0.1) {
+						update = -0.1;
+					}
+					this.baselineOutputs[t].dw[0] += update;
+					baselineMSE += (V - b) * (V - b);
+					vs.push(V);
+				}
+				baselineMSE /= nuse;
+				this.actorG.backward(); // update params! woohoo!
+				this.baselineG.backward();
+				R.updateNet(this.actorLSTM, this.alpha); // update actor network
+				R.updateNet(this.baselineLSTM, this.beta); // update baseline network
+				// flush
+				this.actorG = new R.Graph();
+				this.actorPrev = null;
+				this.actorOutputs = [];
+				this.rewardHistory = [];
+				this.actorActions = [];
+				this.baselineG = new R.Graph();
+				this.baselinePrev = null;
+				this.baselineOutputs = [];
+				this.tderror = baselineMSE;
+			}
+			this.t += 1;
+			this.r0 = r1; // store for next update
+		},
+	}
+	// Currently buggy implementation, doesnt work
+	var DeterministPG = function (env, opt) {
+		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
+		this.epsilon = getopt(opt, 'epsilon', 0.5); // for epsilon-greedy policy
+		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
+		this.beta = getopt(opt, 'beta', 0.01); // baseline net learning rate
+		this.env = env;
+		this.reset();
+	}
+	DeterministPG.prototype = {
+		reset: function () {
+			this.ns = this.env.getNumStates();
+			this.na = this.env.getMaxNumActions();
+			this.nh = 100; // number of hidden units
+			// actor
+			this.actorNet = {};
+			this.actorNet.W1 = new R.RandMat(this.nh, this.ns, 0, 0.01);
+			this.actorNet.b1 = new R.Mat(this.nh, 1, 0, 0.01);
+			this.actorNet.W2 = new R.RandMat(this.na, this.ns, 0, 0.1);
+			this.actorNet.b2 = new R.Mat(this.na, 1, 0, 0.01);
+			this.ntheta = this.na * this.ns + this.na; // number of params in actor
+			// critic
+			this.criticw = new R.RandMat(1, this.ntheta, 0, 0.01); // row vector
+			this.r0 = null;
+			this.s0 = null;
+			this.s1 = null;
+			this.a0 = null;
+			this.a1 = null;
+			this.t = 0;
+		},
+		forwardActor: function (s, needs_backprop) {
+			var net = this.actorNet;
+			var G = new R.Graph(needs_backprop);
+			var a1mat = G.add(G.mul(net.W1, s), net.b1);
+			var h1mat = G.tanh(a1mat);
+			var a2mat = G.add(G.mul(net.W2, h1mat), net.b2);
+			return {
+				'a': a2mat,
+				'G': G
+			}
+		},
+		act: function (slist) {
+			// convert to a Mat column vector
+			var s = new R.Mat(this.ns, 1);
+			s.setFrom(slist);
+			// forward the actor to get action output
+			var ans = this.forwardActor(s, false);
+			var amat = ans.a;
+			var ag = ans.G;
+			// sample action from the stochastic gaussian policy
+			var a = R.copyMat(amat);
+			if (Math.random() < this.epsilon) {
+				var gaussVar = 0.02;
+				a.w[0] = R.randn(0, gaussVar);
+				a.w[1] = R.randn(0, gaussVar);
+			}
+			var clamp = 0.25;
+			if (a.w[0] > clamp) a.w[0] = clamp;
+			if (a.w[0] < -clamp) a.w[0] = -clamp;
+			if (a.w[1] > clamp) a.w[1] = clamp;
+			if (a.w[1] < -clamp) a.w[1] = -clamp;
+			// shift state memory
+			this.s0 = this.s1;
+			this.a0 = this.a1;
+			this.s1 = s;
+			this.a1 = a;
+			return a;
+		},
+		utilJacobianAt: function (s) {
+			var ujacobian = new R.Mat(this.ntheta, this.na);
+			for (var a = 0; a < this.na; a++) {
+				R.netZeroGrads(this.actorNet);
+				var ag = this.forwardActor(this.s0, true);
+				ag.a.dw[a] = 1.0;
+				ag.G.backward();
+				var gflat = R.netFlattenGrads(this.actorNet);
+				ujacobian.setColumn(gflat, a);
+			}
+			return ujacobian;
+		},
+		learn: function (r1) {
+			// perform an update on Q function
+			//this.rewardHistory.push(r1);
+			if (!(this.r0 == null)) {
+				var Gtmp = new R.Graph(false);
+				// dpg update:
+				// first compute the features psi:
+				// the jacobian matrix of the actor for s
+				var ujacobian0 = this.utilJacobianAt(this.s0);
+				// now form the features \psi(s,a)
+				var psi_sa0 = Gtmp.mul(ujacobian0, this.a0); // should be [this.ntheta x 1] "feature" vector
+				var qw0 = Gtmp.mul(this.criticw, psi_sa0); // 1x1
+				// now do the same thing because we need \psi(s_{t+1}, \mu\_\theta(s\_t{t+1}))
+				var ujacobian1 = this.utilJacobianAt(this.s1);
+				var ag = this.forwardActor(this.s1, false);
+				var psi_sa1 = Gtmp.mul(ujacobian1, ag.a);
+				var qw1 = Gtmp.mul(this.criticw, psi_sa1); // 1x1
+				// get the td error finally
+				var tderror = this.r0 + this.gamma * qw1.w[0] - qw0.w[0]; // lol
+				if (tderror > 0.5) tderror = 0.5; // clamp
+				if (tderror < -0.5) tderror = -0.5;
+				this.tderror = tderror;
+				// update actor policy with natural gradient
+				var net = this.actorNet;
+				var ix = 0;
+				for (var p in net) {
+					var mat = net[p];
+					if (net.hasOwnProperty(p)) {
+						for (var i = 0, n = mat.w.length; i < n; i++) {
+							mat.w[i] += this.alpha * this.criticw.w[ix]; // natural gradient update
+							ix += 1;
+						}
+					}
+				}
+				// update the critic parameters too
+				for (var i = 0; i < this.ntheta; i++) {
+					var update = this.beta * tderror * psi_sa0.w[i];
+					this.criticw.w[i] += update;
+				}
+			}
+			this.r0 = r1; // store for next update
+		},
+	}
+	// exports
+	global.DPAgent = DPAgent;
+	global.TDAgent = TDAgent;
+	global.DQNAgent = DQNAgent;
+	//global.SimpleReinforceAgent = SimpleReinforceAgent;
+	//global.RecurrentReinforceAgent = RecurrentReinforceAgent;
+	//global.DeterministPG = DeterministPG;
 })(RL);
 var Trevel = {
-    //settings you can change
-    stop: true,
-    maxBet: 0.00001,
-    minBet: 0.00000005,
-    swap: true,
-    betSpeed: 100,//change this on init
-    verbose: true,
-    isTesting: false,
-    //money management
-    useKelly: false,//martingale performs better on live account!
-    korm: false,
-    kellyPercent: 5, //can't be more than 100 or less than 1
-    useMartingale: true, //if kelly is true this won't work
+	//settings you can change
+	stop: true,
+	maxBet: 0.00001,
+	minBet: 0.00000005,
+	swap: true,
+	betSpeed: 100,//change this on init
+	verbose: true,
+	isTesting: false,
+	//money management
+	useKelly: false,//martingale performs better on live account!
+	korm: false,
+	kellyPercent: 5, //can't be more than 100 or less than 1
+	useMartingale: true, //if kelly is true this won't work
 
-    martingaleMultiplier: 2,
-    //bot settings, these are set automaticcally don't bother
-    currentBalance: 0,
-    startingBalance: 0,
-    betAmount: 0,
-    profit: 0,
-    totalBets: 0,
-    totalWins: 0,
-    winRate: 0,
-    betHistory: [], //this is a sequence of all winning bets not the sequence of bets we placed
-    betOutcomes: [],
-    hbProbability: 0,
-    lbProbability: 0,
-    hbCount: 0,
-    lbcount: 0,
-    nextBet: "",
-    previousReward: 0,
-    addBet: function (bet, outcome) {
-        if (bet === "LB" && outcome === "Win") {
-            this.betHistory.push("LO");
-            this.betOutcomes.push("W");
-            this.totalWins++;
-            this.lbcount++;
-        }
-        if (bet === "LB" && outcome === "Loose") {
-            this.betHistory.push("HI");
-            this.hbCount++;
-            this.betOutcomes.push("L");
-        }
-        if (bet === "HB" && outcome === "Win") {
-            this.betHistory.push("HI");
-            this.totalWins++;
-            this.hbCount++;
-            this.betOutcomes.push("W");
-        }
-        if (bet === "HB" && outcome === "Loose") {
-            this.betHistory.push("LO");
-            this.lbcount++;
-            this.betOutcomes.push("L");
-        }
-        this.totalBets++;
-    },
-    calculateProbabilities: function () {
-        this.hbProbability = this.hbCount / this.betHistory.length;
-        this.lbProbability = this.lbcount / this.betHistory.length;
-        this.winRate = this.totalWins / this.totalBets;
-        if (this.isTesting === false) {
-            this.profit = this.getProfit();
-        }
-    },
-    getCurrentBalance: function () {
-        return parseFloat($('#balance').html());
-    },
-    placeHighBet: function () {
-        $('#double_your_btc_bet_hi_button').click();
-    },
-    placeLowBet: function () {
-        $('#double_your_btc_bet_lo_button').click();
-    },
-    setBetAmount: function (amount) {
-        var elem = document.getElementById("double_your_btc_stake");
-        elem.value = amount;
-    },
-    setOutcome: function (bet) {
-        if ($('#double_your_btc_bet_lose').html() !== '') {
-            this.addBet(bet, "Loose");
-        } else {
-            this.addBet(bet, "Win");
-        }
-    },
-    prepareBet: function () {
-        this.calculateProbabilities();
-        if (this.betHistory.length < 10) {
-            if (this.useMartingale === true && this.betHistory.length > 12) {
-                if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier < this.maxBet) {
-                    this.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier).toFixed(8));
-                } else {
-                    this.setBetAmount(this.minBet);
-                }
-            }
-        } else {
-            if (this.useKelly === true && this.betHistory.length > 12) {
-                this.currentBalance = this.getCurrentBalance();
-                var currMulty = document.getElementById("double_your_btc_payout_multiplier").value;
-                var kellyAmount = (((this.currentBalance * this.kellyPercent) / 100) * ((this.winRate * currMulty - 1)) / (currMulty - 1)).toFixed(8);
-                if (kellyAmount > 0 && kellyAmount < this.maxBet) {
-                    this.setBetAmount(kellyAmount);
-                } else {
-                    this.setBetAmount(this.minBet);
-                }
-            } else if (this.useMartingale === true && this.betHistory.length > 12) {
-                if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier < this.maxBet) {
-                    this.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier).toFixed(8));
-                } else {
-                    this.setBetAmount(this.minBet);
-                }
-            }
-        }
-    },
-    placeBet: function () {
-        if (this.nextBet === "HB") {
-            this.placeHighBet();
-        } else if (this.nextBet === "LB") {
-            this.placeLowBet();
-        } else if (this.betHistory.length > 0 && this.swap === true) {
-            var prev = this.betHistory[this.betHistory.length - 1];
-            if (prev === "LO") {
-                this.placeHighBet();
-            } else {
-                this.placeLowBet();
-            }
-        } else {
-            this.placeLowBet();
-        }
-    },
-    getProfit: function () {
-        return (this.getCurrentBalance() - this.startingBalance).toFixed(8);
-    },
-    getNumStates: function () {
-        return 8;
-    },
-    getMaxNumActions: function () {
-        return 2;
-    },
-    getSentiment: function (bet) {
-        if (bet === "HI") {
-            return 1;
-        } else {
-            return 0;
-        }
-    },
-    getPreviousBets: function () {
-        var hist = [];
-        if (this.betHistory.length > 12) {
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 1]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 2]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 3]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 4]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 5]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 6]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 7]));
-            hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 8]));
-        } else {
-            hist = [0, 1, 0, 1, 0, 1, 0, 1]; //incase we just started...
-        }
-        return hist;
-    },
-    getAgentState: function () { //we'll observe the last 8 bets
-        var s = this.getPreviousBets();
-        return s;
-    },
-    getReward: function () {
-        var reward = 0;
-        var out1 = this.betOutcomes[this.betOutcomes.length - 1];
-        var out2 = this.betOutcomes[this.betOutcomes.length - 2];
-        if (out1 === "L") {
-            if (this.previousReward < 0) {
-                reward = this.previousReward;
-                reward += -0.03;
-                if (out2 === "L") {
-                    reward += -0.03;
-                }
-            }
-            else {
-                reward = -0.03;
-                if (out2 === "L") {
-                    reward += -0.03;
-                }
-            }
-        }
-        else {
-            if (this.previousReward > 0) {
-                reward = this.previousReward;
-                reward += 0.01;
-                if (out2 === "W") {
-                    reward += 0.01;
-                }
-            }
-            else {
-                reward = 0.01;
-                if (out2 === "W") {
-                    reward += 0.01;
-                }
-            }
-        }
-        return reward;
-    },
-    //for raw testing only
-    randomNumber: function (min, max) {
-        return Math.floor(Math.random() * (max - min + 1) + min);
-    },
-    getTestOutcome: function (random) {
-        if (random % 2 === 0) {
-            return "HI";
-        } else {
-            return "LO";
-        }
-    },
-    //random string for random seed
-    rString: function (length, chars) {
-        var result = '';
-        var length = 16;
-        var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
-        for (var i = length; i > 0; --i) result += chars[Math.floor(Math.random() * chars.length)];
-        return result;
-    },
-    //set client seed as random string
-    rSeed: function () { 
-        $('#next_client_seed').val(rString());
-     },
-    //initialize this
-    init: function () {
-        this.startingBalance = this.currentBalance = parseFloat($('#balance').html());
-        this.setBetAmount(this.minBet);
-        this.stop = true;
-        this.swap = true;
-        this.betSpeed = 3000;
-        document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.config()" class="free_play_link">START BOT</a>';
+	martingaleMultiplier: 2,
+	//bot settings, these are set automaticcally don't bother
+	currentBalance: 0,
+	startingBalance: 0,
+	betAmount: 0,
+	profit: 0,
+	totalBets: 0,
+	totalWins: 0,
+	winRate: 0,
+	betHistory: [], //this is a sequence of all winning bets not the sequence of bets we placed
+	betOutcomes: [],
+	hbProbability: 0,
+	lbProbability: 0,
+	hbCount: 0,
+	lbcount: 0,
+	nextBet: "",
+	previousReward: 0,
+	addBet: function (bet, outcome) {
+		if (bet === "LB" && outcome === "Win") {
+			this.betHistory.push("LO");
+			this.betOutcomes.push("W");
+			this.totalWins++;
+			this.lbcount++;
+		}
+		if (bet === "LB" && outcome === "Loose") {
+			this.betHistory.push("HI");
+			this.hbCount++;
+			this.betOutcomes.push("L");
+		}
+		if (bet === "HB" && outcome === "Win") {
+			this.betHistory.push("HI");
+			this.totalWins++;
+			this.hbCount++;
+			this.betOutcomes.push("W");
+		}
+		if (bet === "HB" && outcome === "Loose") {
+			this.betHistory.push("LO");
+			this.lbcount++;
+			this.betOutcomes.push("L");
+		}
+		this.totalBets++;
+	},
+	calculateProbabilities: function () {
+		this.hbProbability = this.hbCount / this.betHistory.length;
+		this.lbProbability = this.lbcount / this.betHistory.length;
+		this.winRate = this.totalWins / this.totalBets;
+		if (this.isTesting === false) {
+			this.profit = this.getProfit();
+		}
+	},
+	getCurrentBalance: function () {
+		return parseFloat($('#balance').html());
+	},
+	placeHighBet: function () {
+		$('#double_your_btc_bet_hi_button').click();
+	},
+	placeLowBet: function () {
+		$('#double_your_btc_bet_lo_button').click();
+	},
+	setBetAmount: function (amount) {
+		var elem = document.getElementById("double_your_btc_stake");
+		elem.value = amount;
+	},
+	setOutcome: function (bet) {
+		if ($('#double_your_btc_bet_lose').html() !== '') {
+			this.addBet(bet, "Loose");
+		} else {
+			this.addBet(bet, "Win");
+		}
+	},
+	prepareBet: function () {
+		this.calculateProbabilities();
+		if (this.betHistory.length < 10) {
+			if (this.useMartingale === true && this.betHistory.length > 12) {
+				if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier < this.maxBet) {
+					this.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier).toFixed(8));
+				} else {
+					this.setBetAmount(this.minBet);
+				}
+			}
+		} else {
+			if (this.useKelly === true && this.betHistory.length > 12) {
+				this.currentBalance = this.getCurrentBalance();
+				var currMulty = document.getElementById("double_your_btc_payout_multiplier").value;
+				var kellyAmount = (((this.currentBalance * this.kellyPercent) / 100) * ((this.winRate * currMulty - 1)) / (currMulty - 1)).toFixed(8);
+				if (kellyAmount > 0 && kellyAmount < this.maxBet) {
+					this.setBetAmount(kellyAmount);
+				} else {
+					this.setBetAmount(this.minBet);
+				}
+			} else if (this.useMartingale === true && this.betHistory.length > 12) {
+				if ($('#double_your_btc_bet_lose').html() !== '' && parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier < this.maxBet) {
+					this.setBetAmount((parseFloat($('#double_your_btc_stake').val()) * this.martingaleMultiplier).toFixed(8));
+				} else {
+					this.setBetAmount(this.minBet);
+				}
+			}
+		}
+	},
+	placeBet: function () {
+		if (this.nextBet === "HB") {
+			this.placeHighBet();
+		} else if (this.nextBet === "LB") {
+			this.placeLowBet();
+		} else if (this.betHistory.length > 0 && this.swap === true) {
+			var prev = this.betHistory[this.betHistory.length - 1];
+			if (prev === "LO") {
+				this.placeHighBet();
+			} else {
+				this.placeLowBet();
+			}
+		} else {
+			this.placeLowBet();
+		}
+	},
+	getProfit: function () {
+		return (this.getCurrentBalance() - this.startingBalance).toFixed(8);
+	},
+	getNumStates: function () {
+		return 8;
+	},
+	getMaxNumActions: function () {
+		return 2;
+	},
+	getSentiment: function (bet) {
+		if (bet === "HI") {
+			return 1;
+		} else {
+			return 0;
+		}
+	},
+	getPreviousBets: function () {
+		var hist = [];
+		if (this.betHistory.length > 12) {
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 1]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 2]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 3]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 4]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 5]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 6]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 7]));
+			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 8]));
+		} else {
+			hist = [0, 1, 0, 1, 0, 1, 0, 1]; //incase we just started...
+		}
+		return hist;
+	},
+	getAgentState: function () { //we'll observe the last 8 bets
+		var s = this.getPreviousBets();
+		return s;
+	},
+	getReward: function () {
+		var reward = 0;
+		var out1 = this.betOutcomes[this.betOutcomes.length - 1];
+		var out2 = this.betOutcomes[this.betOutcomes.length - 2];
+		if (out1 === "L") {
+			if (this.previousReward < 0) {
+				reward = this.previousReward;
+				reward += -0.03;
+				if (out2 === "L") {
+					reward += -0.03;
+				}
+			}
+			else {
+				reward = -0.03;
+				if (out2 === "L") {
+					reward += -0.03;
+				}
+			}
+		}
+		else {
+			if (this.previousReward > 0) {
+				reward = this.previousReward;
+				reward += 0.01;
+				if (out2 === "W") {
+					reward += 0.01;
+				}
+			}
+			else {
+				reward = 0.01;
+				if (out2 === "W") {
+					reward += 0.01;
+				}
+			}
+		}
+		return reward;
+	},
+	//for raw testing only
+	randomNumber: function (min, max) {
+		return Math.floor(Math.random() * (max - min + 1) + min);
+	},
+	getTestOutcome: function (random) {
+		if (random % 2 === 0) {
+			return "HI";
+		} else {
+			return "LO";
+		}
+	},
+	//random string for random seed
+	rString: function (length, chars) {
+		var result = '';
+		var length = 16;
+		var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
+		for (var i = length; i > 0; --i) result += chars[Math.floor(Math.random() * chars.length)];
+		return result;
+	},
+	//set client seed as random string
+	rSeed: function () {
+		$('#next_client_seed').val(rString());
+	},
+	//initialize this
+	init: function () {
+		this.startingBalance = this.currentBalance = parseFloat($('#balance').html());
+		this.setBetAmount(this.minBet);
+		this.stop = true;
+		this.swap = true;
+		this.betSpeed = 3000;
+		document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.config()" class="free_play_link">START BOT</a>';
 
-    },
-    // ask user config variables
-    config: function () {
+	},
+	// ask user config variables
+	config: function () {
 
-        //define temporary variables
-        var maxb = 0, minb = 0;
+		//define temporary variables
+		var maxb = 0, minb = 0;
 
-        //prompt questions
-        maxb = prompt('Maximum bet stake in Satoshi', 1000);
-        this.maxBet = sattobtc(maxb);
-        minb = prompt('Minimum bet stake in Satoshi', 2);
-        this.minBet = sattobtc(minb);
-        this.martingaleMultiplier = prompt('Bet multiplier on lose', 2);
-        this.swap = prompt('True for swap enabled, false for disabled', 'true');
-        this.korm = prompt('True to enable Kelly, false to enabled martingale, leave blank for both', 'false');
-        this.betSpeed = prompt('Wait time before next bet is placed in ms', 3000);
+		//prompt questions
+		maxb = prompt('Maximum bet stake in Satoshi', 1000);
+		this.maxBet = sattobtc(maxb);
+		minb = prompt('Minimum bet stake in Satoshi', 2);
+		this.minBet = sattobtc(minb);
+		this.martingaleMultiplier = prompt('Bet multiplier on lose', 2);
+		this.swap = prompt('True for swap enabled, false for disabled', 'true');
+		this.korm = prompt('True to enable Kelly, false to enabled martingale, leave blank for both', 'false');
+		this.betSpeed = prompt('Wait time before next bet is placed in ms', 3000);
 
-        //convert satoshi to btc
-        function sattobtc(sat) {
-            var btc = 0.00000001;
-            return sat * btc;
-        }
-        if (this.korm === 'true') {
-            this.useKelly = true;
-            this.useMartingale = false;
-        }
-        else if (this.korm === 'false') {
-            this.useMartingale = true;
-            this.useKelly = false;
-        }
-        else {
-            this.useKelly = this.useMartingale = true;
-        }
-        //start betting
-        startbetting();
-    },
-    stopbets: function () {
-        env.stop = true;
-        clearInterval(interval);
-        console.log('Bet session has been stopped, to start over click start.');
-        document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.config()" class="free_play_link">START BOT</a>';
+		//convert satoshi to btc
+		function sattobtc(sat) {
+			var btc = 0.00000001;
+			return sat * btc;
+		}
+		if (this.korm === 'true') {
+			this.useKelly = true;
+			this.useMartingale = false;
+		}
+		else if (this.korm === 'false') {
+			this.useMartingale = true;
+			this.useKelly = false;
+		}
+		else {
+			this.useKelly = this.useMartingale = true;
+		}
+		//start betting
+		startbetting();
+	},
+	stopbets: function () {
+		env.stop = true;
+		clearInterval(interval);
+		console.log('Bet session has been stopped, to start over click start.');
+		document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.config()" class="free_play_link">START BOT</a>';
 
 
-    }
+	}
 };
 //Deep Q learning with reinforceJS
 var spec = {};
@@ -1806,17 +1806,17 @@ spec.num_hidden_units = 24;
 var env = Trevel;
 var interval = null;
 if (env.isTesting === false) {
-    env.init();
+	env.init();
 }
 // create the DQN agent
 agent = new RL.DQNAgent(env, spec);
 
 // start betting function/agent interval
 function startbetting() {
-    console.log('Starting bet session, to stop click STOP BOT');
-    document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.stopbets()" class="free_play_link">STOP BOT</a>';
-    env.stop = false;
-    interval = setInterval(function () { loop(); }, env.betSpeed);
+	console.log('Starting bet session, to stop click STOP BOT');
+	document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.stopbets()" class="free_play_link">STOP BOT</a>';
+	env.stop = false;
+	interval = setInterval(function () { loop(); }, env.betSpeed);
 }
 console.clear();
 console.log('You are using Trevel, with ReinforceJS');
@@ -1825,62 +1825,61 @@ console.log('Click it to set the config. Note: These settings are not persistent
 console.log('To change the default values for these settings, search the script for "prompt"');
 console.log('Enjoy');
 function loop() {
-if(env.profit > 0.00000500)
-{
-    env.rSeed();
-}
-    if (env.stop === false) {
-        var state = env.getAgentState();
-        var action = agent.act(state);
-        var outcome = "";
-        if (env.isTesting === false) {
-            if (action === 0) {
-                env.nextBet = "LB";
-                env.prepareBet();
-                env.placeBet();
-                env.setOutcome("LB");
-                outcome = env.betOutcomes[env.betOutcomes.length - 1];
-            } else if (action === 1) {
-                env.nextBet = "HB";
-                env.prepareBet();
-                env.placeBet();
-                env.setOutcome("HB");
-                outcome = env.betOutcomes[env.betOutcomes.length - 1];
-            }
-            if (env.verbose === true) {
-                env.calculateProbabilities();
-                //console.log("Machine Bet: " + action + "{" + env.nextBet + "} isKelly: " + env.useKelly + " isMartingale: " + env.useMartingale);
-                console.log("Profit: " + env.profit + " WinRate: " + (env.winRate * 100).toFixed(2));
-            }
-        } else {
-            console.log("Action: " + action);
-            var testOutcome = env.getTestOutcome(env.randomNumber(0, 1000));
-            if (action === 0 && testOutcome === "LO") {
-                env.addBet("LB", "Win");
-                outcome = "W";
-            } else if (action === 0 && testOutcome === "HI") {
-                env.addBet("LB", "Loose");
-                outcome = "L";
-            } else if (action === 1 && testOutcome === "HI") {
-                env.addBet("HB", "Win");
-                outcome = "W";
-            } else if (action === 1 && testOutcome === "LO") {
-                env.addBet("HB", "Loose");
-                outcome = "L";
-            }
-            env.calculateProbabilities();
-            console.log("Winrate: " + (env.winRate * 100).toFixed(2));
-        }
-        var reward = env.getReward();
-        if (reward === 0) {
-            if (outcome === "L") {
-                reward = -0.03;
-            } else {
-                reward = 0.01;
-            }
-        }
-        agent.learn(reward);
-        env.previousReward = reward;
-    }
+	if (env.profit > 0.00000500) {
+		env.rSeed();
+	}
+	if (env.stop === false) {
+		var state = env.getAgentState();
+		var action = agent.act(state);
+		var outcome = "";
+		if (env.isTesting === false) {
+			if (action === 0) {
+				env.nextBet = "LB";
+				env.prepareBet();
+				env.placeBet();
+				env.setOutcome("LB");
+				outcome = env.betOutcomes[env.betOutcomes.length - 1];
+			} else if (action === 1) {
+				env.nextBet = "HB";
+				env.prepareBet();
+				env.placeBet();
+				env.setOutcome("HB");
+				outcome = env.betOutcomes[env.betOutcomes.length - 1];
+			}
+			if (env.verbose === true) {
+				env.calculateProbabilities();
+				//console.log("Machine Bet: " + action + "{" + env.nextBet + "} isKelly: " + env.useKelly + " isMartingale: " + env.useMartingale);
+				console.log("Profit: " + env.profit + " WinRate: " + (env.winRate * 100).toFixed(2));
+			}
+		} else {
+			console.log("Action: " + action);
+			var testOutcome = env.getTestOutcome(env.randomNumber(0, 1000));
+			if (action === 0 && testOutcome === "LO") {
+				env.addBet("LB", "Win");
+				outcome = "W";
+			} else if (action === 0 && testOutcome === "HI") {
+				env.addBet("LB", "Loose");
+				outcome = "L";
+			} else if (action === 1 && testOutcome === "HI") {
+				env.addBet("HB", "Win");
+				outcome = "W";
+			} else if (action === 1 && testOutcome === "LO") {
+				env.addBet("HB", "Loose");
+				outcome = "L";
+			}
+			env.calculateProbabilities();
+			console.log("Winrate: " + (env.winRate * 100).toFixed(2));
+		}
+		var reward = env.getReward();
+		if (reward === 0) {
+			if (outcome === "L") {
+				reward = -0.03;
+			} else {
+				reward = 0.01;
+			}
+		}
+		agent.learn(reward);
+		env.previousReward = reward;
+	}
 
 }

From 85aca3e9bb54d7f1bd541daa9c79358c519a8533 Mon Sep 17 00:00:00 2001
From: nickisghosty <nickisghosty@gmail.com>
Date: Sun, 22 Oct 2017 07:31:09 -0700
Subject: [PATCH 3/6] fixed space after function

---
 DQ-Trevel.js | 240 +++++++++++++++++++++++++--------------------------
 1 file changed, 120 insertions(+), 120 deletions(-)

diff --git a/DQ-Trevel.js b/DQ-Trevel.js
index 7a68dde..358e04b 100644
--- a/DQ-Trevel.js
+++ b/DQ-Trevel.js
@@ -1,5 +1,5 @@
 var R = {}; // the Recurrent library
-(function (global) {
+(function(global) {
 	"use strict";
 	// Utility fun
 	function assert(condition, message) {
@@ -15,7 +15,7 @@ var R = {}; // the Recurrent library
 	// Random numbers utils
 	var return_v = false;
 	var v_val = 0.0;
-	var gaussRandom = function () {
+	var gaussRandom = function() {
 		if (return_v) {
 			return_v = false;
 			return v_val;
@@ -29,18 +29,18 @@ var R = {}; // the Recurrent library
 		return_v = true;
 		return u * c;
 	}
-	var randf = function (a, b) {
+	var randf = function(a, b) {
 		return Math.random() * (b - a) + a;
 	}
-	var randi = function (a, b) {
+	var randi = function(a, b) {
 		return Math.floor(Math.random() * (b - a) + a);
 	}
-	var randn = function (mu, std) {
+	var randn = function(mu, std) {
 		return mu + gaussRandom() * std;
 	}
 	// helper function returns array of zeros of length n
 	// and uses typed arrays if available
-	var zeros = function (n) {
+	var zeros = function(n) {
 		if (typeof (n) === 'undefined' || isNaN(n)) {
 			return [];
 		}
@@ -56,7 +56,7 @@ var R = {}; // the Recurrent library
 		}
 	}
 	// Mat holds a matrix
-	var Mat = function (n, d) {
+	var Mat = function(n, d) {
 		// n is number of rows d is number of columns
 		this.n = n;
 		this.d = d;
@@ -64,37 +64,37 @@ var R = {}; // the Recurrent library
 		this.dw = zeros(n * d);
 	}
 	Mat.prototype = {
-		get: function (row, col) {
+		get: function(row, col) {
 			// slow but careful accessor function
 			// we want row-major order
 			var ix = (this.d * row) + col;
 			assert(ix >= 0 && ix < this.w.length);
 			return this.w[ix];
 		},
-		set: function (row, col, v) {
+		set: function(row, col, v) {
 			// slow but careful accessor function
 			var ix = (this.d * row) + col;
 			assert(ix >= 0 && ix < this.w.length);
 			this.w[ix] = v;
 		},
-		setFrom: function (arr) {
+		setFrom: function(arr) {
 			for (var i = 0, n = arr.length; i < n; i++) {
 				this.w[i] = arr[i];
 			}
 		},
-		setColumn: function (m, i) {
+		setColumn: function(m, i) {
 			for (var q = 0, n = m.w.length; q < n; q++) {
 				this.w[(this.d * q) + i] = m.w[q];
 			}
 		},
-		toJSON: function () {
+		toJSON: function() {
 			var json = {};
 			json['n'] = this.n;
 			json['d'] = this.d;
 			json['w'] = this.w;
 			return json;
 		},
-		fromJSON: function (json) {
+		fromJSON: function(json) {
 			this.n = json.n;
 			this.d = json.d;
 			this.w = zeros(this.n * this.d);
@@ -104,12 +104,12 @@ var R = {}; // the Recurrent library
 			}
 		}
 	}
-	var copyMat = function (b) {
+	var copyMat = function(b) {
 		var a = new Mat(b.n, b.d);
 		a.setFrom(b.w);
 		return a;
 	}
-	var copyNet = function (net) {
+	var copyNet = function(net) {
 		// nets are (k,v) pairs with k = string key, v = Mat()
 		var new_net = {};
 		for (var p in net) {
@@ -119,7 +119,7 @@ var R = {}; // the Recurrent library
 		}
 		return new_net;
 	}
-	var updateMat = function (m, alpha) {
+	var updateMat = function(m, alpha) {
 		// updates in place
 		for (var i = 0, n = m.n * m.d; i < n; i++) {
 			if (m.dw[i] !== 0) {
@@ -128,14 +128,14 @@ var R = {}; // the Recurrent library
 			}
 		}
 	}
-	var updateNet = function (net, alpha) {
+	var updateNet = function(net, alpha) {
 		for (var p in net) {
 			if (net.hasOwnProperty(p)) {
 				updateMat(net[p], alpha);
 			}
 		}
 	}
-	var netToJSON = function (net) {
+	var netToJSON = function(net) {
 		var j = {};
 		for (var p in net) {
 			if (net.hasOwnProperty(p)) {
@@ -144,7 +144,7 @@ var R = {}; // the Recurrent library
 		}
 		return j;
 	}
-	var netFromJSON = function (j) {
+	var netFromJSON = function(j) {
 		var net = {};
 		for (var p in j) {
 			if (j.hasOwnProperty(p)) {
@@ -154,7 +154,7 @@ var R = {}; // the Recurrent library
 		}
 		return net;
 	}
-	var netZeroGrads = function (net) {
+	var netZeroGrads = function(net) {
 		for (var p in net) {
 			if (net.hasOwnProperty(p)) {
 				var mat = net[p];
@@ -162,7 +162,7 @@ var R = {}; // the Recurrent library
 			}
 		}
 	}
-	var netFlattenGrads = function (net) {
+	var netFlattenGrads = function(net) {
 		var n = 0;
 		for (var p in net) {
 			if (net.hasOwnProperty(p)) {
@@ -184,7 +184,7 @@ var R = {}; // the Recurrent library
 		return g;
 	}
 	// return Mat but filled with random numbers from gaussian
-	var RandMat = function (n, d, mu, std) {
+	var RandMat = function(n, d, mu, std) {
 		var m = new Mat(n, d);
 		fillRandn(m, mu, std);
 		//fillRand(m,-std,std); // kind of :P
@@ -192,23 +192,23 @@ var R = {}; // the Recurrent library
 	}
 	// Mat utils
 	// fill matrix with random gaussian numbers
-	var fillRandn = function (m, mu, std) {
+	var fillRandn = function(m, mu, std) {
 		for (var i = 0, n = m.w.length; i < n; i++) {
 			m.w[i] = randn(mu, std);
 		}
 	}
-	var fillRand = function (m, lo, hi) {
+	var fillRand = function(m, lo, hi) {
 		for (var i = 0, n = m.w.length; i < n; i++) {
 			m.w[i] = randf(lo, hi);
 		}
 	}
-	var gradFillConst = function (m, c) {
+	var gradFillConst = function(m, c) {
 		for (var i = 0, n = m.dw.length; i < n; i++) {
 			m.dw[i] = c
 		}
 	}
 	// Transformer definitions
-	var Graph = function (needs_backprop) {
+	var Graph = function(needs_backprop) {
 		if (typeof needs_backprop === 'undefined') {
 			needs_backprop = true;
 		}
@@ -219,12 +219,12 @@ var R = {}; // the Recurrent library
 		this.backprop = [];
 	}
 	Graph.prototype = {
-		backward: function () {
+		backward: function() {
 			for (var i = this.backprop.length - 1; i >= 0; i--) {
 				this.backprop[i](); // tick!
 			}
 		},
-		rowPluck: function (m, ix) {
+		rowPluck: function(m, ix) {
 			// pluck a row of m with index ix and return it as col vector
 			assert(ix >= 0 && ix < m.n);
 			var d = m.d;
@@ -233,7 +233,7 @@ var R = {}; // the Recurrent library
 				out.w[i] = m.w[d * ix + i];
 			} // copy over the data
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0, n = d; i < n; i++) {
 						m.dw[d * ix + i] += out.dw[i];
 					}
@@ -242,7 +242,7 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		tanh: function (m) {
+		tanh: function(m) {
 			// tanh nonlinearity
 			var out = new Mat(m.n, m.d);
 			var n = m.w.length;
@@ -250,7 +250,7 @@ var R = {}; // the Recurrent library
 				out.w[i] = Math.tanh(m.w[i]);
 			}
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0; i < n; i++) {
 						// grad for z = tanh(x) is (1 - z^2)
 						var mwi = out.w[i];
@@ -261,7 +261,7 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		sigmoid: function (m) {
+		sigmoid: function(m) {
 			// sigmoid nonlinearity
 			var out = new Mat(m.n, m.d);
 			var n = m.w.length;
@@ -269,7 +269,7 @@ var R = {}; // the Recurrent library
 				out.w[i] = sig(m.w[i]);
 			}
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0; i < n; i++) {
 						// grad for z = tanh(x) is (1 - z^2)
 						var mwi = out.w[i];
@@ -280,14 +280,14 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		relu: function (m) {
+		relu: function(m) {
 			var out = new Mat(m.n, m.d);
 			var n = m.w.length;
 			for (var i = 0; i < n; i++) {
 				out.w[i] = Math.max(0, m.w[i]); // relu
 			}
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0; i < n; i++) {
 						m.dw[i] += m.w[i] > 0 ? out.dw[i] : 0.0;
 					}
@@ -296,7 +296,7 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		mul: function (m1, m2) {
+		mul: function(m1, m2) {
 			// multiply matrices m1 * m2
 			assert(m1.d === m2.n, 'matmul dimensions misaligned');
 			var n = m1.n;
@@ -312,7 +312,7 @@ var R = {}; // the Recurrent library
 				}
 			}
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0; i < m1.n; i++) { // loop over rows of m1
 						for (var j = 0; j < m2.d; j++) { // loop over cols of m2
 							for (var k = 0; k < m1.d; k++) { // dot product loop
@@ -327,14 +327,14 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		add: function (m1, m2) {
+		add: function(m1, m2) {
 			assert(m1.w.length === m2.w.length);
 			var out = new Mat(m1.n, m1.d);
 			for (var i = 0, n = m1.w.length; i < n; i++) {
 				out.w[i] = m1.w[i] + m2.w[i];
 			}
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0, n = m1.w.length; i < n; i++) {
 						m1.dw[i] += out.dw[i];
 						m2.dw[i] += out.dw[i];
@@ -344,7 +344,7 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		dot: function (m1, m2) {
+		dot: function(m1, m2) {
 			// m1 m2 are both column vectors
 			assert(m1.w.length === m2.w.length);
 			var out = new Mat(1, 1);
@@ -354,7 +354,7 @@ var R = {}; // the Recurrent library
 			}
 			out.w[0] = dot;
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0, n = m1.w.length; i < n; i++) {
 						m1.dw[i] += m2.w[i] * out.dw[0];
 						m2.dw[i] += m1.w[i] * out.dw[0];
@@ -364,14 +364,14 @@ var R = {}; // the Recurrent library
 			}
 			return out;
 		},
-		eltmul: function (m1, m2) {
+		eltmul: function(m1, m2) {
 			assert(m1.w.length === m2.w.length);
 			var out = new Mat(m1.n, m1.d);
 			for (var i = 0, n = m1.w.length; i < n; i++) {
 				out.w[i] = m1.w[i] * m2.w[i];
 			}
 			if (this.needs_backprop) {
-				var backward = function () {
+				var backward = function() {
 					for (var i = 0, n = m1.w.length; i < n; i++) {
 						m1.dw[i] += m2.w[i] * out.dw[i];
 						m2.dw[i] += m1.w[i] * out.dw[i];
@@ -382,7 +382,7 @@ var R = {}; // the Recurrent library
 			return out;
 		},
 	}
-	var softmax = function (m) {
+	var softmax = function(m) {
 		var out = new Mat(m.n, m.d); // probability volume
 		var maxval = -999999;
 		for (var i = 0, n = m.w.length; i < n; i++) {
@@ -401,13 +401,13 @@ var R = {}; // the Recurrent library
 		// to set gradients directly on m
 		return out;
 	}
-	var Solver = function () {
+	var Solver = function() {
 		this.decay_rate = 0.999;
 		this.smooth_eps = 1e-8;
 		this.step_cache = {};
 	}
 	Solver.prototype = {
-		step: function (model, step_size, regc, clipval) {
+		step: function(model, step_size, regc, clipval) {
 			// perform parameter update
 			var solver_stats = {};
 			var num_clipped = 0;
@@ -443,7 +443,7 @@ var R = {}; // the Recurrent library
 			return solver_stats;
 		}
 	}
-	var initLSTM = function (input_size, hidden_sizes, output_size) {
+	var initLSTM = function(input_size, hidden_sizes, output_size) {
 		// hidden size should be a list
 		var model = {};
 		for (var d = 0; d < hidden_sizes.length; d++) { // loop over depths
@@ -469,7 +469,7 @@ var R = {}; // the Recurrent library
 		model['bd'] = new Mat(output_size, 1);
 		return model;
 	}
-	var forwardLSTM = function (G, model, hidden_sizes, x, prev) {
+	var forwardLSTM = function(G, model, hidden_sizes, x, prev) {
 		// forward prop for a single tick of LSTM
 		// G is graph to append ops to
 		// model contains LSTM parameters
@@ -527,11 +527,11 @@ var R = {}; // the Recurrent library
 			'o': output
 		};
 	}
-	var sig = function (x) {
+	var sig = function(x) {
 		// helper function for computing sigmoid
 		return 1.0 / (1 + Math.exp(-x));
 	}
-	var maxi = function (w) {
+	var maxi = function(w) {
 		// argmax of array w
 		var maxv = w[0];
 		var maxix = 0;
@@ -544,7 +544,7 @@ var R = {}; // the Recurrent library
 		}
 		return maxix;
 	}
-	var samplei = function (w) {
+	var samplei = function(w) {
 		// sample argmax from w, assuming w are 
 		// probabilities that sum to one
 		var r = randf(0, 1);
@@ -587,10 +587,10 @@ var R = {}; // the Recurrent library
 })(R);
 // END OF RECURRENTJS
 var RL = {};
-(function (global) {
+(function(global) {
 	"use strict";
 	// syntactic sugar function for getting default parameter values
-	var getopt = function (opt, field_name, default_value) {
+	var getopt = function(opt, field_name, default_value) {
 		if (typeof opt === 'undefined') {
 			return default_value;
 		}
@@ -600,12 +600,12 @@ var RL = {};
 	var assert = R.assert;
 	var randi = R.randi;
 	var randf = R.randf;
-	var setConst = function (arr, c) {
+	var setConst = function(arr, c) {
 		for (var i = 0, n = arr.length; i < n; i++) {
 			arr[i] = c;
 		}
 	}
-	var sampleWeighted = function (p) {
+	var sampleWeighted = function(p) {
 		var r = Math.random();
 		var c = 0.0;
 		for (var i = 0, n = p.length; i < n; i++) {
@@ -624,7 +624,7 @@ var RL = {};
 	// - requires model of the environment :(
 	// - does not learn from experience :(
 	// - assumes finite MDP :(
-	var DPAgent = function (env, opt) {
+	var DPAgent = function(env, opt) {
 		this.V = null; // state value function
 		this.P = null; // policy distribution \pi(s,a)
 		this.env = env; // store pointer to environment
@@ -632,7 +632,7 @@ var RL = {};
 		this.reset();
 	}
 	DPAgent.prototype = {
-		reset: function () {
+		reset: function() {
 			// reset the agent's policy and value function
 			this.ns = this.env.getNumStates();
 			this.na = this.env.getMaxNumActions();
@@ -646,7 +646,7 @@ var RL = {};
 				}
 			}
 		},
-		act: function (s) {
+		act: function(s) {
 			// behave according to the learned policy
 			var poss = this.env.allowedActions(s);
 			var ps = [];
@@ -658,12 +658,12 @@ var RL = {};
 			var maxi = sampleWeighted(ps);
 			return poss[maxi];
 		},
-		learn: function () {
+		learn: function() {
 			// perform a single round of value iteration
 			self.evaluatePolicy(); // writes this.V
 			self.updatePolicy(); // writes this.P
 		},
-		evaluatePolicy: function () {
+		evaluatePolicy: function() {
 			// perform a synchronous update of the value function
 			var Vnew = zeros(this.ns);
 			for (var s = 0; s < this.ns; s++) {
@@ -685,7 +685,7 @@ var RL = {};
 			}
 			this.V = Vnew; // swap
 		},
-		updatePolicy: function () {
+		updatePolicy: function() {
 			// update policy to be greedy w.r.t. learned Value function
 			for (var s = 0; s < this.ns; s++) {
 				var poss = this.env.allowedActions(s);
@@ -716,7 +716,7 @@ var RL = {};
 	// QAgent uses TD (Q-Learning, SARSA)
 	// - does not require environment model :)
 	// - learns from experience :)
-	var TDAgent = function (env, opt) {
+	var TDAgent = function(env, opt) {
 		this.update = getopt(opt, 'update', 'qlearn'); // qlearn | sarsa
 		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
 		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
@@ -739,7 +739,7 @@ var RL = {};
 		this.reset();
 	}
 	TDAgent.prototype = {
-		reset: function () {
+		reset: function() {
 			// reset the agent's policy and value function
 			this.ns = this.env.getNumStates();
 			this.na = this.env.getMaxNumActions();
@@ -770,10 +770,10 @@ var RL = {};
 			this.a0 = null;
 			this.a1 = null;
 		},
-		resetEpisode: function () {
+		resetEpisode: function() {
 			// an episode finished
 		},
-		act: function (s) {
+		act: function(s) {
 			// act according to epsilon greedy policy
 			var poss = this.env.allowedActions(s);
 			var probs = [];
@@ -795,7 +795,7 @@ var RL = {};
 			this.a1 = a;
 			return a;
 		},
-		learn: function (r1) {
+		learn: function(r1) {
 			// takes reward for previous action, which came from a call to act()
 			if (!(this.r0 == null)) {
 				this.learnFromTuple(this.s0, this.a0, this.r0, this.s1, this.a1, this.lambda);
@@ -806,7 +806,7 @@ var RL = {};
 			}
 			this.r0 = r1; // store this for next update
 		},
-		updateModel: function (s0, a0, r0, s1) {
+		updateModel: function(s0, a0, r0, s1) {
 			// transition (s0,a0) -> (r0,s1) was observed. Update environment model
 			var sa = a0 * this.ns + s0;
 			if (this.env_model_s[sa] === -1) {
@@ -816,7 +816,7 @@ var RL = {};
 			this.env_model_s[sa] = s1;
 			this.env_model_r[sa] = r0;
 		},
-		plan: function () {
+		plan: function() {
 			// order the states based on current priority queue information
 			var spq = [];
 			for (var i = 0, n = this.sa_seen.length; i < n; i++) {
@@ -829,7 +829,7 @@ var RL = {};
 					});
 				}
 			}
-			spq.sort(function (a, b) {
+			spq.sort(function(a, b) {
 				return a.p < b.p ? 1 : -1
 			});
 			// perform the updates
@@ -853,7 +853,7 @@ var RL = {};
 				this.learnFromTuple(s0, a0, r0, s1, a1, 0); // note lambda = 0 - shouldnt use eligibility trace here
 			}
 		},
-		learnFromTuple: function (s0, a0, r0, s1, a1, lambda) {
+		learnFromTuple: function(s0, a0, r0, s1, a1, lambda) {
 			var sa = a0 * this.ns + s0;
 			// calculate the target for Q(s,a)
 			if (this.update === 'qlearn') {
@@ -917,7 +917,7 @@ var RL = {};
 				this.updatePolicy(s0);
 			}
 		},
-		updatePriority: function (s, a, u) {
+		updatePriority: function(s, a, u) {
 			// used in planning. Invoked when Q[sa] += update
 			// we should find all states that lead to (s,a) and upgrade their priority
 			// of being update in the next planning step
@@ -942,7 +942,7 @@ var RL = {};
 				}
 			}
 		},
-		updatePolicy: function (s) {
+		updatePolicy: function(s) {
 			var poss = this.env.allowedActions(s);
 			// set policy at s to be the action that achieves max_a Q(s,a)
 			// first find the maxy Q values
@@ -983,7 +983,7 @@ var RL = {};
 			}
 		}
 	}
-	var DQNAgent = function (env, opt) {
+	var DQNAgent = function(env, opt) {
 		this.gamma = getopt(opt, 'gamma', 0.75); // future reward discount factor
 		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
 		this.alpha = getopt(opt, 'alpha', 0.01); // value function learning rate
@@ -996,7 +996,7 @@ var RL = {};
 		this.reset();
 	}
 	DQNAgent.prototype = {
-		reset: function () {
+		reset: function() {
 			this.nh = this.num_hidden_units; // number of hidden units
 			this.ns = this.env.getNumStates();
 			this.na = this.env.getMaxNumActions();
@@ -1018,7 +1018,7 @@ var RL = {};
 			this.a1 = null;
 			this.tderror = 0; // for visualization only...
 		},
-		toJSON: function () {
+		toJSON: function() {
 			// save function
 			var j = {};
 			j.nh = this.nh;
@@ -1027,14 +1027,14 @@ var RL = {};
 			j.net = R.netToJSON(this.net);
 			return j;
 		},
-		fromJSON: function (j) {
+		fromJSON: function(j) {
 			// load function
 			this.nh = j.nh;
 			this.ns = j.ns;
 			this.na = j.na;
 			this.net = R.netFromJSON(j.net);
 		},
-		forwardQ: function (net, s, needs_backprop) {
+		forwardQ: function(net, s, needs_backprop) {
 			var G = new R.Graph(needs_backprop);
 			var a1mat = G.add(G.mul(net.W1, s), net.b1);
 			var h1mat = G.tanh(a1mat);
@@ -1042,7 +1042,7 @@ var RL = {};
 			this.lastG = G; // back this up. Kind of hacky isn't it
 			return a2mat;
 		},
-		act: function (slist) {
+		act: function(slist) {
 			// convert to a Mat column vector
 			var s = new R.Mat(this.ns, 1);
 			s.setFrom(slist);
@@ -1061,7 +1061,7 @@ var RL = {};
 			this.a1 = a;
 			return a;
 		},
-		learn: function (r1) {
+		learn: function(r1) {
 			// perform an update on Q function
 			if (!(this.r0 == null) && this.alpha > 0) {
 				// learn from this tuple to get a sense of how "surprising" it is to the agent
@@ -1085,7 +1085,7 @@ var RL = {};
 			}
 			this.r0 = r1; // store for next update
 		},
-		learnFromTuple: function (s0, a0, r0, s1, a1) {
+		learnFromTuple: function(s0, a0, r0, s1, a1) {
 			// want: Q(s,a) = r + gamma * max_a' Q(s',a')
 			// compute the target Q value
 			var tmat = this.forwardQ(this.net, s1, false);
@@ -1106,7 +1106,7 @@ var RL = {};
 		}
 	}
 	// buggy implementation, doesnt work...
-	var SimpleReinforceAgent = function (env, opt) {
+	var SimpleReinforceAgent = function(env, opt) {
 		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
 		this.epsilon = getopt(opt, 'epsilon', 0.75); // for epsilon-greedy policy
 		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
@@ -1115,7 +1115,7 @@ var RL = {};
 		this.reset();
 	}
 	SimpleReinforceAgent.prototype = {
-		reset: function () {
+		reset: function() {
 			this.ns = this.env.getNumStates();
 			this.na = this.env.getMaxNumActions();
 			this.nh = 100; // number of hidden units
@@ -1138,7 +1138,7 @@ var RL = {};
 			this.baselineGraphs = [];
 			this.t = 0;
 		},
-		forwardActor: function (s, needs_backprop) {
+		forwardActor: function(s, needs_backprop) {
 			var net = this.actorNet;
 			var G = new R.Graph(needs_backprop);
 			var a1mat = G.add(G.mul(net.W1, s), net.b1);
@@ -1149,7 +1149,7 @@ var RL = {};
 				'G': G
 			}
 		},
-		forwardValue: function (s, needs_backprop) {
+		forwardValue: function(s, needs_backprop) {
 			var net = this.baselineNet;
 			var G = new R.Graph(needs_backprop);
 			var a1mat = G.add(G.mul(net.W1, s), net.b1);
@@ -1160,7 +1160,7 @@ var RL = {};
 				'G': G
 			}
 		},
-		act: function (slist) {
+		act: function(slist) {
 			// convert to a Mat column vector
 			var s = new R.Mat(this.ns, 1);
 			s.setFrom(slist);
@@ -1189,7 +1189,7 @@ var RL = {};
 			this.a1 = a;
 			return a;
 		},
-		learn: function (r1) {
+		learn: function(r1) {
 			// perform an update on Q function
 			this.rewardHistory.push(r1);
 			var n = this.rewardHistory.length;
@@ -1257,7 +1257,7 @@ var RL = {};
 		},
 	}
 	// buggy implementation as well, doesn't work
-	var RecurrentReinforceAgent = function (env, opt) {
+	var RecurrentReinforceAgent = function(env, opt) {
 		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
 		this.epsilon = getopt(opt, 'epsilon', 0.1); // for epsilon-greedy policy
 		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
@@ -1266,7 +1266,7 @@ var RL = {};
 		this.reset();
 	}
 	RecurrentReinforceAgent.prototype = {
-		reset: function () {
+		reset: function() {
 			this.ns = this.env.getNumStates();
 			this.na = this.env.getMaxNumActions();
 			this.nh = 40; // number of hidden units
@@ -1288,7 +1288,7 @@ var RL = {};
 			this.a0 = null;
 			this.a1 = null;
 		},
-		act: function (slist) {
+		act: function(slist) {
 			// convert to a Mat column vector
 			var s = new R.Mat(this.ns, 1);
 			s.setFrom(slist);
@@ -1316,7 +1316,7 @@ var RL = {};
 			this.a1 = a;
 			return a;
 		},
-		learn: function (r1) {
+		learn: function(r1) {
 			// perform an update on Q function
 			this.rewardHistory.push(r1);
 			var n = this.rewardHistory.length;
@@ -1382,7 +1382,7 @@ var RL = {};
 		},
 	}
 	// Currently buggy implementation, doesnt work
-	var DeterministPG = function (env, opt) {
+	var DeterministPG = function(env, opt) {
 		this.gamma = getopt(opt, 'gamma', 0.5); // future reward discount factor
 		this.epsilon = getopt(opt, 'epsilon', 0.5); // for epsilon-greedy policy
 		this.alpha = getopt(opt, 'alpha', 0.001); // actor net learning rate
@@ -1391,7 +1391,7 @@ var RL = {};
 		this.reset();
 	}
 	DeterministPG.prototype = {
-		reset: function () {
+		reset: function() {
 			this.ns = this.env.getNumStates();
 			this.na = this.env.getMaxNumActions();
 			this.nh = 100; // number of hidden units
@@ -1411,7 +1411,7 @@ var RL = {};
 			this.a1 = null;
 			this.t = 0;
 		},
-		forwardActor: function (s, needs_backprop) {
+		forwardActor: function(s, needs_backprop) {
 			var net = this.actorNet;
 			var G = new R.Graph(needs_backprop);
 			var a1mat = G.add(G.mul(net.W1, s), net.b1);
@@ -1422,7 +1422,7 @@ var RL = {};
 				'G': G
 			}
 		},
-		act: function (slist) {
+		act: function(slist) {
 			// convert to a Mat column vector
 			var s = new R.Mat(this.ns, 1);
 			s.setFrom(slist);
@@ -1449,7 +1449,7 @@ var RL = {};
 			this.a1 = a;
 			return a;
 		},
-		utilJacobianAt: function (s) {
+		utilJacobianAt: function(s) {
 			var ujacobian = new R.Mat(this.ntheta, this.na);
 			for (var a = 0; a < this.na; a++) {
 				R.netZeroGrads(this.actorNet);
@@ -1461,7 +1461,7 @@ var RL = {};
 			}
 			return ujacobian;
 		},
-		learn: function (r1) {
+		learn: function(r1) {
 			// perform an update on Q function
 			//this.rewardHistory.push(r1);
 			if (!(this.r0 == null)) {
@@ -1544,7 +1544,7 @@ var Trevel = {
 	lbcount: 0,
 	nextBet: "",
 	previousReward: 0,
-	addBet: function (bet, outcome) {
+	addBet: function(bet, outcome) {
 		if (bet === "LB" && outcome === "Win") {
 			this.betHistory.push("LO");
 			this.betOutcomes.push("W");
@@ -1569,7 +1569,7 @@ var Trevel = {
 		}
 		this.totalBets++;
 	},
-	calculateProbabilities: function () {
+	calculateProbabilities: function() {
 		this.hbProbability = this.hbCount / this.betHistory.length;
 		this.lbProbability = this.lbcount / this.betHistory.length;
 		this.winRate = this.totalWins / this.totalBets;
@@ -1577,27 +1577,27 @@ var Trevel = {
 			this.profit = this.getProfit();
 		}
 	},
-	getCurrentBalance: function () {
+	getCurrentBalance: function() {
 		return parseFloat($('#balance').html());
 	},
-	placeHighBet: function () {
+	placeHighBet: function() {
 		$('#double_your_btc_bet_hi_button').click();
 	},
-	placeLowBet: function () {
+	placeLowBet: function() {
 		$('#double_your_btc_bet_lo_button').click();
 	},
-	setBetAmount: function (amount) {
+	setBetAmount: function(amount) {
 		var elem = document.getElementById("double_your_btc_stake");
 		elem.value = amount;
 	},
-	setOutcome: function (bet) {
+	setOutcome: function(bet) {
 		if ($('#double_your_btc_bet_lose').html() !== '') {
 			this.addBet(bet, "Loose");
 		} else {
 			this.addBet(bet, "Win");
 		}
 	},
-	prepareBet: function () {
+	prepareBet: function() {
 		this.calculateProbabilities();
 		if (this.betHistory.length < 10) {
 			if (this.useMartingale === true && this.betHistory.length > 12) {
@@ -1626,7 +1626,7 @@ var Trevel = {
 			}
 		}
 	},
-	placeBet: function () {
+	placeBet: function() {
 		if (this.nextBet === "HB") {
 			this.placeHighBet();
 		} else if (this.nextBet === "LB") {
@@ -1642,23 +1642,23 @@ var Trevel = {
 			this.placeLowBet();
 		}
 	},
-	getProfit: function () {
+	getProfit: function() {
 		return (this.getCurrentBalance() - this.startingBalance).toFixed(8);
 	},
-	getNumStates: function () {
+	getNumStates: function() {
 		return 8;
 	},
-	getMaxNumActions: function () {
+	getMaxNumActions: function() {
 		return 2;
 	},
-	getSentiment: function (bet) {
+	getSentiment: function(bet) {
 		if (bet === "HI") {
 			return 1;
 		} else {
 			return 0;
 		}
 	},
-	getPreviousBets: function () {
+	getPreviousBets: function() {
 		var hist = [];
 		if (this.betHistory.length > 12) {
 			hist.push(this.getSentiment(this.betHistory[this.betHistory.length - 1]));
@@ -1674,11 +1674,11 @@ var Trevel = {
 		}
 		return hist;
 	},
-	getAgentState: function () { //we'll observe the last 8 bets
+	getAgentState: function() { //we'll observe the last 8 bets
 		var s = this.getPreviousBets();
 		return s;
 	},
-	getReward: function () {
+	getReward: function() {
 		var reward = 0;
 		var out1 = this.betOutcomes[this.betOutcomes.length - 1];
 		var out2 = this.betOutcomes[this.betOutcomes.length - 2];
@@ -1715,10 +1715,10 @@ var Trevel = {
 		return reward;
 	},
 	//for raw testing only
-	randomNumber: function (min, max) {
+	randomNumber: function(min, max) {
 		return Math.floor(Math.random() * (max - min + 1) + min);
 	},
-	getTestOutcome: function (random) {
+	getTestOutcome: function(random) {
 		if (random % 2 === 0) {
 			return "HI";
 		} else {
@@ -1726,7 +1726,7 @@ var Trevel = {
 		}
 	},
 	//random string for random seed
-	rString: function (length, chars) {
+	rString: function(length, chars) {
 		var result = '';
 		var length = 16;
 		var chars = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789abcdefghijklmnopqrstuvwxyz'
@@ -1734,11 +1734,11 @@ var Trevel = {
 		return result;
 	},
 	//set client seed as random string
-	rSeed: function () {
+	rSeed: function() {
 		$('#next_client_seed').val(rString());
 	},
 	//initialize this
-	init: function () {
+	init: function() {
 		this.startingBalance = this.currentBalance = parseFloat($('#balance').html());
 		this.setBetAmount(this.minBet);
 		this.stop = true;
@@ -1748,7 +1748,7 @@ var Trevel = {
 
 	},
 	// ask user config variables
-	config: function () {
+	config: function() {
 
 		//define temporary variables
 		var maxb = 0, minb = 0;
@@ -1782,7 +1782,7 @@ var Trevel = {
 		//start betting
 		startbetting();
 	},
-	stopbets: function () {
+	stopbets: function() {
 		env.stop = true;
 		clearInterval(interval);
 		console.log('Bet session has been stopped, to start over click start.');
@@ -1816,7 +1816,7 @@ function startbetting() {
 	console.log('Starting bet session, to stop click STOP BOT');
 	document.getElementById("free_play_link_li").innerHTML = '<a href="#" onclick="Trevel.stopbets()" class="free_play_link">STOP BOT</a>';
 	env.stop = false;
-	interval = setInterval(function () { loop(); }, env.betSpeed);
+	interval = setInterval(function() { loop(); }, env.betSpeed);
 }
 console.clear();
 console.log('You are using Trevel, with ReinforceJS');

From 3fed033681bb876e8e1847512f415c19f330ee0d Mon Sep 17 00:00:00 2001
From: nickisghosty <nickisghosty@gmail.com>
Date: Sun, 22 Oct 2017 08:17:55 -0700
Subject: [PATCH 4/6] fixed random seed

forgot this in thgis.rString and made so every roll new random seed
---
 DQ-Trevel.js | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/DQ-Trevel.js b/DQ-Trevel.js
index 358e04b..dbcf15a 100644
--- a/DQ-Trevel.js
+++ b/DQ-Trevel.js
@@ -1735,7 +1735,7 @@ var Trevel = {
 	},
 	//set client seed as random string
 	rSeed: function() {
-		$('#next_client_seed').val(rString());
+		$('#next_client_seed').val(this.rString());
 	},
 	//initialize this
 	init: function() {
@@ -1825,9 +1825,8 @@ console.log('Click it to set the config. Note: These settings are not persistent
 console.log('To change the default values for these settings, search the script for "prompt"');
 console.log('Enjoy');
 function loop() {
-	if (env.profit > 0.00000500) {
 		env.rSeed();
-	}
+	
 	if (env.stop === false) {
 		var state = env.getAgentState();
 		var action = agent.act(state);

From e895e4a6e77f34ec9c08aba5d12ec57727518bcf Mon Sep 17 00:00:00 2001
From: nickisghosty <nickisghosty@gmail.com>
Date: Sun, 22 Oct 2017 10:00:55 -0700
Subject: [PATCH 5/6] added more of a log

---
 DQ-Trevel.js | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/DQ-Trevel.js b/DQ-Trevel.js
index dbcf15a..4455bef 100644
--- a/DQ-Trevel.js
+++ b/DQ-Trevel.js
@@ -1516,11 +1516,12 @@ var Trevel = {
 	//settings you can change
 	stop: true,
 	maxBet: 0.00001,
-	minBet: 0.00000005,
+	minBet: 0.00000001,
 	swap: true,
 	betSpeed: 100,//change this on init
 	verbose: true,
 	isTesting: false,
+	newseed: '', // 'win' for change ever win, 'lose' for change every loss, blank for every roll
 	//money management
 	useKelly: false,//martingale performs better on live account!
 	korm: false,
@@ -1542,6 +1543,10 @@ var Trevel = {
 	lbProbability: 0,
 	hbCount: 0,
 	lbcount: 0,
+	hbw: 0,
+	hbl: 0,
+	lbw: 0,
+	lbl: 0,
 	nextBet: "",
 	previousReward: 0,
 	addBet: function(bet, outcome) {
@@ -1550,22 +1555,26 @@ var Trevel = {
 			this.betOutcomes.push("W");
 			this.totalWins++;
 			this.lbcount++;
+			this.lbw++;
 		}
 		if (bet === "LB" && outcome === "Loose") {
 			this.betHistory.push("HI");
 			this.hbCount++;
 			this.betOutcomes.push("L");
+			this.lbl++;
 		}
 		if (bet === "HB" && outcome === "Win") {
 			this.betHistory.push("HI");
 			this.totalWins++;
 			this.hbCount++;
 			this.betOutcomes.push("W");
+			this.hbw++;
 		}
 		if (bet === "HB" && outcome === "Loose") {
 			this.betHistory.push("LO");
 			this.lbcount++;
 			this.betOutcomes.push("L");
+			this.hbl++;
 		}
 		this.totalBets++;
 	},
@@ -1593,7 +1602,13 @@ var Trevel = {
 	setOutcome: function(bet) {
 		if ($('#double_your_btc_bet_lose').html() !== '') {
 			this.addBet(bet, "Loose");
+			if (this.newseed == 'lose') {
+				this.rSeed();
+			}
 		} else {
+			if (this.newseed == 'win') {
+				this.rSeed();
+			}
 			this.addBet(bet, "Win");
 		}
 	},
@@ -1761,6 +1776,7 @@ var Trevel = {
 		this.martingaleMultiplier = prompt('Bet multiplier on lose', 2);
 		this.swap = prompt('True for swap enabled, false for disabled', 'true');
 		this.korm = prompt('True to enable Kelly, false to enabled martingale, leave blank for both', 'false');
+		this.newseed = prompt('Randomize client seed every "win", "lose", or leave blank for every roll', '');
 		this.betSpeed = prompt('Wait time before next bet is placed in ms', 3000);
 
 		//convert satoshi to btc
@@ -1825,8 +1841,11 @@ console.log('Click it to set the config. Note: These settings are not persistent
 console.log('To change the default values for these settings, search the script for "prompt"');
 console.log('Enjoy');
 function loop() {
+	if (Trevel.newseed == '') {
 		env.rSeed();
-	
+	}
+
+
 	if (env.stop === false) {
 		var state = env.getAgentState();
 		var action = agent.act(state);
@@ -1847,8 +1866,19 @@ function loop() {
 			}
 			if (env.verbose === true) {
 				env.calculateProbabilities();
+				clear();
+
 				//console.log("Machine Bet: " + action + "{" + env.nextBet + "} isKelly: " + env.useKelly + " isMartingale: " + env.useMartingale);
-				console.log("Profit: " + env.profit + " WinRate: " + (env.winRate * 100).toFixed(2));
+				console.log("Bet Number: " + env.totalBets + " | Outcome: " + outcome);
+				console.log("Win Rate: " + (env.winRate * 100).toFixed(2) + " | Hi/Lo Win Rate: " + ((env.hbw / env.totalBets) * 100).toFixed(2) + " / " + ((env.lbw / env.totalBets) * 100).toFixed(2));
+				console.log("Wins/Loses: " + env.totalWins + " / " + (env.totalBets - env.totalWins));
+				console.log("Hi/Lo Bets: " + env.hbCount + " / " + env.lbcount);
+				console.log("Hi/Lo Wins: " + env.hbw + " / " + env.lbw);
+				console.log("Hi/Lo Loses: " + env.hbl + " / " + env.lbl);
+				console.log("Hi Probability: " + env.hbProbability.toFixed(2) + " | Lo Probability: " + env.lbProbability.toFixed(2));
+				console.log("Client Seed: " + $('#next_client_seed').val() + " | Lotto Tickets: " + $('#user_lottery_tickets').html() + " | Rewards Points: " + $('.user_reward_points').text());
+				console.log("Profit: " + env.profit + " | Balance: " + env.getCurrentBalance().toFixed(8));
+
 			}
 		} else {
 			console.log("Action: " + action);

From 82ba6030384799402c7431e7b293079ecc7b883f Mon Sep 17 00:00:00 2001
From: nickisghosty <nickisghosty@gmail.com>
Date: Sun, 22 Oct 2017 10:54:43 -0700
Subject: [PATCH 6/6] Made log easier to read

---
 DQ-Trevel.js | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/DQ-Trevel.js b/DQ-Trevel.js
index 4455bef..8dcbc2e 100644
--- a/DQ-Trevel.js
+++ b/DQ-Trevel.js
@@ -1869,15 +1869,19 @@ function loop() {
 				clear();
 
 				//console.log("Machine Bet: " + action + "{" + env.nextBet + "} isKelly: " + env.useKelly + " isMartingale: " + env.useMartingale);
-				console.log("Bet Number: " + env.totalBets + " | Outcome: " + outcome);
-				console.log("Win Rate: " + (env.winRate * 100).toFixed(2) + " | Hi/Lo Win Rate: " + ((env.hbw / env.totalBets) * 100).toFixed(2) + " / " + ((env.lbw / env.totalBets) * 100).toFixed(2));
-				console.log("Wins/Loses: " + env.totalWins + " / " + (env.totalBets - env.totalWins));
-				console.log("Hi/Lo Bets: " + env.hbCount + " / " + env.lbcount);
-				console.log("Hi/Lo Wins: " + env.hbw + " / " + env.lbw);
-				console.log("Hi/Lo Loses: " + env.hbl + " / " + env.lbl);
-				console.log("Hi Probability: " + env.hbProbability.toFixed(2) + " | Lo Probability: " + env.lbProbability.toFixed(2));
-				console.log("Client Seed: " + $('#next_client_seed').val() + " | Lotto Tickets: " + $('#user_lottery_tickets').html() + " | Rewards Points: " + $('.user_reward_points').text());
-				console.log("Profit: " + env.profit + " | Balance: " + env.getCurrentBalance().toFixed(8));
+				
+				console.log("| Client Seed: " + $('#next_client_seed').val() + " | Lotto Tickets: " + $('#user_lottery_tickets').html() + " | Rewards Points: " + $('.user_reward_points').text());
+				console.log(" 8=========================== "+ env.totalBets + " ===========================D ~ ")
+				console.log("| Win Rate: " + (env.winRate * 100).toFixed(2) + " | Hi Win Rate: " + ((env.hbw / env.totalBets) * 100).toFixed(2) + " | Lo Win Rate: " + ((env.lbw / env.totalBets) * 100).toFixed(2) + " |");
+				console.log("|____________________________________________________________|");
+				console.log("| Total Wins: " + env.totalWins + "  | Total Hi: " + env.hbCount + "  |  Hi Wins: " + env.hbw + "  |  Hi Loss " + env.hbl+ "  |");
+				console.log("| Total Loss: " + (env.totalBets - env.totalWins) + "  |  Total Lo " + env.lbcount + "  |  Lo Wins: " + env.lbw + "  |  Lo Loss " + env.lbl+"  |");
+				console.log("|____________________________________________________________|");
+				console.log("|    Hi Probability: " + env.hbProbability.toFixed(2) + "     |     Lo Probability: " + env.lbProbability.toFixed(2) + "     |");
+				console.log("|____________________________________________________________|");
+				console.log("|     Last Bet: " + env.betHistory[env.betHistory.length - 1]+ "    |     Outcome: " + outcome+ "    |     Stake: "+ env.betAmount+ "     |");
+				console.log("|      Profit: " + env.profit + "     |     Balance: " + env.getCurrentBalance().toFixed(8)+ "      |");
+				console.log("|____________________________________________________________|")
 
 			}
 		} else {