Testing, licensing, packaging etc

Sentimentron · Sentimentron · commit a2ba56576123 · 2016-07-26T10:29:36.000+01:00
diff --git a/.npmignore b/.npmignore
@@ -0,0 +1,37 @@
+# Logs
+logs
+*.log
+npm-debug.log*
+
+# Runtime data
+pids
+*.pid
+*.seed
+
+# Directory for instrumented libs generated by jscoverage/JSCover
+lib-cov
+
+# Coverage directory used by tools like istanbul
+coverage
+
+# nyc test coverage
+.nyc_output
+
+# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
+.grunt
+
+# node-waf configuration
+.lock-wscript
+
+# Compiled binary addons (http://nodejs.org/api/addons.html)
+build/Release
+
+# Dependency directories
+node_modules
+jspm_packages
+
+# Optional npm cache directory
+.npm
+
+# Optional REPL history
+.node_repl_history
diff --git a/Makefile b/Makefile
@@ -0,0 +1,4 @@
+test:
+		./node_modules/.bin/mocha --reporter spec
+
+.PHONY: test
diff --git a/README.md b/README.md
@@ -1,2 +1,27 @@
 # dracula-sentiment-node
-node.js/Javascript implementation of Dracula - adapted to do deep sentiment analysis using character embeddings.
+A quick way to get "good enough" sentiment analysis into your applications, this package uses character and word-level embeddings and LSTM networks to decide if a given text is either "positive" or "negative".
+
+## Installation
+
+    npm install dracula-sentiment --save
+
+## Usage
+    
+    var dracula = require('dracula-sentiment');
+    var text = "xoxo cant wait";
+    console.log(text, dracula.analyze(text));
+
+For best results, remove any non-ascii characters by converting them to their closest equivalents via `unidecode` or something similar. 
+
+## Testing
+    
+    npm test
+
+Tests aren't very extensive at present. 
+
+## Contributing
+
+If you encounter any sentences where the classifiction is obviously wrong, open an issue and we'll work out a way to extend Dracula's training data so that it doesn't happen. Contributions to clean up the code and improve its style and performance are certainly welcome! 
+
+## Release
+* 1.0.0 Original release
diff --git a/dracula.embeddings.js b/dracula.embeddings.js
@@ -19,7 +19,10 @@ function draculaGetEmbeddings(word, length) {
   }
 
   for (var i = ret.length; i < length; i++) {
-    ret.push(new Array(32).fill(0));
+    var r = [];
+    // Change from browser version: ES6 is messy
+    for (var i = 0; i < 32; i++) r.push(0);
+    ret.push(r);
   }
 
   return ret;
diff --git a/dracula.js b/dracula.js
@@ -34,14 +34,18 @@ function dracula(content, visualize) {
   }
 
   var lstmOutput = lstmOutput1;
-
   // Mean-pooling
   var meanOutput = [];
   for (var i = 0; i < lstmOutput.length; i++) {
     if (lengths[i] == 0) break;
-    var cur = new Array(32).fill(0);
-    var max = new Array(32).fill(-Number.MAX_VALUE);
-    var min = new Array(32).fill(Number.MAX_VALUE);
+    var cur = [];
+    var min = [];
+    var max = [];
+    for (var j = 0; j < 32; j++) {
+        cur.push(0);
+        max.push(-Number.MAX_VALUE);
+        min.push(Number.MAX_VALUE);
+    }
     for (var j = 0; j < lengths[i]; j++) {
       cur = numeric.add(cur, lstmOutput[i][j]);
       max = numeric.max(max, lstmOutput[i][j]);
@@ -65,7 +69,10 @@ function dracula(content, visualize) {
       visualize2DActivation(lstmWords, "lstm-words-"+i, "Word-level LSTM")
   }
 
-  var finalPool = new Array(96).fill(0);
+  var finalPool = [];
+  for (var i = 0; i < 96; i++) {
+    finalPool.push(0);
+  }
   for (var i = 0; i < lstmWords.length; i++) {
     finalPool = numeric.add(finalPool, lstmWords[i]);
   }
@@ -80,6 +87,6 @@ function dracula(content, visualize) {
   if (visualize) {
     visualize2DActivation(probs, "probs-plot", "Softmax");
   }
-  return determineLabels(probs);
+  output = determineLabels(probs);
   return output.join(', ');
 }
diff --git a/dracula.lstm.js b/dracula.lstm.js
@@ -5,9 +5,9 @@ function draculaLSTM(values, prefix, backwards, dims) {
   // Basically a 2D version of what's in nn_lstm.py
 
   prefix = 'draculaParams_'+prefix;
-  var U = window[prefix+'_U'];
-  var W = window[prefix+'_W'];
-  var b = window[prefix+'_b'];
+  var U = eval(prefix+'_U');
+  var W = eval(prefix+'_W');
+  var b = eval(prefix+'_b');
 
   var sigmoid = function(t) {
     return numeric.div(1,
@@ -41,8 +41,11 @@ function draculaLSTM(values, prefix, backwards, dims) {
     stateBelow[i] = numeric.add(stateBelow[i], b)
   }
 
-  var h_ = new Array(dims).fill(0);
-  var c_ = new Array(dims).fill(0);
+  var h_ = [];
+  var c_ = [];
+  for (var i = 0; i < dims; i++) {
+    h_.push(0); c_.push(0);
+  }
   var ret = [];
 
   var tokens = [];
diff --git a/dracula.softmax.js b/dracula.softmax.js
@@ -1,22 +1,18 @@
 function draculaSoftmax(values) {
 
   // TODO: need to check if this U is the same as the one exported
-//  var act = numeric.dot(values, draculaParams_U);
   var act = [];
   for (var i = 0; i < values.length; i++) {
     var tmp = numeric.dot(values[i], draculaParams_U);
     tmp = numeric.add(tmp, draculaParams_b);
     act.push(tmp);
   }
 
-  //var exp = numeric.exp(act);
   var exp = [];
   for (var i = 0; i < act.length; i++) {
-    //var ex = numeric.exp(act[i] - Math.max(...act[i]))
     var ex = numeric.exp(act[i]);
     ex = numeric.div(ex, numeric.sum(ex))
     exp.push(ex);
-//    exp[i] = numeric.div(exp[i], numeric.sum(exp[i]));
   }
   return exp;
 }
@@ -36,9 +32,9 @@ function determineLabels(exp) {
       }
     }
     if (argMax == 0) {
-      ret.push("This tweet might be negative.");
+      ret.push("negative");
     } else if (argMax == 2) {
-      ret.push("This tweet may possibly be positive.");
+      ret.push("positive");
     } else {
       ret.push("Something weird's going on here: the argMax is not working "
       +" correctly");
diff --git a/index.js b/index.js
@@ -14,13 +14,14 @@ var fs = require('fs');
 // similar to the browser version as possible.
 var numeric = require('numeric');
 eval(fs.readFileSync('dracula.params.js')+'');
+eval(fs.readFileSync('dracula.embeddings.js')+'');
 eval(fs.readFileSync('dracula.lstm.js')+'');
 eval(fs.readFileSync('dracula.softmax.js')+'');
 eval(fs.readFileSync('dracula.tokenize.js')+'');
 eval(fs.readFileSync('dracula.js')+'');
 
  module.exports = {
-    analyse: function(string) {
+    analyze: function(string) {
         return dracula(string, false);
     }
 }
diff --git a/package.json b/package.json
@@ -30,5 +30,8 @@
   "devDependencies": {
     "chai": "^3.5.0",
     "mocha": "^2.5.3"
+  },
+  "scripts": {
+    "test": "make test"
   }
 }
diff --git a/test/index.js b/test/index.js
@@ -0,0 +1,13 @@
+var should = require('chai').should(),
+    dracula = require('../index'),
+    analyze = dracula.analyze;
+
+describe('#analyze', function() {
+    it('Should think "terror" is bad news', function() {
+        analyze("terror").should.equal("negative");
+    });
+
+    it('Should think "puppies" are good news', function() {
+        analyze('puppies').should.equal("positive");
+    });
+});

-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +test:
 +		./node_modules/.bin/mocha --reporter spec
++
 +.PHONY: test
Original file line number	Diff line number	Diff line change
`@@ -19,7 +19,10 @@ function draculaGetEmbeddings(word, length) {`
`19`	`19`	`}`
`20`	`20`
`21`	`21`	`for (var i = ret.length; i < length; i++) {`
`22`		`- ret.push(new Array(32).fill(0));`
	`22`	`+ var r = [];`
	`23`	`+ // Change from browser version: ES6 is messy`
	`24`	`+ for (var i = 0; i < 32; i++) r.push(0);`
	`25`	`+ ret.push(r);`
`23`	`26`	`}`
`24`	`27`
`25`	`28`	`return ret;`
Original file line number	Diff line number	Diff line change
`@@ -30,5 +30,8 @@`
`30`	`30`	`"devDependencies": {`
`31`	`31`	`"chai": "^3.5.0",`
`32`	`32`	`"mocha": "^2.5.3"`
	`33`	`+ },`
	`34`	`+ "scripts": {`
	`35`	`+ "test": "make test"`
`33`	`36`	`}`
`34`	`37`	`}`