jeancroy · aguynamedben · Mar 1, 2018 · Mar 1, 2018
diff --git a/.babelrc b/.babelrc
@@ -1,3 +1,3 @@
 {
-  "presets": ["es2015"]
-}
+  "presets": ["env"]
+}
diff --git a/dist/FuzzySearch.js b/dist/FuzzySearch.js
@@ -35,6 +35,7 @@ FuzzySearch.defaultOptions =
     thresh_include: 2.0,              // To be a candidate, score of item must be at least this
     thresh_relative_to_best: 0.5,     // and be at least this fraction of the best score
     field_good_enough: 20,            // If a field have this score, stop searching other fields. (field score is before item related bonus)
+    max_inners: null,                 // With large datasets, abandon searches early when there area lot of matches ("high positive count mitigation", see https://github.com/jeancroy/fuzz-aldrin-plus#high-positive-count-mitigation for more details)
 
     //
     //  Scoring, bonus
@@ -1418,6 +1419,7 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
         var options = this.options;
         var opt_bpd = options.bonus_position_decay;
         var opt_fge = options.field_good_enough;
+        var opt_max_inners = options.max_inners;
         var opt_trb = options.thresh_relative_to_best;
         var opt_score_tok = options.score_per_token;
         var opt_round = options.score_round;
@@ -1504,6 +1506,13 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
                 if (tmp > thresh_include) thresh_include = tmp;
             }
 
+            //
+            // Don't consider more expensive calculations if max_inners has been reached
+            ///
+
+            var max_inners_reached = (opt_max_inners && results.length >= opt_max_inners);
+            if (max_inners_reached) break;
+
             //
             //candidate for best result ? push to list
             //
@@ -1640,8 +1649,6 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
     }
 });
 
-
-
 extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
 
     /**
@@ -1885,7 +1892,7 @@ function normalize(str) {
 
 function getDiacriticsMap() {
     // replace most common accents in french-spanish by their base letter
-    //"������?��������������������"
+    //"������?�������������������"
     var from = "\xE3\xE0\xE1\xE4\xE2\xE6\u1EBD\xE8\xE9\xEB\xEA\xEC\xED\xEF\xEE\xF5\xF2\xF3\xF6\xF4\u0153\xF9\xFA\xFC\xFB\xF1\xE7";
     var to = "aaaaaaeeeeeiiiioooooouuuunc";
     var diacriticsMap = {};

diff --git a/dist/FuzzySearch.min.js b/dist/FuzzySearch.min.js
diff --git a/package.json b/package.json
@@ -6,8 +6,7 @@
   "dependencies": {},
   "devDependencies": {
     "babel-core": "^6.26.0",
-    "babel-preset-es2015": "^6.24.1",
-    "babel-preset-es2017": "^6.24.1",
+    "babel-preset-env": "^1.6.1",
     "chai": "^4.1.2",
     "grunt": "^0.4.5",
     "grunt-contrib-concat": "^0.5.1",

diff --git a/readme.md b/readme.md
@@ -451,11 +451,6 @@ We multiply Jaro-like score by llcs and the score become:
 Having m squared give the advantage of even better score for good matches and worse score for bad match. It lower the likelihood of multiple bad match out-score a single good match. A character matched in a good token is now worth more than a character matched in a bad token.
 
 
-
-
-
-
-
 Configuration
 ==============
 
@@ -476,6 +471,7 @@ Configuration
 | highlight_tk_max_size    | 64      | max size of a token for highlight algorithm (it is BVMAXSIZE(31) for search)|
 | highlight_before         | ...     |   tag to put before the highlight <br> `default: <strong class="highlight">`|
 | highlight_after          |  ...    | after the highlight <br> `default: </strong>`   |
+| max_inners               | null    | Optional. High positive count mitigation for large datasets. See same  [fuzz-aldrin-plus](https://github.com/jeancroy/fuzz-aldrin-plus/blob/c8cf693ee77909d0dbfbc90b452733bba5e5c8bd/README.md#high-positive-count-mitigation) argument|
 
 
 Algorithms
@@ -641,7 +637,6 @@ More precisely we'll store sequence of consecutive increase instead of each incr
  otherwise it is copied to current line.
 
 
-
 References
 ==========
 
@@ -674,11 +669,21 @@ Comparison of some string similarity measurements
 > https://asecuritysite.com/forensics/simstring
 
 
-Tests
-=====
+Development & Tests
+===================
+
+See [src/readme.md](src/readme.md) for some information about how the code is laid out.
+
+Install Dependencies
+--------------------
+
+    yarn
+
+Run Tests
+---------
 
 Tests are located in test/ and use Mocha, JSDom, and Babel for ES6 syntax support (in tests only).
 
 To run tests:
 
-    npm run test
+    yarn test
diff --git a/src/init.js b/src/init.js
@@ -23,6 +23,7 @@ FuzzySearch.defaultOptions =
     thresh_include: 2.0,              // To be a candidate, score of item must be at least this
     thresh_relative_to_best: 0.5,     // and be at least this fraction of the best score
     field_good_enough: 20,            // If a field have this score, stop searching other fields. (field score is before item related bonus)
+    max_inners: null,                 // With large datasets, abandon searches early when there area lot of matches ("high positive count mitigation", see https://github.com/jeancroy/fuzz-aldrin-plus#high-positive-count-mitigation for more details)
 
     //
     //  Scoring, bonus

diff --git a/src/search.js b/src/search.js
@@ -75,6 +75,7 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
         var options = this.options;
         var opt_bpd = options.bonus_position_decay;
         var opt_fge = options.field_good_enough;
+        var opt_max_inners = options.max_inners;
         var opt_trb = options.thresh_relative_to_best;
         var opt_score_tok = options.score_per_token;
         var opt_round = options.score_round;
@@ -161,6 +162,13 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
                 if (tmp > thresh_include) thresh_include = tmp;
             }
 
+            //
+            // Don't consider more expensive calculations if max_inners has been reached
+            ///
+
+            var max_inners_reached = (opt_max_inners && results.length >= opt_max_inners);
+            if (max_inners_reached) break;
+
             //
             //candidate for best result ? push to list
             //
@@ -296,5 +304,3 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
 
     }
 });
-
-
diff --git a/test/options/options.spec.js b/test/options/options.spec.js
@@ -0,0 +1,21 @@
+import { expect } from 'chai';
+
+import FuzzySearch from '../../dist/FuzzySearch';
+
+describe('saving options', () => {
+  it('saves options in searcher.options', () => {
+    const docs = [
+      { _id: 1, title: 'Item 1', domain: 'item1.com' },
+      { _id: 2, title: 'Item 2', domain: 'item2.com' },
+    ];
+
+    const searcher = new FuzzySearch({
+      source: docs,
+      keys: { title: 'title', domain: 'domain' },
+      identify_item: doc => doc._id,
+      field_good_enough: 19,
+    });
+
+    expect(searcher.options.field_good_enough).to.equal(19);
+  });
+});