Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .babelrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
{
"presets": ["es2015"]
}
"presets": ["env"]
}
13 changes: 10 additions & 3 deletions dist/FuzzySearch.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ FuzzySearch.defaultOptions =
thresh_include: 2.0, // To be a candidate, score of item must be at least this
thresh_relative_to_best: 0.5, // and be at least this fraction of the best score
field_good_enough: 20, // If a field have this score, stop searching other fields. (field score is before item related bonus)
max_inners: null, // With large datasets, abandon searches early when there area lot of matches ("high positive count mitigation", see https://github.com/jeancroy/fuzz-aldrin-plus#high-positive-count-mitigation for more details)

//
// Scoring, bonus
Expand Down Expand Up @@ -1418,6 +1419,7 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
var options = this.options;
var opt_bpd = options.bonus_position_decay;
var opt_fge = options.field_good_enough;
var opt_max_inners = options.max_inners;
var opt_trb = options.thresh_relative_to_best;
var opt_score_tok = options.score_per_token;
var opt_round = options.score_round;
Expand Down Expand Up @@ -1504,6 +1506,13 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
if (tmp > thresh_include) thresh_include = tmp;
}

//
// Don't consider more expensive calculations if max_inners has been reached
///

var max_inners_reached = (opt_max_inners && results.length >= opt_max_inners);
if (max_inners_reached) break;

//
//candidate for best result ? push to list
//
Expand Down Expand Up @@ -1640,8 +1649,6 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
}
});



extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {

/**
Expand Down Expand Up @@ -1885,7 +1892,7 @@ function normalize(str) {

function getDiacriticsMap() {
// replace most common accents in french-spanish by their base letter
//"������?�������������������"
//"������?�������������������"
var from = "\xE3\xE0\xE1\xE4\xE2\xE6\u1EBD\xE8\xE9\xEB\xEA\xEC\xED\xEF\xEE\xF5\xF2\xF3\xF6\xF4\u0153\xF9\xFA\xFC\xFB\xF1\xE7";
var to = "aaaaaaeeeeeiiiioooooouuuunc";
var diacriticsMap = {};
Expand Down
2 changes: 1 addition & 1 deletion dist/FuzzySearch.min.js

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
"dependencies": {},
"devDependencies": {
"babel-core": "^6.26.0",
"babel-preset-es2015": "^6.24.1",
"babel-preset-es2017": "^6.24.1",
"babel-preset-env": "^1.6.1",
"chai": "^4.1.2",
"grunt": "^0.4.5",
"grunt-contrib-concat": "^0.5.1",
Expand Down
23 changes: 14 additions & 9 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,6 @@ We multiply Jaro-like score by llcs and the score become:
Having m squared give the advantage of even better score for good matches and worse score for bad match. It lower the likelihood of multiple bad match out-score a single good match. A character matched in a good token is now worth more than a character matched in a bad token.







Configuration
==============

Expand All @@ -476,6 +471,7 @@ Configuration
| highlight_tk_max_size | 64 | max size of a token for highlight algorithm (it is BVMAXSIZE(31) for search)|
| highlight_before | ... | tag to put before the highlight <br> `default: <strong class="highlight">`|
| highlight_after | ... | after the highlight <br> `default: </strong>` |
| max_inners | null | Optional. High positive count mitigation for large datasets. See same [fuzz-aldrin-plus](https://github.com/jeancroy/fuzz-aldrin-plus/blob/c8cf693ee77909d0dbfbc90b452733bba5e5c8bd/README.md#high-positive-count-mitigation) argument|


Algorithms
Expand Down Expand Up @@ -641,7 +637,6 @@ More precisely we'll store sequence of consecutive increase instead of each incr
otherwise it is copied to current line.



References
==========

Expand Down Expand Up @@ -674,11 +669,21 @@ Comparison of some string similarity measurements
> https://asecuritysite.com/forensics/simstring


Tests
=====
Development & Tests
===================

See [src/readme.md](src/readme.md) for some information about how the code is laid out.

Install Dependencies
--------------------

yarn

Run Tests
---------

Tests are located in test/ and use Mocha, JSDom, and Babel for ES6 syntax support (in tests only).

To run tests:

npm run test
yarn test
1 change: 1 addition & 0 deletions src/init.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ FuzzySearch.defaultOptions =
thresh_include: 2.0, // To be a candidate, score of item must be at least this
thresh_relative_to_best: 0.5, // and be at least this fraction of the best score
field_good_enough: 20, // If a field have this score, stop searching other fields. (field score is before item related bonus)
max_inners: null, // With large datasets, abandon searches early when there area lot of matches ("high positive count mitigation", see https://github.com/jeancroy/fuzz-aldrin-plus#high-positive-count-mitigation for more details)

//
// Scoring, bonus
Expand Down
10 changes: 8 additions & 2 deletions src/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
var options = this.options;
var opt_bpd = options.bonus_position_decay;
var opt_fge = options.field_good_enough;
var opt_max_inners = options.max_inners;
var opt_trb = options.thresh_relative_to_best;
var opt_score_tok = options.score_per_token;
var opt_round = options.score_round;
Expand Down Expand Up @@ -161,6 +162,13 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {
if (tmp > thresh_include) thresh_include = tmp;
}

//
// Don't consider more expensive calculations if max_inners has been reached
///

var max_inners_reached = (opt_max_inners && results.length >= opt_max_inners);
if (max_inners_reached) break;

//
//candidate for best result ? push to list
//
Expand Down Expand Up @@ -296,5 +304,3 @@ extend(FuzzySearch.prototype, /** @lends {FuzzySearch.prototype} */ {

}
});


21 changes: 21 additions & 0 deletions test/options/options.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import { expect } from 'chai';

import FuzzySearch from '../../dist/FuzzySearch';

describe('saving options', () => {
it('saves options in searcher.options', () => {
const docs = [
{ _id: 1, title: 'Item 1', domain: 'item1.com' },
{ _id: 2, title: 'Item 2', domain: 'item2.com' },
];

const searcher = new FuzzySearch({
source: docs,
keys: { title: 'title', domain: 'domain' },
identify_item: doc => doc._id,
field_good_enough: 19,
});

expect(searcher.options.field_good_enough).to.equal(19);
});
});
Loading