Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions helper/diffPlaces.js
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ function isParentHierarchyDifferent(item1, item2){
}
}

// special case to consider empires and country as the same for deduplication purposes
if (
(item1.layer === 'empire' && item2.layer === 'country') ||
(item1.layer === 'country' && item2.layer === 'empire')
) {
return false;
}

// special handling of postal codes, which we consider to be strictly
// unique within a single country/dependency regardless of the rest of
// the hierarchy (ie. we ignore other parent properties)
Expand Down Expand Up @@ -351,6 +359,17 @@ function layerDependentNormalization(names, layer) {
});
}

// empire / country USA synonyms
if (layer === 'empire' || layer === 'country') {
_.forEach(names, (value, lang) => {
copy[lang] = field.getArrayValue(value).map(name => {
return name
.replace(/^(united states) of america$/i, '$1')
.trim();
});
});
}

// county
if( layer === 'county' ){
_.forEach(names, (value, lang) => {
Expand Down
168 changes: 168 additions & 0 deletions test/unit/middleware/dedupe.js
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,174 @@ module.exports.tests.priority = function(test, common) {
});
});

test('real-world test New Zealand: empire vs country', function (t) {
var req = {
clean: {
text: 'New Zealand',
size: 100
}
};
var res = {
data: [
{
'name': {
'default': 'New Zealand'
},
'source': 'whosonfirst',
'source_id': '136253053',
'layer': 'empire',
'parent': {
'empire_id': 136253053
},
},
{
'name': {
'default': [ 'New Zealand', 'Aotearoa' ],
},
'source': 'whosonfirst',
'source_id': '85633345',
'layer': 'country',
'parent': {
'continent_id': 102191583,
'country_id': 85633345
},
}
]
};

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'results have fewer items than before');
t.equal(res.data[0].layer, 'country', 'empire result removed');
t.end();
});
});

test('real-world test New Zealand: empire vs country - inverted order', function (t) {
var req = {
clean: {
text: 'New Zealand',
size: 100
}
};
var res = {
data: [
{
'name': {
'default': [ 'New Zealand', 'Aotearoa' ],
},
'source': 'whosonfirst',
'source_id': '85633345',
'layer': 'country',
'parent': {
'continent_id': 102191583,
'country_id': 85633345
},
},
{
'name': {
'default': 'New Zealand'
},
'source': 'whosonfirst',
'source_id': '136253053',
'layer': 'empire',
'parent': {
'empire_id': 136253053
},
}
]
};

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'results have fewer items than before');
t.equal(res.data[0].layer, 'country', 'empire result removed');
t.end();
});
});

test('real-world test United States: empire vs country', function (t) {
var req = {
clean: {
text: 'United States',
size: 100
}
};
var res = {
data: [
{
'name': {
'default': 'United States'
},
'source': 'whosonfirst',
'source_id': '85633793',
'layer': 'country',
'parent': {
'empire_id': 136253057,
'country_id': 85633793
},
},
{
'name': {
'default': 'United States of America',
},
'source': 'whosonfirst',
'source_id': '136253057',
'layer': 'empire',
'parent': {
'empire_id': 136253057,
},
}
]
};

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'results have fewer items than before');
t.equal(res.data[0].layer, 'country', 'empire result removed');
t.end();
});
});

test('real-world test United States: empire vs country - inverted order', function (t) {
var req = {
clean: {
text: 'United States',
size: 100
}
};
var res = {
data: [
{
'name': {
'default': 'United States of America',
},
'source': 'whosonfirst',
'source_id': '136253057',
'layer': 'empire',
'parent': {
'empire_id': 136253057,
},
},
{
'name': {
'default': 'United States'
},
'source': 'whosonfirst',
'source_id': '85633793',
'layer': 'country',
'parent': {
'empire_id': 136253057,
'country_id': 85633793
},
}
]
};

dedupe(req, res, function () {
t.equal(res.data.length, 1, 'results have fewer items than before');
t.equal(res.data[0].layer, 'country', 'empire result removed');
t.end();
});
});

test('A->B B->C dependency graph', function (t) {
var req = {
clean: {
Expand Down