Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions model/countries/BR/BR-parsing-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@ regex_definitions:

kFloorLiteralRe: # Regex for literal for a floor
regex_fragment: andar

# Regular expression to match 5-digit zip prefix
kZipPrefixValueRe:
regex_fragment: (?:\d{5})

# Regular expression to match 3-digit zip suffix
kZipSuffixValueRe:
regex_fragment: (?:\d{3})

capture_definitions:
ParseBuildingLocation:
Expand Down Expand Up @@ -165,6 +173,10 @@ parsing_definitions:
output: unit-name
parts: [ {regex_reference: kUnitNameValueRe} ]

postal-code:
decomposition:
capture_reference: ParsePostalCodeOptionalSeparatorExpression

test_regex_definitions:
# Tests for kBuildingValueRe
- id: "kBuildingValueRe: plain number"
Expand Down Expand Up @@ -324,3 +336,15 @@ test_parsing_definitions:
unit-name: "12"
floor: "1"
landmark: "foo"
- id: "Zip code with separator"
type: postal-code
input: "12345-678"
output:
postal-code-prefix: "12345"
postal-code-suffix: "678"
- id: "Zip code without separator"
type: postal-code
input: "12345678"
output:
postal-code-prefix: "12345"
postal-code-suffix: "678"
7 changes: 6 additions & 1 deletion model/countries/CA/CA-formatting-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ formatting-rules:
- skip: country # redundant with country-name
- skip: street-address # redundant with street-address-alternative-1

postal-code:
- postal-code-prefix
- postal-code-suffix

examples:
- id: name
comment: |
Expand All @@ -33,7 +37,8 @@ examples:
address-line2: Apt. 306
locality1: Ottawa
admin-area1: ON
postal-code: M5H 2J9
postal-code-prefix: M5H
postal-code-suffix: 2J9
country: CA
country-name: Canada

Expand Down
32 changes: 32 additions & 0 deletions model/countries/CA/CA-parsing-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
regex_definitions:
# Regular expression to match the separator between
# zip code prefix and suffix.
kZipCodeSeparatorsRe:
regex_reference: kWhitespaceSeparator

# Regular expression to match zip prefix
kZipPrefixValueRe:
regex_fragment: (?:[ABCEGHJ-NPRSTVXY]\d[ABCEGHJ-NPRSTV-Z])

# Regular expression to match zip suffix
kZipSuffixValueRe:
regex_fragment: (?:\d[ABCEGHJ-NPRSTV-Z]\d)

parsing_definitions:
postal-code:
decomposition:
capture_reference: ParsePostalCodeOptionalSeparatorExpression

test_parsing_definitions:
- id: "Zip code with separator"
type: postal-code
input: "K1A 0B1"
output:
postal-code-prefix: "K1A"
postal-code-suffix: "0B1"
- id: "Zip code without separator"
type: postal-code
input: "K1A0B1"
output:
postal-code-prefix: "K1A"
postal-code-suffix: "0B1"
10 changes: 8 additions & 2 deletions model/countries/NL/NL-formatting-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ formatting-rules:
- separator: "-"
- unit

postal-code:
- postal-code-prefix
- postal-code-suffix

examples:
- id: name
comment: |
Expand All @@ -46,7 +50,8 @@ examples:
unit: A
building-and-unit: 10-A
locality1: Amsterdam
postal-code: 1234 AB
postal-code-prefix: 1234
postal-code-suffix: AB
country: NL
country-name: Netherlands
output:
Expand All @@ -65,7 +70,8 @@ examples:
building: 10
building-and-unit: 10
locality1: Amsterdam
postal-code: 1234 AB
postal-code-prefix: 1234
postal-code-suffix: AB
country: NL
country-name: Netherlands
output:
Expand Down
26 changes: 25 additions & 1 deletion model/countries/NL/NL-parsing-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ regex_definitions:
kHouseNumberAndUnitSeparator:
regex_fragment: (?:[-\s/,]*)

# Regular expression to match zip prefix
kZipPrefixValueRe:
regex_fragment: (?:[1-9]\d{3})

# Regular expression to match zip suffix
kZipSuffixValueRe:
regex_fragment: (?:[A-Z]{2})

capture_definitions:
ParseBuildingLocation:
capture:
Expand All @@ -31,7 +39,7 @@ capture_definitions:
- capture:
output: unit-name
parts: [ {regex_reference: kUnitValueRe} ]
quantifier: MATCH_OPTIONAL
quantifier: MATCH_OPTIONAL

parsing_definitions:
building-location:
Expand All @@ -44,6 +52,10 @@ parsing_definitions:
output: street-address-alternative-1
parts:
- capture_reference: ParseBuildingLocation

postal-code:
decomposition:
capture_reference: ParsePostalCodeOptionalSeparatorExpression

test_parsing_definitions:
- id: "Test 1"
Expand Down Expand Up @@ -241,3 +253,15 @@ test_parsing_definitions:
building: "146"
unit: "A-02"
unit-name: "A-02"
- id: "Test 19: zip code with separator"
type: postal-code
input: "1234 AB"
output:
postal-code-prefix: "1234"
postal-code-suffix: "AB"
- id: "Test 20: zip code without separator"
type: postal-code
input: "1234AB"
output:
postal-code-prefix: "1234"
postal-code-suffix: "AB"
26 changes: 25 additions & 1 deletion model/countries/PL/PL-parsing-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@ regex_definitions:
# Regular expression to match separator of house/building number and unit/apartment number.
kHouseNumberAndUnitSeparator:
regex_fragment: (?:^|[/\s]+)

# Regular expression to match 2-digit zip prefix
kZipPrefixValueRe:
regex_fragment: (?:\d{2})

# Regular expression to match 3-digit zip suffix
kZipSuffixValueRe:
regex_fragment: (?:\d{3})

capture_definitions:
ParseBuildingLocation:
Expand Down Expand Up @@ -85,6 +93,10 @@ parsing_definitions:
unit:
decomposition:
capture_reference: ParseUnitWithOptionalPrefix

postal-code:
decomposition:
capture_reference: ParsePostalCodeOptionalSeparatorExpression


test_parsing_definitions:
Expand Down Expand Up @@ -222,4 +234,16 @@ test_parsing_definitions:
building: "9A"
unit: "m.10"
unit-type: "m."
unit-name: "10"
unit-name: "10"
- id: "Test 15: zip code with separator"
type: postal-code
input: "00-843"
output:
postal-code-prefix: "00"
postal-code-suffix: "843"
- id: "Test 16: zip code without separator"
type: postal-code
input: "00843"
output:
postal-code-prefix: "00"
postal-code-suffix: "843"
7 changes: 4 additions & 3 deletions model/countries/US/US-formatting-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ examples:
address-line2: Apt. 10, Club of Autofillers
locality1: New York City
admin-area1: NY
postal-code: 11367
postal-code-prefix: 11367
postal-code-suffix: 4100
country: US
country-name: USA

Expand All @@ -45,5 +46,5 @@ examples:
text: |
1234 Main St.
Apt. 10, Club of Autofillers
New York City, NY 11367
USA
New York City, NY 11367-4100
USA
39 changes: 39 additions & 0 deletions model/countries/US/US-parsing-rules.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
regex_definitions:
# Regular expression to match 5-digit zip prefix
kZipPrefixValueRe:
regex_fragment: (?:\d{5})

# Regular expression to match 4-digit zip suffix
kZipSuffixValueRe:
regex_fragment: (?:\d{4})

parsing_definitions:
postal-code:
decomposition:
capture_reference: ParsePostalCodeOptionalSeparatorExpression

test_parsing_definitions:
- id: "Zip code with suffix"
type: postal-code
input: "90210-5555"
output:
postal-code-prefix: "90210"
postal-code-suffix: "5555"
- id: "Zip code without suffix"
type: postal-code
input: "90210"
output:
postal-code-prefix: "90210"
postal-code-suffix: ""
- id: "Zip code with extra spaces"
type: postal-code
input: "90210 - 5555"
output:
postal-code-prefix: "90210"
postal-code-suffix: "5555"
- id: "Zip code without separator"
type: postal-code
input: "902105555"
output:
postal-code-prefix: "90210"
postal-code-suffix: "5555"
2 changes: 2 additions & 0 deletions model/countries/global/global-descriptions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ short-descriptions:
country: 2-letter country code
country-name: Name of a country
postal-code: Postal code
postal-code-prefix: Postal code prefix
postal-code-suffix: Postal code suffix
admin-area1: Biggest type of admin area if a country has multiple levels
admin-area2: 2nd biggest type of admin area if a country has multiple levels
admin-area3: 3rd biggest type of admin area if a country has multiple levels
Expand Down
5 changes: 5 additions & 0 deletions model/countries/global/global-formatting-rules.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,8 @@ formatting-rules:
- tel-local-prefix
- separator: ""
- tel-local-suffix

postal-code:
- postal-code-prefix
- separator: "-"
- postal-code-suffix
4 changes: 3 additions & 1 deletion model/countries/global/global-model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,9 @@ concepts:
- admin-area2
- admin-area3
- admin-area4
- postal-code
- postal-code:
- postal-code-prefix
- postal-code-suffix
- country
- country-name
# Company related
Expand Down
Loading