22authority_id : icao
33id : 9303
44language : iso-639-2:mul
5+ supported_languages : [iso-639-2:rus, iso-639-2:bel, iso-639-2:ukr, iso-639-2:mkd, iso-639-2:srb ]
56source_script : Cyrl
67destination_script : Latn
78name : " Doc 9303: Machine Readable Travel Documents, Part 3: Specifications Common to all MRTDs, Seventh Edition, 2015"
@@ -20,9 +21,171 @@ description: |
2021
2122 This document defines the transliteration mappings used to produce
2223 this transcription or transliteration.
24+
2325tests :
26+ - source : Бабрыковіч Аляксандр
27+ expected : Babrykovich Aliaksandr
28+ language : iso-639-2:bel
29+ - source : Міховіч Марыя
30+ expected : Mikhovich Maryia
31+ language : iso-639-2:bel
32+ - source : Максім
33+ expected : Maksim
34+ language : iso-639-2:bel
35+ - source : Іван
36+ expected : Ivan
37+ language : iso-639-2:bel
38+ - source : СВЯТЛАНА
39+ expected : SVIATLANA
40+ language : iso-639-2:bel
41+ - source : Ігар
42+ expected : Ihar
43+ language : iso-639-2:bel
44+ - source : Палто Алена
45+ expected : Palto Alena
46+ language : iso-639-2:bel
47+ - source : Мікалай
48+ expected : Mikalai
49+ language : iso-639-2:bel
50+ # https://en.wikipedia.org/wiki/Machine-readable_passport#Names
51+ - source : Горбачёв
52+ expected : Gorbachev
53+ language : iso-639-2:rus
54+ - source : Горбачёв
55+ expected : Horbachiov
56+ language : iso-639-2:bel
57+ - source : Алексей
58+ expected : Aleksei
59+ language : iso-639-2:rus
60+ - source : Академика Королёва
61+ expected : Akademika Koroleva
62+ language : iso-639-2:rus
63+ - source : улица Бирюлёвская
64+ expected : ulitsa Biriulevskaia
65+ language : iso-639-2:rus
66+ - source : Врубеля Улица
67+ expected : Vrubelia Ulitsa
68+ language : iso-639-2:rus
69+ - source : Люблинская
70+ expected : Liublinskaia
71+ language : iso-639-2:rus
72+ # https://news.tut.by/society/650761.html
73+ - source : Мария Рудь
74+ expected : Mariia Rud
75+ language : iso-639-2:rus
76+ - source : Мария Рудь
77+ expected : Mariia Rud
78+ language : iso-639-2:bel
79+ # https://pasport.org.ua/ru/vazhno/transliteratsiya
80+ - source : Олександр
81+ expected : Oleksandr
82+ language : iso-639-2:urk
2483
2584map :
85+ rules :
86+ - pattern : \u0401
87+ result : IO
88+ language : [ iso-639-2:bel ]
89+ - pattern : (?<!\b\u2019)\b\u0404
90+ result : YE
91+ language : [ iso-639-2:ukr ]
92+ - pattern : (?<!\b\u2019)\b\u0407
93+ result : YI
94+ language : [ iso-639-2:ukr ]
95+ - pattern : \u040C
96+ result : KJ
97+ language : [ iso-639-2:mkd ]
98+ - pattern : \u040F
99+ result : DJ
100+ language : [ iso-639-2::mkd ]
101+ - pattern : \u0413
102+ result : H
103+ language : [ iso-639-2:bel, iso-639-2:srb, iso-639-2:ukr ]
104+ - pattern : \u0416
105+ result : Z
106+ language : [ iso-639-2:srb ]
107+ - pattern : \u0418
108+ result : Y
109+ language : [ iso-639-2:ukr ]
110+ - pattern : (?<!\b\u2019)\b\u0419
111+ result : Y
112+ language : [ iso-639-2:ukr ]
113+ - pattern : \u0425
114+ result : H
115+ language : [ iso-639-2:srb, iso-639-2:mkd ]
116+ - pattern : \u0426
117+ result : C
118+ language : [ iso-639-2:srb, iso-639-2:mkd ]
119+ - pattern : \u0427
120+ result : C
121+ language : [ iso-639-2:srb ]
122+ - pattern : \u0428
123+ result : S
124+ language : [ iso-639-2:srb ]
125+ - pattern : \u0429
126+ result : SHT
127+ language : [ iso-639-2:bul ]
128+ - pattern : (?<!\b\u2019)\b\u042E
129+ result : YA
130+ language : [ iso-639-2:ukr ]
131+ - pattern : (?<!\b\u2019)\b\u042F
132+ result : YA
133+ language : [ iso-639-2:urk ]
134+ - pattern : \u0492
135+ result : GJ
136+ language : [ iso-639-2:mkd ]
137+ - pattern : \u0451
138+ result : io
139+ language : [ iso-639-2:bel ]
140+ - pattern : (?<!\b\u2019)\b\u0454
141+ result : ye
142+ language : [ iso-639-2:ukr ]
143+ - pattern : (?<!\b\u2019)\b\u0457
144+ result : yi
145+ language : [ iso-639-2:ukr ]
146+ - pattern : \u045C
147+ result : kj
148+ language : [ iso-639-2:mkd ]
149+ - pattern : \u045F
150+ result : dj
151+ language : [ iso-639-2:mkd ]
152+ - pattern : \u0433
153+ result : h
154+ language : [ iso-639-2:bel, iso-639-2:srb, iso-639-2:ukr ]
155+ - pattern : \u0436
156+ result : z
157+ language : [ iso-639-2:srb ]
158+ - pattern : \u0438
159+ result : y
160+ language : [ iso-639-2:ukr ]
161+ - pattern : (?<!\b\u2019)\b\u0439
162+ result : y
163+ language : [ iso-639-2:ukr ]
164+ - pattern : \u0445
165+ result : h
166+ language : [ iso-639-2:srb, iso-639-2:mkd ]
167+ - pattern : \u0446
168+ result : c
169+ language : [ iso-639-2:srb, iso-639-2:mkd ]
170+ - pattern : \u0447
171+ result : c
172+ language : [ iso-639-2:srb ]
173+ - pattern : \u0448
174+ result : s
175+ language : [ iso-639-2:srb ]
176+ - pattern : \u0449
177+ result : sht
178+ language : [ iso-639-2:bul ]
179+ - pattern : \u044E
180+ result : yu
181+ language : [ iso-639-2:ukr ]
182+ - pattern : \u044F
183+ result : ya
184+ language : [ iso-639-2:ukr ]
185+ - pattern : \u0493
186+ result : gj
187+ language : [ iso-639-2:mkd ]
188+
26189 characters :
27190 # A. Transliteration of Multinational Latin-based Characters
28191 " \u00C0 " : " A" # À
81244 " \u012C " : " I" # Ĭ
82245 " \u012E " : " I" # Į
83246 " \u0130 " : " I" # İ
84- " \u0049 " : " I" # I
85247 " \u0132 " : " IJ" # IJ
86248 " \u0134 " : " J" # Ĵ
87249 " \u0136 " : " K" # Ķ
@@ -173,52 +335,51 @@ map:
173335 " \u0125 " : " h" # ĥ
174336 " \u0127 " : " h" # ħ
175337 " \u0129 " : " i" # ĩ
176- " \u012B " : " I" # ī
177- " \u012D " : " I" # ĭ
178- " \u012F " : " I" # į
179- " \u0069 " : " I" # i̇
180- " \u0131 " : " I" # i
181- " \u0133 " : " IJ" # ij
182- " \u0135 " : " J" # ĵ
183- " \u0137 " : " K" # ķ
184- " \u013A " : " L" # ĺ
185- " \u013C " : " L" # ļ
186- " \u013E " : " L" # ľ
187- " \u0140 " : " L" # ŀ
188- " \u0142 " : " L" # ł
189- " \u0144 " : " N" # ń
190- " \u0146 " : " N" # ņ
191- " \u0148 " : " N" # ň
192- " \u014B " : " N" # ŋ
193- " \u014D " : " O" # ō
194- " \u014F " : " O" # ŏ
195- " \u0151 " : " O" # ő
196- " \u0153 " : " OE" # œ
197- " \u0155 " : " R" # ŕ
198- " \u0157 " : " R" # ŗ
199- " \u0159 " : " R" # ř
200- " \u015B " : " S" # ś
201- " \u015D " : " S" # ŝ
202- " \u015F " : " S" # ş
203- " \u0161 " : " S" # š
204- " \u0163 " : " T" # ţ
205- " \u0165 " : " T" # ť
206- " \u0167 " : " T" # ŧ
207- " \u0169 " : " U" # ũ
208- " \u016B " : " U" # ū
209- " \u016D " : " U" # ŭ
210- " \u016F " : " U" # ů
211- " \u0171 " : " U" # ű
212- " \u0173 " : " U" # ų
213- " \u0175 " : " W" # ŵ
214- " \u0177 " : " Y" # ŷ
215- " \u00FF " : " Y" # ÿ
216- " \u017A " : " Z" # ź
217- " \u017C " : " Z" # ż
218- " \u017E " : " Z" # ž
338+ " \u012B " : " i" # ī
339+ " \u012D " : " i" # ĭ
340+ " \u012F " : " i" # į
341+ " \u0131 " : " i" # i
342+ " \u0133 " : " ij" # ij
343+ " \u0135 " : " j" # ĵ
344+ " \u0137 " : " k" # ķ
345+ " \u013A " : " l" # ĺ
346+ " \u013C " : " l" # ļ
347+ " \u013E " : " l" # ľ
348+ " \u0140 " : " l" # ŀ
349+ " \u0142 " : " l" # ł
350+ " \u0144 " : " n" # ń
351+ " \u0146 " : " n" # ņ
352+ " \u0148 " : " n" # ň
353+ " \u014B " : " n" # ŋ
354+ " \u014D " : " o" # ō
355+ " \u014F " : " o" # ŏ
356+ " \u0151 " : " o" # ő
357+ " \u0153 " : " oe" # œ
358+ " \u0155 " : " r" # ŕ
359+ " \u0157 " : " r" # ŗ
360+ " \u0159 " : " r" # ř
361+ " \u015B " : " s" # ś
362+ " \u015D " : " s" # ŝ
363+ " \u015F " : " s" # ş
364+ " \u0161 " : " s" # š
365+ " \u0163 " : " t" # ţ
366+ " \u0165 " : " t" # ť
367+ " \u0167 " : " t" # ŧ
368+ " \u0169 " : " u" # ũ
369+ " \u016B " : " u" # ū
370+ " \u016D " : " u" # ŭ
371+ " \u016F " : " u" # ů
372+ " \u0171 " : " u" # ű
373+ " \u0173 " : " u" # ų
374+ " \u0175 " : " w" # ŵ
375+ " \u0177 " : " y" # ŷ
376+ " \u00FF " : " y" # ÿ
377+ " \u017A " : " z" # ź
378+ " \u017C " : " z" # ż
379+ " \u017E " : " z" # ž
219380
220381 # B. Transliteration of Cyrillic Characters
221- " \u0401 " : " E" # Ё (except Belorussian = IO)
382+ " \u0401 " : " E" # Ё (except Belorussian = IO)
222383 " \u0402 " : " D" # Ћ
223384 " \u0404 " : " IE" # Є (except if Ukrainian first character, then = YE)
224385 " \u0405 " : " DZ" # Ѕ
@@ -229,11 +390,11 @@ map:
229390 " \u040A " : " NJ" # Њ
230391 " \u040C " : " K" # Ќ (except in the language spoken in the former Yugoslav Republic of Macedonia = KJ)
231392 " \u040E " : " U" # ў
232- " \u040F " : " DZ" # Џ (except in the language spoken in the former Yugoslav Republic of Macedonia = DJ)
393+ " \u040F " : " DZ" # Џ (except in the language spoken in the former Yugoslav Republic of Macedonia = DJ)
233394 " \u0410 " : " A" # А
234395 " \u0411 " : " B" # Б
235396 " \u0412 " : " V" # В
236- " \u0413 " : " G" # Г (except Belorussian, Serbian, and Ukrainian = H)
397+ " \u0413 " : " G" # Г (except Belorussian, Serbian, and Ukrainian = H)
237398 " \u0414 " : " D" # Д
238399 " \u0415 " : " E" # Е
239400 " \u0416 " : " ZH" # Ж (except Serbian = Z)
279440 " \u045C " : " k" # ќ (except in the language spoken in the former Yugoslav Republic of Macedonia = kj)
280441 " \u045E " : " u" # ў
281442 " \u045F " : " dz" # џ (except in the language spoken in the former Yugoslav Republic of Macedonia = dj)
282- " \u0410 " : " a" # а
443+ " \u0430 " : " a" # а
283444 " \u0431 " : " b" # б
284445 " \u0432 " : " v" # в
285446 " \u0433 " : " g" # г (except Belorussian, Serbian, and Ukrainian = h)
315476 " \u0491 " : " g" # ґ
316477 " \u0493 " : " g" # ғ (except in the language spoken in the former Yugoslav Republic of Macedonia = gj)
317478 " \u04BB " : " c" # һ
479+
480+ # Soft sign transliteration don't defined by standard so it's skipped
481+ # https://ru.wikipedia.org/wiki/Транслитерация_русского_алфавита_латиницей#cite_note-tt12-19
482+ " \u042C " : " " # Ь
483+ " \u044C " : " " # ь
0 commit comments