Skip to content

Commit 474ea12

Browse files
chaaklauronaldtse
authored andcommitted
Fix mapping rule issue when there are multiple matches
1 parent f5c3508 commit 474ea12

File tree

2 files changed

+29
-13
lines changed

2 files changed

+29
-13
lines changed

lib/interscript.rb

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,19 @@ def transliterate(system_code, string)
3131
output = string.clone
3232
offsets = Array.new string.to_s.size, 1
3333

34+
# mapping.rules.each do |r|
35+
# string.to_s.scan(/#{r['pattern']}/) do |matches|
36+
# match = Regexp.last_match
37+
# pos = match.offset(0).first
38+
# result = r['result'].clone
39+
# matches.each.with_index { |v, i| result.sub!(/\\#{i + 1}/, v) } if matches.is_a? Array
40+
# result.upcase! if up_case_around?(string, pos)
41+
# output[offsets[0...pos].sum, match[0].size] = result
42+
# offsets[pos] += result.size - match[0].size
43+
# end
44+
# end
3445
mapping.rules.each do |r|
35-
string.to_s.scan(/#{r['pattern']}/) do |matches|
36-
match = Regexp.last_match
37-
pos = match.offset(0).first
38-
result = r['result'].clone
39-
matches.each.with_index { |v, i| result.sub!(/\\#{i + 1}/, v) } if matches.is_a? Array
40-
result.upcase! if up_case_around?(string, pos)
41-
output[offsets[0...pos].sum, match[0].size] = result
42-
offsets[pos] += result.size - match[0].size
43-
end
46+
output.gsub!(/#{r['pattern']}/, r['result'])
4447
end
4548

4649
charmap.each do |k, v|

maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,13 @@ tests:
188188
map:
189189
rules:
190190
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0415 # Е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
191-
result: Ye
191+
result: "Y\u0415"
192192
- pattern: \b\u0415 # Е initially
193-
result: Ye
193+
result: "Y\u0435"
194194
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0435 # е after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
195-
result: ye
195+
result: "y\u0435"
196196
- pattern: \b\u0435 # е initially
197-
result: ye
197+
result: "y\u0435"
198198
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯяЙйЪъЬь])\u0401 # Ё after a, e, ё, и, о, у, ы, э, ю, я, й, ъ, ь
199199
result: "Y\u00eb"
200200
- pattern: \b\u0401
@@ -207,6 +207,10 @@ map:
207207
result: "Y\u00b7"
208208
- pattern: \u0439(?=[АаУуЫыЭэ]) # й before а, у, ы, or э
209209
result: "y\u00b7"
210+
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯя])\u042b(?=[АаУуЫыЭэ]) # Ы after any vowel character and before а, у, ы, or э
211+
result: "\u00b7Y\u00b7"
212+
- pattern: (?<=[АаЕеЁёИиОоУуЫыЭэЮюЯя])\u044b(?=[АаУуЫыЭэ]) # ы after any vowel character and before а, у, ы, or э
213+
result: "\u00b7y\u00b7"
210214
- pattern: \u042b(?=[АаУуЫыЭэ]) # Ы before а, у, ы, or э
211215
result: "Y\u00b7"
212216
- pattern: \u044b(?=[АаУуЫыЭэ]) # ы before а, у, ы, or э
@@ -224,6 +228,15 @@ map:
224228
- pattern: (Ш|ш)(Ч|ч) # шч => sh·ch
225229
result: "\\1\u00b7\\2"
226230

231+
postrules:
232+
233+
#YE
234+
- pattern: "((?<=[A-ZË])Ye(?=[A-ZË])?|(?<=[A-ZË])?Ye(?=[A-ZË]))"
235+
result: "YE"
236+
#
237+
- pattern: "((?<=[A-ZË])Yë(?=[A-ZË])?|(?<=[A-ZË])?Yë(?=[A-ZË]))"
238+
result: ""
239+
227240
characters:
228241
"\u0410": "A"
229242
"\u0411": "B"

0 commit comments

Comments
 (0)