From 398c51fd3a1c3569425f2c167c78da2f215933c4 Mon Sep 17 00:00:00 2001 From: obadx Date: Wed, 11 Feb 2026 20:40:12 +0200 Subject: [PATCH 1/4] add: intial support for Tajweed rules --- .../phonetics/conv_base_operation.py | 48 +++++- src/quran_transcript/phonetics/operations.py | 7 + .../phonetics/tajweed_rulses.py | 18 +- tests/test_explain_error_api.py | 29 ++-- tests/test_sub_with_mapping.py | 5 +- tests/test_sub_with_mapping_pytest.py | 161 +++++++++++++++--- 6 files changed, 220 insertions(+), 48 deletions(-) diff --git a/src/quran_transcript/phonetics/conv_base_operation.py b/src/quran_transcript/phonetics/conv_base_operation.py index ff8cedd..1ab4812 100644 --- a/src/quran_transcript/phonetics/conv_base_operation.py +++ b/src/quran_transcript/phonetics/conv_base_operation.py @@ -35,7 +35,9 @@ class MappingPos: pos: tuple[int, int] # start, (pythonic exlusive end) tajweed_rules: list[TajweedRule] | None = None - def add_tajweed_rule(self, new_tajweed_rule: TajweedRule | None) -> None: + def add_tajweed_rule( + self, new_tajweed_rules: TajweedRule | list[TajweedRule] | None + ) -> None: """Add a tajweed rule to this mapping position. Appends the new tajweed rule to the existing list of rules if both the @@ -49,8 +51,16 @@ def add_tajweed_rule(self, new_tajweed_rule: TajweedRule | None) -> None: >>> # This will add the rule if tajweed_rules exists and rule is not None >>> # mapping.add_tajweed_rule(some_rule) """ - if self.tajweed_rules is not None and new_tajweed_rule is not None: - self.tajweed_rules.append(new_tajweed_rule) + if isinstance(new_tajweed_rules, TajweedRule): + new_tajweed_rules = [new_tajweed_rules] + elif new_tajweed_rules is None: + new_tajweed_rules = [] + + if new_tajweed_rules != []: + if self.tajweed_rules is None: + self.tajweed_rules = [] + for rule in new_tajweed_rules: + self.tajweed_rules.append(rule) MappingListType: TypeAlias = list[MappingPos | None] @@ -76,6 +86,10 @@ def merge_mappings( Merged position mappings maintaining the relationship between the original text and the final substituted text. The length matches the original mappings length. + Raises: + ValueError: if `new_mappings` is an empty list + + Logic: - For each non-None old mapping, searches its position range in new_mappings - Finds the first and last non-None mapping in that range @@ -102,30 +116,50 @@ def merge_mappings( >>> result = merge_mappings(old, new) # result: [MappingPos(pos=(0, 5))] # spans first to last non-None """ + if mappings is None: return new_mappings + if new_mappings == []: + raise ValueError("`new_mappings` should not be an empty list") + # TODO: add Tajweed rules depencance - merged_mappings = [None for _ in range(len(mappings))] + merged_mappings: MappingListType = [None for _ in range(len(mappings))] for idx, old_map in enumerate(mappings): if old_map is not None: start_map = None + new_start_idx = 0 end_map = None - for start_map in new_mappings[old_map.pos[0] : old_map.pos[1]]: + new_end_idx = 0 + for new_start_idx in range(old_map.pos[0], old_map.pos[1]): + start_map = new_mappings[new_start_idx] if start_map is not None: break - for end_map in new_mappings[old_map.pos[0] : old_map.pos[1]][::-1]: + for new_end_idx in range(old_map.pos[1] - 1, old_map.pos[0] - 1, -1): + end_map = new_mappings[new_end_idx] if end_map is not None: break if start_map is not None and end_map is not None: + # Avoid copying by reference (instead copying by values) + # Avoiding changes mapping with refrenrece by other uninteded code merged_mappings[idx] = MappingPos( pos=(start_map.pos[0], end_map.pos[1]) ) + merged_mappings[idx].add_tajweed_rule(old_map.tajweed_rules) + for new_idx in range(new_start_idx, new_end_idx + 1): + if new_mappings[new_idx] is not None: + merged_mappings[idx].add_tajweed_rule( + new_mappings[new_idx].tajweed_rules + ) elif start_map is not None: + # Single start mapping merged_mappings[idx] = MappingPos(pos=start_map.pos) + merged_mappings[idx].add_tajweed_rule(start_map.tajweed_rules) elif end_map is not None: + # Single End mapping merged_mappings[idx] = MappingPos(pos=end_map.pos) + merged_mappings[idx].add_tajweed_rule(end_map.tajweed_rules) return merged_mappings @@ -493,7 +527,7 @@ def sub_with_mapping( # Apply the regex substitution new_text = re.sub(pattern, repl, text) - new_mappings = get_mappings(text, new_text, mappings) + new_mappings = get_mappings(text, new_text, mappings, tajweed_rule=tajweed_rule) return new_text, new_mappings diff --git a/src/quran_transcript/phonetics/operations.py b/src/quran_transcript/phonetics/operations.py index a3fa682..83a6701 100644 --- a/src/quran_transcript/phonetics/operations.py +++ b/src/quran_transcript/phonetics/operations.py @@ -1,6 +1,7 @@ from dataclasses import dataclass, field import re + from .conv_base_operation import ( ConversionOperation, sub_with_mapping, @@ -11,6 +12,7 @@ from .moshaf_attributes import MoshafAttributes from ..alphabet import uthmani as uth from ..alphabet import phonetics as ph +from .tajweed_rulses import NormalMaddRule @dataclass @@ -722,6 +724,7 @@ class MaddPattern: pattern: str target: str madd: str + name: str @dataclass @@ -741,16 +744,19 @@ class Madd(ConversionOperation): pattern=f"({uth.fatha}){uth.alif}", target=ph.alif, madd=uth.alif, + name="alif", ), "dam": MaddPattern( pattern=f"({uth.dama}){uth.waw}", target=ph.waw_madd, madd=uth.waw, + name="waw", ), "kasr": MaddPattern( pattern=f"({uth.kasra}){uth.yaa}", target=ph.yaa_madd, madd=uth.yaa, + name="yaa", ), } ) @@ -854,6 +860,7 @@ def forward( r"\1" + 2 * madd_patt.target, text, mappings, + tajweed_rule=NormalMaddRule(tag=madd_patt.name), ) return text, mappings diff --git a/src/quran_transcript/phonetics/tajweed_rulses.py b/src/quran_transcript/phonetics/tajweed_rulses.py index 8e946f6..4b36822 100644 --- a/src/quran_transcript/phonetics/tajweed_rulses.py +++ b/src/quran_transcript/phonetics/tajweed_rulses.py @@ -23,7 +23,6 @@ class TajweedRule(ABC): correctness_type: Literal["match", "count"] tag: Optional[str] | None = None available_tags: Optional[set] | None = None - to_overwrite_rule: Optional[list["TajweedRule"]] | None = None def __post_init__(self): if self.tag is not None and self.available_tags is not None: @@ -49,6 +48,21 @@ def get_relvant_rule(self, ph_str: str) -> Optional["TajweedRule"]: return self +@dataclass +class Qalqalah(TajweedRule): + name: LangName = field(default_factory=lambda: LangName(ar="قلقة", en="Qalqalah")) + golden_len: int = 0 + correctness_type: Literal["match", "count"] = "match" + + def is_ph_str_in(self, ph_str: str) -> bool: + """Whether the phonetic script is assoicated with this Tajweed rule or not""" + return True + + def get_relvant_rule(self, ph_str: str) -> Optional["TajweedRule"]: + """Returs a Tajweed rule that is assocaited with the input ph_str""" + return self + + @dataclass class MaddRule(TajweedRule): name: LangName @@ -90,7 +104,7 @@ def get_relvant_rule(self, ph_str: str) -> Optional["TajweedRule"]: @dataclass class NormalMaddRule(MaddRule): name: LangName = field( - default_factory=LangName(ar="المد الطبيعي", en="Normal Madd") + default_factory=lambda: LangName(ar="المد الطبيعي", en="Normal Madd") ) golden_len: int = 2 diff --git a/tests/test_explain_error_api.py b/tests/test_explain_error_api.py index 51270b5..1992555 100644 --- a/tests/test_explain_error_api.py +++ b/tests/test_explain_error_api.py @@ -1,20 +1,19 @@ -from quran_transcript.phonetics.tajweed_rulses import NormalMaddRule from quran_transcript.phonetics.error_explainer import ReciterError, explain_error -from quran_transcript.phonetics.conv_base_operation import MappingPos + +from quran_transcript import quran_phonetizer, MoshafAttributes if __name__ == "__main__": - normal_madd_alif = NormalMaddRule(tag="alif") - normal_madd_waw = NormalMaddRule(tag="waw") - uthmani_text = "قالوا" - ref_mapping = [ - MappingPos(pos=(0, 1)), - MappingPos(pos=(1, 3), tajweed_rules=[normal_madd_alif]), - MappingPos(pos=(3, 4)), - MappingPos(pos=(4, 6), tajweed_rules=[normal_madd_waw]), - None, - ] - ref_ph_text = "قاالۥۥ" + uthmani_text = "قَالُوٓا۟" + + moshaf = MoshafAttributes( + rewaya="hafs", + madd_monfasel_len=4, + madd_mottasel_len=4, + madd_mottasel_waqf=4, + madd_aared_len=4, + ) + ref_ph_out = quran_phonetizer(uthmani_text, moshaf) predicted_text = "كالۥۥ" predicted_text = "فكالۥۥ" @@ -22,9 +21,9 @@ errors = explain_error( uthmani_text=uthmani_text, - ref_ph_text=ref_ph_text, + ref_ph_text=ref_ph_out.phonemes, predicted_ph_text=predicted_text, - mappings=ref_mapping, + mappings=ref_ph_out.mappings, ) for err in errors: print(err) diff --git a/tests/test_sub_with_mapping.py b/tests/test_sub_with_mapping.py index 05363a1..d046a3e 100644 --- a/tests/test_sub_with_mapping.py +++ b/tests/test_sub_with_mapping.py @@ -12,12 +12,13 @@ ) aya = Aya() aya = Aya(1, 1) + aya = Aya(12, 1) # aya = Aya(2, 1) # aya = Aya(75, 27) - # aya = Aya(2, 6) + aya = Aya(2, 6) # aya = Aya(3, 1) # aya = Aya(30, 28) - aya = Aya(2, 9) + # aya = Aya(2, 9) uth_text = aya.get().uthmani # uth_text = aya.get_by_imlaey_words(start=7, window=2).uthmani diff --git a/tests/test_sub_with_mapping_pytest.py b/tests/test_sub_with_mapping_pytest.py index ecf36d2..425167b 100644 --- a/tests/test_sub_with_mapping_pytest.py +++ b/tests/test_sub_with_mapping_pytest.py @@ -13,6 +13,7 @@ MappingListType, ) +from quran_transcript.phonetics.tajweed_rulses import NormalMaddRule, Qalqalah # Import the sub_with_mapping function from the existing test file @@ -205,7 +206,7 @@ def test_sub_with_mapping_operations( MappingPos(pos=(7, 8), tajweed_rules=None), None, MappingPos(pos=(8, 9), tajweed_rules=None), - MappingPos(pos=(9, 11), tajweed_rules=None), + MappingPos(pos=(9, 11), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(11, 12), tajweed_rules=None), MappingPos(pos=(12, 13), tajweed_rules=None), MappingPos(pos=(13, 14), tajweed_rules=None), @@ -246,7 +247,7 @@ def test_sub_with_mapping_operations( MappingPos(pos=(7, 8), tajweed_rules=None), None, MappingPos(pos=(8, 9), tajweed_rules=None), - MappingPos(pos=(9, 12), tajweed_rules=None), + MappingPos(pos=(9, 12), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(12, 13), tajweed_rules=None), MappingPos(pos=(13, 14), tajweed_rules=None), MappingPos(pos=(14, 15), tajweed_rules=None), @@ -258,9 +259,9 @@ def test_sub_with_mapping_operations( MappingPos(pos=(18, 19), tajweed_rules=None), MappingPos(pos=(19, 20), tajweed_rules=None), MappingPos(pos=(20, 21), tajweed_rules=None), - MappingPos(pos=(21, 22), tajweed_rules=None), # here the cavet + MappingPos(pos=(21, 22), tajweed_rules=None), None, - MappingPos(pos=(22, 24), tajweed_rules=None), # here the cavet + MappingPos(pos=(22, 24), tajweed_rules=None), MappingPos(pos=(24, 25), tajweed_rules=None), MappingPos(pos=(25, 26), tajweed_rules=None), MappingPos(pos=(26, 27), tajweed_rules=None), @@ -479,6 +480,72 @@ def test_merge_mappings_contraction(self): None, ], # Should span first to last non-None ), + # Test start and eend in between + tajweed rules (input) + ( + [ + MappingPos(pos=(0, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(3, 5)), + MappingPos(pos=(5, 6)), + ], + [ + None, + MappingPos(pos=(0, 1)), + None, + MappingPos(pos=(2, 3)), + MappingPos(pos=(3, 6)), + None, + ], + [ + MappingPos(pos=(0, 1), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(2, 6)), + None, + ], # Should span first to last non-None + ), + # Test start and eend in between + tajweed rules (output) + ( + [ + MappingPos(pos=(0, 3)), + MappingPos(pos=(3, 5)), + MappingPos(pos=(5, 6)), + ], + [ + None, + MappingPos(pos=(0, 1)), + None, + MappingPos(pos=(2, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(3, 6)), + None, + ], + [ + MappingPos(pos=(0, 1)), + MappingPos(pos=(2, 6), tajweed_rules=[NormalMaddRule(tag="alif")]), + None, + ], # Should span first to last non-None + ), + # Test start and eend in between + tajweed rules (both) + ( + [ + MappingPos(pos=(0, 3)), + MappingPos(pos=(3, 5), tajweed_rules=[Qalqalah()]), + MappingPos(pos=(5, 6)), + ], + [ + None, + MappingPos(pos=(0, 1)), + None, + MappingPos(pos=(2, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(3, 6)), + None, + ], + [ + MappingPos(pos=(0, 1)), + MappingPos( + pos=(2, 6), + tajweed_rules=[Qalqalah(), NormalMaddRule(tag="alif")], + ), + None, + ], # Should span first to last non-None + ), # Test partial None mappings - some positions in range are None ( [MappingPos(pos=(0, 3))], @@ -517,8 +584,8 @@ def test_merge_mappings_partial_none(self, mappings, new_mappings, expected): def test_merge_mappings_empty_lists(self): """Test edge cases with empty lists.""" # Both empty - result = merge_mappings([], []) - assert result == [] + with pytest.raises(ValueError): + result = merge_mappings([], []) # Empty mappings, non-empty new_mappings new_mappings = [MappingPos(pos=(0, 1))] @@ -526,9 +593,9 @@ def test_merge_mappings_empty_lists(self): assert result == [] # Non-empty mappings, empty new_mappings - mappings = [MappingPos(pos=(0, 1))] - result = merge_mappings(mappings, []) - assert result == [None] + with pytest.raises(ValueError): + mappings = [MappingPos(pos=(0, 1))] + result = merge_mappings(mappings, []) def test_merge_mappings_complex_range(self): """Test complex scenario with multiple overlapping ranges.""" @@ -567,7 +634,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(0, 6), tajweed_rules=None), MappingPos(pos=(6, 16), tajweed_rules=None), None, - MappingPos(pos=(16, 20), tajweed_rules=None), + MappingPos( + pos=(16, 20), + tajweed_rules=[ + NormalMaddRule( + tag="alif", + ) + ], + ), MappingPos(pos=(20, 21), tajweed_rules=None), MappingPos(pos=(21, 22), tajweed_rules=None), MappingPos(pos=(22, 23), tajweed_rules=None), @@ -578,11 +652,25 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(26, 27), tajweed_rules=None), MappingPos(pos=(27, 28), tajweed_rules=None), MappingPos(pos=(28, 29), tajweed_rules=None), - MappingPos(pos=(29, 31), tajweed_rules=None), + MappingPos( + pos=(29, 31), + tajweed_rules=[ + NormalMaddRule( + tag="alif", + ) + ], + ), MappingPos(pos=(31, 32), tajweed_rules=None), MappingPos(pos=(32, 33), tajweed_rules=None), None, - MappingPos(pos=(33, 35), tajweed_rules=None), + MappingPos( + pos=(33, 35), + tajweed_rules=[ + NormalMaddRule( + tag="alif", + ) + ], + ), MappingPos(pos=(35, 36), tajweed_rules=None), MappingPos(pos=(36, 37), tajweed_rules=None), MappingPos(pos=(37, 38), tajweed_rules=None), @@ -594,7 +682,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(41, 42), tajweed_rules=None), MappingPos(pos=(42, 43), tajweed_rules=None), None, - MappingPos(pos=(43, 45), tajweed_rules=None), + MappingPos( + pos=(43, 45), + tajweed_rules=[ + NormalMaddRule( + tag="alif", + ) + ], + ), MappingPos(pos=(45, 46), tajweed_rules=None), MappingPos(pos=(46, 47), tajweed_rules=None), MappingPos(pos=(47, 48), tajweed_rules=None), @@ -625,7 +720,7 @@ def test_merge_mappings_complex_range(self): None, MappingPos(pos=(6, 7), tajweed_rules=None), MappingPos(pos=(7, 8), tajweed_rules=None), - MappingPos(pos=(8, 11), tajweed_rules=None), + MappingPos(pos=(8, 11), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(11, 12), tajweed_rules=None), MappingPos(pos=(12, 13), tajweed_rules=None), MappingPos(pos=(13, 14), tajweed_rules=None), @@ -639,7 +734,7 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(18, 19), tajweed_rules=None), MappingPos(pos=(19, 20), tajweed_rules=None), None, - MappingPos(pos=(20, 22), tajweed_rules=None), + MappingPos(pos=(20, 22), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(22, 23), tajweed_rules=None), MappingPos(pos=(23, 24), tajweed_rules=None), MappingPos(pos=(24, 25), tajweed_rules=None), @@ -671,7 +766,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(10, 11), tajweed_rules=None), MappingPos(pos=(11, 12), tajweed_rules=None), MappingPos(pos=(12, 13), tajweed_rules=None), - MappingPos(pos=(13, 15), tajweed_rules=None), + MappingPos( + pos=(13, 15), + tajweed_rules=[ + NormalMaddRule( + tag="yaa", + ) + ], + ), MappingPos(pos=(15, 16), tajweed_rules=None), MappingPos(pos=(16, 17), tajweed_rules=None), MappingPos(pos=(17, 18), tajweed_rules=None), @@ -681,7 +783,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(21, 22), tajweed_rules=None), MappingPos(pos=(22, 23), tajweed_rules=None), MappingPos(pos=(23, 24), tajweed_rules=None), - MappingPos(pos=(24, 26), tajweed_rules=None), + MappingPos( + pos=(24, 26), + tajweed_rules=[ + NormalMaddRule( + tag="waw", + ) + ], + ), None, None, MappingPos(pos=(26, 27), tajweed_rules=None), @@ -745,7 +854,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(83, 84), tajweed_rules=None), MappingPos(pos=(84, 85), tajweed_rules=None), MappingPos(pos=(85, 86), tajweed_rules=None), - MappingPos(pos=(86, 88), tajweed_rules=None), + MappingPos( + pos=(86, 88), + tajweed_rules=[ + NormalMaddRule( + tag="alif", + ) + ], + ), MappingPos(pos=(88, 89), tajweed_rules=None), MappingPos(pos=(89, 90), tajweed_rules=None), MappingPos(pos=(90, 91), tajweed_rules=None), @@ -772,7 +888,7 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(2, 6), tajweed_rules=None), None, MappingPos(pos=(6, 7), tajweed_rules=None), - MappingPos(pos=(7, 9), tajweed_rules=None), + MappingPos(pos=(7, 9), tajweed_rules=[NormalMaddRule(tag="alif")]), ], ), ( @@ -788,7 +904,7 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(4, 8), tajweed_rules=None), None, MappingPos(pos=(8, 9), tajweed_rules=None), - MappingPos(pos=(9, 11), tajweed_rules=None), + MappingPos(pos=(9, 11), tajweed_rules=[NormalMaddRule(tag="alif")]), ], ), ], @@ -804,11 +920,12 @@ def test_phonetizer_with_mappings( madd_aared_len=4, ) ph_out = quran_phonetizer(uth_text, moshaf) - assert ph_out.phonemes == ph_text - assert exp_mappings == ph_out.mappings print(uth_text) print(ph_out.phonemes) print(ph_out.mappings) + + assert ph_out.phonemes == ph_text + assert exp_mappings == ph_out.mappings for idx, uth_c in enumerate(uth_text): print(f"UTH_IDX: `{idx}`, SPAN: `{ph_out.mappings[idx]}`") ph_c = "" From 77a2a9c0a753c912b9d1e2053c9f9da3b40b2e57 Mon Sep 17 00:00:00 2001 From: obadx Date: Thu, 12 Feb 2026 15:01:14 +0200 Subject: [PATCH 2/4] add: complete refactory to code 2x faster mapping --- pyproject.toml | 3 + .../phonetics/conv_base_operation.py | 134 ++++++----- src/quran_transcript/phonetics/operations.py | 41 ++-- tests/test_sub_with_mapping.py | 12 +- tests/test_sub_with_mapping_pytest.py | 224 ++++++++++-------- uv.lock | 69 +++++- 6 files changed, 289 insertions(+), 194 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d2ef76e..89a9a00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,9 @@ classifiers = [ test = [ "pytest", ] +profile = [ + "pyinstrument>=5.1.2", +] [project.urls] Homepage = "https://github.com/obadx/quran-transcript" diff --git a/src/quran_transcript/phonetics/conv_base_operation.py b/src/quran_transcript/phonetics/conv_base_operation.py index 1ab4812..c3af53c 100644 --- a/src/quran_transcript/phonetics/conv_base_operation.py +++ b/src/quran_transcript/phonetics/conv_base_operation.py @@ -34,6 +34,7 @@ class MappingPos: pos: tuple[int, int] # start, (pythonic exlusive end) tajweed_rules: list[TajweedRule] | None = None + deleted: bool = False def add_tajweed_rule( self, new_tajweed_rules: TajweedRule | list[TajweedRule] | None @@ -51,19 +52,18 @@ def add_tajweed_rule( >>> # This will add the rule if tajweed_rules exists and rule is not None >>> # mapping.add_tajweed_rule(some_rule) """ + if not new_tajweed_rules: # covers None and [] + return + if self.tajweed_rules is None: + self.tajweed_rules = [] + # if new_tajweed_rules is a single rule, make it a list if isinstance(new_tajweed_rules, TajweedRule): - new_tajweed_rules = [new_tajweed_rules] - elif new_tajweed_rules is None: - new_tajweed_rules = [] - - if new_tajweed_rules != []: - if self.tajweed_rules is None: - self.tajweed_rules = [] - for rule in new_tajweed_rules: - self.tajweed_rules.append(rule) + self.tajweed_rules.append(new_tajweed_rules) + else: + self.tajweed_rules.extend(new_tajweed_rules) -MappingListType: TypeAlias = list[MappingPos | None] +MappingListType: TypeAlias = list[MappingPos] def merge_mappings( @@ -75,6 +75,11 @@ def merge_mappings( * This function maintains the relationship between character positions in the original text and their corresponding positions after one or more regex substitution operations. + **Important:** This function **mutates** the input `mappings` list in‑place. + It updates each `MappingPos` object with a new `pos` tuple and a new `deleted` + flag, and also merges the tajweed rules from the corresponding entries in + `new_mappings`. The length of the list remains unchanged. + Args: mappings: Previous character position mappings from original text. Each MappingPos represents a span (start, end) in the output text. `None` values indicate @@ -123,45 +128,38 @@ def merge_mappings( if new_mappings == []: raise ValueError("`new_mappings` should not be an empty list") - # TODO: add Tajweed rules depencance - merged_mappings: MappingListType = [None for _ in range(len(mappings))] - for idx, old_map in enumerate(mappings): - if old_map is not None: - start_map = None - new_start_idx = 0 - end_map = None - new_end_idx = 0 - for new_start_idx in range(old_map.pos[0], old_map.pos[1]): - start_map = new_mappings[new_start_idx] - if start_map is not None: - break - for new_end_idx in range(old_map.pos[1] - 1, old_map.pos[0] - 1, -1): - end_map = new_mappings[new_end_idx] - if end_map is not None: - break - - if start_map is not None and end_map is not None: - # Avoid copying by reference (instead copying by values) - # Avoiding changes mapping with refrenrece by other uninteded code - merged_mappings[idx] = MappingPos( - pos=(start_map.pos[0], end_map.pos[1]) + for idx in range(len(mappings)): + old_map = mappings[idx] + if old_map is None: + raise ValueError() + start = old_map.pos[0] + end = old_map.pos[1] + + if old_map.deleted: + if start < len(new_mappings): + mappings[idx].pos = ( + new_mappings[start].pos[0], + new_mappings[start].pos[0], ) - merged_mappings[idx].add_tajweed_rule(old_map.tajweed_rules) - for new_idx in range(new_start_idx, new_end_idx + 1): - if new_mappings[new_idx] is not None: - merged_mappings[idx].add_tajweed_rule( - new_mappings[new_idx].tajweed_rules - ) - elif start_map is not None: - # Single start mapping - merged_mappings[idx] = MappingPos(pos=start_map.pos) - merged_mappings[idx].add_tajweed_rule(start_map.tajweed_rules) - elif end_map is not None: - # Single End mapping - merged_mappings[idx] = MappingPos(pos=end_map.pos) - merged_mappings[idx].add_tajweed_rule(end_map.tajweed_rules) + else: + mappings[idx].pos = ( + new_mappings[-1].pos[1], + new_mappings[-1].pos[1], + ) + + else: + mappings[idx].pos = ( + new_mappings[start].pos[0], + new_mappings[end - 1].pos[1], + ) - return merged_mappings + deleted = True + for new_idx in range(start, end): + mappings[idx].add_tajweed_rule(new_mappings[new_idx].tajweed_rules) + deleted = deleted and new_mappings[new_idx].deleted + mappings[idx].deleted = deleted + + return mappings def get_mappings( @@ -182,6 +180,11 @@ def get_mappings( transcription. It can associate tajweed rules with affected character spans and merge with existing mappings from previous transformations. + **Important:** This function **mutates** the input `mappings` list in‑place. + It updates each `MappingPos` object with a new `pos` tuple and a new `deleted` + flag, and also merges the tajweed rules from the corresponding entries in + `new_mappings`. The length of the list remains unchanged. + Args: text: Original input text before transformation. new_text: Transformed text after operation (e.g., regex substitution). @@ -244,11 +247,11 @@ def get_mappings( >>> new_text = "abc" >>> mappings = get_mappings(text, new_text) >>> mappings[-1] # Last character deleted - None + MappingsPos(pos=(3,3), deleted=True) Note: - Character positions use Python-style slice notation (inclusive start, exclusive end) - - None values in mappings indicate deleted characters + - `MappingPos(pos=(x, x), deleted=True)` values in mappings indicate deleted characters - Tajweed rules are associated with affected character spans when provided - The function validates mapping continuity and raises errors on inconsistencies - Special handling exists for Quranic orthographic patterns like shadda @@ -256,13 +259,6 @@ def get_mappings( if text == "": return [] - # rev_mapings = {} - # if mappings: - # for m_idx, map_pos in enumerate(mappings): - # if map_pos is not None: - # for idx in range(map_pos.pos[0], map_pos.pos[1]): - # rev_mapings[idx] = m_idx - # NOTE: Opcoes operation order is: equal, insert, replace ops = opcodes(text, new_text) """ @@ -323,7 +319,9 @@ def get_mappings( ) # increae the end pos to append the insert for old_idx in range(next_op[1], next_op[2]): - new_mappings[old_idx] = None + new_mappings[old_idx] = MappingPos( + pos=(next_op[4], next_op[4]), deleted=True + ) to_del_poses.add(old_idx) # insert + replace else: @@ -337,7 +335,9 @@ def get_mappings( new_mappings[next_op[1]] = new_map_pos # assignign the rest to None for old_idx in range(next_op[1] + 1, next_op[2]): - new_mappings[old_idx] = None + new_mappings[old_idx] = MappingPos( + pos=(next_op[4], next_op[4]), deleted=True + ) to_del_poses.add(old_idx) # equal only @@ -391,11 +391,18 @@ def get_mappings( if new_mappings[old_idx] is None: new_mappings[old_idx] = MappingPos(pos=(new_idx, new_idx + 1)) elif curr_op[0] == "delete": - ... + for old_idx in range(curr_op[1], curr_op[2]): + new_mappings[old_idx] = MappingPos( + pos=(curr_op[3], curr_op[3]), deleted=True + ) + new_mappings[old_idx].add_tajweed_rule(tajweed_rule) last_op = curr_op curr_op = next_op + # TODO: remove this + assert all(m is not None for m in new_mappings) + # This not optimal but in case of إدغام كامل we want to delete the first letter and leave the next one # for example: # لكم ما @@ -407,10 +414,15 @@ def get_mappings( ): first = re_out.span()[0] second = re_out.span()[1] - 2 - if new_mappings[first] is not None and new_mappings[second] is None: + if (not new_mappings[first].deleted) and new_mappings[second].deleted: # Swapping new_mappings[second] = new_mappings[first] - new_mappings[first] = None + # first and space if exists + for idx in range(first, second): + new_mappings[idx] = MappingPos( + pos=(new_mappings[second].pos[0], new_mappings[second].pos[0]), + deleted=True, + ) new_mappings = merge_mappings(mappings, new_mappings) diff --git a/src/quran_transcript/phonetics/operations.py b/src/quran_transcript/phonetics/operations.py index 83a6701..fbf5d57 100644 --- a/src/quran_transcript/phonetics/operations.py +++ b/src/quran_transcript/phonetics/operations.py @@ -78,17 +78,13 @@ def _process_mappings( last_pos = 0 # Adding mapping offsets for idx in range(start_idx, end_idx): - if disc_map[ptr] is None: - mappings[idx] = None - else: - # Avoiding copyiing object by refrence - mappings[idx] = MappingPos( - pos=( - disc_map[ptr].pos[0] + start_offset, - disc_map[ptr].pos[1] + start_offset, - ) - ) - last_pos = mappings[idx].pos[1] + # Avoiding copyiing object by refrence + mappings[idx].pos = ( + disc_map[ptr].pos[0] + start_offset, + disc_map[ptr].pos[1] + start_offset, + ) + mappings[idx].deleted = disc_map[ptr].deleted + last_pos = mappings[idx].pos[1] ptr += 1 end = ( @@ -96,20 +92,15 @@ def _process_mappings( if (re_idx + 1) == len(re_outs) else re_outs[idx + 1].span()[0] ) - # Shifting the rest of position to the rigth + # Shifting the rest of position to the right offset = None for idx in range(end_idx, end): - if (offset is not None) and (mappings[idx] is not None): - mappings[idx].pos = ( - mappings[idx].pos[0] + offset, - mappings[idx].pos[1] + offset, - ) - elif mappings[idx] is not None: + if offset is None: offset = last_pos - mappings[idx].pos[0] - mappings[idx].pos = ( - mappings[idx].pos[0] + offset, - mappings[idx].pos[1] + offset, - ) + mappings[idx].pos = ( + mappings[idx].pos[0] + offset, + mappings[idx].pos[1] + offset, + ) return mappings @@ -129,7 +120,11 @@ def _get_single_word_mapping(self, uth_word: str, rep: str) -> MappingListType: mappings.append(MappingPos(pos=(ph_start, ph_end))) # Deleting madd sign for c in chars[1:]: - mappings.append(None) + mappings.append( + MappingPos( + pos=(mappings[-1].pos[1], mappings[-1].pos[1]), deleted=True + ) + ) uth_start += len(chars) diff --git a/tests/test_sub_with_mapping.py b/tests/test_sub_with_mapping.py index d046a3e..b29d140 100644 --- a/tests/test_sub_with_mapping.py +++ b/tests/test_sub_with_mapping.py @@ -1,5 +1,6 @@ from quran_transcript.phonetics.phonetizer import quran_phonetizer from quran_transcript import Aya, MoshafAttributes +from pyinstrument import Profiler if __name__ == "__main__": @@ -12,10 +13,11 @@ ) aya = Aya() aya = Aya(1, 1) - aya = Aya(12, 1) - # aya = Aya(2, 1) + # aya = Aya(12, 1) + aya = Aya(2, 1) + aya = Aya(19, 1) # aya = Aya(75, 27) - aya = Aya(2, 6) + # aya = Aya(2, 6) # aya = Aya(3, 1) # aya = Aya(30, 28) # aya = Aya(2, 9) @@ -24,7 +26,10 @@ # uth_text = aya.get_by_imlaey_words(start=7, window=2).uthmani # uth_text = "لَكُم مَّا" + profiler = Profiler() + profiler.start() ph_out = quran_phonetizer(uth_text, moshaf) + profiler.stop() ph_text = ph_out.phonemes print(uth_text) print(ph_out.phonemes) @@ -39,6 +44,7 @@ print("-" * 40) + print(profiler.output_text(unicode=True, color=True)) """ * meem moshaddah * Lam Ism Allah should be deleted at [8] [Not the best thing but works] diff --git a/tests/test_sub_with_mapping_pytest.py b/tests/test_sub_with_mapping_pytest.py index 425167b..b89546e 100644 --- a/tests/test_sub_with_mapping_pytest.py +++ b/tests/test_sub_with_mapping_pytest.py @@ -54,7 +54,7 @@ "aaaacd", [ MappingPos(pos=(0, 4), tajweed_rules=None), - None, + MappingPos(pos=(4, 4), deleted=True), MappingPos(pos=(4, 5), tajweed_rules=None), MappingPos(pos=(5, 6), tajweed_rules=None), ], @@ -67,7 +67,7 @@ "mmmmcd", [ MappingPos(pos=(0, 4), tajweed_rules=None), - None, + MappingPos(pos=(4, 4), deleted=True), MappingPos(pos=(4, 5), tajweed_rules=None), MappingPos(pos=(5, 6), tajweed_rules=None), ], @@ -80,7 +80,7 @@ "acd", [ MappingPos(pos=(0, 1), tajweed_rules=None), - None, + MappingPos(pos=(1, 1), deleted=True), MappingPos(pos=(1, 2), tajweed_rules=None), MappingPos(pos=(2, 3), tajweed_rules=None), ], @@ -119,9 +119,9 @@ "aaaac", [ MappingPos(pos=(0, 4), tajweed_rules=None), - None, + MappingPos(pos=(4, 4), deleted=True), MappingPos(pos=(4, 5), tajweed_rules=None), - None, + MappingPos(pos=(5, 5), deleted=True), ], ), # Test case 9: equal + insert + equal @@ -202,16 +202,16 @@ def test_sub_with_mapping_operations( MappingPos(pos=(4, 5), tajweed_rules=None), MappingPos(pos=(5, 6), tajweed_rules=None), MappingPos(pos=(6, 7), tajweed_rules=None), - None, + MappingPos(pos=(7, 7), deleted=True), MappingPos(pos=(7, 8), tajweed_rules=None), - None, + MappingPos(pos=(8, 8), deleted=True), MappingPos(pos=(8, 9), tajweed_rules=None), MappingPos(pos=(9, 11), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(11, 12), tajweed_rules=None), MappingPos(pos=(12, 13), tajweed_rules=None), MappingPos(pos=(13, 14), tajweed_rules=None), - None, - None, + MappingPos(pos=(14, 14), deleted=True), + MappingPos(pos=(14, 14), deleted=True), MappingPos(pos=(14, 15), tajweed_rules=None), MappingPos(pos=(15, 16), tajweed_rules=None), MappingPos(pos=(16, 17), tajweed_rules=None), @@ -219,13 +219,13 @@ def test_sub_with_mapping_operations( MappingPos(pos=(18, 19), tajweed_rules=None), MappingPos(pos=(19, 20), tajweed_rules=None), MappingPos(pos=(20, 21), tajweed_rules=None), - None, + MappingPos(pos=(21, 21), deleted=True), MappingPos(pos=(21, 22), tajweed_rules=None), MappingPos(pos=(22, 23), tajweed_rules=None), MappingPos(pos=(23, 24), tajweed_rules=None), MappingPos(pos=(24, 25), tajweed_rules=None), - None, - None, + MappingPos(pos=(25, 25), deleted=True), + MappingPos(pos=(25, 25), deleted=True), MappingPos(pos=(25, 26), tajweed_rules=None), MappingPos(pos=(26, 27), tajweed_rules=None), MappingPos(pos=(27, 28), tajweed_rules=None), @@ -233,7 +233,7 @@ def test_sub_with_mapping_operations( MappingPos(pos=(29, 30), tajweed_rules=None), MappingPos(pos=(30, 34), tajweed_rules=None), MappingPos(pos=(34, 35), tajweed_rules=None), - None, + MappingPos(pos=(35, 35), deleted=True), ], [ MappingPos(pos=(0, 1), tajweed_rules=None), @@ -243,16 +243,16 @@ def test_sub_with_mapping_operations( MappingPos(pos=(4, 5), tajweed_rules=None), MappingPos(pos=(5, 6), tajweed_rules=None), MappingPos(pos=(6, 7), tajweed_rules=None), - None, + MappingPos(pos=(7, 7), deleted=True), MappingPos(pos=(7, 8), tajweed_rules=None), - None, + MappingPos(pos=(8, 8), deleted=True), MappingPos(pos=(8, 9), tajweed_rules=None), MappingPos(pos=(9, 12), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(12, 13), tajweed_rules=None), MappingPos(pos=(13, 14), tajweed_rules=None), MappingPos(pos=(14, 15), tajweed_rules=None), - None, - None, + MappingPos(pos=(15, 15), deleted=True), + MappingPos(pos=(15, 15), deleted=True), MappingPos(pos=(15, 16), tajweed_rules=None), MappingPos(pos=(16, 17), tajweed_rules=None), MappingPos(pos=(17, 18), tajweed_rules=None), @@ -260,13 +260,13 @@ def test_sub_with_mapping_operations( MappingPos(pos=(19, 20), tajweed_rules=None), MappingPos(pos=(20, 21), tajweed_rules=None), MappingPos(pos=(21, 22), tajweed_rules=None), - None, + MappingPos(pos=(22, 22), deleted=True), MappingPos(pos=(22, 24), tajweed_rules=None), MappingPos(pos=(24, 25), tajweed_rules=None), MappingPos(pos=(25, 26), tajweed_rules=None), MappingPos(pos=(26, 27), tajweed_rules=None), - None, - None, + MappingPos(pos=(27, 27), deleted=True), + MappingPos(pos=(27, 27), deleted=True), MappingPos(pos=(27, 28), tajweed_rules=None), MappingPos(pos=(28, 29), tajweed_rules=None), MappingPos(pos=(29, 30), tajweed_rules=None), @@ -274,7 +274,7 @@ def test_sub_with_mapping_operations( MappingPos(pos=(31, 32), tajweed_rules=None), MappingPos(pos=(32, 36), tajweed_rules=None), MappingPos(pos=(36, 37), tajweed_rules=None), - None, + MappingPos(pos=(37, 37), deleted=True), ], ), ( @@ -298,8 +298,8 @@ def test_sub_with_mapping_operations( MappingPos(pos=(16, 17), tajweed_rules=None), MappingPos(pos=(17, 21), tajweed_rules=None), MappingPos(pos=(21, 22), tajweed_rules=None), - None, - None, + MappingPos(pos=(22, 22), deleted=True), + MappingPos(pos=(22, 22), deleted=True), ], [ MappingPos(pos=(0, 1), tajweed_rules=None), @@ -319,8 +319,8 @@ def test_sub_with_mapping_operations( MappingPos(pos=(16, 17), tajweed_rules=None), MappingPos(pos=(17, 21), tajweed_rules=None), MappingPos(pos=(21, 23), tajweed_rules=None), - None, - None, + MappingPos(pos=(23, 23), deleted=True), + MappingPos(pos=(23, 23), deleted=True), ], ), ], @@ -354,8 +354,7 @@ def test_sub_with_mapping_edge_cases(): assert result_text == "abcd" assert len(result_mappings) == 4 for i, mapping in enumerate(result_mappings): - if mapping is not None: - assert mapping.pos == (i, i + 1) + assert mapping.pos == (i, i + 1) def test_mapping_pos_consistency(): @@ -376,7 +375,7 @@ class TestMergeMappings: def test_merge_mappings_none_input(self): """Test when first parameter is None - should return new_mappings.""" - new_mappings: list[MappingPos | None] = [ + new_mappings: MappingListType = [ MappingPos(pos=(0, 1)), MappingPos(pos=(1, 2)), MappingPos(pos=(2, 3)), @@ -387,19 +386,19 @@ def test_merge_mappings_none_input(self): def test_merge_mappings_identity(self): """Test simple identity mapping - should preserve positions.""" - mappings: list[MappingPos | None] = [ + mappings: MappingListType = [ MappingPos(pos=(0, 1)), MappingPos(pos=(1, 2)), MappingPos(pos=(2, 3)), ] - new_mappings: list[MappingPos | None] = [ + new_mappings: MappingListType = [ MappingPos(pos=(0, 1)), MappingPos(pos=(1, 2)), MappingPos(pos=(2, 3)), ] - expected: list[MappingPos | None] = [ + expected: MappingListType = [ MappingPos(pos=(0, 1)), MappingPos(pos=(1, 2)), MappingPos(pos=(2, 3)), @@ -440,14 +439,14 @@ def test_merge_mappings_contraction(self): new_mappings = [ MappingPos(pos=(0, 1)), # Contracted range - None, # Missing position + MappingPos(pos=(1, 1), deleted=True), # Missing position MappingPos(pos=(1, 2)), - None, + MappingPos(pos=(2, 2), deleted=True), ] expected = [ MappingPos(pos=(0, 2)), # Should span from first to last non-None - None, + MappingPos(pos=(2, 2), deleted=True), ] result = merge_mappings(mappings, new_mappings) @@ -467,17 +466,17 @@ def test_merge_mappings_contraction(self): MappingPos(pos=(5, 6)), ], [ - None, + MappingPos(pos=(0, 0), deleted=True), MappingPos(pos=(0, 1)), - None, - MappingPos(pos=(2, 3)), + MappingPos(pos=(1, 1), deleted=True), + MappingPos(pos=(1, 3)), MappingPos(pos=(3, 6)), - None, + MappingPos(pos=(6, 6), deleted=True), ], [ MappingPos(pos=(0, 1)), - MappingPos(pos=(2, 6)), - None, + MappingPos(pos=(1, 6)), + MappingPos(pos=(6, 6), deleted=True), ], # Should span first to last non-None ), # Test start and eend in between + tajweed rules (input) @@ -488,17 +487,17 @@ def test_merge_mappings_contraction(self): MappingPos(pos=(5, 6)), ], [ - None, + MappingPos(pos=(0, 0), deleted=True), MappingPos(pos=(0, 1)), - None, - MappingPos(pos=(2, 3)), + MappingPos(pos=(1, 1), deleted=True), + MappingPos(pos=(1, 3)), MappingPos(pos=(3, 6)), - None, + MappingPos(pos=(6, 6), deleted=True), ], [ MappingPos(pos=(0, 1), tajweed_rules=[NormalMaddRule(tag="alif")]), - MappingPos(pos=(2, 6)), - None, + MappingPos(pos=(1, 6)), + MappingPos(pos=(6, 6), deleted=True), ], # Should span first to last non-None ), # Test start and eend in between + tajweed rules (output) @@ -509,17 +508,17 @@ def test_merge_mappings_contraction(self): MappingPos(pos=(5, 6)), ], [ - None, + MappingPos(pos=(0, 0), deleted=True), MappingPos(pos=(0, 1)), - None, - MappingPos(pos=(2, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(1, 1), deleted=True), + MappingPos(pos=(1, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(3, 6)), - None, + MappingPos(pos=(6, 6), deleted=True), ], [ MappingPos(pos=(0, 1)), - MappingPos(pos=(2, 6), tajweed_rules=[NormalMaddRule(tag="alif")]), - None, + MappingPos(pos=(1, 6), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(6, 6), deleted=True), ], # Should span first to last non-None ), # Test start and eend in between + tajweed rules (both) @@ -530,33 +529,37 @@ def test_merge_mappings_contraction(self): MappingPos(pos=(5, 6)), ], [ - None, + MappingPos(pos=(0, 0), deleted=True), MappingPos(pos=(0, 1)), - None, - MappingPos(pos=(2, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), + MappingPos(pos=(1, 1), deleted=True), + MappingPos(pos=(1, 3), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(3, 6)), - None, + MappingPos(pos=(6, 6), deleted=True), ], [ MappingPos(pos=(0, 1)), MappingPos( - pos=(2, 6), + pos=(1, 6), tajweed_rules=[Qalqalah(), NormalMaddRule(tag="alif")], ), - None, + MappingPos(pos=(6, 6), deleted=True), ], # Should span first to last non-None ), # Test partial None mappings - some positions in range are None ( [MappingPos(pos=(0, 3))], - [MappingPos(pos=(0, 1)), None, MappingPos(pos=(2, 3))], + [ + MappingPos(pos=(0, 1)), + MappingPos(pos=(1, 1), deleted=True), + MappingPos(pos=(2, 3)), + ], [MappingPos(pos=(0, 3))], # Should span first to last non-None ), # Test single mapping to multiple new mappings ( [MappingPos(pos=(1, 2))], - [None, MappingPos(pos=(1, 2))], - [MappingPos(pos=(1, 2))], # Should get the non-None range + [MappingPos(pos=(0, 0), deleted=True), MappingPos(pos=(0, 2))], + [MappingPos(pos=(0, 2))], # Should get the non-None range ), # Test edge case with only start mapping ( @@ -567,8 +570,8 @@ def test_merge_mappings_contraction(self): # Test edge case with only end mapping ( [MappingPos(pos=(0, 1))], - [None], - [None], # Should use end position only + [MappingPos(pos=(0, 0), deleted=True)], + [MappingPos(pos=(0, 0), deleted=True)], # Should use end position only ), ], ) @@ -608,7 +611,7 @@ def test_merge_mappings_complex_range(self): new_mappings = [ MappingPos(pos=(0, 2)), # Expanded first range MappingPos(pos=(2, 4)), - None, # Gap + MappingPos(pos=(4, 4), deleted=True), # Gap MappingPos(pos=(4, 7)), # Expanded second range MappingPos(pos=(7, 8)), MappingPos(pos=(8, 9)), # Expanded third range @@ -633,7 +636,7 @@ def test_merge_mappings_complex_range(self): [ MappingPos(pos=(0, 6), tajweed_rules=None), MappingPos(pos=(6, 16), tajweed_rules=None), - None, + MappingPos(pos=(16, 16), deleted=True), MappingPos( pos=(16, 20), tajweed_rules=[ @@ -646,7 +649,7 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(21, 22), tajweed_rules=None), MappingPos(pos=(22, 23), tajweed_rules=None), MappingPos(pos=(23, 24), tajweed_rules=None), - None, + MappingPos(pos=(24, 24), deleted=True), MappingPos(pos=(24, 25), tajweed_rules=None), MappingPos(pos=(25, 26), tajweed_rules=None), MappingPos(pos=(26, 27), tajweed_rules=None), @@ -662,7 +665,7 @@ def test_merge_mappings_complex_range(self): ), MappingPos(pos=(31, 32), tajweed_rules=None), MappingPos(pos=(32, 33), tajweed_rules=None), - None, + MappingPos(pos=(33, 33), deleted=True), MappingPos( pos=(33, 35), tajweed_rules=[ @@ -674,14 +677,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(35, 36), tajweed_rules=None), MappingPos(pos=(36, 37), tajweed_rules=None), MappingPos(pos=(37, 38), tajweed_rules=None), - None, + MappingPos(pos=(38, 38), deleted=True), MappingPos(pos=(38, 39), tajweed_rules=None), - None, + MappingPos(pos=(39, 39), deleted=True), MappingPos(pos=(39, 40), tajweed_rules=None), MappingPos(pos=(40, 41), tajweed_rules=None), MappingPos(pos=(41, 42), tajweed_rules=None), MappingPos(pos=(42, 43), tajweed_rules=None), - None, + MappingPos(pos=(43, 43), deleted=True), MappingPos( pos=(43, 45), tajweed_rules=[ @@ -693,16 +696,16 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(45, 46), tajweed_rules=None), MappingPos(pos=(46, 47), tajweed_rules=None), MappingPos(pos=(47, 48), tajweed_rules=None), - None, + MappingPos(pos=(48, 48), deleted=True), MappingPos(pos=(48, 49), tajweed_rules=None), - None, + MappingPos(pos=(49, 49), deleted=True), MappingPos(pos=(49, 50), tajweed_rules=None), MappingPos(pos=(50, 51), tajweed_rules=None), MappingPos(pos=(51, 52), tajweed_rules=None), MappingPos(pos=(52, 53), tajweed_rules=None), MappingPos(pos=(53, 57), tajweed_rules=None), MappingPos(pos=(57, 58), tajweed_rules=None), - None, + MappingPos(pos=(58, 58), deleted=True), ], ), ( @@ -712,34 +715,34 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(0, 1), tajweed_rules=None), MappingPos(pos=(1, 2), tajweed_rules=None), MappingPos(pos=(2, 3), tajweed_rules=None), - None, + MappingPos(pos=(3, 3), deleted=True), MappingPos(pos=(3, 4), tajweed_rules=None), MappingPos(pos=(4, 5), tajweed_rules=None), MappingPos(pos=(5, 6), tajweed_rules=None), - None, - None, + MappingPos(pos=(6, 6), deleted=True), + MappingPos(pos=(6, 6), deleted=True), MappingPos(pos=(6, 7), tajweed_rules=None), MappingPos(pos=(7, 8), tajweed_rules=None), MappingPos(pos=(8, 11), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(11, 12), tajweed_rules=None), MappingPos(pos=(12, 13), tajweed_rules=None), MappingPos(pos=(13, 14), tajweed_rules=None), - None, - None, + MappingPos(pos=(14, 14), deleted=True), + MappingPos(pos=(14, 14), deleted=True), MappingPos(pos=(14, 15), tajweed_rules=None), MappingPos(pos=(15, 16), tajweed_rules=None), MappingPos(pos=(16, 17), tajweed_rules=None), MappingPos(pos=(17, 18), tajweed_rules=None), - None, + MappingPos(pos=(18, 18), deleted=True), MappingPos(pos=(18, 19), tajweed_rules=None), MappingPos(pos=(19, 20), tajweed_rules=None), - None, + MappingPos(pos=(20, 20), deleted=True), MappingPos(pos=(20, 22), tajweed_rules=[NormalMaddRule(tag="alif")]), MappingPos(pos=(22, 23), tajweed_rules=None), MappingPos(pos=(23, 24), tajweed_rules=None), MappingPos(pos=(24, 25), tajweed_rules=None), - None, - None, + MappingPos(pos=(25, 25), deleted=True), + MappingPos(pos=(25, 25), deleted=True), MappingPos(pos=(25, 26), tajweed_rules=None), MappingPos(pos=(26, 27), tajweed_rules=None), MappingPos(pos=(27, 28), tajweed_rules=None), @@ -747,7 +750,7 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(29, 30), tajweed_rules=None), MappingPos(pos=(30, 34), tajweed_rules=None), MappingPos(pos=(34, 35), tajweed_rules=None), - None, + MappingPos(pos=(35, 35), deleted=True), ], ), ( @@ -757,10 +760,10 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(0, 1), tajweed_rules=None), MappingPos(pos=(1, 2), tajweed_rules=None), MappingPos(pos=(2, 6), tajweed_rules=None), - None, + MappingPos(pos=(6, 6), deleted=True), MappingPos(pos=(6, 7), tajweed_rules=None), MappingPos(pos=(7, 8), tajweed_rules=None), - None, + MappingPos(pos=(8, 8), deleted=True), MappingPos(pos=(8, 9), tajweed_rules=None), MappingPos(pos=(9, 10), tajweed_rules=None), MappingPos(pos=(10, 11), tajweed_rules=None), @@ -791,15 +794,15 @@ def test_merge_mappings_complex_range(self): ) ], ), - None, - None, + MappingPos(pos=(26, 26), deleted=True), + MappingPos(pos=(26, 26), deleted=True), MappingPos(pos=(26, 27), tajweed_rules=None), MappingPos(pos=(27, 28), tajweed_rules=None), MappingPos(pos=(28, 29), tajweed_rules=None), MappingPos(pos=(29, 30), tajweed_rules=None), MappingPos(pos=(30, 31), tajweed_rules=None), MappingPos(pos=(31, 35), tajweed_rules=None), - None, + MappingPos(pos=(35, 35), deleted=True), MappingPos(pos=(35, 36), tajweed_rules=None), MappingPos(pos=(36, 38), tajweed_rules=None), MappingPos(pos=(38, 39), tajweed_rules=None), @@ -808,11 +811,11 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(41, 42), tajweed_rules=None), MappingPos(pos=(42, 43), tajweed_rules=None), MappingPos(pos=(43, 44), tajweed_rules=None), - None, + MappingPos(pos=(44, 44), deleted=True), MappingPos(pos=(44, 45), tajweed_rules=None), MappingPos(pos=(45, 46), tajweed_rules=None), MappingPos(pos=(46, 47), tajweed_rules=None), - None, + MappingPos(pos=(47, 47), deleted=True), MappingPos(pos=(47, 48), tajweed_rules=None), MappingPos(pos=(48, 49), tajweed_rules=None), MappingPos(pos=(49, 50), tajweed_rules=None), @@ -822,23 +825,23 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(55, 56), tajweed_rules=None), MappingPos(pos=(56, 57), tajweed_rules=None), MappingPos(pos=(57, 58), tajweed_rules=None), - None, + MappingPos(pos=(58, 58), deleted=True), MappingPos(pos=(58, 59), tajweed_rules=None), MappingPos(pos=(59, 60), tajweed_rules=None), MappingPos(pos=(60, 61), tajweed_rules=None), MappingPos(pos=(61, 62), tajweed_rules=None), MappingPos(pos=(62, 63), tajweed_rules=None), - None, + MappingPos(pos=(63, 63), deleted=True), MappingPos(pos=(63, 64), tajweed_rules=None), MappingPos(pos=(64, 65), tajweed_rules=None), MappingPos(pos=(65, 66), tajweed_rules=None), MappingPos(pos=(66, 67), tajweed_rules=None), - None, + MappingPos(pos=(67, 67), deleted=True), MappingPos(pos=(67, 68), tajweed_rules=None), MappingPos(pos=(68, 69), tajweed_rules=None), MappingPos(pos=(69, 70), tajweed_rules=None), MappingPos(pos=(70, 71), tajweed_rules=None), - None, + MappingPos(pos=(71, 71), deleted=True), MappingPos(pos=(71, 72), tajweed_rules=None), MappingPos(pos=(72, 73), tajweed_rules=None), MappingPos(pos=(73, 74), tajweed_rules=None), @@ -846,11 +849,11 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(77, 78), tajweed_rules=None), MappingPos(pos=(78, 79), tajweed_rules=None), MappingPos(pos=(79, 80), tajweed_rules=None), - None, + MappingPos(pos=(80, 80), deleted=True), MappingPos(pos=(80, 81), tajweed_rules=None), MappingPos(pos=(81, 82), tajweed_rules=None), MappingPos(pos=(82, 83), tajweed_rules=None), - None, + MappingPos(pos=(83, 83), deleted=True), MappingPos(pos=(83, 84), tajweed_rules=None), MappingPos(pos=(84, 85), tajweed_rules=None), MappingPos(pos=(85, 86), tajweed_rules=None), @@ -866,14 +869,14 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(89, 90), tajweed_rules=None), MappingPos(pos=(90, 91), tajweed_rules=None), MappingPos(pos=(91, 92), tajweed_rules=None), - None, + MappingPos(pos=(92, 92), deleted=True), MappingPos(pos=(92, 93), tajweed_rules=None), MappingPos(pos=(93, 94), tajweed_rules=None), MappingPos(pos=(94, 95), tajweed_rules=None), MappingPos(pos=(95, 96), tajweed_rules=None), MappingPos(pos=(96, 100), tajweed_rules=None), MappingPos(pos=(100, 101), tajweed_rules=None), - None, + MappingPos(pos=(101, 101), deleted=True), ], ), ( @@ -881,12 +884,12 @@ def test_merge_mappings_complex_range(self): "مِممممَاا", [ MappingPos(pos=(0, 1), tajweed_rules=None), - None, + MappingPos(pos=(1, 1), deleted=True), MappingPos(pos=(1, 2), tajweed_rules=None), - None, - None, + MappingPos(pos=(2, 2), deleted=True), + MappingPos(pos=(2, 2), deleted=True), MappingPos(pos=(2, 6), tajweed_rules=None), - None, + MappingPos(pos=(6, 6), deleted=True), MappingPos(pos=(6, 7), tajweed_rules=None), MappingPos(pos=(7, 9), tajweed_rules=[NormalMaddRule(tag="alif")]), ], @@ -899,14 +902,25 @@ def test_merge_mappings_complex_range(self): MappingPos(pos=(1, 2), tajweed_rules=None), MappingPos(pos=(2, 3), tajweed_rules=None), MappingPos(pos=(3, 4), tajweed_rules=None), - None, - None, + MappingPos(pos=(4, 4), deleted=True), + MappingPos(pos=(4, 4), deleted=True), MappingPos(pos=(4, 8), tajweed_rules=None), - None, + MappingPos(pos=(8, 8), deleted=True), MappingPos(pos=(8, 9), tajweed_rules=None), MappingPos(pos=(9, 11), tajweed_rules=[NormalMaddRule(tag="alif")]), ], ), + ( + "الٓمٓ", + "ءَلِف لَااااااممممِۦۦۦۦۦۦم", + [ + MappingPos(pos=(0, 6), tajweed_rules=None, deleted=False), + MappingPos(pos=(6, 14), tajweed_rules=None, deleted=False), + MappingPos(pos=(14, 14), tajweed_rules=None, deleted=True), + MappingPos(pos=(14, 26), tajweed_rules=None, deleted=False), + MappingPos(pos=(26, 26), tajweed_rules=None, deleted=True), + ], + ), ], ) def test_phonetizer_with_mappings( diff --git a/uv.lock b/uv.lock index 310fa11..d202f6e 100644 --- a/uv.lock +++ b/uv.lock @@ -1,6 +1,11 @@ version = 1 revision = 3 requires-python = ">=3.10" +resolution-markers = [ + "python_full_version >= '3.14'", + "python_full_version >= '3.11' and python_full_version < '3.14'", + "python_full_version < '3.11'", +] [[package]] name = "annotated-types" @@ -25,7 +30,7 @@ name = "exceptiongroup" version = "1.3.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/0b/9f/a65090624ecf468cdca03533906e7c69ed7588582240cfe7cc9e770b50eb/exceptiongroup-1.3.0.tar.gz", hash = "sha256:b241f5885f560bc56a59ee63ca4c6a8bfa46ae4ad651af316d4e81817bb9fd88", size = 29749, upload-time = "2025-05-10T17:42:51.123Z" } wheels = [ @@ -284,6 +289,62 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" }, ] +[[package]] +name = "pyinstrument" +version = "5.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/32/7f/d3c4ef7c43f3294bd5a475dfa6f295a9fee5243c292d5c8122044fa83bcb/pyinstrument-5.1.2.tar.gz", hash = "sha256:af149d672da9493fa37334a1cc68f7b80c3e6cb9fd99b9e426c447db5c650bf0", size = 266889, upload-time = "2026-01-04T18:38:58.464Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/43/74/c66e1bf3565600d78f53195efb6f8fd31610f85a58aa3fee39c56bf71d1b/pyinstrument-5.1.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f224fe80ba288a00980af298d3808219f9d246fd95b4f91729c9c33a0dc54fe6", size = 131470, upload-time = "2026-01-04T18:37:22.536Z" }, + { url = "https://files.pythonhosted.org/packages/1a/6b/606c5bfa311b5be74f58ef505c678216dda2be3b76a2ac770c2b0fccff77/pyinstrument-5.1.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7df09fc0d5b72daf48b73cdf07738761bff7f656c81aff686b3ccdd7d2abe236", size = 124567, upload-time = "2026-01-04T18:37:24.161Z" }, + { url = "https://files.pythonhosted.org/packages/15/70/c8a88defb77873513971f590549c48ceb70f7ef10f30a689762ef36dd877/pyinstrument-5.1.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75a7e17377d4405666bbaf126b1fd7bbb7e206d7246e6db3d62864d3d4790ae3", size = 149205, upload-time = "2026-01-04T18:37:25.696Z" }, + { url = "https://files.pythonhosted.org/packages/8f/4b/0e64fefb939af472c3fbc63ab45224766447bde73f51579f3ecc335b0a49/pyinstrument-5.1.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5381cc6583d26e04d9298acded4242f4fe71986f1472c8aee6992c6816f0cac5", size = 147900, upload-time = "2026-01-04T18:37:27.343Z" }, + { url = "https://files.pythonhosted.org/packages/38/6e/b4209711c61176acfeb6c351e9f88a37ed3d3bc3b749c374c0a655ee8f50/pyinstrument-5.1.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ec08a530bef8d3492d31d8b0b12d0cfde09539f2a1c4b9678662ebc3c843e478", size = 148133, upload-time = "2026-01-04T18:37:29.047Z" }, + { url = "https://files.pythonhosted.org/packages/26/28/f323b70789833baf0628af7b9f797b8c1a13b695bd8aa582b1312f14b602/pyinstrument-5.1.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d671168508129b472be570bc9aee361190ba917b997c703bd134bb4de445ce7", size = 147652, upload-time = "2026-01-04T18:37:30.682Z" }, + { url = "https://files.pythonhosted.org/packages/16/cd/9b0af0307a3a2cffb48ca76275c50b8bec3f85ca6e7b996e2e6cfbda1207/pyinstrument-5.1.2-cp310-cp310-win32.whl", hash = "sha256:5957a94f84564b374a7f856d1b322345d600964280b0d687b8ddcc483f21e576", size = 125793, upload-time = "2026-01-04T18:37:31.906Z" }, + { url = "https://files.pythonhosted.org/packages/05/89/fe4c650c252aefb8064bfdff6c0a020d33d15c55dc22abfa1f352dcc2dd1/pyinstrument-5.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:38a2180a7801c51610b50e5d423674b21872efd019ccf05a11b7f9016cb1dcfc", size = 126679, upload-time = "2026-01-04T18:37:33.59Z" }, + { url = "https://files.pythonhosted.org/packages/79/ef/0288edd620fb0cf2074d8c8e3567007a6bac66307b839d99988563de4eb8/pyinstrument-5.1.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3739a05583ea6312c385eb59fe985cd20d9048e95f9eeeb6a2f6c35202e2d36e", size = 131284, upload-time = "2026-01-04T18:37:35.01Z" }, + { url = "https://files.pythonhosted.org/packages/0b/4e/2a90a6997d9f7a39a6998d56de72e52673ebf5a9169a1c39dbf173e95105/pyinstrument-5.1.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c9ee05dc75ac5fb18498c311e624f77f7f321f7ff325b251aa09e52e46f1d6a", size = 124468, upload-time = "2026-01-04T18:37:36.628Z" }, + { url = "https://files.pythonhosted.org/packages/04/74/7bfd403e81f9b5ec523f60cced8f516ee52312752bb2e0fafabfd90bbd78/pyinstrument-5.1.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a49a55ca5b75218767e29cacbe515d0b66fc18cb48a937bca0f77b8dafc7202", size = 148057, upload-time = "2026-01-04T18:37:37.998Z" }, + { url = "https://files.pythonhosted.org/packages/50/3a/7205d7c199947d18edcd013af4ddf4d3cca85c5488fbe493050035947f7c/pyinstrument-5.1.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0c45c14974ff04b1bfdc6c2a448627c6da7409c7800d0eb7bd03fb435dcb41d7", size = 146526, upload-time = "2026-01-04T18:37:39.642Z" }, + { url = "https://files.pythonhosted.org/packages/24/e8/f6864172e7ebe4bc5209bafbc574a619b4c511b9506b941789b11441be7c/pyinstrument-5.1.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:22b9c04b3982c41c04b1c5ed05d1bc3a2ba26533450084058119f6dc160e70a3", size = 147179, upload-time = "2026-01-04T18:37:41.332Z" }, + { url = "https://files.pythonhosted.org/packages/6d/04/89ef2d1c34767bfdbcc74ab0c7e0d021d7fac5e79873239e4ca26e97d6da/pyinstrument-5.1.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:5c4995ee0774801790c138f0dfec17d4e7a7ef09a6d56d53cbcbf0578a711021", size = 146354, upload-time = "2026-01-04T18:37:42.808Z" }, + { url = "https://files.pythonhosted.org/packages/2e/d4/64441547ec12391b92c739a3b0685059e7dfa088d928df8364676ef7abc7/pyinstrument-5.1.2-cp311-cp311-win32.whl", hash = "sha256:fe449e4a8ee60a2a27cf509350a584670f4c3704649601be7937598f09dbe7ca", size = 125790, upload-time = "2026-01-04T18:37:44.141Z" }, + { url = "https://files.pythonhosted.org/packages/4d/8b/0a5f6b239294decb0ecd932711f3470bfbd42fc2e08a94cd5c1f4f6da7f1/pyinstrument-5.1.2-cp311-cp311-win_amd64.whl", hash = "sha256:3fb839429671a42bf349335af4c1ce5cf83386ac11f04df0bc40720d4cb7d77d", size = 126578, upload-time = "2026-01-04T18:37:45.423Z" }, + { url = "https://files.pythonhosted.org/packages/26/d9/8fa5571ddd21b2b7189bd8b0bb4e90be1659a54dda5af51c7f6bf2b5666f/pyinstrument-5.1.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2519865d4bf58936f2506c1c46a82d29a20f3239aa50c941df1ca9618c7da5f0", size = 131419, upload-time = "2026-01-04T18:37:46.843Z" }, + { url = "https://files.pythonhosted.org/packages/6f/50/0512adb83cadfeaa1a215dc9784defff5043c5aa052d15015e3d8013af75/pyinstrument-5.1.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:059442106b8b5de29ae5ac1bdc20d044fed4da534b8caba434b6ffb119037bf5", size = 124446, upload-time = "2026-01-04T18:37:48.572Z" }, + { url = "https://files.pythonhosted.org/packages/9b/78/c45f0b668fb3c8c0d32058a451a8e1d34737cd7586387982185e12df1977/pyinstrument-5.1.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cd51f2d54fc39a4cfd73ba6be27cd0187123132ce3f445b639bff5e1b23d7e26", size = 149694, upload-time = "2026-01-04T18:37:49.876Z" }, + { url = "https://files.pythonhosted.org/packages/91/4d/2ca3ca9906ce6e05070f431c54d54ccbaf57a980cfa58032d35b0b0ac1f8/pyinstrument-5.1.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:12af1e83795b6c640d657d339014dd1ff718b182dec736d7d1f1d8a97534eb53", size = 148461, upload-time = "2026-01-04T18:37:51.544Z" }, + { url = "https://files.pythonhosted.org/packages/18/d2/bfe84a4326172ef68655b65b49fd041eeb94c8e59ee47258589b8b79dd3b/pyinstrument-5.1.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:2565513658e742c5eb691a779cb29d19d01bc9ee951d0eb76482e9f343c38c2e", size = 148560, upload-time = "2026-01-04T18:37:52.931Z" }, + { url = "https://files.pythonhosted.org/packages/d0/00/db7f5def351e869230b0165828c4edacbf3fdda8d66aff30dd73a62082c2/pyinstrument-5.1.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5afd0ba788a1d112da49fb77966918e01df1f9e7d62e72894d82f7acb0996c2d", size = 148178, upload-time = "2026-01-04T18:37:54.278Z" }, + { url = "https://files.pythonhosted.org/packages/5e/bc/aea3329576e20b987d205027b8e6442ece845d681b9f9d8682d5404f81f3/pyinstrument-5.1.2-cp312-cp312-win32.whl", hash = "sha256:554077b031b278593cb2301f0057be771ea62a729878c69aaf29fcdfb7b71281", size = 125927, upload-time = "2026-01-04T18:37:55.615Z" }, + { url = "https://files.pythonhosted.org/packages/14/e2/d928434ec3a840478e95fd0d73b0dfc0b8060a07b06f4b45e9df30444e9a/pyinstrument-5.1.2-cp312-cp312-win_amd64.whl", hash = "sha256:55a905384ba43efc924b8863aa6cfd276f029e4aa70c4a0e3b7389e27b191e45", size = 126675, upload-time = "2026-01-04T18:37:57.278Z" }, + { url = "https://files.pythonhosted.org/packages/b4/8e/b9aea969eec67c129652000446384d550a0df45c297adc9fd74da2f8482c/pyinstrument-5.1.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:7b8bab2334bf1d4c9e92d61db574300b914b594588a6b6dd67c45450152dfc29", size = 131418, upload-time = "2026-01-04T18:37:58.642Z" }, + { url = "https://files.pythonhosted.org/packages/8f/62/76418eb29b5591f3e5500369a6777ce928135c3aa6ccdb0c861a9c6ca93b/pyinstrument-5.1.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:13dcc138a61298ef4994b7aebff509d2c06db89dfd6e2021f0b9cd96aaa44ec3", size = 124448, upload-time = "2026-01-04T18:37:59.95Z" }, + { url = "https://files.pythonhosted.org/packages/07/73/874bccc04bcf6f4babc3de1a9568e209e7e40998563974f5030b0fb4d3e0/pyinstrument-5.1.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c8abd4a7ffa2e7f9e00039a5e549e8eebc80d7ca8d43f0fb51a50ff2b117ce4a", size = 149853, upload-time = "2026-01-04T18:38:01.405Z" }, + { url = "https://files.pythonhosted.org/packages/cf/85/268446c4388d77ff4abdeaff202356e1527b3ff9576f5587443a24980bec/pyinstrument-5.1.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:eb3a05108edebc30f31e2c69c904576042f1158b2513ab80adc08f7848a7a8f0", size = 148641, upload-time = "2026-01-04T18:38:03.086Z" }, + { url = "https://files.pythonhosted.org/packages/fc/15/4f8dea3381483e68d00582a9b823a21a088acfe77a847a7991a1a8feed76/pyinstrument-5.1.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f70d588b53f3f35829d1d1ddfa05e07fcebf1434b3b1509d542ca317d8e9a2a5", size = 148674, upload-time = "2026-01-04T18:38:04.805Z" }, + { url = "https://files.pythonhosted.org/packages/fa/61/72c180454b6511d5b90166f8828e1bab3b45d0489952a1fe48c5c585233d/pyinstrument-5.1.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b007327e0d6a6a01d5064883dd27c19996f044ce7488d507826fee7884e6a32e", size = 148315, upload-time = "2026-01-04T18:38:06.114Z" }, + { url = "https://files.pythonhosted.org/packages/2c/f0/4c27cebddf22a8840bd8b419366bb321ce41f921ca1893e309c932ab28bf/pyinstrument-5.1.2-cp313-cp313-win32.whl", hash = "sha256:9ba0e6b17a7e86c3dc02d208e4c25506e8f914d9964ae89449f1f37f0b70abc0", size = 125926, upload-time = "2026-01-04T18:38:07.507Z" }, + { url = "https://files.pythonhosted.org/packages/6c/20/6b1bee88ddef065b0df3a3ba4ba60ed8a9ca443d5cded7152a8a9750914f/pyinstrument-5.1.2-cp313-cp313-win_amd64.whl", hash = "sha256:660d7fc486a839814db0b2f716bc13d8b99b9c780aaeb47f74a70a34adc02a7b", size = 126678, upload-time = "2026-01-04T18:38:08.826Z" }, + { url = "https://files.pythonhosted.org/packages/66/0f/7d5154c92904bdf25be067a7fe4cad4ba48919f16ccbb51bb953d9ae1a20/pyinstrument-5.1.2-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:0baed297beee2bb9897e737bbd89e3b9d45a2fbbea9f1ad4e809007d780a9b1e", size = 131388, upload-time = "2026-01-04T18:38:10.491Z" }, + { url = "https://files.pythonhosted.org/packages/17/28/bf83231a3f951e11b4dfaf160e1eeba1ce29377eab30e3d2eb6ee22ff3ba/pyinstrument-5.1.2-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ebb910a32a45bde6c3fc30c578efc28a54517990e11e94b5e48a0d5479728568", size = 124456, upload-time = "2026-01-04T18:38:11.792Z" }, + { url = "https://files.pythonhosted.org/packages/ac/98/762cf10896d907268629e1db08a48f128984a53e8d92b99ea96f862597e5/pyinstrument-5.1.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bad403c157f9c6dba7f731a6fca5bfcd8ca2701a39bcc717dcc6e0b10055ffc4", size = 149594, upload-time = "2026-01-04T18:38:13.434Z" }, + { url = "https://files.pythonhosted.org/packages/1a/1b/48580e16e623d89af58b89c552c95a2ae65f70a1f4fab1d97879f34791db/pyinstrument-5.1.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2f456cabdb95fd343c798a7f2a56688b028f981522e283c5f59bd59195b66df5", size = 148339, upload-time = "2026-01-04T18:38:14.767Z" }, + { url = "https://files.pythonhosted.org/packages/62/7e/38157a8a6ec67789d8ee109fd09877ea3340df44e1a7add8f249e30a8ade/pyinstrument-5.1.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:4e9c4dcc1f2c4a0cd6b576e3604abc37496a7868243c9a1443ad3b9db69d590f", size = 148485, upload-time = "2026-01-04T18:38:16.121Z" }, + { url = "https://files.pythonhosted.org/packages/4b/34/31ee72b19cfc48a82801024b5d653f07982154a11381a3ae65bbfdbf2c7b/pyinstrument-5.1.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:acf93b128328c6d80fdb85431068ac17508f0f7845e89505b0ea6130dead5ca6", size = 148106, upload-time = "2026-01-04T18:38:17.623Z" }, + { url = "https://files.pythonhosted.org/packages/3b/b4/7ab20243187262d66ab062778b1ccac4ca55090752f32a83f603f4e5e3a2/pyinstrument-5.1.2-cp314-cp314-win32.whl", hash = "sha256:9c7f0167903ecff8b1d744f7e37b2bd4918e05a69cca724cb112f5ed59d1e41b", size = 126593, upload-time = "2026-01-04T18:38:18.968Z" }, + { url = "https://files.pythonhosted.org/packages/9e/a0/db6a8ae3182546227f5a043b1be29b8d5f98bf973e20d922981ef206de85/pyinstrument-5.1.2-cp314-cp314-win_amd64.whl", hash = "sha256:ce3f6b1f9a2b5d74819ecc07d631eadececf915f551474a75ad65ac580ec5a0e", size = 127358, upload-time = "2026-01-04T18:38:20.28Z" }, + { url = "https://files.pythonhosted.org/packages/59/d2/719f439972b3f80e35fb5b1bcd888c3218d60dbc91957b99ffafd7ac9221/pyinstrument-5.1.2-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:af8651b239049accbeecd389d35823233f649446f76f47fd005316b05d08cef2", size = 132317, upload-time = "2026-01-04T18:38:21.669Z" }, + { url = "https://files.pythonhosted.org/packages/e2/1c/0ebfef69ae926665fae635424c5647411235c3689c9a9ad69fd68de6cae2/pyinstrument-5.1.2-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c6082f1c3e43e1d22834e91ba8975f0080186df4018a04b4dd29f9623c59df1d", size = 124917, upload-time = "2026-01-04T18:38:23.385Z" }, + { url = "https://files.pythonhosted.org/packages/a6/ee/5599f769f515a0f1c97443edc7394fe2b9829bf39f404c046499c1a62378/pyinstrument-5.1.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c031eb066ddc16425e1e2f56aad5c1ce1e27b2432a70329e5385b85e812decee", size = 157407, upload-time = "2026-01-04T18:38:24.774Z" }, + { url = "https://files.pythonhosted.org/packages/fd/40/32aa865252288caef301237488ee309bd6701125888bf453d23ab764e357/pyinstrument-5.1.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f447ec391cad30667ba412dce41607aaa20d4a2496a7ab867e0c199f0fe3ae3d", size = 155068, upload-time = "2026-01-04T18:38:26.112Z" }, + { url = "https://files.pythonhosted.org/packages/91/68/0b56a1540fe1c357dfcda82d4f5b52c87fada5962cbf18703ea39ccbbe69/pyinstrument-5.1.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:50299bddfc1fe0039898f895b10ef12f9db08acffb4d85326fad589cda24d2ee", size = 155186, upload-time = "2026-01-04T18:38:27.914Z" }, + { url = "https://files.pythonhosted.org/packages/7a/48/7ef84abfc3e41148cf993095214f104e75ecff585e94c6e8be001e672573/pyinstrument-5.1.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a193ff08825ece115ececa136832acb14c491c77ab1e6b6a361905df8753d5c6", size = 153979, upload-time = "2026-01-04T18:38:29.236Z" }, + { url = "https://files.pythonhosted.org/packages/8f/cf/a28ad117d58b33c1d74bcdfbbcf1603b67346883800ac7d510cff8d3bcee/pyinstrument-5.1.2-cp314-cp314t-win32.whl", hash = "sha256:de887ba19e1057bd2d86e6584f17788516a890ae6fe1b7eed9927873f416b4d8", size = 127267, upload-time = "2026-01-04T18:38:30.619Z" }, + { url = "https://files.pythonhosted.org/packages/8e/97/03635143a12a5d941f545548b00f8ac39d35565321a2effb4154ed267338/pyinstrument-5.1.2-cp314-cp314t-win_amd64.whl", hash = "sha256:b6a71f5e7f53c86c9b476b30cf19509463a63581ef17ddbd8680fee37ae509db", size = 128164, upload-time = "2026-01-04T18:38:32.281Z" }, +] + [[package]] name = "pytest" version = "8.4.1" @@ -314,6 +375,9 @@ dependencies = [ ] [package.optional-dependencies] +profile = [ + { name = "pyinstrument" }, +] test = [ { name = "pytest" }, ] @@ -322,11 +386,12 @@ test = [ requires-dist = [ { name = "levenshtein", specifier = ">=0.27.1" }, { name = "pydantic", specifier = ">=2.12.5" }, + { name = "pyinstrument", marker = "extra == 'profile'", specifier = ">=5.1.2" }, { name = "pytest", marker = "extra == 'test'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'", specifier = ">=4.0.0" }, { name = "xmltodict" }, ] -provides-extras = ["test"] +provides-extras = ["test", "profile"] [[package]] name = "rapidfuzz" From 4e089f7ec636aad285492016b9cb7de560d1f11a Mon Sep 17 00:00:00 2001 From: obadx Date: Thu, 12 Feb 2026 16:29:32 +0200 Subject: [PATCH 3/4] add: version 0.3.0 --- pyproject.toml | 2 +- .../phonetics/conv_base_operation.py | 33 ++++++++----------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 89a9a00..e37a55e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ build-backend = "setuptools.build_meta" [project] license = "MIT" name = "quran-transcript" -version = "0.2.1" +version = "0.3.0" authors = [ { name="Abdullah", email="abdullahamlyossef@gmail.com" }, ] diff --git a/src/quran_transcript/phonetics/conv_base_operation.py b/src/quran_transcript/phonetics/conv_base_operation.py index c3af53c..69df654 100644 --- a/src/quran_transcript/phonetics/conv_base_operation.py +++ b/src/quran_transcript/phonetics/conv_base_operation.py @@ -22,6 +22,7 @@ class MappingPos: inclusive and the end is exclusive (Python-style slice notation). tajweed_rules: List of TajweedRule objects that apply to this character span. None indicates no tajweed rules are associated with this mapping. + deleted(bool): Wheter this location is deleted or not. If deleted pos[0] == pos[1] Example: >>> mapping = MappingPos(pos=(0, 3), tajweed_rules=[]) @@ -95,12 +96,6 @@ def merge_mappings( ValueError: if `new_mappings` is an empty list - Logic: - - For each non-None old mapping, searches its position range in new_mappings - - Finds the first and last non-None mapping in that range - - Creates a new MappingPos spanning from the first to last non-None position - - If only start or end found, uses that position alone - - Preserves None values for deleted characters Examples: # Identity mapping - no change @@ -117,9 +112,9 @@ def merge_mappings( # Contraction with deletions - range contracts with None values >>> old = [MappingPos(pos=(0, 3))] - >>> new = [MappingPos(pos=(0, 1)), None, MappingPos(pos=(2, 5))] + >>> new = [MappingPos(pos=(0, 1)), MappingPos(pos=(1, 1), deleted=True), MappingPos(pos=(1, 5))] >>> result = merge_mappings(old, new) - # result: [MappingPos(pos=(0, 5))] # spans first to last non-None + # result: [MappingPos(pos=(0, 5))] # spans first to last """ if mappings is None: @@ -170,11 +165,6 @@ def get_mappings( ) -> MappingListType: """Generate character position mappings between original and transformed text. - This is the core mapping engine that analyzes character-level transformations using - Levenshtein opcodes to create precise position mappings. It tracks how each character - in the original text maps to its position in the transformed text, handling complex - operations like insertions, replacements, deletions, and their combinations. - The function is essential for maintaining character-level precision in Quranic text processing, particularly when converting between Uthmani script and phonetic transcription. It can associate tajweed rules with affected character spans and @@ -196,11 +186,12 @@ def get_mappings( Returns: List of MappingPos objects tracking character positions from original to - transformed text. Length matches original text length. None values indicate - deleted characters, while MappingPos objects contain position spans and + transformed text. Length matches original text length. `MappingPos(pos=(x, x), deleted=True)` + values indicate deleted characters, while MappingPos objects contain position spans and associated tajweed rules. Raises: + AssertionError: if one of the generated mappings is `None` ValueError: If mapping continuity validation fails (detected gaps in position mappings that should be contiguous). @@ -225,13 +216,13 @@ def get_mappings( (3, 4) Quranic text transformation with alif elongation: - >>> text = "بِسْمِ لَّاهِ" - >>> new_text = "بِسْمِ لَّااهِ" + >>> text = "بِسْمِ لَّاهِ" # len 13 + >>> new_text = "بِسْمِ لَّااهِ" # len 14 >>> mappings = get_mappings(text, new_text) >>> len(mappings) # Same length as original text - 9 - >>> mappings[8].pos # Final character position - (26, 27) + 13 + >>> mappings[10].pos # 2 beats madd + (10, 12) Complex transformation with existing mappings and tajweed rule: >>> existing_mappings = [MappingPos(pos=(0, 1)), MappingPos(pos=(1, 2))] @@ -241,6 +232,8 @@ def get_mappings( >>> mappings = get_mappings(text, new_text, existing_mappings, tajweed_rule) >>> mappings[0].pos # First 'a' maps to (0, 2) with tajweed rule (0, 2) + >>> mappings[0].tajweed_rules + NormalMadd() Character deletion: >>> text = "abcd" From 3328b140644acd4fe15822fe5de6967f8a748a5c Mon Sep 17 00:00:00 2001 From: obadx Date: Thu, 12 Feb 2026 16:48:10 +0200 Subject: [PATCH 4/4] =?UTF-8?q?=D8=AA=D9=88=D8=AB=D9=8A=D9=82=20=D8=A7?= =?UTF-8?q?=D9=84=D8=A5=D8=B5=D8=AF=D8=A7=D8=B1=20=D8=A7=D9=84=D8=AC=D8=AF?= =?UTF-8?q?=D9=8A=D8=AF=20v0.3.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 22 ++++++++++++++++------ tests/test_sub_with_mapping.py | 4 ++-- uv.lock | 2 +- 3 files changed, 19 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index b7f81a4..5f955d8 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,13 @@ # `quran-transcript` package +## 🆕 ما الجديد في الإصدار 0.3.0 (What's New in Version 0.3.0) + +### 📍 خرائط المواقع الجديدة (New Position Mappings) +- أسرع مرتان في عملية إنشاء الرسم الصوتي (Faster 2x in phonetizatoin) +- تمثيل الحروف المحذوفة ب `MappingPos(pos=(x, x), deleted=True` - حيث `x` رقم صحيح أكبر من الصفر - بدلا من `None` (Represent deleted characther with `MappingPos(pos=(x, x), deleted=True)` -where `x` is an integer >=0- instead of `None`) + + ## 🆕 ما الجديد في الإصدار 0.2.0 (What's New in Version 0.2.0) ### 📍 خرائط المواقع الجديدة (New Position Mappings) @@ -131,24 +138,27 @@ phonetic_script = quran_phonetizer(uthmani_script, moshaf) print('\n' * 2) print(f"الرسم الصوتي:\n{phonetic_script.phonemes}") +# print(f"صفات الحروف:\n{phonetic_script.sifat}") +print('\n' * 2) +print("صفات الحروف:") +for sifa in phonetic_script.sifat: + print(json.dumps(sifa.model_dump(), ensure_ascii=False, indent=4)) + print() + # جديد: عرض خرائط المواقع print('\n' * 2) print("خرائط المواقع:") for idx, (uth_char, mapping) in enumerate(zip(uthmani_script, phonetic_script.mappings)): - if mapping is not None: + if not mapping.deleted: # استخراج الصوت لهذا الحرف phoneme = phonetic_script.phonemes[mapping.pos[0]:mapping.pos[1]] print(f"حرف: '{uth_char}' -> صوت: '{phoneme}' (موقع: {mapping.pos})") else: print(f"حرف: '{uth_char}' -> محذوف") -print('\n' * 2) -print("صفات الحروف:") -for sifa in phonetic_script.sifat: - print(json.dumps(sifa.model_dump(), ensure_ascii=False, indent=4)) - print() ``` + > 📘 For more information on `MoshafAttributes`, refer to the [Quran Dataset Documentation](https://github.com/obadx/prepare-quran-dataset?tab=readme-ov-file#moshaf-attributes-docs). diff --git a/tests/test_sub_with_mapping.py b/tests/test_sub_with_mapping.py index b29d140..30f745d 100644 --- a/tests/test_sub_with_mapping.py +++ b/tests/test_sub_with_mapping.py @@ -17,7 +17,7 @@ aya = Aya(2, 1) aya = Aya(19, 1) # aya = Aya(75, 27) - # aya = Aya(2, 6) + aya = Aya(2, 6) # aya = Aya(3, 1) # aya = Aya(30, 28) # aya = Aya(2, 9) @@ -44,7 +44,7 @@ print("-" * 40) - print(profiler.output_text(unicode=True, color=True)) + print(profiler.output_text(unicode=True, color=True, show_all=True)) """ * meem moshaddah * Lam Ism Allah should be deleted at [8] [Not the best thing but works] diff --git a/uv.lock b/uv.lock index d202f6e..c115ade 100644 --- a/uv.lock +++ b/uv.lock @@ -365,7 +365,7 @@ wheels = [ [[package]] name = "quran-transcript" -version = "0.2.1" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "levenshtein" },