Skip to content

API Reference

This reference is automatically generated from the source code docstrings.

Arudh Processor

pyarud.processor.ArudhProcessor

The main engine for Arabic prosody analysis.

This class handles: 1. Converting Arabic text to Arudi writing (phonetic representation). 2. Converting Arudi text to binary patterns (1s and 0s). 3. Detecting the poetic meter (Bahr) from a list of meters. 4. Performing granular, foot-by-foot analysis to identify defects (Zihaf/Ellah).

Source code in pyarud/processor.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
class ArudhProcessor:
    """
    The main engine for Arabic prosody analysis.

    This class handles:
    1. Converting Arabic text to Arudi writing (phonetic representation).
    2. Converting Arudi text to binary patterns (1s and 0s).
    3. Detecting the poetic meter (Bahr) from a list of meters.
    4. Performing granular, foot-by-foot analysis to identify defects (Zihaf/Ellah).
    """
    def __init__(self):
        self.converter = ArudiConverter()
        self.meter_classes = get_all_meters()
        self.precomputed_patterns = {}
        self._precompute_patterns()

    def _precompute_patterns(self):
        """
        Generates structured valid patterns for each meter using the detailed_patterns engine.
        """
        for name, bahr_cls in self.meter_classes.items():
            bahr_instance = bahr_cls()
            # detailed_patterns returns {'sadr': [...], 'ajuz': [...], 'pairs': set()}
            self.precomputed_patterns[name] = bahr_instance.detailed_patterns

    def _get_similarity(self, a, b):
        # Use cubic scaling to penalize small mismatches more heavily.
        # A 0.95 raw ratio becomes ~0.73, increasing separation significantly.
        return math.pow(SequenceMatcher(None, a, b).ratio(), 6)

    def process_poem(self, verses, meter_name=None):
        """
        Analyzes a list of verses to detect the meter and evaluate prosodic correctness.

        Args:
            verses (list[tuple[str, str]]): A list of tuples, where each tuple contains
                the Sadr (first hemistich) and Ajuz (second hemistich) of a verse.
            meter_name (str, optional): The name of a specific meter to force the analysis against.
                If provided, auto-detection is skipped. Defaults to None.

        Returns:
            dict: A dictionary containing:
                - `meter` (str): The name of the detected or forced meter.
                - `verses` (list[dict]): A list of analysis results for each verse, including:
                    - `score` (float): Compatibility score (0.0 - 1.0).
                    - `sadr_analysis` (list[dict]): Detailed foot-by-foot analysis of the Sadr.
                    - `ajuz_analysis` (list[dict]): Detailed foot-by-foot analysis of the Ajuz.
        """
        detected_counts = Counter()
        temp_results = []

        # 1. Detect Meter for each verse (if not forced)
        for i, (sadr, ajuz) in enumerate(verses):
            # Convert text to pattern
            sadr_arudi, sadr_pattern = self.converter.prepare_text(sadr)
            ajuz_arudi, ajuz_pattern = self.converter.prepare_text(ajuz)

            # Handle single shatr input if needed (future proofing)
            if not ajuz:
                ajuz_pattern = ""

            match_info = None
            if not meter_name:
                # Auto-detect
                candidates = self._find_best_meter(sadr_pattern, ajuz_pattern)
                if candidates:
                    best_match = candidates[0]
                    detected_counts[best_match["meter"]] += 1
                    match_info = best_match
            else:
                # Forced meter - no detection step needed here, 
                # but we might want to store a dummy match object or just skip to analysis
                pass

            temp_results.append(
                {
                    "index": i,
                    "sadr": {"text": sadr, "pattern": sadr_pattern, "arudi": sadr_arudi},
                    "ajuz": {"text": ajuz, "pattern": ajuz_pattern, "arudi": ajuz_arudi},
                    "match": match_info,
                }
            )

        if meter_name:
            global_meter = meter_name
        elif detected_counts:
            global_meter = detected_counts.most_common(1)[0][0]
        else:
            return {"error": "Could not detect any valid meter."}

        # 2. Analyze against Global Meter
        final_analysis = []
        for res in temp_results:
            analysis = self._analyze_verse(res, global_meter)
            final_analysis.append(analysis)

        return {"meter": global_meter, "verses": final_analysis}

    def _find_best_meter(self, sadr_pattern, ajuz_pattern):
        METER_PRIORITY = {
            "rajaz": 20,
            "kamel": 10,
            "hazaj": 20,
            "wafer": 10,
            "saree": 20,
            "munsareh": 10,
            "baseet": 10,
            "ramal": 15,
            "mutadarak": 15,
            "mutakareb": 15,
        }

        candidates = []

        for name, patterns in self.precomputed_patterns.items():
            # 1. Score Sadr
            best_sadr = self._find_best_component_match(sadr_pattern, patterns["sadr"])

            # 2. Score Ajuz (if exists)
            best_ajuz = None
            if ajuz_pattern:
                best_ajuz = self._find_best_component_match(ajuz_pattern, patterns["ajuz"])

            # 3. Calculate Combined Score
            # If single shatr meter, ajuz score is irrelevant (or 0)
            s_score = best_sadr["score"]
            a_score = best_ajuz["score"] if best_ajuz else 0

            # Compatibility Check
            is_valid_pair = False
            if best_sadr["ref"] and (not ajuz_pattern or best_ajuz["ref"]):
                s_pat = best_sadr["ref"]["pattern"]
                a_pat = best_ajuz["ref"]["pattern"] if best_ajuz else ""
                if (s_pat, a_pat) in patterns["pairs"]:
                    is_valid_pair = True

            # Weighted score? Or Average?
            if ajuz_pattern:
                total_score = (s_score + a_score) / 2
            else:
                total_score = s_score

            candidates.append({
                "meter": name,
                "score": total_score,
                "sadr_match": best_sadr,
                "ajuz_match": best_ajuz,
                "valid_pair": is_valid_pair
            })

        # Sort candidates
        # Priority: Score -> Validity -> Priority Map
        candidates.sort(key=lambda x: (
            round(x["score"], 3),
            x["valid_pair"],
            METER_PRIORITY.get(x["meter"], 0)
        ), reverse=True)

        if not candidates:
            return []

        return candidates

    def _find_best_component_match(self, input_pattern, component_patterns):
        best_score = -1
        best_ref = None

        for item in component_patterns:
            ref_pat = item["pattern"]
            score = self._get_similarity(ref_pat, input_pattern)
            if score > best_score:
                best_score = score
                best_ref = item

        return {"score": best_score, "ref": best_ref}

    def _analyze_verse(self, res, meter_name):
        # Re-run match against specific meter to get details
        patterns = self.precomputed_patterns.get(meter_name)
        if not patterns:
            return {"error": "Meter data not found"}

        sadr_match = self._find_best_component_match(res["sadr"]["pattern"], patterns["sadr"])
        ajuz_match = None
        if res["ajuz"]["pattern"]:
            ajuz_match = self._find_best_component_match(res["ajuz"]["pattern"], patterns["ajuz"])

        # Get allowed feet for this meter for greedy analysis
        bahr_cls = self.meter_classes.get(meter_name)
        allowed_sadr = []
        allowed_ajuz = []
        if bahr_cls:
            inst = bahr_cls()
            allowed_sadr = inst.get_allowed_feet_patterns(0)
            allowed_ajuz = inst.get_allowed_feet_patterns(1)

        # Analyze Sadr Feet
        sadr_analysis = self._analyze_feet(res["sadr"]["pattern"], allowed_sadr, sadr_match["ref"])

        ajuz_analysis = None
        if res["ajuz"]["pattern"]:
            ajuz_analysis = self._analyze_feet(res["ajuz"]["pattern"], allowed_ajuz, ajuz_match["ref"])

        return {
            "verse_index": res["index"],
            "sadr_text": res["sadr"]["text"],
            "ajuz_text": res["ajuz"]["text"],
            "input_pattern": res["sadr"]["pattern"] + res["ajuz"]["pattern"],
            "best_ref_pattern": (sadr_match["ref"]["pattern"] if sadr_match["ref"] else "") + 
                                (ajuz_match["ref"]["pattern"] if ajuz_match and ajuz_match["ref"] else ""),
            "score": round(
                (sadr_match["score"] + (ajuz_match["score"] if ajuz_match else 0)) / (2 if ajuz_match else 1), 2
            ),
            "sadr_analysis": sadr_analysis,
            "ajuz_analysis": ajuz_analysis
        }

    def _analyze_feet(self, input_pattern, allowed_feet_list, best_ref):
        """
        Maps input bits to feet using greedy matching against ALLOWED forms.
        This prevents one broken foot from misaligning the rest if they are valid.
        """
        analysis = []
        current_idx = 0

        # Fallback to best_ref feet if allowed_feet_list is not provided (should not happen)
        ref_feet_backup = best_ref["feet"] if best_ref else []

        # Determine number of feet to analyze
        num_feet = len(allowed_feet_list) if allowed_feet_list else len(ref_feet_backup)

        for i in range(num_feet):
            # 1. Get valid candidates for this foot position
            if allowed_feet_list:
                candidates = allowed_feet_list[i]
            elif i < len(ref_feet_backup):
                candidates = [ref_feet_backup[i]]
            else:
                candidates = []

            # Sort candidates by length descending to try longest match first
            candidates = sorted(candidates, key=len, reverse=True)

            best_local_match = None
            best_local_score = -1

            # Try to find best fit at current_idx
            # We look ahead by len(cand)
            for cand in candidates:
                cand_len = len(cand)
                # Get segment of equal length (or truncated if at end)
                segment = input_pattern[current_idx : current_idx + cand_len]

                if not segment:
                    break  # No more input

                score = self._get_similarity(cand, segment)

                # Boost score if lengths match (to prefer aligning valid feet)
                if len(segment) == cand_len:
                    if score == 1.0:
                        # Found perfect match, take it immediately
                        best_local_match = cand
                        best_local_score = 1.0
                        break

                if score > best_local_score:
                    best_local_score = score
                    best_local_match = cand
                    # Consume what we compared against

            # If no candidates (e.g., error in definitions), break
            if not best_local_match and candidates:
                best_local_match = candidates[0] # Default to first/longest

            # If we still didn't find anything (e.g. input exhausted), skip
            if not best_local_match:
                analysis.append({
                    "foot_index": i,
                    "expected_pattern": candidates[0] if candidates else "?",
                    "actual_segment": "MISSING",
                    "score": 0.0,
                    "status": "missing"
                })
                continue

            # Extract the segment we decided to consume
            # Logic: If score is low, we should consume the length of the EXPECTED pattern 
            # to keep alignment for next feet? Or length of actual?
            # If we assume the user *tried* to write the pattern, we consume Pattern Length.

            consume_len = len(best_local_match)
            # Clamp to input length
            end_idx = min(current_idx + consume_len, len(input_pattern))
            actual_segment = input_pattern[current_idx : end_idx]

            # Recalculate score on the final decided segment
            final_score = self._get_similarity(best_local_match, actual_segment)

            status = "ok" if final_score == 1.0 else "broken"
            if not actual_segment:
                status = "missing"

            analysis.append({
                "foot_index": i,
                "expected_pattern": best_local_match,
                "actual_segment": actual_segment,
                "score": round(final_score, 2),
                "status": status
            })

            current_idx = end_idx

        # Check for extra bits
        if current_idx < len(input_pattern):
            extra = input_pattern[current_idx:]
            analysis.append({
                "foot_index": num_feet,
                "expected_pattern": "",
                "actual_segment": extra,
                "score": 0,
                "status": "extra_bits"
            })

        return analysis

process_poem(verses, meter_name=None)

Analyzes a list of verses to detect the meter and evaluate prosodic correctness.

Parameters:

Name Type Description Default
verses list[tuple[str, str]]

A list of tuples, where each tuple contains the Sadr (first hemistich) and Ajuz (second hemistich) of a verse.

required
meter_name str

The name of a specific meter to force the analysis against. If provided, auto-detection is skipped. Defaults to None.

None

Returns:

Name Type Description
dict

A dictionary containing: - meter (str): The name of the detected or forced meter. - verses (list[dict]): A list of analysis results for each verse, including: - score (float): Compatibility score (0.0 - 1.0). - sadr_analysis (list[dict]): Detailed foot-by-foot analysis of the Sadr. - ajuz_analysis (list[dict]): Detailed foot-by-foot analysis of the Ajuz.

Source code in pyarud/processor.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
def process_poem(self, verses, meter_name=None):
    """
    Analyzes a list of verses to detect the meter and evaluate prosodic correctness.

    Args:
        verses (list[tuple[str, str]]): A list of tuples, where each tuple contains
            the Sadr (first hemistich) and Ajuz (second hemistich) of a verse.
        meter_name (str, optional): The name of a specific meter to force the analysis against.
            If provided, auto-detection is skipped. Defaults to None.

    Returns:
        dict: A dictionary containing:
            - `meter` (str): The name of the detected or forced meter.
            - `verses` (list[dict]): A list of analysis results for each verse, including:
                - `score` (float): Compatibility score (0.0 - 1.0).
                - `sadr_analysis` (list[dict]): Detailed foot-by-foot analysis of the Sadr.
                - `ajuz_analysis` (list[dict]): Detailed foot-by-foot analysis of the Ajuz.
    """
    detected_counts = Counter()
    temp_results = []

    # 1. Detect Meter for each verse (if not forced)
    for i, (sadr, ajuz) in enumerate(verses):
        # Convert text to pattern
        sadr_arudi, sadr_pattern = self.converter.prepare_text(sadr)
        ajuz_arudi, ajuz_pattern = self.converter.prepare_text(ajuz)

        # Handle single shatr input if needed (future proofing)
        if not ajuz:
            ajuz_pattern = ""

        match_info = None
        if not meter_name:
            # Auto-detect
            candidates = self._find_best_meter(sadr_pattern, ajuz_pattern)
            if candidates:
                best_match = candidates[0]
                detected_counts[best_match["meter"]] += 1
                match_info = best_match
        else:
            # Forced meter - no detection step needed here, 
            # but we might want to store a dummy match object or just skip to analysis
            pass

        temp_results.append(
            {
                "index": i,
                "sadr": {"text": sadr, "pattern": sadr_pattern, "arudi": sadr_arudi},
                "ajuz": {"text": ajuz, "pattern": ajuz_pattern, "arudi": ajuz_arudi},
                "match": match_info,
            }
        )

    if meter_name:
        global_meter = meter_name
    elif detected_counts:
        global_meter = detected_counts.most_common(1)[0][0]
    else:
        return {"error": "Could not detect any valid meter."}

    # 2. Analyze against Global Meter
    final_analysis = []
    for res in temp_results:
        analysis = self._analyze_verse(res, global_meter)
        final_analysis.append(analysis)

    return {"meter": global_meter, "verses": final_analysis}

Bahr (Meters)

pyarud.bahr.Bahr

Base class for defining poetic meters (Buhur).

Subclasses define the standard feet (tafeelat), valid Arudh/Dharb combinations, and disallowed variations (Zihaf) for specific positions.

Source code in pyarud/bahr.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
class Bahr:
    """
    Base class for defining poetic meters (Buhur).

    Subclasses define the standard feet (tafeelat), valid Arudh/Dharb combinations,
    and disallowed variations (Zihaf) for specific positions.
    """
    tafeelat: tuple[type[Tafeela], ...] = ()
    arod_dharbs_map: dict[type[BaseEllahZehaf], tuple[type[BaseEllahZehaf], ...]] | set[type[BaseEllahZehaf]] = {}
    sub_bahrs: tuple[type["Bahr"], ...] = ()
    only_one_shatr = False
    disallowed_zehafs_for_hashw: dict[int, tuple[list[type[BaseEllahZehaf]], ...]] = {}

    @property
    def last_tafeela(self):
        return self.tafeelat[-1]()

    def get_shatr_hashw_combinations(self, shatr_index=0):
        combinations = []
        # Hashw is everything except the last tafeela (Arudh/Dharb)
        for i, tafeela_class in enumerate(self.tafeelat[:-1]):
            tafeela = tafeela_class()
            forms = tafeela.all_zehaf_tafeela_forms()

            # Filter disallowed zehafs
            if shatr_index in self.disallowed_zehafs_for_hashw:
                disallowed = self.disallowed_zehafs_for_hashw[shatr_index]
                if i < len(disallowed):
                    forms = [f for f in forms if f.applied_ella_zehaf_class not in disallowed[i]]

            combinations.append(forms)
        return combinations

    def get_allowed_feet_patterns(self, shatr_index=0):
        """
        Returns a list of lists, where index i contains all valid binary strings for foot i.
        Used for granular analysis to align input to valid feet.
        """
        allowed_per_index = []

        # Hashw feet
        hashw_combs = self.get_shatr_hashw_combinations(shatr_index)
        for _, forms in enumerate(hashw_combs):
            allowed_per_index.append([str(f) for f in forms])

        # Last foot (Arudh/Dharb)
        last_feet = set()
        if self.only_one_shatr:
            # Treat endings as Arudh
            if isinstance(self.arod_dharbs_map, set):
                for z_cls in self.arod_dharbs_map:
                    try:
                        last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                    except AssertionError:
                        continue
            else:
                for z_cls in self.arod_dharbs_map:
                    try:
                        last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                    except AssertionError:
                        continue
        else:
            if shatr_index == 0:  # Sadr -> Arudh
                for z_cls in self.arod_dharbs_map.keys():
                    try:
                        last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                    except AssertionError:
                        continue
            else:  # Ajuz -> Dharb
                for d_list in self.arod_dharbs_map.values():
                    for z_cls in d_list:
                        try:
                            last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                        except AssertionError:
                            continue

        allowed_per_index.append(list(last_feet))
        return allowed_per_index

    @property
    def detailed_patterns(self):
        """
        Returns structured patterns for Sadr and Ajuz separately.
        """
        patterns = {
            "sadr": [],
            "ajuz": [],
            "pairs": set() # Set of (sadr_pattern_str, ajuz_pattern_str) for validation
        }

        if self.only_one_shatr:
             # Single shatr meters (Mashtoor/Manhook)
             # We treat them as Sadr only
             hashw = self.get_shatr_hashw_combinations()

             # For single shatr, the "Arudh" is the end of the line

             # Collect all allowed endings from the map
             # In single shatr, arod_dharbs_map is a set or dict. 
             # If dict, keys are allowed endings? Or values?
             # Looking at subclasses: arod_dharbs_map = {Waqf, Kasf} (Set)

             endings = []
             if isinstance(self.arod_dharbs_map, set):
                 for z_cls in self.arod_dharbs_map:
                     try:
                         endings.append(z_cls(self.last_tafeela).modified_tafeela)
                     except AssertionError:
                         continue
             else:
                 # If it's a dict (some Mashtoors might use dict?), iterate keys
                 for z_cls in self.arod_dharbs_map:
                     try:
                         endings.append(z_cls(self.last_tafeela).modified_tafeela)
                     except AssertionError:
                         continue

             permutations = list(itertools.product(*hashw, endings))
             for p in permutations:
                 # p is a tuple of Tafeela objects
                 feet_strs = [str(t) for t in p]
                 full_str = "".join(feet_strs)
                 patterns["sadr"].append({
                     "pattern": full_str,
                     "feet": feet_strs,
                     "type": "single_shatr"
                 })
                 # Pairs logic doesn't apply or is trivial
                 patterns["pairs"].add((full_str, ""))

        else:
            # Two shatrs
            sadr_hashw = self.get_shatr_hashw_combinations(0)
            ajuz_hashw = self.get_shatr_hashw_combinations(1)

            for arudh_z_cls, dharb_z_list in self.arod_dharbs_map.items():
                # 1. Generate Arudh (End of Sadr)
                try:
                    arudh_obj = arudh_z_cls(self.last_tafeela).modified_tafeela
                except AssertionError:
                    continue

                arudh_str = str(arudh_obj)

                # 2. Generate Sadr variations for this Arudh
                sadr_perms = list(itertools.product(*sadr_hashw, [arudh_obj]))

                for sp in sadr_perms:
                    feet_strs = [str(t) for t in sp]
                    full_sadr = "".join(feet_strs)

                    patterns["sadr"].append({
                        "pattern": full_sadr,
                        "feet": feet_strs,
                        "arudh_foot": arudh_str,
                        "arudh_class": arudh_z_cls.__name__
                    })

                    # 3. Generate compatible Dharbs (End of Ajuz)
                    # dharb_z_list is tuple of allowed classes for this Arudh
                    compatible_dharbs = []
                    for d_z in dharb_z_list:
                        try:
                            dharb_obj = d_z(self.last_tafeela).modified_tafeela
                            compatible_dharbs.append(dharb_obj)
                        except AssertionError:
                            continue

                    if not compatible_dharbs:
                        continue

                    # 4. Generate Ajuz variations for these Dharbs
                    ajuz_perms = list(itertools.product(*ajuz_hashw, compatible_dharbs))

                    for ap in ajuz_perms:
                        feet_strs_a = [str(t) for t in ap]
                        full_ajuz = "".join(feet_strs_a)

                        patterns["ajuz"].append({
                            "pattern": full_ajuz,
                            "feet": feet_strs_a,
                            "dharb_foot": feet_strs_a[-1],
                            "allowed_arudhs": [arudh_str] # Valid only if Sadr ended with this
                        })

                        # Register valid pair
                        patterns["pairs"].add((full_sadr, full_ajuz))

        # Deduplicate lists (dicts are not hashable, use careful logic or just return list)
        # Actually, we generated duplicates if multiple Arudh classes result in same pattern?
        # It's fine for now. The Processor will handle matching.

        # Add sub-bahrs
        for sub in self.sub_bahrs:
            sub_p = sub().detailed_patterns
            patterns["sadr"].extend(sub_p["sadr"])
            patterns["ajuz"].extend(sub_p["ajuz"])
            patterns["pairs"].update(sub_p["pairs"])

        return patterns

    @property
    def bait_combinations(self):
        # Deprecated wrapper for backward compatibility
        # Returns flattened list of full lines
        p = self.detailed_patterns
        if self.only_one_shatr:
            return sorted(list(set(x["pattern"] for x in p["sadr"])), key=len)

        # Reconstruct full lines from pairs
        return sorted([s+a for s,a in p["pairs"]], key=len)

detailed_patterns property

Returns structured patterns for Sadr and Ajuz separately.

get_allowed_feet_patterns(shatr_index=0)

Returns a list of lists, where index i contains all valid binary strings for foot i. Used for granular analysis to align input to valid feet.

Source code in pyarud/bahr.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def get_allowed_feet_patterns(self, shatr_index=0):
    """
    Returns a list of lists, where index i contains all valid binary strings for foot i.
    Used for granular analysis to align input to valid feet.
    """
    allowed_per_index = []

    # Hashw feet
    hashw_combs = self.get_shatr_hashw_combinations(shatr_index)
    for _, forms in enumerate(hashw_combs):
        allowed_per_index.append([str(f) for f in forms])

    # Last foot (Arudh/Dharb)
    last_feet = set()
    if self.only_one_shatr:
        # Treat endings as Arudh
        if isinstance(self.arod_dharbs_map, set):
            for z_cls in self.arod_dharbs_map:
                try:
                    last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                except AssertionError:
                    continue
        else:
            for z_cls in self.arod_dharbs_map:
                try:
                    last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                except AssertionError:
                    continue
    else:
        if shatr_index == 0:  # Sadr -> Arudh
            for z_cls in self.arod_dharbs_map.keys():
                try:
                    last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                except AssertionError:
                    continue
        else:  # Ajuz -> Dharb
            for d_list in self.arod_dharbs_map.values():
                for z_cls in d_list:
                    try:
                        last_feet.add(str(z_cls(self.last_tafeela).modified_tafeela))
                    except AssertionError:
                        continue

    allowed_per_index.append(list(last_feet))
    return allowed_per_index

Arudi Converter

pyarud.arudi.ArudiConverter

Source code in pyarud/arudi.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
class ArudiConverter:
    def __init__(self):
        self.harakat = [KASRA, FATHA, DAMMA]  # kasra, fatha, damma
        self.sukun = [SUKUN]  # sukun
        self.mostly_saken = [ALEF, WAW, ALEF_MAKSURA, YEH]  # alef, waw, alef maqsurah, ya'a
        self.tnween_chars = [DAMMATAN, KASRATAN, FATHATAN]  # damm, kasra, fatha tanween
        self.shadda_chars = [SHADDA]
        self.all_chars = list(LETTERS + " ")
        self.prem_chars = (
            self.harakat + self.sukun + self.mostly_saken + self.tnween_chars + self.shadda_chars + self.all_chars
        )

        # Word replacements for Arudi writing
        self.CHANGE_LST = {
            "هذا": "هَاذَا",
            "هذه": "هَاذِه",
            "هذان": "هَاذَان",
            "هذين": "هَاذَين",
            "هؤلاء": "هَاؤُلَاء",
            "ذلك": "ذَالِك",
            "ذلكما": "ذَالِكُمَا",
            "ذلكم": "ذَالِكُم",
            "أولئك": "أُلَائِك",
            "أولئكم": "أُلَائِكُم",
            "الله": "اللَّاه",
            "اللهم": "اللَّاهُمّ",
            "إله": "إِلَاه",
            "الإله": "الإِلَاه",
            "إلهي": "إِلَاهي",
            "إلهنا": "إِلَاهنا",
            "إلهكم": "إِلَاهكم",
            "إلههم": "إِلَاههم",
            "إلههن": "إِلَاههن",
            "رحمن": "رَحمَان",
            "الرحمن": "الرَّحمَان",
            "طاوس": "طَاوُوس",
            "داود": "دَاوُود",
            "لكن": "لَاكِن",
            "لكنّ": "لَاكِنّ",
            "لكنه": "لَاكِنّهُ",
            "طه": "طَاهَا",
            "يس": "يَاسِين",
        }

    def register_custom_spelling(self, word, replacement):
        """
        Register a custom Arudi spelling for a specific word.

        Args:
            word (str): The word (without diacritics) to replace (e.g., 'لكن').
            replacement (str): The phonetic Arudi spelling (e.g., 'لَاكِن').
        """
        self.CHANGE_LST[word] = replacement

    def _normalize_shadda(self, text):
        # Ensure Shadda comes before Harakat/Tanween
        harakat_all = "".join(self.harakat + self.tnween_chars)
        shadda = "".join(self.shadda_chars)
        return re.sub(f"([{harakat_all}])([{shadda}])", r"\2\1", text)

    def _handle_space(self, plain_chars):
        if not plain_chars:
            return plain_chars

        if plain_chars[-1] == " ":
            return plain_chars[:-2]
        else:
            return plain_chars[:-1]

    def _remove_extra_harakat(self, text):
        out = ""
        i = 0
        while i < len(text):
            if i < len(text) - 1:
                if text[i] in self.harakat and text[i + 1] in self.harakat:
                    i += 1
                    continue
            out += text[i]
            i += 1
        return out

    def _process_specials_before(self, bait):
        # Handle specific starting Alif cases
        if bait and bait[0] == "ا":
            # Heuristic: randomly choose or based on context. Bohour used random.
            # We'll default to Fatha for consistency in deterministic output,
            # or Hamza with Fatha.
            bait = "أَ" + bait[1:]

        bait = bait.replace("وا ", "و ")
        if bait.endswith("وا"):
            bait = bait[:-1]

        bait = bait.replace("وْا", "و")
        if bait.endswith("وْا"):
            bait = bait[:-2] + "و"

        # Common substitutions
        bait = bait.replace("الله", "اللاه")
        bait = bait.replace("اللّه", "الله")
        bait = bait.replace("إلَّا", "إِلّا")
        bait = bait.replace("نْ ال", "نَ ال")
        bait = bait.replace("لْ ال", "لِ ال")
        bait = bait.replace("إلَى", "إِلَى")
        bait = bait.replace("إذَا", "إِذَا")
        bait = bait.replace("ك ", "كَ ")
        bait = bait.replace(" ال ", " الْ ")
        bait = bait.replace("ْ ال", "ِ ال")
        bait = bait.replace("عَمْرٍو", "عَمْرٍ")
        bait = bait.replace("عَمْرُو", "عَمْرُ")

        # Shorten long vowels before Al (Hamzat Wasl + Lam Qamariya/Shamsiya)
        # This handles "Iltiqa al-Sakinayn" (meeting of two sakins) by dropping the first long vowel
        # e.g., 'إِلَى الْ' -> 'إِلَ الْ', 'فِي الْ' -> 'فِ الْ', 'ذَا الْ' -> 'ذَ الْ'
        bait = re.sub(r"([^\s])([اىيو])\s+ال", r"\1 ال", bait)

        # Word replacements from CHANGE_LST
        out = []
        valid_prefixes = ["و", "ف", "ك", "ب", "ل", "وب", "فك", "ول", "فل"]

        for word in bait.split(" "):
            cleaned_word = strip_tashkeel(word)
            found = False

            # 1. Exact match check
            for key, replacement in self.CHANGE_LST.items():
                if cleaned_word == key:
                    out.append(replacement)
                    found = True
                    break

            # 2. Prefix check if not found
            if not found:
                for key, replacement in self.CHANGE_LST.items():
                    if cleaned_word.endswith(key):
                        prefix = cleaned_word[:-len(key)]
                        if prefix in valid_prefixes:
                            # We found a prefixed match.
                            # We need to reconstruct the word with the original prefix's diacritics
                            # This is tricky because 'word' has diacritics intermixed.
                            # Simple heuristic: Take the original word string up to the match?
                            # No, diacritics make length differ.
                            # Better: Just prepend the prefix chars? 
                            # "وَهَذَا" -> prefix "وَ" ? 
                            # We know cleaned prefix is "و".
                            # Let's try to find where the key starts in the original word.

                            # Find the index of the key's first char in the original word (last occurrence)
                            # ... This assumes standard orthography.

                            # Simplest robust approach for Arudi:
                            # Just use the cleaned prefix + replacement?
                            # "وهذا" -> "و" + "هَاذَا" -> "وهَاذَا"
                            # But we lose the prefix's original harakat (e.g. "وَ").
                            # Ideally we want "وَ" + "هَاذَا".

                            # Hack: Since prefixes are usually 1-2 chars, we can assume they are at the start.
                            # But we don't know their length in the original string (due to harakat).

                            # Alternative: Use regex to find the suffix in the original word?
                            # Or just use the predefined "cleaned prefix" + standard haraka?
                            # "و" -> "وَ" (Fatha usually). "ب" -> "بِ" (Kasra). "ل" -> "لِ" (Kasra).
                            # "ك" -> "كَ" (Fatha). "ف" -> "فَ" (Fatha).

                            prefix_harakat = {
                                "و": "وَ", "ف": "فَ", "ك": "كَ", "ب": "بِ", "ل": "لِ"
                            }

                            # Construct new word
                            new_prefix = ""
                            for p_char in prefix:
                                new_prefix += prefix_harakat.get(p_char, p_char) # Default to char if no mapping

                            out.append(new_prefix + replacement)
                            found = True
                            break

            if not found:
                out.append(word)

        bait = " ".join(out)

        # Ensure second char isn't a bare letter if first is
        if len(bait) > 1 and bait[1] in self.all_chars:
            bait = bait[0] + self.harakat[1] + bait[1:]

        # Filter trailing alif after tanween
        final_chars = []
        i = 0
        while i < len(bait):
            if bait[i] == "ا" and i > 0 and bait[i - 1] in self.tnween_chars:
                i += 1
                # skip following harakat if any
                if i < len(bait) and bait[i] in self.harakat + self.sukun + self.tnween_chars + self.shadda_chars:
                    i += 1
                continue
            final_chars.append(bait[i])
            i += 1

        return "".join(final_chars)

    def _process_specials_after(self, bait):
        bait = bait.replace("ةن", "تن")
        return bait

    def _extract_pattern(self, text):
        """
        Core logic to extract binary pattern and arudi text.
        Based on Bohour's extract_tf3eelav3.
        """
        text = self._remove_extra_harakat(text)
        chars = list(text.replace(ALEF_MADDA, "ءَا").strip())  # Replace Madda
        chars = [c for c in chars if c in self.prem_chars]
        chars = list(re.sub(" +", " ", "".join(chars).strip()))

        out_pattern = ""
        plain_chars = ""

        i = 0
        while i < len(chars) - 1:
            char = chars[i]

            if char in self.all_chars:
                if char == " ":
                    plain_chars += char
                    i += 1
                    continue

                # Lookahead
                next_char = chars[i + 1]
                if next_char == " " and i + 2 < len(chars):
                    next_char = chars[i + 2]

                next_next_char = None
                if i < len(chars) - 2:
                    next_next_char = chars[i + 2]

                prev_digit = out_pattern[-1] if len(out_pattern) > 0 else ""

                # Logic
                if next_char in self.harakat:
                    out_pattern += "1"
                    plain_chars += char

                elif next_char in self.sukun:
                    if prev_digit != "0":
                        out_pattern += "0"
                        plain_chars += char
                    elif (i + 1) == len(chars) - 1:
                        # End of line sukun handling
                        out_pattern = out_pattern[:-1] + "10"
                        plain_chars += char
                    else:
                        plain_chars = self._handle_space(plain_chars) + char

                elif next_char in self.tnween_chars:
                    if char != "ا":
                        plain_chars += char
                    plain_chars += "ن"
                    out_pattern += "10"

                elif next_char in self.shadda_chars:
                    if prev_digit != "0":
                        plain_chars += char + char
                        out_pattern += "01"
                    else:
                        plain_chars = self._handle_space(plain_chars) + char + char
                        out_pattern += "1"

                    # Check what follows Shadda
                    if i + 2 < len(chars):
                        if chars[i + 2] in self.harakat:
                            i += 1  # Skip harakat processing next loop (handled here implicitly?)
                            # Actually Bohour logic just increments i to skip processing the harakah as separate char?
                            # But we added '1' for the second letter of shadda.
                            pass
                        elif chars[i + 2] in self.tnween_chars:
                            i += 1
                            plain_chars += "ن"
                            out_pattern += "0"  # Shadda(1) + Tanween(0) -> 10. Wait. Shadda is 01.
                            # If Shadda + Tanween:
                            # Letter + Shadda + Tanween
                            # 1. Letter Sakin (0)
                            # 2. Letter Mutaharrik (1)
                            # 3. Tanween (0)
                            # Result should be 010.
                            # Above logic adds 01, then adds 0. Correct.

                elif next_char == "ا":
                    out_pattern += "10"
                    plain_chars += char + "ا"

                elif next_char in self.all_chars:
                    # Letter followed by Letter (implies first is Sakin if no haraka in betweeen?)
                    # Or assumes implicit sukun?
                    if prev_digit != "0":
                        out_pattern += "0"
                        plain_chars += char
                    elif prev_digit == "0" and i + 1 < len(chars) and chars[i + 1] == " ":
                        # Special case from Bohour
                        out_pattern += "1"
                        plain_chars += char
                    else:
                        plain_chars = self._handle_space(plain_chars) + char
                    i -= 1  # Backtrack? This logic in Bohour is tricky.
                    # If we assumed it was a letter but it's followed by a letter, we treat current as sakin.
                    # The i -= 1 might be to re-process? No, i += 2 at end.

                # Ha' al-Gha'ib (He) handling
                if next_next_char == " ":
                    if char == "ه":
                        if next_char == self.harakat[0]:  # Kasra
                            plain_chars += "ي"
                            out_pattern += "0"
                        if next_char == self.harakat[2]:  # Damma
                            plain_chars += "و"
                            out_pattern += "0"

                i += 2  # Advance past char and its diacritic/follower
            elif char == "ا":
                # Alef encountered as 'char' (e.g. after a diacritic consumed the previous letter)
                out_pattern += "0"
                plain_chars += char
                i += 1
            else:
                i += 1

        # Finalize
        if out_pattern and out_pattern[-1] != "0":
            out_pattern += "0"  # Always end with sukun (Qafiyah)

        # Ashba' (Saturation) of last letter
        if chars:
            last_char = chars[-1]
            if last_char == self.harakat[0]:  # Kasra
                plain_chars += "ي"
            elif last_char == self.tnween_chars[1]:  # Kasr Tanween
                plain_chars = plain_chars[:-1] + "ي"
            elif last_char == self.harakat[1]:  # Fatha
                plain_chars += "ا"
            elif last_char == self.harakat[2]:  # Damma
                plain_chars += "و"
            elif last_char == self.tnween_chars[0]:  # Damm Tanween
                plain_chars = plain_chars[:-1] + "و"
            elif last_char in self.mostly_saken and len(chars) > 1 and chars[-2] not in self.tnween_chars:
                plain_chars += last_char

        return plain_chars, out_pattern

    def prepare_text(self, text):
        """
        Converts standard Arabic text into Arudi style and extracts the binary pattern.

        Args:
            text (str): The input Arabic text (hemistich or line).

        Returns:
            tuple[str, str]: A tuple containing:
                - `arudi_style` (str): The phonetic Arudi representation (e.g., "مُسْتَفْعِلُنْ").
                - `pattern` (str): The binary pattern string (e.g., "1010110").
        """
        text = text.strip()
        if not text:
            return "", ""

        text = self._normalize_shadda(text)
        preprocessed = self._process_specials_before(text)
        arudi_style, pattern = self._extract_pattern(preprocessed)
        arudi_style = self._process_specials_after(arudi_style)

        return arudi_style, pattern

prepare_text(text)

Converts standard Arabic text into Arudi style and extracts the binary pattern.

Parameters:

Name Type Description Default
text str

The input Arabic text (hemistich or line).

required

Returns:

Type Description

tuple[str, str]: A tuple containing: - arudi_style (str): The phonetic Arudi representation (e.g., "مُسْتَفْعِلُنْ"). - pattern (str): The binary pattern string (e.g., "1010110").

Source code in pyarud/arudi.py
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def prepare_text(self, text):
    """
    Converts standard Arabic text into Arudi style and extracts the binary pattern.

    Args:
        text (str): The input Arabic text (hemistich or line).

    Returns:
        tuple[str, str]: A tuple containing:
            - `arudi_style` (str): The phonetic Arudi representation (e.g., "مُسْتَفْعِلُنْ").
            - `pattern` (str): The binary pattern string (e.g., "1010110").
    """
    text = text.strip()
    if not text:
        return "", ""

    text = self._normalize_shadda(text)
    preprocessed = self._process_specials_before(text)
    arudi_style, pattern = self._extract_pattern(preprocessed)
    arudi_style = self._process_specials_after(arudi_style)

    return arudi_style, pattern