-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrepresentations.py
More file actions
2269 lines (1994 loc) · 103 KB
/
representations.py
File metadata and controls
2269 lines (1994 loc) · 103 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
import copy
import re
import itertools
class Split(object):
"""A container object for a simple split.
Used mainly to promote consistency with special kinds of splits."""
__slots__ = ('splitloc', 'forced')
def __init__(self, splitloc=0, forced=False):
"""Initializes by storing splitloc; default value is 0 (no split)"""
self.splitloc = splitloc
self.forced = forced
def __repr__(self):
"""Provides a string representation of the object"""
cls_name = type(self).__name__
if self.forced:
cls_name = "Forced" + cls_name
splitloc = self.splitloc
other_attr = ", ".join("{}={}".format(attr, self.__getattribute__(attr)) for cls in type(self).mro() for attr in
getattr(cls, '__slots__', ()) if
(attr not in ["splitloc", "forced"] and self.__getattribute__(attr) is not None))
if other_attr:
other_attr = ", " + other_attr
return "{}({}{})".format(cls_name, splitloc, other_attr)
def __bool__(self):
"""Return False iff splitloc == 0"""
return self.splitloc != 0
def __lt__(self, other):
"""For sorting, use the splitloc"""
return self.splitloc < other.splitloc
def __gt__(self, other):
"""For comparisons, use the splitloc"""
return self.splitloc > other.splitloc
def __eq__(self, other):
"""For object equality, compare the attributes"""
return type(self) == type(other) and all(
self.__getattribute__(attr) == other.__getattribute__(attr) for cls in type(self).mro() for attr in
set(getattr(cls, '__slots__', ())) - {"forced", "new_badsplits"})
def __hash__(self):
"""For hashing, use the splitloc"""
return hash(self.splitloc)
def __copy__(self):
"""A method for copying, to improve speed"""
return Split(self.splitloc, self.forced)
def reindex(self, other_split):
"""Reindexes by subtracting the splitloc of the other split.
Returns a new Split object
Arguments:
other_split: split with location to be reindexed to"""
new_split = copy.copy(self)
new_split.splitloc -= other_split.splitloc
return new_split
def is_left_of(self, other_split):
"""Checks whether a Split is left of another"""
return isinstance(other_split, Split) and self < other_split
def is_right_of(self, other_split):
"""Checks whether a Split is right of another"""
return isinstance(other_split, Split) and self > other_split
def trim_R_newform(self, other_split):
"""A placeholder for a method that is only valid for ChangeSplits"""
return self
class ChangeSplit(Split):
"""A container for a split that introduces a change to the form
of its left and/or right child"""
__slots__ = ('L_newform', 'R_newform')
def __init__(self, splitloc, L_newform=None, R_newform=None, forced=False):
"""Initializes by storing data
Arguments:
splitloc: split location
L_newform: form of left child (default = None)
R_newform: form of right child (default = None)
forced: forced (? split) (default = False)
"""
# Store splitloc based on split
super().__init__(splitloc, forced=forced)
# Store form changes
self.L_newform = L_newform
self.R_newform = R_newform
def __copy__(self):
"""A method for copying, to improve speed"""
return ChangeSplit(self.splitloc, self.L_newform, self.R_newform, self.forced)
def reindex(self, other_split):
"""Reindexes the current split by subtracting the splitloc of the
provided other_split, and trimming the non-RED L_newform.
Arguments:
other_split: split with location to be trimmed at
"""
new_split = super().reindex(other_split)
if not (new_split.L_newform is None or isinstance(new_split.L_newform, Reduplicant)):
new_split.L_newform = new_split.L_newform[other_split.splitloc:]
return new_split
def trim_R_newform(self, other_split):
"""Trims a non-RED R_newform, if it exists, to remove any
atoms that are to the right of other_split.splitloc
Arguments:
other_split: split with location to remove right-side atoms"""
new_split = copy.copy(self)
if not (new_split.R_newform is None or isinstance(new_split.R_newform, Reduplicant)):
new_split.R_newform = new_split.R_newform[:other_split.splitloc - new_split.splitloc]
return new_split
class RedSplit(ChangeSplit):
"""A container for a split that introduces reduplication"""
__slots__ = ('red_edge', 'minbase_edge', 'new_badsplits')
def __init__(self, splitloc, red_edge, minbase_edge, new_badsplits, L_newform=None, R_newform=None, forced=False):
"""Initializes by storing data.
new_badsplits is a tuple of splitlocs (ints) to be avoided
Arguments:
splitloc: split location
red_edge: reduplication edge location
minbase_edge: minbase edge location
new_badsplits: tuple of splitlocs (ints) to be avoided
L_newform: left new form (default = None)
R_newform: right new form (default = None)
forced: forced (? split) (default = False)
"""
# Store splitloc and provided changed forms
super().__init__(splitloc, L_newform=L_newform, R_newform=R_newform, forced=forced)
# Store RED-specific info
self.red_edge = red_edge
self.minbase_edge = minbase_edge
self.new_badsplits = new_badsplits
def __copy__(self):
"""A method for copying, to improve speed"""
return RedSplit(self.splitloc, self.red_edge, self.minbase_edge, self.new_badsplits, self.L_newform, self.R_newform, self.forced)
@property
def red_start(self):
"""Get the index of the start of RED"""
return min(self.splitloc, self.red_edge)
@property
def red_end(self):
"""Get the index of the end of RED"""
return max(self.splitloc, self.red_edge)
@property
def attachment(self):
"""Gets the side to which RED attaches: L or R"""
if self.red_edge < self.splitloc:
return ("L")
elif self.red_edge > self.splitloc:
return ("R")
def reindex(self, other_split):
"""Reindexes by subtracting the splitloc of the other split,
also updating the non-RED L_newform, red_edge and new_badsplits.
Returns a new RedSplit object
Arguments:
other_split: split to set new split location for red_edge"""
new_split = super().reindex(other_split)
new_split.red_edge -= other_split.splitloc
new_split.new_badsplits = tuple(badsplit - other_split.splitloc for badsplit in new_split.new_badsplits)
return new_split
def is_left_of(self, other_split):
"""Checks whether a RedSplit is left of another Split"""
return isinstance(other_split, Split) and self < other_split and self.red_edge <= other_split.splitloc
def is_right_of(self, other_split):
"""Checks whether a RedSplit is right of another Split"""
return isinstance(other_split, Split) and self > other_split and self.red_edge >= other_split.splitloc
def is_at_edge_of(self, construction):
"""Checks whether a RedSplit is at the edge of a Construction"""
return self.red_edge == 0 or self.red_edge == len(construction)
def has_base_at_edge_of(self, construction):
"""Checks whether the minbase of the RedSplit is at the edge
of a Construction"""
base_edge = self.minbase_edge
return base_edge == 0 or base_edge == len(construction)
class SplitStore(tuple):
"""A container for storing the splits associated with a construction.
The underlying data structure is a tuple."""
__slots__ = ()
def get(self, splitloc):
"""Gets the Split object associated with a given splitloc.
If there is no such Split, returns None
Arguments:
splitloc: split location to retrieve associated Split object
"""
for split in self:
if split.splitloc == splitloc:
return split
return None
@property
def splitlocs(self):
"""Returns a tuple of the splitlocs of all Splits in the store"""
return tuple(split.splitloc for split in self)
def reindex(self, other_split):
"""Returns a new SplitStore in which all of the splits
have been reindexed by subtracting the splitloc of other_split.
Arguments:
other_split: split with location to be reindexed to
"""
return SplitStore(split.reindex(other_split) for split in self)
def slice(self, start_split, end_split=None):
"""Returns a new SplitStore containing all of the Splits that fall
entirely within the range spanned by start_split.splitloc and
end_split.splitloc (defaults to end of construction), reindexed
according to start_split (and with any non-RED R_newforms trimmed
according to end_split).
Only includes RedSplits if the start or end splits are not in their
new badsplits.
Arguments:
start_split: split with location to begin slice at
end_split: split with location to end slice at (default = None)
"""
# If there are no splits, return an empty SplitStore
if not self:
return SplitStore()
# Otherwise, slice as required
sliced_splits = (split for split in self if split.is_right_of(start_split) and not (
isinstance(split, RedSplit) and start_split.splitloc in split.new_badsplits))
if end_split is not None:
sliced_splits = (split.trim_R_newform(end_split) for split in sliced_splits if
split.is_left_of(end_split) and not (
isinstance(split, RedSplit) and end_split.splitloc in split.new_badsplits))
modified_splits = SplitStore(sliced_splits).reindex(start_split)
return modified_splits
def partition(self, other_split):
"""Returns a pair of SplitStores, (L_splitlist, R_splitlist),
containing all of the splits to the left and right of the provided
other_split, respectively. Splits to the right are reindexed,
and any ChangeSplits to the left have their non-RED R_newforms
shortened.
Arguments:
other_split: split with location to partition
"""
L_splitlist = SplitStore(split.trim_R_newform(other_split) for split in self if split.is_left_of(other_split))
R_splitlist = SplitStore(split.reindex(other_split) for split in self if split.is_right_of(other_split))
return (L_splitlist, R_splitlist)
def to_splittree(self, red_delayed=False):
"""Converts a SplitStore into a SplitTree, assuming primarily
right-branching structure, except that RedSplits are constrained
to be made after their corresponding edge splits, so that each
instance of RED is introduced at the edge of a construction.
If red_delayed is True, RedSplits are additionally delayed so
that they introduce terminal nodes, i.e. so that the red + base
combination forms its own construction.
Note: this will not retrieve splits from a construction store; only the
splits in the existing splitlist for this construction will be used.
Arguments:
red_delayed: boolean to additional delay RedSplits so that
they introduce terminal nodes (default = False)
"""
splittree = SplitTree(terminal=True)
# Build bottom-up
for split in sorted(self, reverse=True):
splittree = SplitTree(split=split, R_subtree=splittree)
# Make structural enforcements
if red_delayed:
splittree = splittree.enforce_delayed_red()
else:
splittree = splittree.enforce_edge_red()
return splittree
class SplitTree(object):
"""A container for storing a tree of binary Splits associated with
a construction. Splits are indexed to the construction as a whole,
not to its parts. Behaves like a namedtuple, with attributes split,
L_subtree, and R_subtree."""
__slots__ = ('split', 'L_subtree', 'R_subtree')
def __init__(self, split=None, L_subtree=None, R_subtree=None, terminal=False):
"""Creates a new SplitTree object
Arguments:
split = split object
L_subtree = SplitTree object left of new SplitTree (default = None)
R_subtree = SplitTree object right of new SplitTree (default = None)
terminal = boolean to indicate final SplitTree object (?) (default = False)
"""
if not terminal:
if L_subtree is None:
L_subtree = SplitTree(terminal=True)
if R_subtree is None:
R_subtree = SplitTree(terminal=True)
self.split = split
self.L_subtree = L_subtree
self.R_subtree = R_subtree
def __iter__(self):
"""Iterate over subtrees"""
if self.is_branching:
yield self.L_subtree
yield self.R_subtree
def __repr__(self):
"""String representation: like a namedtuple"""
if self.is_branching:
return "SplitTree({}, L={}, R={})".format(self.split, repr(self.L_subtree), repr(self.R_subtree))
else:
return "SplitTree()"
def __str__(self):
"""Pretty printing: indented"""
return self._pretty_print()
def __eq__(self, other_tree):
"""Two trees are equal if they have the same splits and same children"""
if not isinstance(other_tree, SplitTree):
return False
if self.is_branching:
return (self.split == other_tree.split and
self.L_subtree == other_tree.L_subtree and
self.R_subtree == other_tree.R_subtree)
else:
return not other_tree.is_branching
def __hash__(self):
"""For hashing, use the corresponding splitlist"""
return hash(self.to_splitlist())
def _pretty_print(self, indent_level=0):
"""Pretting printing: indented"""
indent = " " * indent_level
if self.is_branching:
return "{}\n{}{}\n{}".format(self.R_subtree._pretty_print(indent_level=indent_level + 1),
indent, "{}({})".format(type(self.split).__name__[0], self.split.splitloc),
self.L_subtree._pretty_print(indent_level=indent_level + 1))
else:
return "{}xxx".format(indent)
@property
def is_branching(self):
"""Checks if the tree is branching"""
return self.split is not None
@property
def _contained_splits(self):
"""Returns a generator over all of the Split objects contained in
the SplitTree; that is, the split itself, plus the splits of the
left and right subtrees."""
if self.is_branching:
yield from self.L_subtree._contained_splits
yield self.split
yield from self.R_subtree._contained_splits
def is_ancestor(self, split1, split2):
"""Checks if split1 is ancestor of split2 in this SplitTree.
split1 is an ancestor if split2 is contained within the subtree
corresponding to split1.
Note: this uses the Splits from the tree that have the same splitloc
as split1 and split2, not split1 and split2 themselves.
Arguments:
split1: Split to check if is an ancestor to split2
split2: Split to check if is in split1 subtree
"""
subtree = self.get(split1.splitloc)
split2 = self.to_splitlist().get(split2.splitloc)
return split2 in subtree._contained_splits
def get_terminal_parents(self):
"""Returns a generator over the parent of each terminal node,
paired with the side of the subtree that is the terminal node"""
if self.is_branching:
if self.L_subtree.is_branching:
yield from self.L_subtree.get_terminal_parents()
else:
yield (self, "L")
if self.R_subtree.is_branching:
yield from self.R_subtree.get_terminal_parents()
else:
yield (self, "R")
def _get_subtree(self, side):
"""Gets the subtree on the given side
Arguments:
side: side to retrieve subtree
"""
if side == "L":
return self.L_subtree
elif side == "R":
return self.R_subtree
def _set_subtree(self, side, new_subtree):
"""Sets the subtree on the given side
Arguments:
side: side to set subtree
new_subtree: subtree to be set
"""
if side == "L":
self.L_subtree = new_subtree
elif side == "R":
self.R_subtree = new_subtree
def reindex(self, split):
"""Reindex all splits in the SplitTree by the given Split.
Arguments:
split: Split to be used to reindex splits in SplitTree"""
if self.is_branching:
return SplitTree(split=self.split.reindex(split),
L_subtree=self.L_subtree.reindex(split),
R_subtree=self.R_subtree.reindex(split))
else:
return SplitTree(terminal=True)
def to_splitlist(self):
"""Converts a tree into a flat splitlist"""
return SplitStore(self._contained_splits)
def replace(self, target_split, replacement_subtree):
"""Returns a new SplitTree object, in which the subtree introduced by
a split at the splitloc of a provided Split is replaced by a new subtree
Arguments:
target_split: Split with location to replace subtree
replacement_subtree: replacement subtree to replace
introduced subtree with
"""
# If the current split matches, make the swap
if self.split.splitloc == target_split.splitloc:
new_tree = replacement_subtree
# Otherwise, recurse
else:
if self.split < target_split:
# Go down the right branch
new_tree = SplitTree(split=self.split, L_subtree=self.L_subtree,
R_subtree=self.R_subtree.replace(target_split, replacement_subtree))
elif self.split > target_split:
# Go down the left branch
new_tree = SplitTree(split=self.split,
L_subtree=self.L_subtree.replace(target_split, replacement_subtree),
R_subtree=self.R_subtree)
return new_tree
def to_brackets(self, brackets, first_split=True):
"""Converts a SplitTree into a list of brackets to be inserted
into a word string
Arguments:
brackets: list of brackets to be inserted into a word string
first_split: boolean to indicate if split is first in subtree (?)
(default = True)
"""
if first_split:
brackets = copy.copy(brackets)
if self.is_branching:
splitloc = self.split.splitloc
# Insert brackets for left component
brackets[0] += "["
brackets[splitloc] = "]" + brackets[splitloc]
# Insert brackets for right component
brackets[splitloc] += "["
brackets[-1] = "]" + brackets[-1]
# Add angled brackets to flag RED
if isinstance(self.split, RedSplit):
brackets[self.split.red_start] += "<"
brackets[self.split.red_end] = ">" + brackets[self.split.red_end]
# Recurse
Lbrackets = self.L_subtree.to_brackets(brackets[:splitloc] + [brackets[splitloc].strip("[<")],
first_split=False)
Rbrackets = self.R_subtree.reindex(self.split).to_brackets(
[brackets[splitloc].strip("]>")] + brackets[splitloc + 1:], first_split=False)
brackets[:splitloc] = Lbrackets[:-1]
brackets[splitloc] = Lbrackets[-1] + Rbrackets[0]
brackets[splitloc + 1:] = Rbrackets[1:]
# Add external brackets to the entire word if there is internal structure
if first_split:
brackets[0] = "[" + brackets[0]
brackets[-1] += "]"
return brackets
def get(self, splitloc):
"""Extracts the subtree of a splittree
that is introduced by a split at a given splitloc.
If there is no such Split, returns None.
Arguments:
splitloc: location of splittree to extract subtree"""
# First check the split is actually present
split = self.to_splitlist().get(splitloc)
if split is None:
return None
# Now get the subtree
current_tree = self
while current_tree.split != split:
if split < current_tree.split:
current_tree = current_tree.L_subtree
else:
current_tree = current_tree.R_subtree
return current_tree
def update_split_type(self, new_split):
"""Returns a new tree that has a Split replaced with new_split
of the same splitloc.
Arguments:
new_split: new Split to replace with"""
subtree = self.get(new_split.splitloc)
old_split = subtree.split
subtree.split = new_split
return self.replace(old_split, subtree)
def enforce_edge_red(self):
"""Return a restructured SplitTree where all reduplicants are at the edge
of the construction when the corresponding RedSplit is made.
Thus, the RedSplit for an L-attaching RED is made to have a
terminal L_subtree, and the RedSplit for an R-attaching RED is
made to have a terminal R_subtree, while the Split at the red_edge
is moved to be parent of the RedSplit."""
splits = SplitStore(self._contained_splits)
redsplits = [split for split in splits if isinstance(split, RedSplit)]
edgesplits = [splits.get(redsplit.red_edge) for redsplit in redsplits]
# Now enforce the edge-alignment
modified_splittree = self
for redsplit, edgesplit in zip(redsplits, edgesplits):
# We can skip edgesplits of None, assuming that RED extends to the construction edge
if edgesplit is not None:
attach_side = redsplit.attachment
other_side = "L" if attach_side == "R" else "R"
# We are only concerned with the cases where redsplit is an ancestor of edgesplit
if modified_splittree.is_ancestor(redsplit, edgesplit):
# Let R represent the RedSplit subtree and E the EdgeSplit subtree
E = modified_splittree.get(edgesplit.splitloc)
# Replace E in the tree with its subtree from the attachment side
modified_splittree = modified_splittree.replace(edgesplit, E._get_subtree(attach_side))
# Get R from the modified tree
R = modified_splittree.get(redsplit.splitloc)
# Modify E to have the subtree of R on the attachment side
E._set_subtree(attach_side, R._get_subtree(attach_side))
# Modify R to have an empty subtree on the attachment side
R._set_subtree(attach_side, SplitTree(terminal=True))
# Modify E to have R as its subtree on the other side
E._set_subtree(other_side, R)
# Replace R in the tree with E
modified_splittree = modified_splittree.replace(redsplit, E)
return modified_splittree
def enforce_delayed_red(self):
"""Restructure the SplitTree so that any instances of reduplication
are as low/late as possible, so the RedSplit introduces no subtrees,
and the split between RED and the base is terminal.
This also enforces that all instances of RED be at the edge of their
constructions."""
# First make sure that all REDs are at the edge
modified_splittree = self.enforce_edge_red()
# Now get the redsplits and their corresponding base splits
splits = SplitStore(modified_splittree._contained_splits)
redsplits = [split for split in splits if isinstance(split, RedSplit)]
basesplits = []
for redsplit in redsplits:
basesplit = None # Leftover Nones represent bases that go to the construction edge
if redsplit.attachment == "L":
following_splits = [split for split in splits if split > redsplit]
if following_splits: basesplit = min(following_splits)
elif redsplit.attachment == "R":
previous_splits = [split for split in splits if split < redsplit]
if previous_splits: basesplit = max(previous_splits)
basesplits.append(basesplit)
for redsplit, basesplit in zip(redsplits, basesplits):
# We can skip cases where there is no basesplit,
# since then the edge of the construction is the basesplit,
# and the redsplit is terminal due to enforcing edge-alignment
if basesplit is not None:
attach_side = redsplit.attachment
other_side = "L" if attach_side == "R" else "R"
# We are only concerned with cases where redsplit is an ancestor of basesplit
if modified_splittree.is_ancestor(redsplit, basesplit):
# Let R represent the RedSplit subtree and B the basesplit subtree
R = modified_splittree.get(redsplit.splitloc)
B = modified_splittree.get(basesplit.splitloc)
# First replace R in the tree with its subtree from the other side
modified_splittree = modified_splittree.replace(redsplit, R._get_subtree(other_side))
# Then remove the other subtree of R
R._set_subtree(other_side, SplitTree(terminal=True))
# Set R to be the subtree of B on the attachment side
B._set_subtree(attach_side, R)
# Finally, update B in the tree
modified_splittree = modified_splittree.replace(basesplit, B)
return modified_splittree
def get_recapitulated_redsplits(self, construction):
"""Gets the potential RedSplits of a reference Construction
that are recapitulated in this tree
Arguments:
construction: Construction object to get recapitulated RedSplits of
"""
splits = self.to_splitlist()
splitlocs = splits.splitlocs
possible_redsplits = [redspan.to_redsplit() for redspan in construction.redspans]
# Recapitulated redsplits have the same splitloc as some existing split,
# have a split at their red_edge, and have no splits at their badsplits
recapitulated_redsplits = [redsplit for redsplit in possible_redsplits if (
redsplit not in splits and
redsplit.splitloc in splitlocs and
(redsplit.red_edge in splitlocs or
redsplit.red_edge == 0 or
redsplit.red_edge == len(construction)) and
not set(redsplit.new_badsplits).intersection(splitlocs))]
# Make sure forcing remains the same
for redsplit in recapitulated_redsplits:
redsplit.forced = splits.get(redsplit.splitloc).forced
return recapitulated_redsplits
def get_subtrees(self):
"""Returns a tuple of the two subtrees, with R_subtree reindexed
according to the current split"""
if self.is_branching:
R_subtree = None if self.R_subtree is None else self.R_subtree.reindex(self.split)
return (self.L_subtree, R_subtree)
class RedSpan(object):
"""Data class for a potential instance of reduplication"""
__slots__ = ('red_start', 'red_end', 'minbase_edge', 'reduplicant', 'minbase', 'red_label')
def __init__(self, source, red_start, red_end, minbase_edge, label_by_kind=False):
"""Initialize the object.
From the endpoints of RED and the minimum permissible base,
calculate all other required attributes.
Arguments:
source: source of potential instance of reduplication
red_start: location of reduplication beginning
red_end: location of reduplication endpoint
minbase_edge: minimum permissible base
label_by_kind: label for RedSpan (default = False)
"""
self.red_start = red_start
self.red_end = red_end
self.minbase_edge = minbase_edge
# If generated from RedSpan source, inherit reduplicant, base, and label
if isinstance(source, RedSpan):
self.reduplicant = source.reduplicant
self.minbase = source.minbase
self.red_label = source.red_label
# Otherwise, calculate properties from source construction
else:
self.reduplicant = self._get_reduplicant(source)
self.minbase = self._get_minbase(source)
if label_by_kind:
self.red_label = self.kind
else:
self.red_label = "<RED>"
def __str__(self):
"""Pretty-printing function"""
return 'RedSpan(red_start={}, red_end={}, minbase_edge={})'.format(
*(self.__getattribute__(attr) for attr in ["red_start", "red_end", "minbase_edge"]))
def __repr__(self):
"""Full-printing function"""
return 'RedSpan({})'.format(
", ".join("{}={}".format(attr, self.__getattribute__(attr)) for attr in self.__slots__))
def __eq__(self, other):
"""For equality, use red_start, reduplicant and minbase"""
return (type(self) == type(other) and
(self.red_start, self.reduplicant, self.minbase) ==
(other.red_start, other.reduplicant, other.minbase))
def __hash__(self):
"""For hashing, use red_start, reduplicant and minbase"""
return hash((self.red_start, self.reduplicant, self.minbase))
@property
def attachment(self):
"""Gets the side to which RED attaches: L or R"""
if self.minbase_edge > self.red_end:
return ("L")
elif self.minbase_edge < self.red_start:
return ("R")
@property
def red_edge(self):
"""Gets the far edge of the redspan from the base"""
if self.attachment == "L":
return self.red_start
elif self.attachment == "R":
return self.red_end
@property
def left_edge(self):
"""Gets the left edge of the redspan"""
return min(self.red_start, self.minbase_edge)
@property
def right_edge(self):
"""Gets the right edge of the redspan"""
return max(self.red_end, self.minbase_edge)
@property
def splitloc(self):
"""Gets the location of the split associated with this redspan.
For L-attaching RED, splitloc is at red_end;
for R-attaching RED, splitloc is at red_start."""
if self.attachment == "L":
return self.red_end
elif self.attachment == "R":
return self.red_start
@property
def base_portion(self):
"""Gets the portion of the base that the reduplicant is assumed
to have copied."""
if self.attachment == "L":
return self.minbase[:len(self.reduplicant)]
elif self.attachment == "R":
return self.minbase[-len(self.reduplicant):]
def _get_reduplicant(self, construction):
"""Gets the part of the construction representing the reduplicant"""
return construction[self.red_start:self.red_end]
def _get_minbase(self, construction):
"""Gets the part of the construction that is the minimal base"""
if self.attachment == "L":
return construction[self.red_end:self.minbase_edge]
elif self.attachment == "R":
return construction[self.minbase_edge:self.red_start]
def reindex(self, newsplit):
"""Returns a new RedSpan, reindexed by subtracting a given newsplit from the RED and base indices.
Validity checking of reindexing is assumed to occur elsewhere (so this can give negative indices)."""
newsplitloc = newsplit.splitloc
return RedSpan(self, self.red_start - newsplitloc, self.red_end - newsplitloc, self.minbase_edge - newsplitloc)
def is_left_of(self, newsplit):
"""Checks whether the RedSpan is entirely left of a provided newsplit."""
newsplitloc = newsplit.splitloc
return self.right_edge <= newsplitloc
def is_right_of(self, newsplit):
"""Checks whether the RedSpan is entirely right of a provided newsplit."""
newsplitloc = newsplit.splitloc
return self.left_edge >= newsplitloc
def is_at_edge_of(self, construction):
"""Checks whether RED in the redspan is at the edge of the construction
that is consistent with its attachment side (L-attaching RED at the left
edge, R-attaching RED at the right edge)"""
red_edge = self.red_edge
return red_edge == 0 or red_edge == len(construction)
def has_base_at_edge_of(self, construction):
"""Checks whether the minbase of the redspan is at the edge
of the construction"""
base_edge = self.minbase_edge
return base_edge == 0 or base_edge == len(construction)
@property
def badsplits(self):
"""Gets the badsplits associated with the current redspan.
These are all the integer splits falling between left_edge and right_edge,
exclusive, and excluding the current splitloc"""
return (tuple(range(self.left_edge + 1, self.splitloc)) +
tuple(range(self.splitloc + 1, self.right_edge)))
def to_redsplit(self, base_newform=None, forced=False):
"""Converts a RedSpan object into a RedSplit object.
If label_by_kind is True, gives the label of RED as the
kind of reduplication; otherwise, gives it as <RED>.
Changes to the form of the base (e.g. initial lengthening)
are represented in base_newform.
Arguments:
base_newform: newform representing changes to the form of the base (?)
forced: boolean indicating if split is forced (?) (default = False)
"""
# Get the new forms of children
if self.attachment == "L":
L_newform = Reduplicant(self.kind, self.red_label)
R_newform = base_newform
elif self.attachment == "R":
L_newform = base_newform
R_newform = Reduplicant(self.kind, self.red_label)
return RedSplit(self.splitloc, self.red_edge, self.minbase_edge, self.badsplits,
L_newform=L_newform, R_newform=R_newform, forced=forced)
@property
def kind(self):
"""Gets a string representation of the kind of RED.
e.g. <RED-2-R> is right-reduplication of a bimoraic base portion,
<RED-1+-L> is left-reduplication of a monomoraic base portion, with lengthening"""
weight = self.base_portion.weight
lengthened = (self.reduplicant != self.base_portion and
self.base_portion.lengthen_initial() == self.reduplicant)
return "<RED-{}{}-{}>".format(weight, "+" if lengthened else "", self.attachment)
class AssociativeStore(dict):
"""Class for storing objects (redspans or badsplits) in association
with a motivating construction.
The basic structure is a dict mapping from object to motivators,
where motivators are represented as strings.
All of the public methods of this class return new instances,
rather than modifying the existing instance in-situ."""
__slots__ = ()
def _motivators(self, obj):
"""Get a set of motivators for the given object"""
return self.get(obj, set())
def add(self, new_objs, motivator):
"""Creates a new AssociativeStore in which a provided tuple of new_objs
has been added, with its corresponding motivator.
If motivator is a Construction, it is converted to str
Arguments:
new_objs: tuple of objects
motivator: associated motivating construction (?) (are all motivators constructions?)
"""
other_store = self.__class__((obj, {motivator.label}) for obj in new_objs)
return self + other_store
def remove(self, motivator):
"""Creates a new AssociativeStore in which the objects associated with
the provided motivator have been removed"""
other_store = self.__class__((obj, {motivator.label}) for obj in self)
return self - other_store
def __add__(self, other_store):
"""Creates a new AssociativeStore that has objs from both the current
store and a provided other store"""
modified_store = self.__class__()
for obj in set(self).union(other_store):
modified_store[obj] = self._motivators(obj).union(other_store._motivators(obj))
return modified_store
def __sub__(self, other_store):
"""Creates a new AssociativeStore that has all of the original objs, except
those contained in the other_store object"""
modified_store = self.__class__()
for obj in self:
modified_store[obj] = self[obj] - other_store._motivators(obj)
modified_store._trim_empty()
return modified_store
def _trim_empty(self):
"""Removes entries for objs that no longer have any motivators"""
unmotivated = [obj for obj, motivators in self.items() if not motivators]
for obj in unmotivated:
del self[obj]
class RedSpanStore(AssociativeStore):
"""Class for storing all RedSpans associated with a construction,
and methods for reindexing and partitioning these RedSpans.
The basic structure is a dict, mapping from RedSpan objects to
a set of strings representing surface ancestral constructions where
the redspans were originally found."""
__slots__ = ()
def reindex(self, split):
"""Returns a new RedSpanStore in which all of the redspans
have been reindexed by subtracting the splitloc of the given Split."""
modified_redspans = RedSpanStore(
(redspan.reindex(split), ancestors) for redspan, ancestors in self.items())
return modified_redspans
def slice(self, start_split, end_split=None):
"""Creates a new RedSpanStore that is a slice of the current one,
including all redspans that are right of start_split and left of
end_split, reindexed according to start_split
Arguments:
start_split: Split to begin slice
end_split: Split to end slice (default = None)
"""
# If there are no redspans, return an empty RedSpanStore
if not self:
return RedSpanStore()
# Otherwise, slice as required
sliced_redspans = ((redspan, ancestors) for redspan, ancestors in self.items() if
redspan.is_right_of(start_split))
if end_split is not None:
sliced_redspans = ((redspan, ancestors) for redspan, ancestors in sliced_redspans if
redspan.is_left_of(end_split))
modified_redspans = RedSpanStore(sliced_redspans).reindex(start_split)
return modified_redspans
def partition(self, split):
"""Partition the redspans according to a provided split.
Returns a tuple of RedSpanStore objects (L_redspans, R_redspans),
where a given RedSpan will be in L_redspans if it falls entirely
left of the splitloc and in R_redspans if it falls entirely right.
The redspans in R_redspans will be reindexed by split"""
L_redspans = self.slice(Split(0), end_split=split)
R_redspans = self.slice(split)
return (L_redspans, R_redspans)
class BadSplitStore(AssociativeStore):
"""Class for storing the badsplits and their motivations for a particular
ConstrNode, as well as methods for updating these structures.
The basic structure is a dict mapping from a badsplit index to the set
of motivators for that badsplit."""
__slots__ = ()
def reindex(self, split):
"""Creates a new BadSplits object that is a copy of the current one,
except with splits reindexed by subtracting the splitloc of a Split"""
modified_badsplits = BadSplitStore(
(badsplitloc - split.splitloc, motivators) for badsplitloc, motivators in self.items())
return modified_badsplits
def slice(self, start_split, end_split=None):
"""Creates a new BadSplits object that is a slice of the current
badsplits occurring between start_split and end_split,
reindexed according to start_split
Arguments:
start_split: Split to begin slice
end_split: Split to end slice (default = None)
"""
# If there are no badsplits, return an empty BadSplitStore
if not self:
return BadSplitStore()
# Otherwise, slice as required
sliced_badsplits = ((badsplitloc, motivators) for badsplitloc, motivators in self.items() if
badsplitloc > start_split.splitloc)
if end_split is not None:
sliced_badsplits = ((badsplitloc, motivators) for badsplitloc, motivators in sliced_badsplits if
badsplitloc < end_split.splitloc)
modified_badsplits = BadSplitStore(sliced_badsplits).reindex(start_split)
return modified_badsplits
def partition(self, split, construction, add_new=True):
"""Partitions the badsplits based on splitloc from a Split object.
If add_new is True, new badpslits introduced by the Split are added.
Returns a tuple of BadSplits objects, (L_badsplits, R_badsplits)
L inherits badsplits s.t. badsplit < splitloc
R inherits badsplits s.t. badsplit > splitloc, reindexed by splitloc
Arguments:
split: Split object with splitloc to base partition on
construction: construction object (?)
add_new: boolean to indicate if new badsplits introduced
by the Split are added (default = True)
"""
# Set up master badsplits for inheritance
master_badsplits = self
# Add new badsplits to master if they exist and add_new is True
if add_new and hasattr(split, "new_badsplits") and split.new_badsplits:
master_badsplits = master_badsplits.add(split.new_badsplits, construction)
L_badsplits = master_badsplits.slice(Split(0), end_split=split)
R_badsplits = master_badsplits.slice(split)
return (L_badsplits, R_badsplits)
class ForcedSplitlocStore(AssociativeStore):
"""Class for storing all forced splitlocs associated with a construction,
and methods for reindexing and partitioning these splitlocs.