Legisign · ThomasKluiters · Aug 24, 2023 · Aug 24, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/.gitignore b/.gitignore
@@ -111,3 +111,6 @@ test/
 
 # Praat textgrids
 *.*TextGrid
+
+# PyCharm
+.idea
diff --git a/textgrids/__init__.py b/textgrids/__init__.py
@@ -130,7 +130,7 @@ def __add__(self, tier):
             raise TypeError('tier types differ')
         # Do not add a tier at the end which begins before this one ends.
         if self.xmax > tier.xmin:
-            raise ValueError('Cannot extend a tier with one that begins before this tier ends: {max} > {min}', 
+            raise ValueError('Cannot extend a tier with one that begins before this tier ends: {max} > {min}',
                 self.xmax, tier.xmin)
         return Tier(super().__add__(tier))
 
@@ -186,9 +186,12 @@ def tier_type(self):
 class TextGrid(OrderedDict):
     '''TextGrid is a dict of tier names (keys) and Tiers (values).'''
 
-    def __init__(self, filename=None, xmin=0.0):
+    def __init__(self, filename=None, xmin=0.0, coding=None):
         self.xmin = self.xmax = xmin
         self.filename = filename
+        self.coding = coding
+        if self.coding is None:
+            self.coding = "utf-8"
         if self.filename:
             self.read(self.filename)
 
@@ -346,15 +349,18 @@ def offset_time(self, offset):
         for tier in tiers:
             self[tier].offset_time(offset)
 
-    def parse(self, data):
+    def parse(self, data, coding = None):
         '''Parse textgrid data.
 
         Obligatory argument "data" is bytes.
         '''
         if not isinstance(data, bytes):
             raise TypeError
         binary = b'ooBinaryFile\x08TextGrid'
-        text = ['File type = "ooTextFile"', 'Object class = "TextGrid"', '']
+        headers = [
+            ['File type = "ooTextFile"', 'Object class = "TextGrid"', ''],
+            ['File type = "ooTextFile short"', '"TextGrid"', ''],
+        ]
         # Check and then discard binary header
         if data[:len(binary)] == binary:
             buff = io.BytesIO(data[len(binary):])
@@ -363,22 +369,23 @@ def parse(self, data):
             except (IndexError, ValueError):
                 raise BinaryError
         else:
-            coding = 'utf-8'
+            coding = self.coding
             # Note and then discard BOM
             if data[:2] == b'\xfe\xff':
                 coding = 'utf-16-be'
                 data = data[2:]
             # Now convert to a text buffer
             buff = [s.strip() for s in data.decode(coding).split('\n')]
-            # Check and then discard header
-            if buff[:len(text)] != text:
-                raise TypeError
-            buff = buff[len(text):]
-            # If the next line starts with a number, this is a short textgrid
-            if buff[0][0] in '-0123456789':
-                self._parse_short(buff)
-            else:
-                self._parse_long(buff)
+            for header in headers:
+                # Check and then discard header
+                if buff[:len(header)] != header:
+                    continue
+                buff = buff[len(header):]
+                # If the next line starts with a number, this is a short textgrid
+                if buff[0][0] in '-0123456789':
+                    self._parse_short(buff)
+                else:
+                    self._parse_long(buff)
 
     def _parse_binary(self, data):
         '''Parse BINARY textgrid files. Not intended to be used directly.'''
@@ -496,8 +503,16 @@ def read(self, filename):
         "filename" is the name of the file.
         '''
         self.filename = filename
-        with open(self.filename, 'rb') as infile:
-            data = infile.read()
+        data = None
+        if isinstance(self.filename, str):
+            with open(self.filename, 'rb') as infile:
+                data = infile.read()
+        if isinstance(self.filename, bytes):
+            data = self.filename
+        if isinstance(self.filename, io.BytesIO):
+            data = self.filename.read()
+        if data is None:
+            raise TypeError("Filename must be any of str, bytes or ByteIO")
         self.parse(data)
 
     def tier_from_csv(self, tier_name, filename):