From 85a37d78370c77be08016310ca03001297859bc2 Mon Sep 17 00:00:00 2001
From: thomaskluiters <thomas.kluiters@gmail.com>
Date: Thu, 24 Aug 2023 10:12:15 +0200
Subject: [PATCH 1/6] Add PyCharm files to .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 36d4101..0a086e8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -111,3 +111,6 @@ test/
 
 # Praat textgrids
 *.*TextGrid
+
+# PyCharm
+.idea

From 4ee2cd5a968240f0dc3a7cd98e2712bd1ec69fce Mon Sep 17 00:00:00 2001
From: thomaskluiters <thomas.kluiters@gmail.com>
Date: Thu, 24 Aug 2023 10:16:11 +0200
Subject: [PATCH 2/6] Make encoding configurable and allow filename to be bytes

---
 textgrids/__init__.py | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/textgrids/__init__.py b/textgrids/__init__.py
index 036adf5..9e70564 100755
--- a/textgrids/__init__.py
+++ b/textgrids/__init__.py
@@ -130,7 +130,7 @@ def __add__(self, tier):
             raise TypeError('tier types differ')
         # Do not add a tier at the end which begins before this one ends.
         if self.xmax > tier.xmin:
-            raise ValueError('Cannot extend a tier with one that begins before this tier ends: {max} > {min}', 
+            raise ValueError('Cannot extend a tier with one that begins before this tier ends: {max} > {min}',
                 self.xmax, tier.xmin)
         return Tier(super().__add__(tier))
 
@@ -186,9 +186,12 @@ def tier_type(self):
 class TextGrid(OrderedDict):
     '''TextGrid is a dict of tier names (keys) and Tiers (values).'''
 
-    def __init__(self, filename=None, xmin=0.0):
+    def __init__(self, filename=None, xmin=0.0, coding=None):
         self.xmin = self.xmax = xmin
         self.filename = filename
+        self.coding = coding
+        if self.coding is None:
+            self.coding = "utf-8"
         if self.filename:
             self.read(self.filename)
 
@@ -346,7 +349,7 @@ def offset_time(self, offset):
         for tier in tiers:
             self[tier].offset_time(offset)
 
-    def parse(self, data):
+    def parse(self, data, coding = None):
         '''Parse textgrid data.
 
         Obligatory argument "data" is bytes.
@@ -363,7 +366,7 @@ def parse(self, data):
             except (IndexError, ValueError):
                 raise BinaryError
         else:
-            coding = 'utf-8'
+            coding = self.coding
             # Note and then discard BOM
             if data[:2] == b'\xfe\xff':
                 coding = 'utf-16-be'
@@ -496,8 +499,16 @@ def read(self, filename):
         "filename" is the name of the file.
         '''
         self.filename = filename
-        with open(self.filename, 'rb') as infile:
-            data = infile.read()
+        data = None
+        if isinstance(self.filename, str):
+            with open(self.filename, 'rb') as infile:
+                data = infile.read()
+        if isinstance(self.filename, bytes):
+            data = self.filename
+        if isinstance(self.filename, io.BytesIO):
+            data = self.filename.read()
+        if data is None:
+            raise TypeError("Filename must be any of str, bytes or ByteIO")
         self.parse(data)
 
     def tier_from_csv(self, tier_name, filename):

From fef3f463acd928742bdd09f792e7b4f692805211 Mon Sep 17 00:00:00 2001
From: thomaskluiters <thomas.kluiters@gmail.com>
Date: Thu, 24 Aug 2023 10:45:19 +0200
Subject: [PATCH 3/6] Iterate over headers when parsing

---
 textgrids/__init__.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/textgrids/__init__.py b/textgrids/__init__.py
index 9e70564..e1222c9 100755
--- a/textgrids/__init__.py
+++ b/textgrids/__init__.py
@@ -373,15 +373,19 @@ def parse(self, data, coding = None):
                 data = data[2:]
             # Now convert to a text buffer
             buff = [s.strip() for s in data.decode(coding).split('\n')]
-            # Check and then discard header
-            if buff[:len(text)] != text:
-                raise TypeError
-            buff = buff[len(text):]
-            # If the next line starts with a number, this is a short textgrid
-            if buff[0][0] in '-0123456789':
-                self._parse_short(buff)
+            for header in text:
+                # Check and then discard header
+                if buff[:len(header)] != header:
+                    continue
+                buff = buff[len(header):]
+                # If the next line starts with a number, this is a short textgrid
+                if buff[0][0] in '-0123456789':
+                    self._parse_short(buff)
+                else:
+                    self._parse_long(buff)
+                break
             else:
-                self._parse_long(buff)
+                raise TypeError("No valid header seen in text")
 
     def _parse_binary(self, data):
         '''Parse BINARY textgrid files. Not intended to be used directly.'''

From 2cab4c7273695f2480c9160a7190b7e889a07385 Mon Sep 17 00:00:00 2001
From: thomaskluiters <thomas.kluiters@gmail.com>
Date: Thu, 24 Aug 2023 10:48:20 +0200
Subject: [PATCH 4/6] Revert "Iterate over headers when parsing"

This reverts commit fef3f463acd928742bdd09f792e7b4f692805211.
---
 textgrids/__init__.py | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/textgrids/__init__.py b/textgrids/__init__.py
index e1222c9..9e70564 100755
--- a/textgrids/__init__.py
+++ b/textgrids/__init__.py
@@ -373,19 +373,15 @@ def parse(self, data, coding = None):
                 data = data[2:]
             # Now convert to a text buffer
             buff = [s.strip() for s in data.decode(coding).split('\n')]
-            for header in text:
-                # Check and then discard header
-                if buff[:len(header)] != header:
-                    continue
-                buff = buff[len(header):]
-                # If the next line starts with a number, this is a short textgrid
-                if buff[0][0] in '-0123456789':
-                    self._parse_short(buff)
-                else:
-                    self._parse_long(buff)
-                break
+            # Check and then discard header
+            if buff[:len(text)] != text:
+                raise TypeError
+            buff = buff[len(text):]
+            # If the next line starts with a number, this is a short textgrid
+            if buff[0][0] in '-0123456789':
+                self._parse_short(buff)
             else:
-                raise TypeError("No valid header seen in text")
+                self._parse_long(buff)
 
     def _parse_binary(self, data):
         '''Parse BINARY textgrid files. Not intended to be used directly.'''

From 14af41db05014d76f60af7798889e7de735e2f3f Mon Sep 17 00:00:00 2001
From: thomaskluiters <thomas.kluiters@gmail.com>
Date: Thu, 24 Aug 2023 10:52:37 +0200
Subject: [PATCH 5/6] Add list of header values encountered in the 'wild'

---
 textgrids/__init__.py | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/textgrids/__init__.py b/textgrids/__init__.py
index 9e70564..d33558a 100755
--- a/textgrids/__init__.py
+++ b/textgrids/__init__.py
@@ -357,7 +357,10 @@ def parse(self, data, coding = None):
         if not isinstance(data, bytes):
             raise TypeError
         binary = b'ooBinaryFile\x08TextGrid'
-        text = ['File type = "ooTextFile"', 'Object class = "TextGrid"', '']
+        headers = [
+            ['File type = "ooTextFile"', 'Object class = "TextGrid"', ''],
+            ['File type = "ooTextFile short"', '"TextGrid"', ''],
+        ]
         # Check and then discard binary header
         if data[:len(binary)] == binary:
             buff = io.BytesIO(data[len(binary):])
@@ -373,15 +376,16 @@ def parse(self, data, coding = None):
                 data = data[2:]
             # Now convert to a text buffer
             buff = [s.strip() for s in data.decode(coding).split('\n')]
-            # Check and then discard header
-            if buff[:len(text)] != text:
-                raise TypeError
-            buff = buff[len(text):]
-            # If the next line starts with a number, this is a short textgrid
-            if buff[0][0] in '-0123456789':
-                self._parse_short(buff)
-            else:
-                self._parse_long(buff)
+            for header in headers:
+                # Check and then discard header
+                if buff[:len(header)] != header:
+                    raise TypeError
+                buff = buff[len(header):]
+                # If the next line starts with a number, this is a short textgrid
+                if buff[0][0] in '-0123456789':
+                    self._parse_short(buff)
+                else:
+                    self._parse_long(buff)
 
     def _parse_binary(self, data):
         '''Parse BINARY textgrid files. Not intended to be used directly.'''

From 76e1e911f1a4d4baafd8547bc9d776fd71baf42e Mon Sep 17 00:00:00 2001
From: thomaskluiters <thomas.kluiters@gmail.com>
Date: Wed, 6 Sep 2023 13:17:36 +0200
Subject: [PATCH 6/6] Don't throw a type error if no match could be found

---
 textgrids/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/textgrids/__init__.py b/textgrids/__init__.py
index d33558a..532f3cf 100755
--- a/textgrids/__init__.py
+++ b/textgrids/__init__.py
@@ -379,7 +379,7 @@ def parse(self, data, coding = None):
             for header in headers:
                 # Check and then discard header
                 if buff[:len(header)] != header:
-                    raise TypeError
+                    continue
                 buff = buff[len(header):]
                 # If the next line starts with a number, this is a short textgrid
                 if buff[0][0] in '-0123456789':