From 675edf90bad3d618b8958d6f86df5fabaf10c1e0 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Mon, 15 Feb 2021 09:01:23 -0700
Subject: [PATCH 01/18] Structured cuneification of line

---
 cuneify_interface.py | 68 +++++++++++++++++++++++++++++---------------
 1 file changed, 45 insertions(+), 23 deletions(-)

diff --git a/cuneify_interface.py b/cuneify_interface.py
index 309771e..a8ae757 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -1,3 +1,5 @@
+# -*- coding: utf8 -*-
+
 import itertools
 import os
 import pickle
@@ -147,6 +149,7 @@ def _get_cuneiform_bytes(self, transliteration):
             return b''
         if transliteration not in self.transliteration_to_cuneiform:
             raise UnrecognisedSymbol(transliteration)
+            # return transliteration.encode('utf8')
         return self.transliteration_to_cuneiform[transliteration]
 
     def get_stripped_transliteration(self, transliteration):
@@ -291,38 +294,57 @@ def __exit__(self, type_, value, traceback):
         self._connection.close()
 
 
-
-def cuneify_line(cache, transliteration, show_transliteration):
-    ''' Take a line of transliteration and display the output, nicely formatted, on the terminal.
-        Should be used whilst in the context of cache.
+def cuneify_line_structured(cache, transliteration):
+    ''' Take a line of transliteration and return structured data of
+        - tokens, eg `tok1`
+        - separators, eg `.`
+        - symbols, eg `𒌉`
+        - unrecognized tokens, eg `bob`
     '''
     transliteration = transliteration.strip()
     # Split using alphanumeric characters (\w)
     tokens = re.split(TOKEN_REGEX, transliteration)
 
-    # It's a much easier code path if we just show the cuneiform
-    if not show_transliteration:
-        return ' '.join(cache.get_cuneiform(token) for token in tokens)
-
-    # Otherwise format something like this:
-    #
-    # tok1.tok2  tok3-tok4-5-   6
-    # A    BBBBB CC   DDD  EEEE F
     separators = re.findall(TOKEN_REGEX, transliteration)
     separators.append('')
-
     line_original = ''
     line_cuneiform = ''
-    for token, separator in zip(tokens, separators):
-        symbol = cache.get_cuneiform(token)
-        # FIXME -- take into account separator length (could be more than one
-        # character
-        n_spaces_after_symbol = 1 + max(len(separator) + len(token) - len(symbol), 0)
-        n_spaces_after_token_separator = 1 + max(len(symbol) - len(token), 0)
-        line_original += token + separator + ' ' * n_spaces_after_token_separator
-        line_cuneiform += symbol + ' ' * n_spaces_after_symbol
-
-    return '{}\n{}'.format(line_original, line_cuneiform)
+    symbols = []
+    unrecognized_tokens = []
+    for token in tokens:
+        try:
+            symbol = cache.get_cuneiform(token)
+        except (UnrecognisedSymbol, TransliterationNotUnderstood):
+            symbol = None
+            unrecognized_tokens.append(token)
+        symbols.append(symbol)
+
+    return (tokens, separators, symbols, unrecognized_tokens)
+
+def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator="?"):
+    ''' Take a line of transliteration and display the output, nicely formatted, on the terminal.
+        Should be used whilst in the context of cache.
+        unrecognized_indicator : String to display if token not recognized. If empty string,
+          the token will be returned as-is.
+    '''
+
+    (tokens, separators, raw_symbols, unrecognized_tokens) = cuneify_line_structured(cache, transliteration)
+
+    # Substitube chosen string for unrecognized tokens
+    symbols = [s if s is not None else t if unrecognized_indicator=="" else unrecognized_indicator for (t, s) in zip(tokens, raw_symbols)]
+
+    if show_transliteration:
+        line_original = ""
+        line_cuneiform = ""
+        for token, separator, symbol in zip(tokens, separators, symbols):
+            if symbol is None:
+                symbol = token if unrecognized_indicator=="" else unrecognized_indicator
+            width = max(len(token + separator), len(symbol))
+            line_original += (token + separator).ljust(width)
+            line_cuneiform += symbol.ljust(width)
+        return '{}\n{}'.format(line_original, line_cuneiform)
+    else:
+        return " ".join(symbols)
 
 
 def cuneify_file(cache, file_name, show_transliteration):

From 5a4ba60e509105d1507feaf74f4e588efcb97b33 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Mon, 15 Feb 2021 11:15:16 -0700
Subject: [PATCH 02/18] Fix local server to work. Show form on results page.

---
 environment.py | 20 +++++++++++++++++++-
 wsgi.py        | 51 +++++++++++++++++++++++---------------------------
 2 files changed, 42 insertions(+), 29 deletions(-)

diff --git a/environment.py b/environment.py
index 98b8136..a239afd 100644
--- a/environment.py
+++ b/environment.py
@@ -6,6 +6,7 @@
 from cuneify_interface import FileCuneiformCache, MySQLCuneiformCache
 
 
+
 if 'mws' in socket.gethostname().lower():
     # Running in MWS
 
@@ -30,7 +31,8 @@ def get_cache(environ):
         return FileCuneiformCache(cache_file_path=cache_file_path, read_only=True)
 
 
-else:
+elif 'openshift' in socket.gethostname().lower():
+
     # Running on OpenShift
 
     MY_URL = 'https://cuneifyplus-puffin.rhcloud.com'
@@ -45,3 +47,19 @@ def get_cache(environ):
         # We use a cache in the data directory. This isn't touched by the deployment process
         cache_file_path = os.path.normpath(os.path.join(environ['OPENSHIFT_DATA_DIR'], 'cuneiform_cache.pickle'))
         return FileCuneiformCache(cache_file_path=cache_file_path)
+
+else:
+
+    # Running locally?
+    MY_URL = ''
+
+    DEPRECATED = False
+
+    def get_font_directory(environ):
+        return 'fonts'
+
+    def get_cache(environ):
+        ''' Return the standard cuneiform cache '''
+        # We use a cache in the data directory. This isn't touched by the deployment process
+        cache_file_path = 'cuneiform_cache.pickle'
+        return FileCuneiformCache(cache_file_path=cache_file_path)
diff --git a/wsgi.py b/wsgi.py
index 890f0cf..a4dc736 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -9,7 +9,7 @@
 from traceback import format_exc
 from urllib.parse import quote
 
-from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, 
+from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line,
         ordered_symbol_to_transliterations)
 from environment import DEPRECATED, MY_URL, get_cache, get_font_directory
 
@@ -29,20 +29,19 @@
 FONTS_PATH_NAME = '/fonts'
 
 
-def _get_input_form(initial='Enter transliteration here...'):
+def _get_input_form(initial=''):
     ''' Return a form that the user can use to enter some transliterated text '''
-    font_name_selection = ''.join(['<option value="{0}">{1} (font: {0})</option>'.format(name, description) 
+    font_name_selection = ''.join(['<option value="{0}">{1} (font: {0})</option>'.format(name, description)
                                    for name, description in FONT_NAMES.items()])
     body = '''
-    <form action="{}/cuneify" method="post">
-    <textarea rows="10" cols="80" name="input"></textarea>
+    <form action="{}" method="post">
+    <textarea rows="10" cols="80" name="input">{}</textarea>
     <br /> <br />
     <input type="checkbox" name="show_transliteration">Show transliteration with output<br /><br />
     <select name="font_name">{}</select>
     <input type="submit" name="action" value="Cuneify">
     <input type="submit" name="action" value="Create sign list">
-    </form>'''.format(MY_URL, font_name_selection)
-    # TODO Use 'initial' when it can be made to disappear on entry into widget
+    </form>'''.format(MY_URL, initial, font_name_selection)
     return body
 
 
@@ -65,9 +64,6 @@ def _get_cuneify_body(environ, transliteration, show_transliteration, font_name)
             except TransliterationNotUnderstood:
                 body += '<font color="red">Possible formatting error in "{}"</font><br />'.format(line)
 
-    # TODO will need javascript to re-populate the text area, I believe
-    # body += '<br /><br /><a href="{}?input={}">Go back</a><br />'.format(MY_URL, quote(transliteration))
-    body += '<br /><br /><a href="{}">Go back</a><br />'.format(MY_URL)
     # TODO this can probably be neatened up a little bit
     return body
 
@@ -85,10 +81,6 @@ def _get_symbol_list_body(environ, transliteration, font_name):
             # Print out unrecognised tokens if there are any
             body += '<br /><font color="red">These tokens were unrecognised: {}</font><br />'.format(', '.join(unrecognised_tokens))
 
-    # TODO will need javascript to re-populate the text area, I believe
-    # body += '<br /><br /><a href="{}?input={}">Go back</a><br />'.format(MY_URL, quote(transliteration))
-    body += '<br /><br /><a href="{}">Go back</a><br />'.format(MY_URL)
-    # TODO this can probably be neatened up a little bit
     return body
 
 
@@ -105,7 +97,7 @@ def construct_font_response(environ, start_response, path_info):
     # TODO we could cache this in memory if reading the font is slow
     with open(font_path, 'rb') as f:
         response_body = f.read()
-     
+
     status = '200 OK'
     if font_path.endswith('.woff'):
         ctype = 'application/x-font-woff'
@@ -124,16 +116,16 @@ def application(environ, start_response):
     # Use the appropriate behaviour here
     path_info = environ['PATH_INFO']
     form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ, keep_blank_values=True)
+    cuneiform_output = ""
+    transliteration = ""
     if path_info.startswith(FONTS_PATH_NAME):
         # Return the static font file
         return construct_font_response(environ, start_response, path_info)
-    elif path_info == '/cuneify':
+    # elif path_info == '/cuneify':
+    elif environ['REQUEST_METHOD']=="POST":
 
         # Whatever else happens, we always need a non-empty transliteration
         transliteration = form.getvalue('input')
-        if transliteration is None or transliteration == '':
-            # There is no transliteration, so show the input form again
-            body = _get_input_form()
 
         # Get the values of the other form inputs
         show_transliteration_value = form.getvalue('show_transliteration')
@@ -144,14 +136,14 @@ def application(environ, start_response):
         # The type of form submission we make determines what we do now
         if action_value == 'Cuneify':
             # We do a transliteration and show the output
-            body = _get_cuneify_body(environ, transliteration, show_transliteration, font_name)
+            cuneiform_output += _get_cuneify_body(environ, transliteration, show_transliteration, font_name)
         elif action_value == 'Create sign list':
             # Make a symbol list!
-            body = _get_symbol_list_body(environ, transliteration, font_name)
+            cuneiform_output += _get_symbol_list_body(environ, transliteration, font_name)
         else:
             raise RuntimeError("Unrecognised action value {}".format(action_value))
-    else:
-        body =  _get_input_form()
+
+    body =  _get_input_form(initial=transliteration)
 
     # TODO remove temporary workaround
     if DEPRECATED:
@@ -176,11 +168,14 @@ def application(environ, start_response):
 <style>''' + font_info + '''</style>
 </head>
 <body>
+<div>
+    {}
+</div>
 {}
 <br />
 <hr>
 <br />
-Using most browsers, the cuneiform should appear on your screen, as the fonts are embedded in the website.  
+Using most browsers, the cuneiform should appear on your screen, as the fonts are embedded in the website.
 However, if you wish to copy-and-paste (e.g. into a Word document), you may need to install the fonts in order for the
 characters to display correctly.  To install the fonts, follow the links below:
 <br />
@@ -200,11 +195,10 @@ def application(environ, start_response):
 </body></html>'''
 
 
-    response_body = response_body.format(body)
+    response_body = response_body.format(cuneiform_output, body)
     response_body = response_body.encode('utf-8')
 
     status = '200 OK'
-    # ctype = 'text/plain'
     ctype = 'text/html'
     response_headers = [('Content-Type', ctype), ('Content-Length', str(len(response_body)))]
     start_response(status, response_headers)
@@ -214,8 +208,9 @@ def application(environ, start_response):
 # Below for testing only
 #
 if __name__ == '__main__':
+    MY_URL=""
     from wsgiref.simple_server import make_server
     httpd = make_server('localhost', 8051, application)
     # Wait for a single request, serve it and quit.
-    httpd.handle_request()
-
+    while 1:
+        httpd.handle_request()

From 726cf324423aabf5c0cd9c163f623f4327b8ce67 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Mon, 15 Feb 2021 11:15:34 -0700
Subject: [PATCH 03/18] option to parse atf file

---
 cuneify_interface.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/cuneify_interface.py b/cuneify_interface.py
index a8ae757..9914683 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -347,18 +347,30 @@ def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indi
         return " ".join(symbols)
 
 
-def cuneify_file(cache, file_name, show_transliteration):
+def cuneify_file(cache, file_name, show_transliteration, parse_atf=True):
     ''' Given a text file with one or more lines of transliterated text, print out the corresponding
         version in cuneiform
     '''
     output = ''
-    with open(file_name) as input_file:
-        for line in input_file:
-            output += cuneify_line(cache, line, show_transliteration)
-            output += '\n'
-            # If also showing transliteration then an extra blank line aids legibility
-            if show_transliteration:
+    if parse_atf:
+        with open(file_name) as input_file:
+            for line in input_file:
+                atf_line_parts = re.search('^([0-9]+\.)([ \t]*)(.*)', line)
+                if atf_line_parts:
+                    transliteration = atf_line_parts.group(3)
+                    output += line
+                    output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n"
+                else:
+                    output += "Y" + line
+
+    else:
+        with open(file_name) as input_file:
+            for line in input_file:
+                output += cuneify_line(cache, line, show_transliteration)
                 output += '\n'
+                # If also showing transliteration then an extra blank line aids legibility
+                if show_transliteration:
+                    output += '\n'
     return output
 
 

From 90551fc24dcf91f94e0b0f5c006e6cc45f90d2cb Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Mon, 15 Feb 2021 11:31:13 -0700
Subject: [PATCH 04/18] handle input as file from cgi

---
 cuneify_interface.py | 42 ++++++++++++++++++++++--------------------
 wsgi.py              | 23 ++++++++---------------
 2 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/cuneify_interface.py b/cuneify_interface.py
index 9914683..8cbe381 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -347,33 +347,35 @@ def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indi
         return " ".join(symbols)
 
 
-def cuneify_file(cache, file_name, show_transliteration, parse_atf=True):
-    ''' Given a text file with one or more lines of transliterated text, print out the corresponding
-        version in cuneiform
-    '''
+def cuneify_interator(cache, iterator, show_transliteration, parse_atf=True):
     output = ''
     if parse_atf:
-        with open(file_name) as input_file:
-            for line in input_file:
-                atf_line_parts = re.search('^([0-9]+\.)([ \t]*)(.*)', line)
-                if atf_line_parts:
-                    transliteration = atf_line_parts.group(3)
-                    output += line
-                    output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n"
-                else:
-                    output += "Y" + line
-
+        for line in iterator:
+            atf_line_parts = re.search('^([0-9]+\.)([ \t]*)(.*)', line)
+            if atf_line_parts:
+                transliteration = atf_line_parts.group(3)
+                output += line
+                output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n"
+            else:
+                output += "Y" + line
     else:
-        with open(file_name) as input_file:
-            for line in input_file:
-                output += cuneify_line(cache, line, show_transliteration)
+        for line in iterator:
+            output += cuneify_line(cache, line, show_transliteration)
+            output += '\n'
+            # If also showing transliteration then an extra blank line aids legibility
+            if show_transliteration:
                 output += '\n'
-                # If also showing transliteration then an extra blank line aids legibility
-                if show_transliteration:
-                    output += '\n'
     return output
 
 
+def cuneify_file(cache, file_name, show_transliteration, parse_atf=True):
+    ''' Given a text file with one or more lines of transliterated text, print out the corresponding
+        version in cuneiform
+    '''
+    with open(file_name) as iterator:
+        return cuneify_interator(cache, iterator, show_transliteration, parse_atf=parse_atf)
+
+
 def ordered_symbol_to_transliterations(cache, transliteration, return_unrecognised=False):
     ''' Given a transliteration, which might be a multi-line input, grab all tokens and build up a symbol list.
         This will be an OrderedDict mapping symbol to transliteration tokens, in the order of appearance
diff --git a/wsgi.py b/wsgi.py
index a4dc736..14dba49 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -9,7 +9,7 @@
 from traceback import format_exc
 from urllib.parse import quote
 
-from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line,
+from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, cuneify_interator,
         ordered_symbol_to_transliterations)
 from environment import DEPRECATED, MY_URL, get_cache, get_font_directory
 
@@ -49,20 +49,13 @@ def _get_cuneify_body(environ, transliteration, show_transliteration, font_name)
     ''' Return the HTML body contents when we've been given a transliteration, and show in the specified font '''
     body = ''
     with get_cache(environ) as cache:
-        for line in transliteration.split('\n'):
-            # Make empty lines appear as breaks in the output
-            line = line.strip()
-            if line == '':
-                body += '<br />'
-                continue
-
-            try:
-                body += '<span class="{}">{}</span><br />'.format(font_name.lower(), cuneify_line(cache, line, show_transliteration).replace('\n', '<br />'))
-                # body += '{}<br />'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '<br />'))
-            except UnrecognisedSymbol as exception:
-                body += '<font color="red">Unknown symbol "{}" in "{}"</font><br />'.format(exception.transliteration, line)
-            except TransliterationNotUnderstood:
-                body += '<font color="red">Possible formatting error in "{}"</font><br />'.format(line)
+        try:
+            body += '<span class="{}">{}</span><br />'.format(font_name.lower(), cuneify_interator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '<br />'))
+            # body += '{}<br />'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '<br />'))
+        except UnrecognisedSymbol as exception:
+            body += '<font color="red">Unknown symbol "{}" in "{}"</font><br />'.format(exception.transliteration, line)
+        except TransliterationNotUnderstood:
+            body += '<font color="red">Possible formatting error in "{}"</font><br />'.format(line)
 
     # TODO this can probably be neatened up a little bit
     return body

From c83023dd68e25e7075827561f2ffaf29e47e3404 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Mon, 15 Feb 2021 20:04:16 -0700
Subject: [PATCH 05/18] procfile and requirements for heroku

---
 Procfile         | 1 +
 requirements.txt | 1 +
 2 files changed, 2 insertions(+)
 create mode 100644 Procfile

diff --git a/Procfile b/Procfile
new file mode 100644
index 0000000..908eb48
--- /dev/null
+++ b/Procfile
@@ -0,0 +1 @@
+web: gunicorn wsgi:application
diff --git a/requirements.txt b/requirements.txt
index e69de29..3540093 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
+gunicorn==20.0.4

From f8140043b2c16ced6b81a47011c5562fe8731b7e Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 08:52:32 -0700
Subject: [PATCH 06/18] gitignore

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..13fa123
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+.vscode/*
+__pycache__/*
+bin/*
+lib/*

From 1f5b7a8f4e090505edabe8d1c66359e50b34375e Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 08:53:13 -0700
Subject: [PATCH 07/18] gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 13fa123..6c784e5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@
 __pycache__/*
 bin/*
 lib/*
+pyvenv.cfg

From 087d5ca36571b99fc604a5b82d182fb0defd9511 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 08:58:01 -0700
Subject: [PATCH 08/18] typo and cleanup

---
 cuneify_interface.py | 4 ++--
 wsgi.py              | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cuneify_interface.py b/cuneify_interface.py
index 8cbe381..685256b 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -347,7 +347,7 @@ def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indi
         return " ".join(symbols)
 
 
-def cuneify_interator(cache, iterator, show_transliteration, parse_atf=True):
+def cuneify_iterator(cache, iterator, show_transliteration, parse_atf=True):
     output = ''
     if parse_atf:
         for line in iterator:
@@ -373,7 +373,7 @@ def cuneify_file(cache, file_name, show_transliteration, parse_atf=True):
         version in cuneiform
     '''
     with open(file_name) as iterator:
-        return cuneify_interator(cache, iterator, show_transliteration, parse_atf=parse_atf)
+        return cuneify_iterator(cache, iterator, show_transliteration, parse_atf=parse_atf)
 
 
 def ordered_symbol_to_transliterations(cache, transliteration, return_unrecognised=False):
diff --git a/wsgi.py b/wsgi.py
index 14dba49..e8c1d54 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -9,7 +9,7 @@
 from traceback import format_exc
 from urllib.parse import quote
 
-from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, cuneify_interator,
+from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, cuneify_iterator,
         ordered_symbol_to_transliterations)
 from environment import DEPRECATED, MY_URL, get_cache, get_font_directory
 
@@ -50,7 +50,7 @@ def _get_cuneify_body(environ, transliteration, show_transliteration, font_name)
     body = ''
     with get_cache(environ) as cache:
         try:
-            body += '<span class="{}">{}</span><br />'.format(font_name.lower(), cuneify_interator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '<br />'))
+            body += '<span class="{}">{}</span><br />'.format(font_name.lower(), cuneify_iterator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '<br />'))
             # body += '{}<br />'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '<br />'))
         except UnrecognisedSymbol as exception:
             body += '<font color="red">Unknown symbol "{}" in "{}"</font><br />'.format(exception.transliteration, line)
@@ -204,6 +204,6 @@ def application(environ, start_response):
     MY_URL=""
     from wsgiref.simple_server import make_server
     httpd = make_server('localhost', 8051, application)
-    # Wait for a single request, serve it and quit.
+    print("Serving on http://localhost:8051")
     while 1:
         httpd.handle_request()

From 85c6cef6a038247ed66900f7f249144304ecb342 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 09:20:24 -0700
Subject: [PATCH 09/18] remove debugging

---
 cuneify_interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuneify_interface.py b/cuneify_interface.py
index 685256b..b826474 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -357,7 +357,7 @@ def cuneify_iterator(cache, iterator, show_transliteration, parse_atf=True):
                 output += line
                 output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n"
             else:
-                output += "Y" + line
+                output += line
     else:
         for line in iterator:
             output += cuneify_line(cache, line, show_transliteration)

From bf482367df08e5455ab75b4b0530e61f53cb4f70 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 09:22:39 -0700
Subject: [PATCH 10/18] detect atf format

---
 wsgi.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/wsgi.py b/wsgi.py
index e8c1d54..ac24c27 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -48,10 +48,11 @@ def _get_input_form(initial=''):
 def _get_cuneify_body(environ, transliteration, show_transliteration, font_name):
     ''' Return the HTML body contents when we've been given a transliteration, and show in the specified font '''
     body = ''
+    is_atf = "\n2." in transliteration # Hacky way to decide if this is an atf formatted file
     with get_cache(environ) as cache:
+        cuneified = cuneify_iterator(cache, iter(transliteration.splitlines(True)), show_transliteration, parse_atf=is_atf)
         try:
-            body += '<span class="{}">{}</span><br />'.format(font_name.lower(), cuneify_iterator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '<br />'))
-            # body += '{}<br />'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '<br />'))
+            body += '<pre class="{}">{}</pre><br />'.format(font_name.lower(), cuneified)
         except UnrecognisedSymbol as exception:
             body += '<font color="red">Unknown symbol "{}" in "{}"</font><br />'.format(exception.transliteration, line)
         except TransliterationNotUnderstood:

From bf54372a4d900b6427aa611eda3c96045220c484 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 09:30:50 -0700
Subject: [PATCH 11/18] add to readme, atf test file

---
 README.md     | 42 +++++++++++++++++++++++++++++++++++++
 test_file.atf | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 test_file.atf

diff --git a/README.md b/README.md
index 208b608..e023b51 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,48 @@
+# cuneifyplus
+
 This is the source code for http://cuneifyplus.arch.cam.ac.uk
 
 It is a wrapper around Steve Tinney's 'cuneify' tool, which allows for more complex conversion of transliterated
 Babylonian and Akkadian into various cuneiform fonts.
 
+## Server
+
+Can be deployed to e.g. Heroku as wsgi app
+
+https://cuneify.herokuapp.com/
+
+
+## Commannd line usage
+
+```bash
+$python3 cuneify_interface.py test_file.txt
+𒇻 𒀠 𒌨 𒆠 𒆗 𒄯 𒃻 𒀀 𒉌
+𒁕
+𒃮
+```
 
+```bash
+$python3 cuneify_interface.py --parse-atf test_file.atf
+&P232701 = RIME 3/1.01.07.031, ex. 117
+#atf: lang sux
+# reconstruction
+@object cone
+@surface a
+1. {d}nin-dar-a
+# 𒀭 𒊩𒌆 𒁯 𒀀
+2. lugal uru16
+# 𒈗 𒂗
+3. lugal-a-ni
+# 𒈗 𒀀 𒉌
+4. gu3-de2-a
+# 𒅗 𒌤 𒀀
+5. ensi2
+# 𒉺𒋼𒋛
+6. lagasz{ki}-ke4
+# 𒉢𒁓𒆷 𒆠 𒆤
+7. e2 gir2-su{ki}-ka-ni
+# 𒂍 𒄈 𒋢 𒆠 𒅗 𒉌
+8. mu-na-du3
+# 𒈬 𒈾 𒆕
+...
+```
diff --git a/test_file.atf b/test_file.atf
new file mode 100644
index 0000000..2b8f7eb
--- /dev/null
+++ b/test_file.atf
@@ -0,0 +1,58 @@
+&P232701 = RIME 3/1.01.07.031, ex. 117
+#atf: lang sux
+# reconstruction
+@object cone
+@surface a
+1. {d}nin-dar-a
+2. lugal uru16
+3. lugal-a-ni
+4. gu3-de2-a
+5. ensi2
+6. lagasz{ki}-ke4
+7. e2 gir2-su{ki}-ka-ni
+8. mu-na-du3
+
+
+&P232702 = RIME 3/1.01.07.031, ex. 118
+#atf: lang sux
+# reconstruction
+@object cone
+@surface a
+1. {d}nin-dar-a
+2. lugal uru16
+3. lugal-a-ni
+4. gu3-de2-a
+5. ensi2
+6. lagasz{ki}-ke4
+7. e2 gir2-su{ki}-ka-ni
+8. mu-na-du3
+
+
+&P232703 = RIME 3/1.01.07.031, ex. 119
+#atf: lang sux
+# reconstruction
+@object cone
+@surface a
+1. {d}nin-dar-a
+2. lugal uru16
+3. lugal-a-ni
+4. gu3-de2-a
+5. ensi2
+6. lagasz{ki}-ke4
+7. e2 gir2-su{ki}-ka-ni
+8. mu-na-du3
+
+
+&P232704 = RIME 3/1.01.07.031, ex. 120
+#atf: lang sux
+# reconstruction
+@object cone
+@surface a
+1. {d}nin-dar-a
+2. lugal uru16
+3. lugal-a-ni
+4. gu3-de2-a
+5. ensi2
+6. lagasz{ki}-ke4
+7. e2 gir2-su{ki}-ka-ni
+8. mu-na-du3

From f8b78af4b530c56f416f68ebf23c7d0dbe4906ac Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 09:40:01 -0700
Subject: [PATCH 12/18] typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e023b51..479ce70 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Can be deployed to e.g. Heroku as wsgi app
 https://cuneify.herokuapp.com/
 
 
-## Commannd line usage
+## Command line usage
 
 ```bash
 $python3 cuneify_interface.py test_file.txt

From 65731e691dc8cdeec721e53fab27d475ca22374f Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 11:31:28 -0700
Subject: [PATCH 13/18] Parse ATF CLI argument. Readme update.

---
 README.md            | 25 +++++++++++++++++++++----
 cuneify_interface.py |  4 +++-
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 479ce70..9b9d84a 100644
--- a/README.md
+++ b/README.md
@@ -7,22 +7,39 @@ Babylonian and Akkadian into various cuneiform fonts.
 
 ## Server
 
-Can be deployed to e.g. Heroku as wsgi app
+Can be deployed to e.g. Heroku as wsgi app.
 
-https://cuneify.herokuapp.com/
+Example: https://cuneify.herokuapp.com/
 
+```bash
+$ heroku create cuneify
+Creating app... done, ⬢ mystic-wind-83
+Created http://cuneify.herokuapp.com/ | git@heroku.com:cuneify.git
+
+$ git push heroku master
+Enumerating objects: 5, done.
+Counting objects: 100% (5/5), done.
+Delta compression using up to 16 threads
+Compressing objects: 100% (3/3), done.
+Writing objects: 100% (3/3), 287 bytes | 287.00 KiB/s, done.
+Total 3 (delta 2), reused 0 (delta 0), pack-reused 0
+.
+.
+.
+remote:        https://cuneify.herokuapp.com/ deployed to Heroku
+```
 
 ## Command line usage
 
 ```bash
-$python3 cuneify_interface.py test_file.txt
+$ python3 cuneify_interface.py test_file.txt
 𒇻 𒀠 𒌨 𒆠 𒆗 𒄯 𒃻 𒀀 𒉌
 𒁕
 𒃮
 ```
 
 ```bash
-$python3 cuneify_interface.py --parse-atf test_file.atf
+$ python3 cuneify_interface.py --parse-atf test_file.atf
 &P232701 = RIME 3/1.01.07.031, ex. 117
 #atf: lang sux
 # reconstruction
diff --git a/cuneify_interface.py b/cuneify_interface.py
index b826474..ba58556 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -421,6 +421,8 @@ def main():
     parser.add_argument('--show-transliteration', action='store_true',
                         help='By default just show cuneiform. If this is set, '
                              'also display original transliteration')
+    parser.add_argument('--parse-atf', action='store_true',
+                        help='If this is set parse file as .atf formatted')
     parser.add_argument('--symbol-list', action='store_true',
         help='If this is set, show a mapping between the transliterated symbols and cuneiform.')
     parser.add_argument('--cache', help='Use specified cache file',
@@ -440,7 +442,7 @@ def main():
                 print('Unrecognised symbols:')
                 print(unrecognised_tokens)
         else:
-            print(cuneify_file(cache, args.input_file, args.show_transliteration))
+            print(cuneify_file(cache, args.input_file, args.show_transliteration, args.parse_atf))
 
 
 if __name__ == '__main__':

From c2b7520a22b858c754272b55f594b0309d6eeef2 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 11:33:11 -0700
Subject: [PATCH 14/18] readme typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9b9d84a..f127f41 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@ Example: https://cuneify.herokuapp.com/
 
 ```bash
 $ heroku create cuneify
-Creating app... done, ⬢ mystic-wind-83
+Creating app... done, ⬢ cuneify
 Created http://cuneify.herokuapp.com/ | git@heroku.com:cuneify.git
 
 $ git push heroku master

From 25f8afb3c7eccd03a7a4a34bda549e133f497d03 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Tue, 16 Feb 2021 11:36:08 -0700
Subject: [PATCH 15/18] Change default for unrecognized tokens to pass through

---
 cuneify_interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cuneify_interface.py b/cuneify_interface.py
index ba58556..dbfa0b0 100644
--- a/cuneify_interface.py
+++ b/cuneify_interface.py
@@ -321,7 +321,7 @@ def cuneify_line_structured(cache, transliteration):
 
     return (tokens, separators, symbols, unrecognized_tokens)
 
-def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator="?"):
+def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator=""):
     ''' Take a line of transliteration and display the output, nicely formatted, on the terminal.
         Should be used whilst in the context of cache.
         unrecognized_indicator : String to display if token not recognized. If empty string,

From e4f713b0255c86b8422cc2160afbf519220d039c Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Sun, 21 Feb 2021 20:48:35 -0700
Subject: [PATCH 16/18] readme local server

---
 README.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f127f41..9079c07 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,16 @@ This is the source code for http://cuneifyplus.arch.cam.ac.uk
 It is a wrapper around Steve Tinney's 'cuneify' tool, which allows for more complex conversion of transliterated
 Babylonian and Akkadian into various cuneiform fonts.
 
-## Server
+## Local Server
+
+```bash
+$ python3 wsgi.py
+Serving on http://localhost:8051
+```
+
+Open the URL in browser to use the HTML interface.
+
+## Remote Server
 
 Can be deployed to e.g. Heroku as wsgi app.
 

From acc6bfd4fc463a9e65fb631e5f1a963e68109d88 Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Sun, 21 Feb 2021 20:54:23 -0700
Subject: [PATCH 17/18] fix quotes to double

---
 wsgi.py | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/wsgi.py b/wsgi.py
index 1510af9..f54bdbf 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -35,12 +35,12 @@
     ]
 )
 
-FONTS_PATH_NAME = '/fonts'
+FONTS_PATH_NAME = "/fonts"
 
 
-def _get_input_form(initial=''):
+def _get_input_form(initial=""):
     """ Return a form that the user can use to enter some transliterated text """
-    font_name_selection = ''.join(['<option value="{0}">{1} (font: {0})</option>'.format(name, description)
+    font_name_selection = "".join(["<option value=\"{0}\">{1} (font: {0})</option>".format(name, description)
                                    for name, description in FONT_NAMES.items()])
     body = """
     <form action="{}" method="post">
@@ -57,17 +57,17 @@ def _get_input_form(initial=''):
 
 
 def _get_cuneify_body(environ, transliteration, show_transliteration, font_name):
-    ''' Return the HTML body contents when we've been given a transliteration, and show in the specified font '''
-    body = ''
+    """ Return the HTML body contents when we've been given a transliteration, and show in the specified font """
+    body = ""
     is_atf = "\n2." in transliteration # Hacky way to decide if this is an atf formatted file
     with get_cache(environ) as cache:
         cuneified = cuneify_iterator(cache, iter(transliteration.splitlines(True)), show_transliteration, parse_atf=is_atf)
         try:
-            body += '<pre class="{}">{}</pre><br />'.format(font_name.lower(), cuneified)
+            body += "<pre class=\"{}\">{}</pre><br />".format(font_name.lower(), cuneified)
         except UnrecognisedSymbol as exception:
-            body += '<font color="red">Unknown symbol "{}" in "{}"</font><br />'.format(exception.transliteration, line)
+            body += "<font color=\"red\">Unknown symbol \"{}\" in \"{}\"</font><br />".format(exception.transliteration, line)
         except TransliterationNotUnderstood:
-            body += '<font color="red">Possible formatting error in "{}"</font><br />'.format(line)
+            body += "<font color=\"red\">Possible formatting error in \"{}\"</font><br />".format(line)
 
     # TODO this can probably be neatened up a little bit
     return body
@@ -84,14 +84,14 @@ def _get_symbol_list_body(environ, transliteration, font_name):
             cache, transliteration, return_unrecognised=True
         )
         for cuneiform_symbol, transliterations in symbol_to_transliterations.items():
-            line = '<span class="{}">{}</span>: {}<br />'.format(
+            line = "<span class=\"{}\">{}</span>: {}<br />".format(
                 font_name.lower(), cuneiform_symbol, ", ".join(transliterations)
             )
             body += line
 
         if len(unrecognised_tokens) > 0:
             # Print out unrecognised tokens if there are any
-            body += '<br /><font color="red">These tokens were unrecognised: {}</font><br />'.format(
+            body += "<br /><font color=\"red\">These tokens were unrecognised: {}</font><br />".format(
                 ", ".join(unrecognised_tokens)
             )
 
@@ -144,10 +144,10 @@ def application(environ, start_response):
     if path_info.startswith(FONTS_PATH_NAME):
         # Return the static font file
         return construct_font_response(environ, start_response, path_info)
-    elif environ['REQUEST_METHOD'] == "POST":
+    elif environ["REQUEST_METHOD"] == "POST":
 
         # Whatever else happens, we always need a non-empty transliteration
-        transliteration = form.getvalue('input')
+        transliteration = form.getvalue("input")
 
         # Get the values of the other form inputs
         show_transliteration_value = form.getvalue("show_transliteration")
@@ -164,7 +164,7 @@ def application(environ, start_response):
             cuneiform_output += _get_cuneify_body(
                 environ, transliteration, show_transliteration, font_name
             )
-        elif action_value == 'Create sign list':
+        elif action_value == "Create sign list":
             # Make a symbol list!
             cuneiform_output += _get_symbol_list_body(environ, transliteration, font_name)
         else:
@@ -176,9 +176,9 @@ def application(environ, start_response):
         [
             """@font-face {{{{
     font-family: {1};
-    src: url(fonts/{1}.woff) format('woff'),
-         url(fonts/{1}.eot) format('embedded-opentype'),
-         url(fonts/{1}.ttf) format('truetype');
+    src: url(fonts/{1}.woff) format("woff"),
+         url(fonts/{1}.eot) format("embedded-opentype"),
+         url(fonts/{1}.ttf) format("truetype");
 }}}}
 .{0} {{{{
     font-family: {1};
@@ -228,8 +228,8 @@ def application(environ, start_response):
     response_body = response_body.format(cuneiform_output, body)
     response_body = response_body.encode("utf-8")
 
-    status = '200 OK'
-    ctype = 'text/html'
+    status = "200 OK"
+    ctype = "text/html"
     response_headers = [
         ("Content-Type", ctype),
         ("Content-Length", str(len(response_body))),
@@ -240,7 +240,7 @@ def application(environ, start_response):
 
 # Below for testing only
 #
-if __name__ == '__main__':
+if __name__ == "__main__":
     MY_URL=""
     from wsgiref.simple_server import make_server
     httpd = make_server("localhost", 8051, application)

From 7562449ee09a9490dec70e4f27738af51b06dfac Mon Sep 17 00:00:00 2001
From: Stan James <wanderingstan@fb.com>
Date: Wed, 24 Feb 2021 14:53:46 -0700
Subject: [PATCH 18/18] escape html in input (redundant, but that's ok)

Make extra sure we don't ever echo user-inputted HTML.
Plus remove whitespace.
---
 wsgi.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wsgi.py b/wsgi.py
index f54bdbf..850916f 100644
--- a/wsgi.py
+++ b/wsgi.py
@@ -51,7 +51,7 @@ def _get_input_form(initial=""):
     <input type="submit" name="action" value="Cuneify">
     <input type="submit" name="action" value="Create sign list">
     </form>""".format(
-        MY_URL, initial, font_name_selection
+        MY_URL, cgi.escape(initial), font_name_selection
     )
     return body
 
@@ -169,7 +169,7 @@ def application(environ, start_response):
             cuneiform_output += _get_symbol_list_body(environ, transliteration, font_name)
         else:
             raise RuntimeError("Unrecognised action value {}".format(action_value))
-    body =  _get_input_form(initial=transliteration)
+    body = _get_input_form(initial=transliteration)
 
     # All the CSS representing font classes
     font_info = "\n".join(