From 675edf90bad3d618b8958d6f86df5fabaf10c1e0 Mon Sep 17 00:00:00 2001 From: Stan James Date: Mon, 15 Feb 2021 09:01:23 -0700 Subject: [PATCH 01/18] Structured cuneification of line --- cuneify_interface.py | 68 +++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 23 deletions(-) diff --git a/cuneify_interface.py b/cuneify_interface.py index 309771e..a8ae757 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -1,3 +1,5 @@ +# -*- coding: utf8 -*- + import itertools import os import pickle @@ -147,6 +149,7 @@ def _get_cuneiform_bytes(self, transliteration): return b'' if transliteration not in self.transliteration_to_cuneiform: raise UnrecognisedSymbol(transliteration) + # return transliteration.encode('utf8') return self.transliteration_to_cuneiform[transliteration] def get_stripped_transliteration(self, transliteration): @@ -291,38 +294,57 @@ def __exit__(self, type_, value, traceback): self._connection.close() - -def cuneify_line(cache, transliteration, show_transliteration): - ''' Take a line of transliteration and display the output, nicely formatted, on the terminal. - Should be used whilst in the context of cache. +def cuneify_line_structured(cache, transliteration): + ''' Take a line of transliteration and return structured data of + - tokens, eg `tok1` + - separators, eg `.` + - symbols, eg `𒌉` + - unrecognized tokens, eg `bob` ''' transliteration = transliteration.strip() # Split using alphanumeric characters (\w) tokens = re.split(TOKEN_REGEX, transliteration) - # It's a much easier code path if we just show the cuneiform - if not show_transliteration: - return ' '.join(cache.get_cuneiform(token) for token in tokens) - - # Otherwise format something like this: - # - # tok1.tok2 tok3-tok4-5- 6 - # A BBBBB CC DDD EEEE F separators = re.findall(TOKEN_REGEX, transliteration) separators.append('') - line_original = '' line_cuneiform = '' - for token, separator in zip(tokens, separators): - symbol = cache.get_cuneiform(token) - # FIXME -- take into account separator length (could be more than one - # character - n_spaces_after_symbol = 1 + max(len(separator) + len(token) - len(symbol), 0) - n_spaces_after_token_separator = 1 + max(len(symbol) - len(token), 0) - line_original += token + separator + ' ' * n_spaces_after_token_separator - line_cuneiform += symbol + ' ' * n_spaces_after_symbol - - return '{}\n{}'.format(line_original, line_cuneiform) + symbols = [] + unrecognized_tokens = [] + for token in tokens: + try: + symbol = cache.get_cuneiform(token) + except (UnrecognisedSymbol, TransliterationNotUnderstood): + symbol = None + unrecognized_tokens.append(token) + symbols.append(symbol) + + return (tokens, separators, symbols, unrecognized_tokens) + +def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator="?"): + ''' Take a line of transliteration and display the output, nicely formatted, on the terminal. + Should be used whilst in the context of cache. + unrecognized_indicator : String to display if token not recognized. If empty string, + the token will be returned as-is. + ''' + + (tokens, separators, raw_symbols, unrecognized_tokens) = cuneify_line_structured(cache, transliteration) + + # Substitube chosen string for unrecognized tokens + symbols = [s if s is not None else t if unrecognized_indicator=="" else unrecognized_indicator for (t, s) in zip(tokens, raw_symbols)] + + if show_transliteration: + line_original = "" + line_cuneiform = "" + for token, separator, symbol in zip(tokens, separators, symbols): + if symbol is None: + symbol = token if unrecognized_indicator=="" else unrecognized_indicator + width = max(len(token + separator), len(symbol)) + line_original += (token + separator).ljust(width) + line_cuneiform += symbol.ljust(width) + return '{}\n{}'.format(line_original, line_cuneiform) + else: + return " ".join(symbols) def cuneify_file(cache, file_name, show_transliteration): From 5a4ba60e509105d1507feaf74f4e588efcb97b33 Mon Sep 17 00:00:00 2001 From: Stan James Date: Mon, 15 Feb 2021 11:15:16 -0700 Subject: [PATCH 02/18] Fix local server to work. Show form on results page. --- environment.py | 20 +++++++++++++++++++- wsgi.py | 51 +++++++++++++++++++++++--------------------------- 2 files changed, 42 insertions(+), 29 deletions(-) diff --git a/environment.py b/environment.py index 98b8136..a239afd 100644 --- a/environment.py +++ b/environment.py @@ -6,6 +6,7 @@ from cuneify_interface import FileCuneiformCache, MySQLCuneiformCache + if 'mws' in socket.gethostname().lower(): # Running in MWS @@ -30,7 +31,8 @@ def get_cache(environ): return FileCuneiformCache(cache_file_path=cache_file_path, read_only=True) -else: +elif 'openshift' in socket.gethostname().lower(): + # Running on OpenShift MY_URL = 'https://cuneifyplus-puffin.rhcloud.com' @@ -45,3 +47,19 @@ def get_cache(environ): # We use a cache in the data directory. This isn't touched by the deployment process cache_file_path = os.path.normpath(os.path.join(environ['OPENSHIFT_DATA_DIR'], 'cuneiform_cache.pickle')) return FileCuneiformCache(cache_file_path=cache_file_path) + +else: + + # Running locally? + MY_URL = '' + + DEPRECATED = False + + def get_font_directory(environ): + return 'fonts' + + def get_cache(environ): + ''' Return the standard cuneiform cache ''' + # We use a cache in the data directory. This isn't touched by the deployment process + cache_file_path = 'cuneiform_cache.pickle' + return FileCuneiformCache(cache_file_path=cache_file_path) diff --git a/wsgi.py b/wsgi.py index 890f0cf..a4dc736 100644 --- a/wsgi.py +++ b/wsgi.py @@ -9,7 +9,7 @@ from traceback import format_exc from urllib.parse import quote -from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, +from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, ordered_symbol_to_transliterations) from environment import DEPRECATED, MY_URL, get_cache, get_font_directory @@ -29,20 +29,19 @@ FONTS_PATH_NAME = '/fonts' -def _get_input_form(initial='Enter transliteration here...'): +def _get_input_form(initial=''): ''' Return a form that the user can use to enter some transliterated text ''' - font_name_selection = ''.join([''.format(name, description) + font_name_selection = ''.join([''.format(name, description) for name, description in FONT_NAMES.items()]) body = ''' -
- + +

Show transliteration with output

-
'''.format(MY_URL, font_name_selection) - # TODO Use 'initial' when it can be made to disappear on entry into widget + '''.format(MY_URL, initial, font_name_selection) return body @@ -65,9 +64,6 @@ def _get_cuneify_body(environ, transliteration, show_transliteration, font_name) except TransliterationNotUnderstood: body += 'Possible formatting error in "{}"
'.format(line) - # TODO will need javascript to re-populate the text area, I believe - # body += '

Go back
'.format(MY_URL, quote(transliteration)) - body += '

Go back
'.format(MY_URL) # TODO this can probably be neatened up a little bit return body @@ -85,10 +81,6 @@ def _get_symbol_list_body(environ, transliteration, font_name): # Print out unrecognised tokens if there are any body += '
These tokens were unrecognised: {}
'.format(', '.join(unrecognised_tokens)) - # TODO will need javascript to re-populate the text area, I believe - # body += '

Go back
'.format(MY_URL, quote(transliteration)) - body += '

Go back
'.format(MY_URL) - # TODO this can probably be neatened up a little bit return body @@ -105,7 +97,7 @@ def construct_font_response(environ, start_response, path_info): # TODO we could cache this in memory if reading the font is slow with open(font_path, 'rb') as f: response_body = f.read() - + status = '200 OK' if font_path.endswith('.woff'): ctype = 'application/x-font-woff' @@ -124,16 +116,16 @@ def application(environ, start_response): # Use the appropriate behaviour here path_info = environ['PATH_INFO'] form = cgi.FieldStorage(fp=environ['wsgi.input'], environ=environ, keep_blank_values=True) + cuneiform_output = "" + transliteration = "" if path_info.startswith(FONTS_PATH_NAME): # Return the static font file return construct_font_response(environ, start_response, path_info) - elif path_info == '/cuneify': + # elif path_info == '/cuneify': + elif environ['REQUEST_METHOD']=="POST": # Whatever else happens, we always need a non-empty transliteration transliteration = form.getvalue('input') - if transliteration is None or transliteration == '': - # There is no transliteration, so show the input form again - body = _get_input_form() # Get the values of the other form inputs show_transliteration_value = form.getvalue('show_transliteration') @@ -144,14 +136,14 @@ def application(environ, start_response): # The type of form submission we make determines what we do now if action_value == 'Cuneify': # We do a transliteration and show the output - body = _get_cuneify_body(environ, transliteration, show_transliteration, font_name) + cuneiform_output += _get_cuneify_body(environ, transliteration, show_transliteration, font_name) elif action_value == 'Create sign list': # Make a symbol list! - body = _get_symbol_list_body(environ, transliteration, font_name) + cuneiform_output += _get_symbol_list_body(environ, transliteration, font_name) else: raise RuntimeError("Unrecognised action value {}".format(action_value)) - else: - body = _get_input_form() + + body = _get_input_form(initial=transliteration) # TODO remove temporary workaround if DEPRECATED: @@ -176,11 +168,14 @@ def application(environ, start_response): +
+ {} +
{}


-Using most browsers, the cuneiform should appear on your screen, as the fonts are embedded in the website. +Using most browsers, the cuneiform should appear on your screen, as the fonts are embedded in the website. However, if you wish to copy-and-paste (e.g. into a Word document), you may need to install the fonts in order for the characters to display correctly. To install the fonts, follow the links below:
@@ -200,11 +195,10 @@ def application(environ, start_response): ''' - response_body = response_body.format(body) + response_body = response_body.format(cuneiform_output, body) response_body = response_body.encode('utf-8') status = '200 OK' - # ctype = 'text/plain' ctype = 'text/html' response_headers = [('Content-Type', ctype), ('Content-Length', str(len(response_body)))] start_response(status, response_headers) @@ -214,8 +208,9 @@ def application(environ, start_response): # Below for testing only # if __name__ == '__main__': + MY_URL="" from wsgiref.simple_server import make_server httpd = make_server('localhost', 8051, application) # Wait for a single request, serve it and quit. - httpd.handle_request() - + while 1: + httpd.handle_request() From 726cf324423aabf5c0cd9c163f623f4327b8ce67 Mon Sep 17 00:00:00 2001 From: Stan James Date: Mon, 15 Feb 2021 11:15:34 -0700 Subject: [PATCH 03/18] option to parse atf file --- cuneify_interface.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/cuneify_interface.py b/cuneify_interface.py index a8ae757..9914683 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -347,18 +347,30 @@ def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indi return " ".join(symbols) -def cuneify_file(cache, file_name, show_transliteration): +def cuneify_file(cache, file_name, show_transliteration, parse_atf=True): ''' Given a text file with one or more lines of transliterated text, print out the corresponding version in cuneiform ''' output = '' - with open(file_name) as input_file: - for line in input_file: - output += cuneify_line(cache, line, show_transliteration) - output += '\n' - # If also showing transliteration then an extra blank line aids legibility - if show_transliteration: + if parse_atf: + with open(file_name) as input_file: + for line in input_file: + atf_line_parts = re.search('^([0-9]+\.)([ \t]*)(.*)', line) + if atf_line_parts: + transliteration = atf_line_parts.group(3) + output += line + output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n" + else: + output += "Y" + line + + else: + with open(file_name) as input_file: + for line in input_file: + output += cuneify_line(cache, line, show_transliteration) output += '\n' + # If also showing transliteration then an extra blank line aids legibility + if show_transliteration: + output += '\n' return output From 90551fc24dcf91f94e0b0f5c006e6cc45f90d2cb Mon Sep 17 00:00:00 2001 From: Stan James Date: Mon, 15 Feb 2021 11:31:13 -0700 Subject: [PATCH 04/18] handle input as file from cgi --- cuneify_interface.py | 42 ++++++++++++++++++++++-------------------- wsgi.py | 23 ++++++++--------------- 2 files changed, 30 insertions(+), 35 deletions(-) diff --git a/cuneify_interface.py b/cuneify_interface.py index 9914683..8cbe381 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -347,33 +347,35 @@ def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indi return " ".join(symbols) -def cuneify_file(cache, file_name, show_transliteration, parse_atf=True): - ''' Given a text file with one or more lines of transliterated text, print out the corresponding - version in cuneiform - ''' +def cuneify_interator(cache, iterator, show_transliteration, parse_atf=True): output = '' if parse_atf: - with open(file_name) as input_file: - for line in input_file: - atf_line_parts = re.search('^([0-9]+\.)([ \t]*)(.*)', line) - if atf_line_parts: - transliteration = atf_line_parts.group(3) - output += line - output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n" - else: - output += "Y" + line - + for line in iterator: + atf_line_parts = re.search('^([0-9]+\.)([ \t]*)(.*)', line) + if atf_line_parts: + transliteration = atf_line_parts.group(3) + output += line + output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n" + else: + output += "Y" + line else: - with open(file_name) as input_file: - for line in input_file: - output += cuneify_line(cache, line, show_transliteration) + for line in iterator: + output += cuneify_line(cache, line, show_transliteration) + output += '\n' + # If also showing transliteration then an extra blank line aids legibility + if show_transliteration: output += '\n' - # If also showing transliteration then an extra blank line aids legibility - if show_transliteration: - output += '\n' return output +def cuneify_file(cache, file_name, show_transliteration, parse_atf=True): + ''' Given a text file with one or more lines of transliterated text, print out the corresponding + version in cuneiform + ''' + with open(file_name) as iterator: + return cuneify_interator(cache, iterator, show_transliteration, parse_atf=parse_atf) + + def ordered_symbol_to_transliterations(cache, transliteration, return_unrecognised=False): ''' Given a transliteration, which might be a multi-line input, grab all tokens and build up a symbol list. This will be an OrderedDict mapping symbol to transliteration tokens, in the order of appearance diff --git a/wsgi.py b/wsgi.py index a4dc736..14dba49 100644 --- a/wsgi.py +++ b/wsgi.py @@ -9,7 +9,7 @@ from traceback import format_exc from urllib.parse import quote -from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, +from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, cuneify_interator, ordered_symbol_to_transliterations) from environment import DEPRECATED, MY_URL, get_cache, get_font_directory @@ -49,20 +49,13 @@ def _get_cuneify_body(environ, transliteration, show_transliteration, font_name) ''' Return the HTML body contents when we've been given a transliteration, and show in the specified font ''' body = '' with get_cache(environ) as cache: - for line in transliteration.split('\n'): - # Make empty lines appear as breaks in the output - line = line.strip() - if line == '': - body += '
' - continue - - try: - body += '{}
'.format(font_name.lower(), cuneify_line(cache, line, show_transliteration).replace('\n', '
')) - # body += '{}
'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '
')) - except UnrecognisedSymbol as exception: - body += 'Unknown symbol "{}" in "{}"
'.format(exception.transliteration, line) - except TransliterationNotUnderstood: - body += 'Possible formatting error in "{}"
'.format(line) + try: + body += '{}
'.format(font_name.lower(), cuneify_interator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '
')) + # body += '{}
'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '
')) + except UnrecognisedSymbol as exception: + body += 'Unknown symbol "{}" in "{}"
'.format(exception.transliteration, line) + except TransliterationNotUnderstood: + body += 'Possible formatting error in "{}"
'.format(line) # TODO this can probably be neatened up a little bit return body From c83023dd68e25e7075827561f2ffaf29e47e3404 Mon Sep 17 00:00:00 2001 From: Stan James Date: Mon, 15 Feb 2021 20:04:16 -0700 Subject: [PATCH 05/18] procfile and requirements for heroku --- Procfile | 1 + requirements.txt | 1 + 2 files changed, 2 insertions(+) create mode 100644 Procfile diff --git a/Procfile b/Procfile new file mode 100644 index 0000000..908eb48 --- /dev/null +++ b/Procfile @@ -0,0 +1 @@ +web: gunicorn wsgi:application diff --git a/requirements.txt b/requirements.txt index e69de29..3540093 100644 --- a/requirements.txt +++ b/requirements.txt @@ -0,0 +1 @@ +gunicorn==20.0.4 From f8140043b2c16ced6b81a47011c5562fe8731b7e Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 08:52:32 -0700 Subject: [PATCH 06/18] gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..13fa123 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.vscode/* +__pycache__/* +bin/* +lib/* From 1f5b7a8f4e090505edabe8d1c66359e50b34375e Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 08:53:13 -0700 Subject: [PATCH 07/18] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 13fa123..6c784e5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ __pycache__/* bin/* lib/* +pyvenv.cfg From 087d5ca36571b99fc604a5b82d182fb0defd9511 Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 08:58:01 -0700 Subject: [PATCH 08/18] typo and cleanup --- cuneify_interface.py | 4 ++-- wsgi.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cuneify_interface.py b/cuneify_interface.py index 8cbe381..685256b 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -347,7 +347,7 @@ def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indi return " ".join(symbols) -def cuneify_interator(cache, iterator, show_transliteration, parse_atf=True): +def cuneify_iterator(cache, iterator, show_transliteration, parse_atf=True): output = '' if parse_atf: for line in iterator: @@ -373,7 +373,7 @@ def cuneify_file(cache, file_name, show_transliteration, parse_atf=True): version in cuneiform ''' with open(file_name) as iterator: - return cuneify_interator(cache, iterator, show_transliteration, parse_atf=parse_atf) + return cuneify_iterator(cache, iterator, show_transliteration, parse_atf=parse_atf) def ordered_symbol_to_transliterations(cache, transliteration, return_unrecognised=False): diff --git a/wsgi.py b/wsgi.py index 14dba49..e8c1d54 100644 --- a/wsgi.py +++ b/wsgi.py @@ -9,7 +9,7 @@ from traceback import format_exc from urllib.parse import quote -from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, cuneify_interator, +from cuneify_interface import (TransliterationNotUnderstood, UnrecognisedSymbol, cuneify_line, cuneify_iterator, ordered_symbol_to_transliterations) from environment import DEPRECATED, MY_URL, get_cache, get_font_directory @@ -50,7 +50,7 @@ def _get_cuneify_body(environ, transliteration, show_transliteration, font_name) body = '' with get_cache(environ) as cache: try: - body += '{}
'.format(font_name.lower(), cuneify_interator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '
')) + body += '{}
'.format(font_name.lower(), cuneify_iterator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '
')) # body += '{}
'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '
')) except UnrecognisedSymbol as exception: body += 'Unknown symbol "{}" in "{}"
'.format(exception.transliteration, line) @@ -204,6 +204,6 @@ def application(environ, start_response): MY_URL="" from wsgiref.simple_server import make_server httpd = make_server('localhost', 8051, application) - # Wait for a single request, serve it and quit. + print("Serving on http://localhost:8051") while 1: httpd.handle_request() From 85c6cef6a038247ed66900f7f249144304ecb342 Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 09:20:24 -0700 Subject: [PATCH 09/18] remove debugging --- cuneify_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuneify_interface.py b/cuneify_interface.py index 685256b..b826474 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -357,7 +357,7 @@ def cuneify_iterator(cache, iterator, show_transliteration, parse_atf=True): output += line output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n" else: - output += "Y" + line + output += line else: for line in iterator: output += cuneify_line(cache, line, show_transliteration) From bf482367df08e5455ab75b4b0530e61f53cb4f70 Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 09:22:39 -0700 Subject: [PATCH 10/18] detect atf format --- wsgi.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/wsgi.py b/wsgi.py index e8c1d54..ac24c27 100644 --- a/wsgi.py +++ b/wsgi.py @@ -48,10 +48,11 @@ def _get_input_form(initial=''): def _get_cuneify_body(environ, transliteration, show_transliteration, font_name): ''' Return the HTML body contents when we've been given a transliteration, and show in the specified font ''' body = '' + is_atf = "\n2." in transliteration # Hacky way to decide if this is an atf formatted file with get_cache(environ) as cache: + cuneified = cuneify_iterator(cache, iter(transliteration.splitlines(True)), show_transliteration, parse_atf=is_atf) try: - body += '{}
'.format(font_name.lower(), cuneify_iterator(cache, iter(transliteration.splitlines()), show_transliteration, parse_atf=False).replace('\n', '
')) - # body += '{}
'.format(cuneify_line(cache, line, show_transliteration).replace('\n', '
')) + body += '
{}

'.format(font_name.lower(), cuneified) except UnrecognisedSymbol as exception: body += 'Unknown symbol "{}" in "{}"
'.format(exception.transliteration, line) except TransliterationNotUnderstood: From bf54372a4d900b6427aa611eda3c96045220c484 Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 09:30:50 -0700 Subject: [PATCH 11/18] add to readme, atf test file --- README.md | 42 +++++++++++++++++++++++++++++++++++++ test_file.atf | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 test_file.atf diff --git a/README.md b/README.md index 208b608..e023b51 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,48 @@ +# cuneifyplus + This is the source code for http://cuneifyplus.arch.cam.ac.uk It is a wrapper around Steve Tinney's 'cuneify' tool, which allows for more complex conversion of transliterated Babylonian and Akkadian into various cuneiform fonts. +## Server + +Can be deployed to e.g. Heroku as wsgi app + +https://cuneify.herokuapp.com/ + + +## Commannd line usage + +```bash +$python3 cuneify_interface.py test_file.txt +𒇻 𒀠 𒌨 𒆠 𒆗 𒄯 𒃻 𒀀 𒉌 +𒁕 +𒃮 +``` +```bash +$python3 cuneify_interface.py --parse-atf test_file.atf +&P232701 = RIME 3/1.01.07.031, ex. 117 +#atf: lang sux +# reconstruction +@object cone +@surface a +1. {d}nin-dar-a +# 𒀭 𒊩𒌆 𒁯 𒀀 +2. lugal uru16 +# 𒈗 𒂗 +3. lugal-a-ni +# 𒈗 𒀀 𒉌 +4. gu3-de2-a +# 𒅗 𒌤 𒀀 +5. ensi2 +# 𒉺𒋼𒋛 +6. lagasz{ki}-ke4 +# 𒉢𒁓𒆷 𒆠 𒆤 +7. e2 gir2-su{ki}-ka-ni +# 𒂍 𒄈 𒋢 𒆠 𒅗 𒉌 +8. mu-na-du3 +# 𒈬 𒈾 𒆕 +... +``` diff --git a/test_file.atf b/test_file.atf new file mode 100644 index 0000000..2b8f7eb --- /dev/null +++ b/test_file.atf @@ -0,0 +1,58 @@ +&P232701 = RIME 3/1.01.07.031, ex. 117 +#atf: lang sux +# reconstruction +@object cone +@surface a +1. {d}nin-dar-a +2. lugal uru16 +3. lugal-a-ni +4. gu3-de2-a +5. ensi2 +6. lagasz{ki}-ke4 +7. e2 gir2-su{ki}-ka-ni +8. mu-na-du3 + + +&P232702 = RIME 3/1.01.07.031, ex. 118 +#atf: lang sux +# reconstruction +@object cone +@surface a +1. {d}nin-dar-a +2. lugal uru16 +3. lugal-a-ni +4. gu3-de2-a +5. ensi2 +6. lagasz{ki}-ke4 +7. e2 gir2-su{ki}-ka-ni +8. mu-na-du3 + + +&P232703 = RIME 3/1.01.07.031, ex. 119 +#atf: lang sux +# reconstruction +@object cone +@surface a +1. {d}nin-dar-a +2. lugal uru16 +3. lugal-a-ni +4. gu3-de2-a +5. ensi2 +6. lagasz{ki}-ke4 +7. e2 gir2-su{ki}-ka-ni +8. mu-na-du3 + + +&P232704 = RIME 3/1.01.07.031, ex. 120 +#atf: lang sux +# reconstruction +@object cone +@surface a +1. {d}nin-dar-a +2. lugal uru16 +3. lugal-a-ni +4. gu3-de2-a +5. ensi2 +6. lagasz{ki}-ke4 +7. e2 gir2-su{ki}-ka-ni +8. mu-na-du3 From f8b78af4b530c56f416f68ebf23c7d0dbe4906ac Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 09:40:01 -0700 Subject: [PATCH 12/18] typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e023b51..479ce70 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Can be deployed to e.g. Heroku as wsgi app https://cuneify.herokuapp.com/ -## Commannd line usage +## Command line usage ```bash $python3 cuneify_interface.py test_file.txt From 65731e691dc8cdeec721e53fab27d475ca22374f Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 11:31:28 -0700 Subject: [PATCH 13/18] Parse ATF CLI argument. Readme update. --- README.md | 25 +++++++++++++++++++++---- cuneify_interface.py | 4 +++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 479ce70..9b9d84a 100644 --- a/README.md +++ b/README.md @@ -7,22 +7,39 @@ Babylonian and Akkadian into various cuneiform fonts. ## Server -Can be deployed to e.g. Heroku as wsgi app +Can be deployed to e.g. Heroku as wsgi app. -https://cuneify.herokuapp.com/ +Example: https://cuneify.herokuapp.com/ +```bash +$ heroku create cuneify +Creating app... done, ⬢ mystic-wind-83 +Created http://cuneify.herokuapp.com/ | git@heroku.com:cuneify.git + +$ git push heroku master +Enumerating objects: 5, done. +Counting objects: 100% (5/5), done. +Delta compression using up to 16 threads +Compressing objects: 100% (3/3), done. +Writing objects: 100% (3/3), 287 bytes | 287.00 KiB/s, done. +Total 3 (delta 2), reused 0 (delta 0), pack-reused 0 +. +. +. +remote: https://cuneify.herokuapp.com/ deployed to Heroku +``` ## Command line usage ```bash -$python3 cuneify_interface.py test_file.txt +$ python3 cuneify_interface.py test_file.txt 𒇻 𒀠 𒌨 𒆠 𒆗 𒄯 𒃻 𒀀 𒉌 𒁕 𒃮 ``` ```bash -$python3 cuneify_interface.py --parse-atf test_file.atf +$ python3 cuneify_interface.py --parse-atf test_file.atf &P232701 = RIME 3/1.01.07.031, ex. 117 #atf: lang sux # reconstruction diff --git a/cuneify_interface.py b/cuneify_interface.py index b826474..ba58556 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -421,6 +421,8 @@ def main(): parser.add_argument('--show-transliteration', action='store_true', help='By default just show cuneiform. If this is set, ' 'also display original transliteration') + parser.add_argument('--parse-atf', action='store_true', + help='If this is set parse file as .atf formatted') parser.add_argument('--symbol-list', action='store_true', help='If this is set, show a mapping between the transliterated symbols and cuneiform.') parser.add_argument('--cache', help='Use specified cache file', @@ -440,7 +442,7 @@ def main(): print('Unrecognised symbols:') print(unrecognised_tokens) else: - print(cuneify_file(cache, args.input_file, args.show_transliteration)) + print(cuneify_file(cache, args.input_file, args.show_transliteration, args.parse_atf)) if __name__ == '__main__': From c2b7520a22b858c754272b55f594b0309d6eeef2 Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 11:33:11 -0700 Subject: [PATCH 14/18] readme typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9b9d84a..f127f41 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Example: https://cuneify.herokuapp.com/ ```bash $ heroku create cuneify -Creating app... done, ⬢ mystic-wind-83 +Creating app... done, ⬢ cuneify Created http://cuneify.herokuapp.com/ | git@heroku.com:cuneify.git $ git push heroku master From 25f8afb3c7eccd03a7a4a34bda549e133f497d03 Mon Sep 17 00:00:00 2001 From: Stan James Date: Tue, 16 Feb 2021 11:36:08 -0700 Subject: [PATCH 15/18] Change default for unrecognized tokens to pass through --- cuneify_interface.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cuneify_interface.py b/cuneify_interface.py index ba58556..dbfa0b0 100644 --- a/cuneify_interface.py +++ b/cuneify_interface.py @@ -321,7 +321,7 @@ def cuneify_line_structured(cache, transliteration): return (tokens, separators, symbols, unrecognized_tokens) -def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator="?"): +def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator=""): ''' Take a line of transliteration and display the output, nicely formatted, on the terminal. Should be used whilst in the context of cache. unrecognized_indicator : String to display if token not recognized. If empty string, From e4f713b0255c86b8422cc2160afbf519220d039c Mon Sep 17 00:00:00 2001 From: Stan James Date: Sun, 21 Feb 2021 20:48:35 -0700 Subject: [PATCH 16/18] readme local server --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f127f41..9079c07 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,16 @@ This is the source code for http://cuneifyplus.arch.cam.ac.uk It is a wrapper around Steve Tinney's 'cuneify' tool, which allows for more complex conversion of transliterated Babylonian and Akkadian into various cuneiform fonts. -## Server +## Local Server + +```bash +$ python3 wsgi.py +Serving on http://localhost:8051 +``` + +Open the URL in browser to use the HTML interface. + +## Remote Server Can be deployed to e.g. Heroku as wsgi app. From acc6bfd4fc463a9e65fb631e5f1a963e68109d88 Mon Sep 17 00:00:00 2001 From: Stan James Date: Sun, 21 Feb 2021 20:54:23 -0700 Subject: [PATCH 17/18] fix quotes to double --- wsgi.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/wsgi.py b/wsgi.py index 1510af9..f54bdbf 100644 --- a/wsgi.py +++ b/wsgi.py @@ -35,12 +35,12 @@ ] ) -FONTS_PATH_NAME = '/fonts' +FONTS_PATH_NAME = "/fonts" -def _get_input_form(initial=''): +def _get_input_form(initial=""): """ Return a form that the user can use to enter some transliterated text """ - font_name_selection = ''.join([''.format(name, description) + font_name_selection = "".join(["".format(name, description) for name, description in FONT_NAMES.items()]) body = """
@@ -57,17 +57,17 @@ def _get_input_form(initial=''): def _get_cuneify_body(environ, transliteration, show_transliteration, font_name): - ''' Return the HTML body contents when we've been given a transliteration, and show in the specified font ''' - body = '' + """ Return the HTML body contents when we've been given a transliteration, and show in the specified font """ + body = "" is_atf = "\n2." in transliteration # Hacky way to decide if this is an atf formatted file with get_cache(environ) as cache: cuneified = cuneify_iterator(cache, iter(transliteration.splitlines(True)), show_transliteration, parse_atf=is_atf) try: - body += '
{}

'.format(font_name.lower(), cuneified) + body += "
{}

".format(font_name.lower(), cuneified) except UnrecognisedSymbol as exception: - body += 'Unknown symbol "{}" in "{}"
'.format(exception.transliteration, line) + body += "Unknown symbol \"{}\" in \"{}\"
".format(exception.transliteration, line) except TransliterationNotUnderstood: - body += 'Possible formatting error in "{}"
'.format(line) + body += "Possible formatting error in \"{}\"
".format(line) # TODO this can probably be neatened up a little bit return body @@ -84,14 +84,14 @@ def _get_symbol_list_body(environ, transliteration, font_name): cache, transliteration, return_unrecognised=True ) for cuneiform_symbol, transliterations in symbol_to_transliterations.items(): - line = '{}: {}
'.format( + line = "{}: {}
".format( font_name.lower(), cuneiform_symbol, ", ".join(transliterations) ) body += line if len(unrecognised_tokens) > 0: # Print out unrecognised tokens if there are any - body += '
These tokens were unrecognised: {}
'.format( + body += "
These tokens were unrecognised: {}
".format( ", ".join(unrecognised_tokens) ) @@ -144,10 +144,10 @@ def application(environ, start_response): if path_info.startswith(FONTS_PATH_NAME): # Return the static font file return construct_font_response(environ, start_response, path_info) - elif environ['REQUEST_METHOD'] == "POST": + elif environ["REQUEST_METHOD"] == "POST": # Whatever else happens, we always need a non-empty transliteration - transliteration = form.getvalue('input') + transliteration = form.getvalue("input") # Get the values of the other form inputs show_transliteration_value = form.getvalue("show_transliteration") @@ -164,7 +164,7 @@ def application(environ, start_response): cuneiform_output += _get_cuneify_body( environ, transliteration, show_transliteration, font_name ) - elif action_value == 'Create sign list': + elif action_value == "Create sign list": # Make a symbol list! cuneiform_output += _get_symbol_list_body(environ, transliteration, font_name) else: @@ -176,9 +176,9 @@ def application(environ, start_response): [ """@font-face {{{{ font-family: {1}; - src: url(fonts/{1}.woff) format('woff'), - url(fonts/{1}.eot) format('embedded-opentype'), - url(fonts/{1}.ttf) format('truetype'); + src: url(fonts/{1}.woff) format("woff"), + url(fonts/{1}.eot) format("embedded-opentype"), + url(fonts/{1}.ttf) format("truetype"); }}}} .{0} {{{{ font-family: {1}; @@ -228,8 +228,8 @@ def application(environ, start_response): response_body = response_body.format(cuneiform_output, body) response_body = response_body.encode("utf-8") - status = '200 OK' - ctype = 'text/html' + status = "200 OK" + ctype = "text/html" response_headers = [ ("Content-Type", ctype), ("Content-Length", str(len(response_body))), @@ -240,7 +240,7 @@ def application(environ, start_response): # Below for testing only # -if __name__ == '__main__': +if __name__ == "__main__": MY_URL="" from wsgiref.simple_server import make_server httpd = make_server("localhost", 8051, application) From 7562449ee09a9490dec70e4f27738af51b06dfac Mon Sep 17 00:00:00 2001 From: Stan James Date: Wed, 24 Feb 2021 14:53:46 -0700 Subject: [PATCH 18/18] escape html in input (redundant, but that's ok) Make extra sure we don't ever echo user-inputted HTML. Plus remove whitespace. --- wsgi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wsgi.py b/wsgi.py index f54bdbf..850916f 100644 --- a/wsgi.py +++ b/wsgi.py @@ -51,7 +51,7 @@ def _get_input_form(initial=""): """.format( - MY_URL, initial, font_name_selection + MY_URL, cgi.escape(initial), font_name_selection ) return body @@ -169,7 +169,7 @@ def application(environ, start_response): cuneiform_output += _get_symbol_list_body(environ, transliteration, font_name) else: raise RuntimeError("Unrecognised action value {}".format(action_value)) - body = _get_input_form(initial=transliteration) + body = _get_input_form(initial=transliteration) # All the CSS representing font classes font_info = "\n".join(