Skip to content
This repository was archived by the owner on Oct 14, 2024. It is now read-only.
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.vscode/*
__pycache__/*
bin/*
lib/*
pyvenv.cfg
1 change: 1 addition & 0 deletions Procfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
web: gunicorn wsgi:application
68 changes: 68 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,74 @@
# cuneifyplus

This is the source code for http://cuneifyplus.arch.cam.ac.uk

It is a wrapper around Steve Tinney's 'cuneify' tool, which allows for more complex conversion of transliterated
Babylonian and Akkadian into various cuneiform fonts.

## Local Server

```bash
$ python3 wsgi.py
Serving on http://localhost:8051
```

Open the URL in browser to use the HTML interface.

## Remote Server

Can be deployed to e.g. Heroku as wsgi app.

Example: https://cuneify.herokuapp.com/

```bash
$ heroku create cuneify
Creating app... done, ⬢ cuneify
Created http://cuneify.herokuapp.com/ | git@heroku.com:cuneify.git

$ git push heroku master
Enumerating objects: 5, done.
Counting objects: 100% (5/5), done.
Delta compression using up to 16 threads
Compressing objects: 100% (3/3), done.
Writing objects: 100% (3/3), 287 bytes | 287.00 KiB/s, done.
Total 3 (delta 2), reused 0 (delta 0), pack-reused 0
.
.
.
remote: https://cuneify.herokuapp.com/ deployed to Heroku
```

## Command line usage

```bash
$ python3 cuneify_interface.py test_file.txt
𒇻 𒀠 𒌨 𒆠 𒆗 𒄯 𒃻 𒀀 𒉌
𒁕
𒃮
```

```bash
$ python3 cuneify_interface.py --parse-atf test_file.atf
&P232701 = RIME 3/1.01.07.031, ex. 117
#atf: lang sux
# reconstruction
@object cone
@surface a
1. {d}nin-dar-a
# 𒀭 𒊩𒌆 𒁯 𒀀
2. lugal uru16
# 𒈗 𒂗
3. lugal-a-ni
# 𒈗 𒀀 𒉌
4. gu3-de2-a
# 𒅗 𒌤 𒀀
5. ensi2
# 𒉺𒋼𒋛
6. lagasz{ki}-ke4
# 𒉢𒁓𒆷 𒆠 𒆤
7. e2 gir2-su{ki}-ka-ni
# 𒂍 𒄈 𒋢 𒆠 𒅗 𒉌
8. mu-na-du3
# 𒈬 𒈾 𒆕
...
```
102 changes: 74 additions & 28 deletions cuneify_interface.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf8 -*-

import itertools
import os
import pickle
Expand Down Expand Up @@ -140,8 +142,8 @@ def __enter__(self):

@abstractmethod
def __exit__(self, type_, value, traceback):
"""Update the cache with the current transliteration, cuneiform pairs.
"""Update the cache with the current transliteration, cuneiform pairs.

It will overwrite the given values if present
"""

Expand Down Expand Up @@ -253,46 +255,79 @@ def _write_cache_file(self):
self._cache_modified = False


def cuneify_line(cache, transliteration, show_transliteration):
""" Take a line of transliteration and display the output, nicely formatted, on the terminal.
Should be used whilst in the context of cache.
def cuneify_line_structured(cache, transliteration):
""" Take a line of transliteration and return structured data (tuple) of
- tokens, eg `tok1`
- separators, eg `.`
- cuneiform symbols, eg `𒌉`
- unrecognized tokens, eg `Howdy`
"""
transliteration = transliteration.strip()
# Split using alphanumeric characters (\w)
tokens = re.split(TOKEN_REGEX, transliteration)

# It's a much easier code path if we just show the cuneiform
if not show_transliteration:
return " ".join(cache.get_cuneiform(token) for token in tokens)

# Otherwise format something like this:
#
# tok1.tok2 tok3-tok4-5- 6
# A BBBBB CC DDD EEEE F
separators = re.findall(TOKEN_REGEX, transliteration)
separators.append("")

line_original = ""
line_cuneiform = ""
for token, separator in zip(tokens, separators):
symbol = cache.get_cuneiform(token)
# FIXME -- take into account separator length (could be more than one
# character
n_spaces_after_symbol = 1 + max(len(separator) + len(token) - len(symbol), 0)
n_spaces_after_token_separator = 1 + max(len(symbol) - len(token), 0)
line_original += token + separator + " " * n_spaces_after_token_separator
line_cuneiform += symbol + " " * n_spaces_after_symbol
symbols = []
unrecognized_tokens = []
for token in tokens:
try:
symbol = cache.get_cuneiform(token)
except (UnrecognisedSymbol, TransliterationNotUnderstood):
symbol = None
unrecognized_tokens.append(token)
symbols.append(symbol)

return (tokens, separators, symbols, unrecognized_tokens)

def cuneify_line(cache, transliteration, show_transliteration, unrecognized_indicator=""):
""" Take a line of transliteration and display the output, nicely formatted, on the terminal.
Should be used whilst in the context of cache.
unrecognized_indicator : String to display if token not recognized. If empty string,
the token will be returned as-is.
"""

return "{}\n{}".format(line_original, line_cuneiform)
(tokens, separators, raw_symbols, unrecognized_tokens) = cuneify_line_structured(cache, transliteration)

# Substitube chosen string for unrecognized tokens
symbols = [s if s is not None else t if unrecognized_indicator=="" else unrecognized_indicator for (t, s) in zip(tokens, raw_symbols)]

if show_transliteration:
# Format tranliteratino under cuneiform, something like this:
#
# tok1.tok2 tok3-tok4-5- 6
# A BBBBB CC DDD EEEE F
line_original = ""
line_cuneiform = ""
for token, separator, symbol in zip(tokens, separators, symbols):
if symbol is None:
symbol = token if unrecognized_indicator=="" else unrecognized_indicator
width = max(len(token + separator), len(symbol))
line_original += (token + separator).ljust(width)
line_cuneiform += symbol.ljust(width)
return "{}\n{}".format(line_original, line_cuneiform)
else:
return " ".join(symbols)


def cuneify_file(cache, file_name, show_transliteration):
""" Given a text file with one or more lines of transliterated text, print out the corresponding
def cuneify_iterator(cache, iterator, show_transliteration, parse_atf=True):
""" Given a iterator object that yields lines of transliterated text, return the corresponding
version in cuneiform
"""
output = ""
with open(file_name) as input_file:
for line in input_file:
if parse_atf:
for line in iterator:
atf_line_parts = re.search("^([0-9]+\.)([ \t]*)(.*)", line)
if atf_line_parts:
transliteration = atf_line_parts.group(3)
output += line
output += "#" + atf_line_parts.group(2) + cuneify_line(cache, transliteration, show_transliteration) + "\n"
else:
output += line
else:
for line in iterator:
output += cuneify_line(cache, line, show_transliteration)
output += "\n"
# If also showing transliteration then an extra blank line aids legibility
Expand All @@ -301,6 +336,14 @@ def cuneify_file(cache, file_name, show_transliteration):
return output


def cuneify_file(cache, file_name, show_transliteration, parse_atf=True):
""" Given a text file with one or more lines of transliterated text, return the corresponding
version in cuneiform
"""
with open(file_name) as iterator:
return cuneify_iterator(cache, iterator, show_transliteration, parse_atf=parse_atf)


def ordered_symbol_to_transliterations(
cache, transliteration, return_unrecognised=False
):
Expand Down Expand Up @@ -355,6 +398,9 @@ def main():
help="By default just show cuneiform. If this is set, "
"also display original transliteration",
)
parser.add_argument(
"--parse-atf", action="store_true",
help="If this is set parse file as .atf formatted")
parser.add_argument(
"--symbol-list",
action="store_true",
Expand All @@ -380,7 +426,7 @@ def main():
print("Unrecognised symbols:")
print(unrecognised_tokens)
else:
print(cuneify_file(cache, args.input_file, args.show_transliteration))
print(cuneify_file(cache, args.input_file, args.show_transliteration, args.parse_atf))


if __name__ == "__main__":
Expand Down
35 changes: 22 additions & 13 deletions environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,31 @@
from cuneify_interface import FileCuneiformCache


if not "mws" in socket.gethostname().lower():
raise RuntimeError(
"Unrecognised environment: {}".format(socket.gethostname().lower())
)
if "mws" in socket.gethostname().lower():

# Running in MWS
MY_URL = "http://cuneifyplus.arch.cam.ac.uk"

MY_URL = "http://cuneifyplus.arch.cam.ac.uk"
def get_font_directory(environ):
return os.path.join(environ["DOCUMENT_ROOT"], "cuneifyplus", "fonts")

def get_cache(environ):
""" Return the standard cuneiform cache """
cache_file_path = os.path.normpath(
os.path.join(environ["DOCUMENT_ROOT"], "cuneifyplus", "cuneiform_cache.pickle")
)
return FileCuneiformCache(cache_file_path=cache_file_path, read_only=True)

def get_font_directory(environ):
return os.path.join(environ["DOCUMENT_ROOT"], "cuneifyplus", "fonts")
else:

# Running locally
MY_URL = ""

def get_cache(environ):
""" Return the standard cuneiform cache """
cache_file_path = os.path.normpath(
os.path.join(environ["DOCUMENT_ROOT"], "cuneifyplus", "cuneiform_cache.pickle")
)
return FileCuneiformCache(cache_file_path=cache_file_path, read_only=True)
def get_font_directory(environ):
return "fonts"

def get_cache(environ):
""" Return the standard cuneiform cache """
# We use a cache in the data directory. This isn't touched by the deployment process
cache_file_path = "cuneiform_cache.pickle"
return FileCuneiformCache(cache_file_path=cache_file_path)
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
gunicorn==20.0.4
58 changes: 58 additions & 0 deletions test_file.atf
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
&P232701 = RIME 3/1.01.07.031, ex. 117
#atf: lang sux
# reconstruction
@object cone
@surface a
1. {d}nin-dar-a
2. lugal uru16
3. lugal-a-ni
4. gu3-de2-a
5. ensi2
6. lagasz{ki}-ke4
7. e2 gir2-su{ki}-ka-ni
8. mu-na-du3


&P232702 = RIME 3/1.01.07.031, ex. 118
#atf: lang sux
# reconstruction
@object cone
@surface a
1. {d}nin-dar-a
2. lugal uru16
3. lugal-a-ni
4. gu3-de2-a
5. ensi2
6. lagasz{ki}-ke4
7. e2 gir2-su{ki}-ka-ni
8. mu-na-du3


&P232703 = RIME 3/1.01.07.031, ex. 119
#atf: lang sux
# reconstruction
@object cone
@surface a
1. {d}nin-dar-a
2. lugal uru16
3. lugal-a-ni
4. gu3-de2-a
5. ensi2
6. lagasz{ki}-ke4
7. e2 gir2-su{ki}-ka-ni
8. mu-na-du3


&P232704 = RIME 3/1.01.07.031, ex. 120
#atf: lang sux
# reconstruction
@object cone
@surface a
1. {d}nin-dar-a
2. lugal uru16
3. lugal-a-ni
4. gu3-de2-a
5. ensi2
6. lagasz{ki}-ke4
7. e2 gir2-su{ki}-ka-ni
8. mu-na-du3
Loading