diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 2bcce81..ba45b2c 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,12 +12,12 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.9, '3.10', 3.11, 3.12] + python-version: [3.9, '3.10', 3.11, 3.12, 3.13, 3.14] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/CHANGES.md b/CHANGES.md index 69991d6..b0abf22 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,17 @@ # Changes +## 4.0 + +- Removed dependence on `attrs` and `python-dateutil`. +- Removed deprecated functionality. + + +### Breaking changes + +Due to removing dependence on `attrs`, some previously `attr.s` decorated classes are now simple +dataclasses. Thus, subclassing them may break or behave differently than before. + + ## 3.24.4 - Fixed bug introduced by supporting escaped brackets in markdown link labels ... diff --git a/RELEASING.md b/RELEASING.md index 546f8df..20b39da 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -13,6 +13,11 @@ tox -r ```shell flake8 src/clldutils ``` +- +- Make sure pylint passes with a score of 10: +```shell +pylint src/clldutils +``` - Make sure the docs render: ```shell diff --git a/docs/attrlib.rst b/docs/attrlib.rst deleted file mode 100644 index 87f8da5..0000000 --- a/docs/attrlib.rst +++ /dev/null @@ -1,7 +0,0 @@ - -Tools to use with the `attrs` package -------------------------------------- - -.. automodule:: clldutils.attrlib - :members: - diff --git a/docs/index.rst b/docs/index.rst index cbe721a..5db91dc 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -21,7 +21,6 @@ Follow the links below for documentation of the clldutils Python API. :caption: Contents: apilib - attrlib clilib color coordinates diff --git a/pylintrc b/pylintrc new file mode 100644 index 0000000..2465270 --- /dev/null +++ b/pylintrc @@ -0,0 +1,657 @@ +[MAIN] + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + +# Clear in-memory caches upon conclusion of linting. Useful if running pylint +# in a server-like mode. +clear-cache-post-run=no + +# Load and enable all available extensions. Use --list-extensions to see a list +# all available extensions. +#enable-all-extensions= + +# In error mode, messages with a category besides ERROR or FATAL are +# suppressed, and no reports are done by default. Error mode is compatible with +# disabling specific errors. +#errors-only= + +# Always return a 0 (non-error) status code, even if lint errors are found. +# This is primarily useful in continuous integration scripts. +#exit-zero= + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. +extension-pkg-allow-list=lxml + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code. (This is an alternative name to extension-pkg-allow-list +# for backward compatibility.) +extension-pkg-whitelist= + +# Return non-zero exit code if any of these messages/categories are detected, +# even if score is above --fail-under value. Syntax same as enable. Messages +# specified are enabled, while categories only check already-enabled messages. +fail-on= + +# Specify a score threshold under which the program will exit with error. +fail-under=8 + +# Interpret the stdin as a python script, whose filename needs to be passed as +# the module_or_package argument. +#from-stdin= + +# Files or directories to be skipped. They should be base names, not paths. +ignore=CVS + +# Add files or directories matching the regular expressions patterns to the +# ignore-list. The regex matches against paths and can be in Posix or Windows +# format. Because '\\' represents the directory delimiter on Windows systems, +# it can't be used as an escape character. +ignore-paths= + +# Files or directories matching the regular expression patterns are skipped. +# The regex matches against base names, not paths. The default value ignores +# Emacs file locks +ignore-patterns=^\.# + +# List of module names for which member attributes should not be checked and +# will not be imported (useful for modules/projects where namespaces are +# manipulated during runtime and thus existing member attributes cannot be +# deduced by static analysis). It supports qualified module names, as well as +# Unix pattern matching. +ignored-modules= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the +# number of processors available to use, and will cap the count on Windows to +# avoid hangs. +jobs=1 + +# Control the amount of potential inferred values when inferring a single +# object. This can help the performance when dealing with large functions or +# complex, nested conditions. +limit-inference-results=100 + +# List of plugins (as comma separated values of python module names) to load, +# usually to register additional checkers. +load-plugins= + +# Pickle collected data for later comparisons. +persistent=yes + +# Resolve imports to .pyi stubs if available. May reduce no-member messages and +# increase not-an-iterable messages. +prefer-stubs=no + +# Minimum Python version to use for version dependent checks. Will default to +# the version used to run pylint. +py-version=3.12 + +# Discover python modules and packages in the file system subtree. +recursive=no + +# Add paths to the list of the source roots. Supports globbing patterns. The +# source root is an absolute path or a path relative to the current working +# directory used to determine a package namespace for modules located under the +# source root. +source-roots= + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# In verbose mode, extra non-checker-related info will be displayed. +#verbose= + + +[BASIC] + +# Naming style matching correct argument names. +argument-naming-style=snake_case + +# Regular expression matching correct argument names. Overrides argument- +# naming-style. If left empty, argument names will be checked with the set +# naming style. +#argument-rgx= + +# Naming style matching correct attribute names. +attr-naming-style=snake_case + +# Regular expression matching correct attribute names. Overrides attr-naming- +# style. If left empty, attribute names will be checked with the set naming +# style. +#attr-rgx= + +# Bad variable names which should always be refused, separated by a comma. +bad-names=foo, + bar, + baz, + toto, + tutu, + tata + +# Bad variable names regexes, separated by a comma. If names match any regex, +# they will always be refused +bad-names-rgxs= + +# Naming style matching correct class attribute names. +class-attribute-naming-style=any + +# Regular expression matching correct class attribute names. Overrides class- +# attribute-naming-style. If left empty, class attribute names will be checked +# with the set naming style. +#class-attribute-rgx= + +# Naming style matching correct class constant names. +class-const-naming-style=UPPER_CASE + +# Regular expression matching correct class constant names. Overrides class- +# const-naming-style. If left empty, class constant names will be checked with +# the set naming style. +#class-const-rgx= + +# Naming style matching correct class names. +class-naming-style=PascalCase + +# Regular expression matching correct class names. Overrides class-naming- +# style. If left empty, class names will be checked with the set naming style. +#class-rgx= + +# Naming style matching correct constant names. +const-naming-style=UPPER_CASE + +# Regular expression matching correct constant names. Overrides const-naming- +# style. If left empty, constant names will be checked with the set naming +# style. +#const-rgx= + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# Naming style matching correct function names. +function-naming-style=snake_case + +# Regular expression matching correct function names. Overrides function- +# naming-style. If left empty, function names will be checked with the set +# naming style. +#function-rgx= + +# Good variable names which should always be accepted, separated by a comma. +good-names=i, + j, + k, + ex, + Run, + _ + +# Good variable names regexes, separated by a comma. If names match any regex, +# they will always be accepted +good-names-rgxs= + +# Include a hint for the correct naming format with invalid-name. +include-naming-hint=no + +# Naming style matching correct inline iteration names. +inlinevar-naming-style=any + +# Regular expression matching correct inline iteration names. Overrides +# inlinevar-naming-style. If left empty, inline iteration names will be checked +# with the set naming style. +#inlinevar-rgx= + +# Naming style matching correct method names. +method-naming-style=snake_case + +# Regular expression matching correct method names. Overrides method-naming- +# style. If left empty, method names will be checked with the set naming style. +#method-rgx= + +# Naming style matching correct module names. +module-naming-style=snake_case + +# Regular expression matching correct module names. Overrides module-naming- +# style. If left empty, module names will be checked with the set naming style. +#module-rgx= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# Regular expression matching correct parameter specification variable names. +# If left empty, parameter specification variable names will be checked with +# the set naming style. +#paramspec-rgx= + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +# These decorators are taken in consideration only for invalid-name. +property-classes=abc.abstractproperty + +# Regular expression matching correct type alias names. If left empty, type +# alias names will be checked with the set naming style. +#typealias-rgx= + +# Regular expression matching correct type variable names. If left empty, type +# variable names will be checked with the set naming style. +#typevar-rgx= + +# Regular expression matching correct type variable tuple names. If left empty, +# type variable tuple names will be checked with the set naming style. +#typevartuple-rgx= + +# Naming style matching correct variable names. +variable-naming-style=snake_case + +# Regular expression matching correct variable names. Overrides variable- +# naming-style. If left empty, variable names will be checked with the set +# naming style. +#variable-rgx= + + +[CLASSES] + +# Warn about protected attribute access inside special methods +check-protected-access-in-special-methods=no + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp, + asyncSetUp, + __post_init__ + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make,os._exit + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[DESIGN] + +# List of regular expressions of class ancestor names to ignore when counting +# public methods (see R0903) +exclude-too-few-public-methods= + +# List of qualified class names to ignore when counting class parents (see +# R0901) +ignored-parents= + +# Maximum number of arguments for function / method. +max-args=5 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Maximum number of boolean expressions in an if statement (see R0916). +max-bool-expr=5 + +# Maximum number of branch for function / method body. +max-branches=12 + +# Maximum number of locals for function / method body. +max-locals=15 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of positional arguments for function / method. +max-positional-arguments=5 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of return / yield for function / method body. +max-returns=6 + +# Maximum number of statements in function / method body. +max-statements=50 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when caught. +overgeneral-exceptions=builtins.BaseException,builtins.Exception + + +[FORMAT] + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Maximum number of characters on a single line. Pylint's default of 100 is +# based on PEP 8's guidance that teams may choose line lengths up to 99 +# characters. +max-line-length=100 + +# Maximum number of lines in a module. +max-module-lines=1000 + +# Allow the body of a class to be on the same line as the declaration if body +# contains single statement. +single-line-class-stmt=no + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + + +[IMPORTS] + +# List of modules that can be imported at any level, not just the top level +# one. +allow-any-import-level= + +# Allow explicit reexports by alias from a package __init__. +allow-reexport-from-package=no + +# Allow wildcard imports from modules that define __all__. +allow-wildcard-with-all=no + +# Deprecated modules which should not be used, separated by a comma. +deprecated-modules= + +# Output a graph (.gv or any supported image format) of external dependencies +# to the given file (report RP0402 must not be disabled). +ext-import-graph= + +# Output a graph (.gv or any supported image format) of all (i.e. internal and +# external) dependencies to the given file (report RP0402 must not be +# disabled). +import-graph= + +# Output a graph (.gv or any supported image format) of internal dependencies +# to the given file (report RP0402 must not be disabled). +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Couples of modules and preferred modules, separated by a comma. +preferred-modules= + + +[LOGGING] + +# The type of string formatting that logging methods do. `old` means using % +# formatting, `new` is for `{}` formatting. +logging-format-style=old + +# Logging modules to check that the string format arguments are in logging +# function parameter format. +logging-modules=logging + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, CONTROL_FLOW, INFERENCE, INFERENCE_FAILURE, +# UNDEFINED. +confidence=HIGH, + CONTROL_FLOW, + INFERENCE, + INFERENCE_FAILURE, + UNDEFINED + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once). You can also use "--disable=all" to +# disable everything first and then re-enable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use "--disable=all --enable=classes +# --disable=W". +disable=raw-checker-failed, + bad-inline-option, + locally-disabled, + file-ignored, + suppressed-message, + useless-suppression, + deprecated-pragma, + use-symbolic-message-instead, + use-implicit-booleaness-not-comparison-to-string, + use-implicit-booleaness-not-comparison-to-zero + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +enable= + + +[METHOD_ARGS] + +# List of qualified names (i.e., library.method) which require a timeout +# parameter e.g. 'requests.api.get,requests.api.post' +timeout-methods=requests.api.delete,requests.api.get,requests.api.head,requests.api.options,requests.api.patch,requests.api.post,requests.api.put,requests.api.request + + +[MISCELLANEOUS] + +# Whether or not to search for fixme's in docstrings. +check-fixme-in-docstring=no + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME, + XXX, + TODO + +# Regular expression of note tags to take in consideration. +notes-rgx= + + +[REFACTORING] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + +# Complete name of functions that never returns. When checking for +# inconsistent-return-statements if a never returning function is called then +# it will be considered as an explicit return statement and no message will be +# printed. +never-returning-functions=sys.exit,argparse.parse_error + +# Let 'consider-using-join' be raised when the separator to join on would be +# non-empty (resulting in expected fixes of the type: ``"- " + " - +# ".join(items)``) +suggest-join-with-non-empty-separator=yes + + +[REPORTS] + +# Python expression which should return a score less than or equal to 10. You +# have access to the variables 'fatal', 'error', 'warning', 'refactor', +# 'convention', and 'info' which contain the number of messages in each +# category, as well as 'statement' which is the total number of statements +# analyzed. This score is used by the global evaluation report (RP0004). +evaluation=max(0, 0 if fatal else 10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details. +msg-template= + +# Set the output format. Available formats are: 'text', 'parseable', +# 'colorized', 'json2' (improved json format), 'json' (old json format), msvs +# (visual studio) and 'github' (GitHub actions). You can also give a reporter +# class, e.g. mypackage.mymodule.MyReporterClass. +#output-format= + +# Tells whether to display a full report or only the messages. +reports=no + +# Activate the evaluation score. +score=yes + + +[SIMILARITIES] + +# Comments are removed from the similarity computation +ignore-comments=yes + +# Docstrings are removed from the similarity computation +ignore-docstrings=yes + +# Imports are removed from the similarity computation +ignore-imports=yes + +# Signatures are removed from the similarity computation +ignore-signatures=yes + +# Minimum lines number of a similarity. +min-similarity-lines=6 + + +[SPELLING] + +# Limits count of emitted suggestions for spelling mistakes. +max-spelling-suggestions=4 + +# Spelling dictionary name. No available dictionaries : You need to install +# both the python package and the system dependency for enchant to work. +spelling-dict= + +# List of comma separated words that should be considered directives if they +# appear at the beginning of a comment and should not be checked. +spelling-ignore-comment-directives=fmt: on,fmt: off,noqa:,noqa,nosec,isort:skip,mypy: + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains the private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to the private dictionary (see the +# --spelling-private-dict-file option) instead of raising a message. +spelling-store-unknown-words=no + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=no + +# This flag controls whether the implicit-str-concat should generate a warning +# on implicit string concatenation in sequences defined over several lines. +check-str-concat-over-line-jumps=no + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# Tells whether to warn about missing members when the owner of the attribute +# is inferred to be None. +ignore-none=yes + +# This flag controls whether pylint should warn about no-member and similar +# checks whenever an opaque object is returned when inferring. The inference +# can return multiple potential results while evaluating a Python object, but +# some branches might not be evaluated, which results in partial inference. In +# that case, it might be useful to still emit no-member and other checks for +# the rest of the inferred objects. +ignore-on-opaque-inference=yes + +# List of symbolic message names to ignore for Mixin members. +ignored-checks-for-mixins=no-member, + not-async-context-manager, + not-context-manager, + attribute-defined-outside-init + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local,argparse.Namespace + +# Show a hint with possible names when a member name was not found. The aspect +# of finding the hint is based on edit distance. +missing-member-hint=yes + +# The maximum edit distance a name should have in order to be considered a +# similar match for a missing member name. +missing-member-hint-distance=1 + +# The total number of similar names that should be taken in consideration when +# showing a hint for a missing member. +missing-member-max-choices=1 + +# Regex pattern to define which classes are considered mixins. +mixin-class-rgx=.*[Mm]ixin + +# List of decorators that change the signature of a decorated function. +signature-mutators= + + +[VARIABLES] + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid defining new builtins when possible. +additional-builtins= + +# Tells whether unused global variables should be treated as a violation. +allow-global-unused-variables=yes + +# List of names allowed to shadow builtins +allowed-redefined-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_, + _cb + +# A regular expression matching the name of dummy variables (i.e. expected to +# not be used). +dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ + +# Argument names that match this expression will be ignored. +ignored-argument-names=_.*|^ignored_|^unused_ + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io diff --git a/setup.cfg b/setup.cfg index 273740f..4ecd253 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,12 +19,12 @@ classifiers = Natural Language :: English Operating System :: OS Independent Programming Language :: Python :: 3 - Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3.12 Programming Language :: Python :: 3.13 + Programming Language :: Python :: 3.14 Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy License :: OSI Approved :: Apache Software License @@ -34,12 +34,10 @@ zip_safe = False packages = find: package_dir = = src -python_requires = >=3.8 +python_requires = >=3.9 install_requires = - python-dateutil - tabulate>=0.7.7 + prettytable colorlog - attrs>=18.1.0 bibtexparser>=2.0.0b4 pylatexenc markdown @@ -100,7 +98,7 @@ show_missing = true skip_covered = True [tox:tox] -envlist = py3.8, py39, py310, py311, py312, py313 +envlist = py39, py310, py311, py312, py313, py314 isolated_build = true skip_missing_interpreter = true diff --git a/src/clldutils/__init__.py b/src/clldutils/__init__.py index eebc3ef..d1d2384 100644 --- a/src/clldutils/__init__.py +++ b/src/clldutils/__init__.py @@ -1 +1,4 @@ +""" +Utilities useful for managing/publishing research data. +""" __version__ = '3.24.5.dev0' diff --git a/src/clldutils/_compat.py b/src/clldutils/_compat.py new file mode 100644 index 0000000..c161e75 --- /dev/null +++ b/src/clldutils/_compat.py @@ -0,0 +1,26 @@ +""" +This module collects functionality required to support older python versions. +""" +import re +import sys +import datetime + + +if (sys.version_info.major, sys.version_info.minor) >= (3, 11): # pragma: no cover + fromisoformat = datetime.datetime.fromisoformat +else: + def fromisoformat(s: str) -> datetime.datetime: # pragma: no cover + """Somewhat hacky backport of the more full-fledged date parsing support in py3.11.""" + s = s.replace('Z', '+00:00') + s = re.sub(r'\.[0-9]+', '', s) + return datetime.datetime.fromisoformat(s) + +if (sys.version_info.major, sys.version_info.minor) >= (3, 10): # pragma: no cover + def entry_points_select(eps, group): + """ + Staring with Python 3.10, `importlib.metadata.entry_points` returns `EntryPoints`.""" + return eps.select(group=group) +else: + def entry_points_select(eps, group): # pragma: no cover + """In Python 3.9, `importlib.metadata.entry_points` returns a `dict`.""" + return eps.get(group, []) diff --git a/src/clldutils/apilib.py b/src/clldutils/apilib.py index 62636f8..64be524 100644 --- a/src/clldutils/apilib.py +++ b/src/clldutils/apilib.py @@ -2,16 +2,11 @@ Support for accessing data in a repository with some "known locations" via an `API` object. """ import re -import json import pathlib import functools import webbrowser -import attr - -from clldutils.misc import lazyproperty from clldutils.path import git_describe -from clldutils.attrlib import valid_range from clldutils.metadata import Metadata from clldutils.jsonlib import load @@ -19,54 +14,14 @@ r'v(?P(?P[0-9]+)\.(?P[0-9]+)(\.(?P[0-9]+))?)$') -# -# Common attributes of data objects -# -def latitude(): - return attr.ib( - converter=lambda s: None if s is None or s == '' else float(s), - validator=valid_range(-90, 90, nullable=True)) - - -def longitude(): - return attr.ib( - converter=lambda s: None if s is None or s == '' else float(s), - validator=valid_range(-180, 180, nullable=True)) - - -def value_ascsv(v): - if v is None: - return '' - elif isinstance(v, float): - return "{0:.5f}".format(v) - elif isinstance(v, dict): - return json.dumps(v) - elif isinstance(v, list): - return ';'.join(v) - return "{0}".format(v) - - -@attr.s -class DataObject(object): - - @classmethod - def fieldnames(cls): - return [f.name for f in attr.fields(cls)] - - def ascsv(self): - res = [] - for f, v in zip(attr.fields(self.__class__), attr.astuple(self)): - res.append((f.metadata.get('ascsv') or value_ascsv)(v)) - return res - - -def assert_release(repos): +def assert_release(repos) -> str: + """Make sure a git repository is checked out to a release tag.""" match = VERSION_NUMBER_PATTERN.match(git_describe(repos)) assert match, 'Repository is not checked out to a valid release tag' return match.group('number') # pragma: no cover -class API(object): +class API: """ An API base class to provide programmatic access to data in a git repository. @@ -102,8 +57,7 @@ def __init__(self, repos=None): def __str__(self): name = self.repos.resolve().name if self.repos.exists() else self.repos.name - return '<{0} repository {1} at {2}>'.format( - name, git_describe(self.repos), self.repos) + return f'<{name} repository {git_describe(self.repos)} at {self.repos}>' def path(self, *comps: str) -> pathlib.Path: """ @@ -113,7 +67,7 @@ def path(self, *comps: str) -> pathlib.Path: """ return self.repos.joinpath(*comps) - @lazyproperty + @functools.cached_property def dataset_metadata(self) -> Metadata: """ If a repository provides metadata about the dataset curated there as JSON-LD file called @@ -124,19 +78,20 @@ def dataset_metadata(self) -> Metadata: return Metadata.from_jsonld( load(mdp) if mdp.exists() else {}, defaults=self.__default_metadata__) - def assert_release(self): + def assert_release(self): # pylint: disable=C0116 return assert_release(self.repos) @property - def appdir(self) -> pathlib.Path: + def appdir(self) -> pathlib.Path: # pylint: disable=C0116 return self.path('app') @property - def appdatadir(self) -> pathlib.Path: + def appdatadir(self) -> pathlib.Path: # pylint: disable=C0116 return self.appdir.joinpath('data') @classmethod def app_wrapper(cls, func): + """Recreate appdata if requested, open app index.html in browser.""" @functools.wraps(func) def wrapper(args): if isinstance(args.repos, cls): diff --git a/src/clldutils/attrlib.py b/src/clldutils/attrlib.py deleted file mode 100644 index 232a656..0000000 --- a/src/clldutils/attrlib.py +++ /dev/null @@ -1,118 +0,0 @@ -""" -Data curation can profit a lot from a transparent data model and documented structure. This can be -achieved using the `attrs` library, - -- defining core objects of the data as `@attr.s` decorated classes -- using `attrs` validation and conversion functionality, to observe the principle of locality - \ - i.e. have data cleanup defined close to the objects, while accessing clean data through the \ - objects elsewhere in the code base. -""" -import re -import functools -import collections - -import attr - -from clldutils.text import PATTERN_TYPE -from clldutils.misc import deprecated - -__all__ = ['asdict', 'valid_range', 'valid_re', 'valid_enum_member', 'cmp_off'] - -# Avoid deprecation warnings for "cmp=False" -# See https://www.attrs.org/en/stable/api.html#deprecated-apis -if getattr(attr, "__version_info__", (0,)) >= (19, 2): - cmp_off = {"eq": False} -else: # pragma: no cover - cmp_off = {"cmp": False} - - -def defaults(cls): - res = collections.OrderedDict() - for field in attr.fields(cls): - default = field.default - if isinstance(default, attr.Factory): - default = default.factory() - res[field.name] = default - return res - - -def asdict(obj, omit_defaults=True, omit_private=True): - """ - Enhanced version of `attr.asdict`. - - :param omit_defaults: If `True`, only attribute values which differ from the default will be \ - added. - :param omit_private: If `True`, values of private attributes (i.e. attributes with names \ - starting with `_`) will not be added. - - .. code-block:: python - - >>> @attr.s - ... class Bag: - ... _private = attr.ib() - ... with_default = attr.ib(default=7) - ... - >>> asdict(Bag('x')) - OrderedDict() - >>> asdict(Bag('x'), omit_defaults=False, omit_private=False) - OrderedDict([('_private', 'x'), ('with_default', 7)]) - >>> attr.asdict(Bag('x')) - {'_private': 'x', 'with_default': 7} - - """ - defs = defaults(obj.__class__) - res = collections.OrderedDict() - for field in attr.fields(obj.__class__): - if not (omit_private and field.name.startswith('_')): - value = getattr(obj, field.name) - if not (omit_defaults and value == defs[field.name]): - if hasattr(value, 'asdict'): - value = value.asdict(omit_defaults=True) - res[field.name] = value - return res - - -def _valid_enum_member(choices, instance, attribute, value, nullable=False): - if not (nullable and value is None) and value not in choices: - raise ValueError('{0} is not a valid {1}'.format(value, attribute.name)) - - -def valid_enum_member(choices, nullable=False): - """ - .. deprecated:: 3.9 - Use `attr.validators.in_` instead. - """ - deprecated('Use `attr.validators.in_` instead.') - return functools.partial(_valid_enum_member, choices, nullable=nullable) - - -def _valid_range(min_, max_, instance, attribute, value, nullable=False): - if not (nullable and value is None) and ( - (min_ is not None and value < min_) or (max_ is not None and value > max_)): - raise ValueError('{0} is not a valid {1}'.format(value, attribute.name)) - - -def valid_range(min_, max_, nullable=False): - """ - A validator that raises a `ValueError` if the provided value that is not in the range defined - by `min_` and `max_`. - """ - return functools.partial(_valid_range, min_, max_, nullable=nullable) - - -def _valid_re(regex, instance, attribute, value, nullable=False): - if nullable and value is None: - return - if not isinstance(regex, PATTERN_TYPE): - regex = re.compile(regex) - if not regex.match(value): - raise ValueError('{0} is not a valid {1}'.format(value, attribute.name)) - - -def valid_re(regex, nullable=False): - """ - .. deprecated:: 3.9 - Use `attr.validators.matches_re` instead. - """ - deprecated('Use `attr.validators.matches_re` instead.') - return functools.partial(_valid_re, regex, nullable=nullable) diff --git a/src/clldutils/badge.py b/src/clldutils/badge.py deleted file mode 100644 index d23f0e1..0000000 --- a/src/clldutils/badge.py +++ /dev/null @@ -1,38 +0,0 @@ -"""Badges for inclusion in markdown docs, etc. - -.. seealso:: http://shields.io/ -""" -from urllib.parse import urlencode, quote - -__all__ = ['Colors', 'badge'] - - -class Colors(object): - """ - Colors available for shields.io badges. - """ - brightgreen = 'brightgreen' - green = 'green' - yellowgreen = 'yellowgreen' - yellow = 'yellow' - orange = 'orange' - red = 'red' - lightgrey = 'lightgrey' - blue = 'blue' - - -def badge(subject, status, color, fmt='svg', markdown=True, label=None, **kw) -> str: - """ - URL for (or markdown markup to include) a badge from shields.io - - :param str subject: Text for the left side of the badge - :param str status: Text for the right side of the badge - :param str color: Color for the right side of the badge - :param str fmt: `'svg'` or `'json'` - :param bool markdown: If `True`, return a markdown image link, else return a URL - :param str|None label: Link label, if `markdown==True` - """ - label = label or ': '.join([subject, status]) - url = 'https://img.shields.io/badge/{0}-{1}-{2}.{3}{4}'.format( - quote(subject), quote(status), color, fmt, '?' + urlencode(kw) if kw else '') - return '![{0}]({1} "{0}")'.format(label, url) if markdown else url diff --git a/src/clldutils/clilib.py b/src/clldutils/clilib.py index 7d202ea..00e3935 100644 --- a/src/clldutils/clilib.py +++ b/src/clldutils/clilib.py @@ -68,133 +68,35 @@ def run(args): """ import csv import random -import typing +from typing import Optional, Any import logging import pkgutil import pathlib import argparse import warnings import importlib -import collections import importlib.metadata +import collections -import tabulate - -from clldutils.loglib import Logging, get_colorlog -from clldutils.misc import deprecated +from clldutils.loglib import get_colorlog from clldutils import markup +from ._compat import entry_points_select __all__ = [ - 'ParserError', 'Command', 'command', 'ArgumentParser', 'ArgumentParserWithLogging', + 'ParserError', 'get_parser_and_subparsers', 'register_subcommands', 'PathType', 'add_format', 'Table', 'add_csv_field_size_limit', 'add_random_seed', 'confirm', ] -def get_entrypoints(group): +def get_entrypoints(group: str) -> list[importlib.metadata.EntryPoint]: + """Returns entry points for a group.""" eps = importlib.metadata.entry_points() - return eps.select(group=group) if hasattr(eps, 'select') else eps.get(group, []) + return entry_points_select(eps, group=group) class ParserError(Exception): - pass - - -# Global registry for commands. -# Note: This registry is global so it can only be used for one ArgumentParser instance. -# Otherwise, different ArgumentParsers will share the same sub-commands which will rarely -# be intended. -_COMMANDS = [] - - -class Command(object): - def __init__(self, func, name=None, usage=None): - self.func = func - self.name = name or func.__name__ - self.usage = usage - - @property - def doc(self): - return self.usage or self.func.__doc__ - - def __call__(self, args): - return self.func(args) - - -def command(name=None, usage=None): - def wrap(f): - _COMMANDS.append(Command(f, name=name, usage=usage)) - return f - return wrap - - -def _attr(obj, attr): - return getattr(obj, attr, getattr(obj, '__{0}__'.format(attr), None)) - - -class ArgumentParser(argparse.ArgumentParser): - def __init_subclass__(cls, **kwargs): - if cls.__name__ != 'ArgumentParserWithLogging': - deprecated( - '{} inherits from clldutils.clilib.ArgumentParser which is deprecated.'.format( - cls.__name__ - )) - super().__init_subclass__(**kwargs) - - def __init__(self, pkg_name, *commands, **kw): - commands = commands or _COMMANDS - kw.setdefault( - 'description', "Main command line interface of the %s package." % pkg_name) - kw.setdefault( - 'epilog', "Use '%(prog)s help ' to get help about individual commands.") - super(ArgumentParser, self).__init__(**kw) - self.commands = collections.OrderedDict((_attr(cmd, 'name'), cmd) for cmd in commands) - self.pkg_name = pkg_name - self.add_argument("--verbosity", help="increase output verbosity") - self.add_argument('command', help=' | '.join(self.commands)) - self.add_argument('args', nargs=argparse.REMAINDER) - - def main(self, args=None, catch_all=False, parsed_args=None): - args = parsed_args or self.parse_args(args=args) - if args.command == 'help' and len(args.args): - # As help text for individual commands we simply re-use the docstrings of the - # callables registered for the command: - print(_attr(self.commands[args.args[0]], 'doc')) - else: - if args.command not in self.commands: - print('invalid command') - self.print_help() - return 64 - try: - self.commands[args.command](args) - except ParserError as e: - print(e) - print(_attr(self.commands[args.command], 'doc')) - return 64 - except Exception as e: - if catch_all: - print(e) - return 1 - raise - return 0 - - -class ArgumentParserWithLogging(ArgumentParser): - - def __init__(self, pkg_name, *commands, **kw): - super(ArgumentParserWithLogging, self).__init__(pkg_name, *commands, **kw) - self.add_argument('--log', default=get_colorlog(pkg_name), help=argparse.SUPPRESS) - self.add_argument( - '--log-level', - default=logging.INFO, - help='log level [ERROR|WARN|INFO|DEBUG]', - type=lambda x: getattr(logging, x)) - - def main(self, args=None, catch_all=False, parsed_args=None): - args = parsed_args or self.parse_args(args=args) - with Logging(args.log, level=args.log_level): - return super(ArgumentParserWithLogging, self).main( - catch_all=catch_all, parsed_args=args) + """Exception to signal errors during cli input validation.""" def confirm(question: str, default=True) -> bool: @@ -212,11 +114,11 @@ def confirm(question: str, default=True) -> bool: class Formatter(argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter): - pass + """Help formatter.""" def get_parser_and_subparsers(prog: str, with_defaults_help: bool = True, with_log: bool = True)\ - -> typing.Tuple[argparse.ArgumentParser, typing.Any]: + -> tuple[argparse.ArgumentParser, Any]: """ Get an `argparse.ArgumentParser` instance and associated subparsers. @@ -226,7 +128,7 @@ def get_parser_and_subparsers(prog: str, with_defaults_help: bool = True, with_l :param with_defaults_help: Whether defaults should be displayed in the help message. :param with_log: Whether a global option to select log levels should be available. """ - kw = dict(prog=prog) + kw = {'prog': prog} if with_defaults_help: kw.update(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser = argparse.ArgumentParser(**kw) @@ -258,14 +160,14 @@ def iter_modules(pkg): modname = ".".join([pkg.__name__, name]) try: yield name, importlib.import_module(modname) - except Exception as e: # pragma: no cover - warnings.warn('{0} {1}'.format(e, modname)) + except Exception as e: # pragma: no cover # pylint: disable=W0718 + warnings.warn(f'{e} {modname}') def register_subcommands( subparsers, pkg: str, - entry_point: typing.Optional[str] = None, + entry_point: Optional[str] = None, formatter_class: argparse.ArgumentDefaultsHelpFormatter = Formatter, skip_invalid: bool = False): """ @@ -288,7 +190,7 @@ def register_subcommands( try: pkg = ep.load() except ImportError: - warnings.warn('ImportError loading entry point {0.name}'.format(ep)) + warnings.warn(f'ImportError loading entry point {ep.name}') continue _cmds.update( [('.'.join([ep.name, name]), mod) for name, mod in iter_modules(pkg)]) @@ -298,11 +200,11 @@ def register_subcommands( if not mod.__doc__: if skip_invalid: continue - raise ValueError('Command \"{0}\" is missing a docstring.'.format(name)) + raise ValueError(f'Command \"{name}\" is missing a docstring.') if not getattr(mod, 'run', None): # pragma: no cover if skip_invalid: continue - raise ValueError('Command \"{0}\" is missing a run function.'.format(name)) + raise ValueError(f'Command \"{name}\" is missing a run function.') valid[name] = mod subparser = subparsers.add_parser( @@ -339,7 +241,7 @@ def register(parser): ) -def add_random_seed(parser, default: typing.Optional[int] = None): +def add_random_seed(parser, default: Optional[int] = None): """ Command line tools may want to fix Python's `random.seed` to ensure reproducible results. @@ -362,8 +264,12 @@ def add_format(parser, default: str = 'pipe'): """ parser.add_argument( "--format", - default=default, - choices=tabulate.tabulate_formats, + default=markup.TableFormat.get(default), + type=markup.TableFormat.get, + # We can only use choices for validation. For the help message we must "abuse" metavar. + # See https://docs.python.org/3/library/argparse.html#choices + metavar=f'{{{",".join(e.name for e in markup.TableFormat)}}}', + choices=markup.TableFormat, help="Format of tabular output.") @@ -393,7 +299,7 @@ def __init__(self, args: argparse.Namespace, *cols, **kw): super().__init__(*cols, **kw) -class PathType(object): +class PathType: # pylint: disable=R0903 """ A type to parse `pathlib.Path` instances from the command line. @@ -409,15 +315,18 @@ def register(parser): def run(args): assert args.input.exists() """ - def __init__(self, must_exist: bool = True, type: typing.Optional[str] = None): + def __init__( + self, + must_exist: bool = True, + type: Optional[str] = None): # pylint: disable=W0622 assert type in (None, 'dir', 'file') self._must_exist = must_exist self._type = type - def __call__(self, string): + def __call__(self, string: str): p = pathlib.Path(string) if self._must_exist and not p.exists(): - raise argparse.ArgumentTypeError('Path {0} does not exist!'.format(string)) + raise argparse.ArgumentTypeError(f'Path {string} does not exist!') if p.exists() and self._type and not getattr(p, 'is_' + self._type)(): - raise argparse.ArgumentTypeError('Path {0} is not a {1}!'.format(string, self._type)) + raise argparse.ArgumentTypeError(f'Path {string} is not a {self._type}!') return p diff --git a/src/clldutils/color.py b/src/clldutils/color.py index e3750da..87c686d 100644 --- a/src/clldutils/color.py +++ b/src/clldutils/color.py @@ -13,10 +13,11 @@ values - but will use different ways to create the scheme depending on the number of values. """ import math -import typing +from typing import Union, Optional import colorsys import fractions import itertools +from collections.abc import Sequence __all__ = [ 'diverging_colors', @@ -27,8 +28,10 @@ 'rgb_as_hex', ] +ColorType = Union[str, Sequence[float]] -def _to_rgb(s: typing.Union[str, list, tuple]) -> tuple: + +def _to_rgb(s: ColorType) -> tuple: def f2i(d): assert 0 <= d <= 1 res = int(math.floor(d * 256)) @@ -41,6 +44,7 @@ def f2i(d): if isinstance(s[0], (float, fractions.Fraction)): s = [f2i(d) for d in s] return s + assert isinstance(s, str) if s.startswith('#'): s = s[1:] @@ -50,24 +54,24 @@ def f2i(d): return tuple(int(c, 16) for c in [s[i:i + 2] for i in range(0, 6, 2)]) -def rgb_as_hex(s: typing.Union[str, list, tuple]) -> str: +def rgb_as_hex(s: ColorType) -> str: """ Convert a RGB triple to a `HEX triplet `_ """ - return '#{0:02X}{1:02X}{2:02X}'.format(*_to_rgb(s)) + return '#{0:02X}{1:02X}{2:02X}'.format(*_to_rgb(s)) # pylint: disable=C0209 -def brightness(color: typing.Union[str, list, tuple]) -> float: +def brightness(color: ColorType) -> float: """ Compute the brightness of a color specified as RGB triple (or Hex triplet). .. seealso:: ``_ """ - R, G, B = _to_rgb(color) + R, G, B = _to_rgb(color) # pylint: disable=invalid-name return 0.299 * R + 0.587 * G + 0.114 * B -def is_bright(color: typing.Union[str, list, tuple]) -> bool: +def is_bright(color: ColorType) -> bool: """ Compute whether a color is considered bright or not. @@ -79,7 +83,7 @@ def is_bright(color: typing.Union[str, list, tuple]) -> bool: return brightness(color) > 125 -def qualitative_colors(n: int, set: str = typing.Optional[str]) -> typing.List[str]: +def qualitative_colors(n: int, set: str = Optional[str]) -> list[str]: # pylint: disable=W0622 """ Choses `n` distinct colors suitable for visualizing categorical variables. @@ -204,7 +208,7 @@ def gethsvs(): itertools.islice((colorsys.hsv_to_rgb(*x) for x in gethsvs()), n)] -def sequential_colors(n): +def sequential_colors(n: int) -> list[str]: """ Between 3 and 9 sequential colors. @@ -226,7 +230,7 @@ def sequential_colors(n): return [cols[ix] for ix in indices[n - 3]] -def diverging_colors(n): +def diverging_colors(n: int) -> list[str]: """ Between 3 and 11 diverging colors diff --git a/src/clldutils/coordinates.py b/src/clldutils/coordinates.py index a02cf06..4eab950 100644 --- a/src/clldutils/coordinates.py +++ b/src/clldutils/coordinates.py @@ -5,50 +5,123 @@ Language Structures, e.g. (12d10N, 92d49E), to floating point latitude and longitude values. """ import re +import enum import math +from typing import Union, Optional +import dataclasses __all__ = ['Coordinates', 'dec2degminsec', 'degminsec2dec', 'degminsec'] -import typing - DEGREES = "°" MINUTES = "\u2032" SECONDS = "\u2033" +DimensionType = Union[str, int, float] +DEGMINSEC_FMT = (r'(?P\d+)\s*' + DEGREES + r'\s*' + r'((?P\d+)\s*' + MINUTES + r'\s*)?' + r'((?P[\d.]+)\s*' + SECONDS + r'\s*)?') PATTERNS = { 'lat_alnum': re.compile(r"(?P\d+)d(?P[0-9]+)?(?P'\d+'')?(?PS|N)"), 'lon_alnum': re.compile(r"(?P\d+)d(?P\d+)?(?P'\d+'')?(?PE|W)"), - 'lat_degminsec': re.compile( - r'(?P\d+)\s*%s\s*((?P\d+)\s*%s\s*)?((?P[\d.]+)\s*%s\s*)?(?PS|N)' % ( - DEGREES, MINUTES, SECONDS)), - 'lon_degminsec': re.compile( - r'(?P\d+)\s*%s\s*((?P\d+)\s*%s\s*)?((?P[\d.]+)\s*%s\s*)?(?PE|W)' % ( - DEGREES, MINUTES, SECONDS)), + 'lat_degminsec': re.compile(DEGMINSEC_FMT + r'(?PS|N)'), + 'lon_degminsec': re.compile(DEGMINSEC_FMT + r'(?PE|W)'), } +class CoordinateFormat(enum.Enum): + """Formatting options for coordinates.""" + alnum = enum.auto() # pylint: disable=invalid-name + ascii = enum.auto() # pylint: disable=invalid-name + degminsec = enum.auto() # pylint: disable=invalid-name + + +def get_format(what: [str, CoordinateFormat]) -> CoordinateFormat: + """Allow retrieving a CoordinateFormat by name.""" + if isinstance(what, str): + return getattr(CoordinateFormat, what) + return what + + +CoordinateFormatType = Union[CoordinateFormat, str] + + +@dataclasses.dataclass +class DegMinSec: + """A coordinate datum as triple.""" + degrees: int + minutes: int + seconds: float + + @classmethod + def from_match(cls, m: re.Match) -> 'DegMinSec': + """Use the groups of a pattern as defined in PATTERNS to create an instance.""" + return cls(int(m.group('deg') or 0), int(m.group('min') or 0), float(m.group('sec') or 0.0)) + + def as_string( + self, + hemisphere: str, + format: CoordinateFormatType, # pylint: disable=redefined-builtin + ) -> str: + """Format as string.""" + degrees, minutes, seconds = self.degrees, self.minutes, self.seconds + seconds = int(round(seconds)) + if seconds == 60: + minutes += 1 + seconds = 0 + + if 120 > minutes >= 60: # pragma: no cover + # This case cannot really happen, because we only ever feed the results of + # dec2degminsec into this method. + degrees += 1 + minutes -= 60 + + format = get_format(format) + if format == CoordinateFormat.alnum: + res = f"{degrees}d" + if minutes: + res += f"{minutes:02}" + res += hemisphere + return res + + if format == CoordinateFormat.ascii: + res = f"{degrees}°" + if minutes: + res += f"{minutes:0>2d}'" + if seconds: + res += f'{seconds:0>2f}"' + res += hemisphere + return res + + res = f"{degrees}{DEGREES}" + + if minutes: + res += f" {minutes}{MINUTES}" + + if seconds: + res += f" {seconds}{SECONDS}" + res += f" {hemisphere}" + return res + + def degminsec(dec, hemispheres: str, no_seconds: bool = False) -> str: """ .. code-block:: python >>> degminsec(2.4, 'NS') "2°24'N" + >>> degminsec(2.43, 'NS') + '2°25\'48.000000"N' >>> degminsec(1.249, 'NS', no_seconds=True) "1°15'N" """ if 'N' in hemispheres: - return Coordinates(dec, 0).lat_to_string(format='ascii', no_seconds=no_seconds) - return Coordinates(0, dec).lon_to_string(format='ascii', no_seconds=no_seconds) - + return Coordinates(dec, 0).lat_to_string( + format=CoordinateFormat.ascii, no_seconds=no_seconds) + return Coordinates(0, dec).lon_to_string( + format=CoordinateFormat.ascii, no_seconds=no_seconds) -def dec2degminsec(dec, no_seconds: bool = False) -> typing.Tuple[float, float, float]: - """ - convert a floating point number of degrees to a triple (int degrees, int minutes, float seconds) - .. code-block:: python - - >>> assert dec2degminsec(30.50) == (30, 30, 0.0) - """ +def _dec2degminsec(dec: float, no_seconds: bool = False) -> DegMinSec: degrees = int(math.floor(dec)) dec = (dec - int(math.floor(dec))) * 60 minutes = int(math.floor(dec)) @@ -62,10 +135,30 @@ def dec2degminsec(dec, no_seconds: bool = False) -> typing.Tuple[float, float, f minutes = 0 degrees += 1 seconds = 0 - return degrees, minutes, seconds + return DegMinSec(degrees, minutes, seconds) -def degminsec2dec(degrees, minutes, seconds) -> float: +def dec2degminsec(dec: float, no_seconds: bool = False) -> tuple[int, int, float]: + """ + convert a floating point number of degrees to a triple (int degrees, int minutes, float seconds) + + .. code-block:: python + + >>> assert dec2degminsec(30.50) == (30, 30, 0.0) + """ + return dataclasses.astuple(_dec2degminsec(dec, no_seconds=no_seconds)) + + +def _degminsec2dec(d: DegMinSec) -> float: + dec = float(d.degrees) + if d.minutes: + dec += float(d.minutes) / 60 + if d.seconds: + dec += float(d.seconds) / 3600 + return dec + + +def degminsec2dec(degrees: int, minutes: int, seconds: float) -> float: """ convert a triple (int degrees, int minutes, float seconds) to a floating point number of degrees @@ -73,15 +166,10 @@ def degminsec2dec(degrees, minutes, seconds) -> float: >>> assert dec2degminsec(degminsec2dec(30,30,0.0)) == (30,30,0.0) """ - dec = float(degrees) - if minutes: - dec += float(minutes) / 60 - if seconds: - dec += float(seconds) / 3600 - return dec + return _degminsec2dec(DegMinSec(degrees, minutes, seconds)) -class Coordinates(object): +class Coordinates: """ A (lat, lon) pair, that can be represented in various formats. @@ -100,7 +188,7 @@ class Coordinates(object): >>> assert c.lat_to_string() == '12d10S' >>> c.lat_to_string(format=None) '12° 10′ 12″ S' - >>> c.lat_to_string(format='ascii') + >>> c.lat_to_string(format=CoordinateFormat.ascii) '12°10\'12.000000"S' >>> assert c.lon_to_string() == '92d49W' >>> lat, lon = '12d30N', '60d30E' @@ -109,7 +197,13 @@ class Coordinates(object): >>> assert c.lon_to_string() == lon """ - def __init__(self, lat, lon, format='alnum'): + def __init__( + self, + lat: DimensionType, + lon: DimensionType, + format: CoordinateFormatType = CoordinateFormat.alnum): # pylint: disable=W0622 + format = get_format(format or CoordinateFormat.alnum) + if isinstance(lat, float): self.latitude = lat elif isinstance(lat, int): @@ -124,86 +218,70 @@ def __init__(self, lat, lon, format='alnum'): else: self.longitude = self.lon_from_string(lon, format) - def _match(self, string, type, format): + def _match( + self, string: Union[str, bytes], + type: str, # pylint: disable=W0622 + format: CoordinateFormat, # pylint: disable=W0622 + ) -> re.Match: if isinstance(string, bytes): string = string.decode('utf8') - if type + '_' + format in PATTERNS: - p = PATTERNS[type + '_' + format] + if type + '_' + format.name in PATTERNS: + p = PATTERNS[type + '_' + format.name] else: - p = PATTERNS[type + '_alnum'] + p = PATTERNS[type + '_alnum'] # pragma: no cover m = p.match(string) if not m: raise ValueError(string) return m - def lat_from_string(self, lat, format='alnum'): + def lat_from_string( + self, + lat: str, + format: CoordinateFormat = CoordinateFormat.alnum, # pylint: disable=W0622 + ) -> float: + """Parse a latitude value.""" m = self._match(lat, 'lat', format) - dec = degminsec2dec(m.group('deg'), m.group('min'), m.group('sec')) + dec = _degminsec2dec(DegMinSec.from_match(m)) if m.group('hem') == 'S': dec = -dec return dec - def lon_from_string(self, lon, format='alnum'): + def lon_from_string( + self, + lon: str, + format: CoordinateFormat = CoordinateFormat.alnum, # pylint: disable=W0622 + ) -> float: + """Parse a longitude value.""" m = self._match(lon, 'lon', format) - dec = degminsec2dec(m.group('deg'), m.group('min'), m.group('sec')) + dec = _degminsec2dec(DegMinSec.from_match(m)) if m.group('hem') == 'W': dec = -dec return dec - def _format(self, degrees, minutes, seconds, hemisphere, format): - seconds = int(round(seconds)) - if seconds == 60: - minutes += 1 - seconds = 0 - - if 120 > minutes >= 60: # pragma: no cover - # This case cannot really happen, because we only ever feed the results of - # dec2degminsec into this method. - degrees += 1 - minutes -= 60 - - if format == 'alnum': - res = "%sd" % degrees - if minutes: - res += "%02d" % minutes - res += hemisphere - return res - - if format == 'ascii': - res = "%s°" % degrees - if minutes: - res += "{0:0>2d}'".format(minutes) - if seconds: - res += '{0:0>2f}"'.format(seconds) - res += hemisphere - return res - - res = "%s%s" % (degrees, DEGREES) - - if minutes: - res += " %s%s" % (minutes, MINUTES) - - if seconds: - res += " %s%s" % (seconds, SECONDS) - res += " %s" % hemisphere - return res - def lat_to_string( - self, format: typing.Union[str, None] = 'alnum', no_seconds: bool = False) -> str: + self, + format: Optional[CoordinateFormat] = CoordinateFormat.alnum, # pylint: disable=W0622 + no_seconds: bool = False, + ) -> str: + """A latitude value represented as string.""" if self.latitude < 0: hemisphere = 'S' else: hemisphere = 'N' - degrees, minutes, seconds = dec2degminsec(abs(self.latitude), no_seconds=no_seconds) - return self._format(degrees, minutes, seconds, hemisphere, format) + d = _dec2degminsec(abs(self.latitude), no_seconds=no_seconds) + return d.as_string(hemisphere, format) def lon_to_string( - self, format: typing.Union[str, None] = 'alnum', no_seconds: bool = False) -> str: + self, + format: Optional[CoordinateFormat] = CoordinateFormat.alnum, # pylint: disable=W0622 + no_seconds: bool = False, + ) -> str: + """A longitude value represented as string.""" if self.longitude < 0: hemisphere = 'W' else: hemisphere = 'E' - degrees, minutes, seconds = dec2degminsec(abs(self.longitude), no_seconds=no_seconds) - return self._format(degrees, minutes, seconds, hemisphere, format) + d = _dec2degminsec(abs(self.longitude), no_seconds=no_seconds) + return d.as_string(hemisphere, format) diff --git a/src/clldutils/db.py b/src/clldutils/db.py index 6525996..30baf2d 100644 --- a/src/clldutils/db.py +++ b/src/clldutils/db.py @@ -2,10 +2,11 @@ This module provides tools to create/drop and use databases specified by DB URL. This module only handles SQLite and PostgreSQL, but abstracts the differences between the two. """ -import typing +from typing import Optional import logging import pathlib import sqlite3 +import contextlib import subprocess import urllib.parse @@ -28,7 +29,7 @@ class DB: """ settings_key = 'sqlalchemy.url' - def __init__(self, url: str, log: typing.Optional[logging.Logger] = None): + def __init__(self, url: str, log: Optional[logging.Logger] = None): self.log = log self.components = urllib.parse.urlparse(url) if self.dialect not in ['sqlite', 'postgresql']: @@ -50,10 +51,12 @@ def from_settings(cls, settings: dict, log=None): @property def dialect(self) -> str: + """The database dialect""" return str(self.components.scheme.split('+')[0]) @property - def name(self): + def name(self) -> str: + """The database name.""" assert self.components.path.startswith('/') return self.components.path[1:].split('?')[0] @@ -75,13 +78,14 @@ def create(self): :raises ValueError: If the database already exists. """ if self.log: - self.log.info('creating {0}'.format(self)) + self.log.info(f'creating {self}') if self.dialect == 'postgresql': subprocess.check_call([CREATEDB, self.name]) else: # self.dialect == 'sqlite' if self.exists(): raise ValueError('db exists!') - sqlite3.connect(self.name) + with contextlib.closing(sqlite3.connect(self.name)): + pass def drop(self): """ @@ -89,7 +93,7 @@ def drop(self): """ if self.exists(): if self.log: - self.log.info('dropping {0}'.format(self)) + self.log.info(f'dropping {self}') if self.dialect == 'postgresql': subprocess.check_call([DROPDB, self.name]) else: diff --git a/src/clldutils/declenum.py b/src/clldutils/declenum.py index 6c0437e..524353e 100644 --- a/src/clldutils/declenum.py +++ b/src/clldutils/declenum.py @@ -8,7 +8,7 @@ @functools.total_ordering -class EnumSymbol(object): +class EnumSymbol: """Define a fixed symbol tied to a parent class.""" def __init__(self, cls_, name, value, description, *args): @@ -26,18 +26,18 @@ def __iter__(self): return iter([self.value, self.description]) def __repr__(self): - return "<%s>" % self.name + return f"<{self.name}>" def __hash__(self): return self.value def __str__(self): - return '{0}'.format(self.value) + return f'{self.value}' def __lt__(self, other): return self.value < getattr(other, 'value', None) - def __json__(self, *args, **kw): + def __json__(self, *args, **kw): # pylint: disable=W0613 return self.value @@ -64,20 +64,23 @@ class DeclEnum(metaclass=EnumMeta): @classmethod def from_string(cls, value): + """Look up a symbol by name.""" try: return cls._reg[value] - except KeyError: - raise ValueError("Invalid value for %r: %r" % (cls.__name__, value)) + except KeyError as e: + raise ValueError(f"Invalid value for {cls.__name__}: {repr(value)}") from e @classmethod def get(cls, item): + """Flexible getter for a symbol.""" if item in iter(cls): return item for li in cls: - if li.name == item or li.value == item: + if item in (li.name, li.value): return li raise ValueError(item) @classmethod def values(cls): + """All values.""" return list(cls._reg) diff --git a/src/clldutils/fonts.py b/src/clldutils/fonts.py index 8767c4d..1076b46 100644 --- a/src/clldutils/fonts.py +++ b/src/clldutils/fonts.py @@ -11,7 +11,7 @@ """ import pathlib -from clldutils.html import HTML, literal +from clldutils.html import HTML, literal, HTMLBuilder __all__ = ['FONTS_DIR', 'charis_font_spec_css', 'charis_font_spec_html'] @@ -29,31 +29,32 @@ def charis_font_spec_css() -> str: `link_callback `_ with `pisa.CreatePDF`, make sure to return unhandled `src_attr` arguments as is. """ - return """ + fdir = FONTS_DIR.resolve() + return f""" @font-face {{ font-family: 'charissil'; - src: url('{0}/CharisSIL-Regular.ttf'); + src: url('{fdir}/CharisSIL-Regular.ttf'); }} @font-face {{ font-family: 'charissil'; font-style: italic; - src: url('{0}/CharisSIL-Italic.ttf'); + src: url('{fdir}/CharisSIL-Italic.ttf'); }} @font-face {{ font-family: 'charissil'; font-weight: bold; - src: url('{0}/CharisSIL-Bold.ttf'); + src: url('{fdir}/CharisSIL-Bold.ttf'); }} @font-face {{ font-family: 'charissil'; font-weight: bold; font-style: italic; - src: url('{0}/CharisSIL-BoldItalic.ttf'); + src: url('{fdir}/CharisSIL-BoldItalic.ttf'); }} -""".format(FONTS_DIR.resolve()) +""" -def charis_font_spec_html() -> HTML: +def charis_font_spec_html() -> HTMLBuilder: """ Charis SIL font specification inside an HTML style tag. """ diff --git a/src/clldutils/html.py b/src/clldutils/html.py index 9d659cf..0226cd9 100644 --- a/src/clldutils/html.py +++ b/src/clldutils/html.py @@ -48,7 +48,7 @@ __all__ = ["HTML", "escape", "literal"] -class literal(markupsafe.Markup): +class literal(markupsafe.Markup): # pylint: disable=invalid-name """An HTML literal string, which will not be further escaped. I'm a subclass of ``markupsafe.Markup``, which itself is a subclass @@ -98,7 +98,7 @@ def __new__(cls, base="", encoding=None, errors="strict"): return super(literal, cls).__new__(cls, base, encoding, errors) @classmethod - def escape(cls, s): + def escape(cls, s: str) -> 'literal': # pylint: disable=W0221 """Escape the argument and return a literal. This is a *class* method. The result depends on the argument type: @@ -132,7 +132,7 @@ def escape(cls, s): """ if s is None: return EMPTY - return super(literal, cls).escape(s) + return super().escape(s) def lit_join(self, iterable): """Like the ``.join`` string method but don't escape elements in the iterable.""" @@ -146,7 +146,7 @@ def lit_join(self, iterable): EMPTY = literal("") -class HTMLBuilder(object): +class HTMLBuilder: """An HTML tag generator.""" literal = literal @@ -230,8 +230,8 @@ class HTMLBuilder(object): _comment_tag = literal("") def __call__(self, *args, **kw): - - """Escape the string args, concatenate them, and return a literal. + """ + Escape the string args, concatenate them, and return a literal. This is the same as ``literal.escape(s)`` but accepts multiple strings. Multiple arguments are useful when mixing child tags @@ -247,11 +247,10 @@ def __call__(self, *args, **kw): ``lit`` If true, don't escape the arguments. (Default False.) """ - nl = kw.pop("nl", False) lit = kw.pop("lit", False) if kw: - raise TypeError("unknown keyword args: {0}".format(sorted(kw))) + raise TypeError(f"unknown keyword args: {sorted(kw)}") if not lit: args = map(escape, args) if nl: @@ -261,8 +260,8 @@ def __call__(self, *args, **kw): return ret def tag(self, tag, *args, **kw): - - """Create an HTML tag. + """ + Create an HTML tag. ``tag`` is the tag name. The other positional arguments become the content for the tag, and are escaped and concatenated. @@ -358,7 +357,6 @@ def tag(self, tag, *args, **kw): >>> HTML.tag("/div", _closed=False) literal(u'') """ - if "c" in kw: assert not args, "The special 'c' keyword argument cannot be used " \ "in conjunction with non-keyword arguments" @@ -466,7 +464,7 @@ def render_attrs(self, attrs): return EMPTY.join(strings) # Private methods - def optimize_attrs(self, attrs, boolean_attrs=None): + def optimize_attrs(self, attrs, boolean_attrs=None): # pylint: disable=R0912 """Perform various transformations on an HTML attributes dict. diff --git a/src/clldutils/inifile.py b/src/clldutils/inifile.py index 3e24cda..b663bda 100644 --- a/src/clldutils/inifile.py +++ b/src/clldutils/inifile.py @@ -6,7 +6,11 @@ import io import re import pathlib +from typing import Union, Any import configparser +from collections.abc import Iterable + +DOT = '.' class INI(configparser.ConfigParser): @@ -14,22 +18,27 @@ class INI(configparser.ConfigParser): An enhanced `ConfigParser` with better support for list-valued options and multiline text. """ @staticmethod - def format_list(items): + def format_list(items: Iterable[str]) -> str: + """Concatenate items as INI style list.""" return ''.join('\n' + item for item in items) @classmethod - def from_file(cls, fname, encoding='utf-8', **kw) -> 'INI': + def from_file(cls, fname: Union[str, pathlib.Path], encoding='utf-8', **kw) -> 'INI': + """ + `kw` are passed through to `ConfigParser.__init__`. + """ obj = cls(**kw) obj.read(str(fname), encoding=encoding) return obj def write_string(self, **kw) -> str: + """Write the INI prefixed with an encoding comment suitable for emacs.""" res = io.StringIO() res.write('# -*- coding: utf-8 -*-\n') - super(INI, self).write(res, **kw) + super().write(res, **kw) return res.getvalue() - def set(self, section, option, value=None): + def set(self, section: str, option: str, value: Union[None, list, tuple, Any] = None): """ Enhances `ConfigParser.set` by @@ -44,13 +53,14 @@ def set(self, section, option, value=None): if isinstance(value, (list, tuple)): value = self.format_list(value) elif not isinstance(value, str): - value = '%s' % value - super(INI, self).set(section, option, value) + value = f'{value}' + super().set(section, option, value) - def getlist(self, section, option) -> list: + def getlist(self, section: str, option: str) -> list: + """Get section content as list.""" return self.get(section, option, fallback='').strip().splitlines() - def gettext(self, section, option, whitespace_preserving_prefix='.'): + def gettext(self, section, option, whitespace_preserving_prefix=DOT) -> str: """ While configparser supports multiline values, it does this at the expense of stripping leading whitespace for each line in such a value. Sometimes we want @@ -66,15 +76,20 @@ def gettext(self, section, option, whitespace_preserving_prefix='.'): lines.append(line) return '\n'.join(lines) - def settext(self, section, option, value, whitespace_preserving_prefix='.'): + def settext(self, section: str, option: str, value: str, whitespace_preserving_prefix=DOT): + """ + Set a text option, preserving newlines. + """ lines = [] for line in value.splitlines(): if re.match(r'\s+', line): + # The line starts with whitespace, so we have to add a non-whitespace char to + # preserve it. line = whitespace_preserving_prefix + line lines.append(line) self.set(section, option, '\n'.join(lines)) - def write(self, fname, **kw): + def write(self, fname, **kw): # pylint: disable=arguments-differ """ Write an INI file. """ diff --git a/src/clldutils/iso_639_3.py b/src/clldutils/iso_639_3.py index bf10ab8..8e6b7cd 100644 --- a/src/clldutils/iso_639_3.py +++ b/src/clldutils/iso_639_3.py @@ -9,12 +9,13 @@ import re import csv import string -import typing +from typing import Union, Protocol, Optional import pathlib import datetime import functools import collections import urllib.request +from collections.abc import Generator, Iterable from clldutils.path import TemporaryDirectory from clldutils.ziparchive import ZipArchive @@ -36,12 +37,18 @@ } +class HasSplitlines(Protocol): # pylint: disable=too-few-public-methods,missing-class-docstring + def splitlines(self) -> Iterable[str]: # pylint: disable=C0116 + ... # pragma: no cover + + def _open(path): return urllib.request.urlopen( urllib.request.Request(BASE_URL + path, headers={'User-Agent': USER_AGENT})) -def iterrows(lines): +def iterrows(lines: Iterable[str]) -> Generator[collections.OrderedDict[str, str], None, None]: + """Parse CSV lines into row dicts.""" header = None for i, row in enumerate(csv.reader(io.StringIO('\n'.join(lines)), delimiter='\t')): if i == 0: @@ -51,8 +58,8 @@ def iterrows(lines): class Table(list): - - def __init__(self, name_and_date, date, fp): + """A code table.""" + def __init__(self, name_and_date: str, date: str, fp: HasSplitlines): parts = name_and_date.split('_') # The ISO 639-3 code tables from 2020-05-15 contain a table with a # malformed name - having an excess "0" in the date stamp. @@ -66,12 +73,10 @@ def __init__(self, name_and_date, date, fp): if not name: name = 'Codes' self.name = name - super(Table, self).__init__(list(iterrows( - [line for line in fp.splitlines() if line.strip()], # strip malformed lines. - ))) + super().__init__(list(iterrows(line for line in fp.splitlines() if line.strip()))) -def download_tables(outdir=None) -> pathlib.Path: +def download_tables(outdir: Optional[Union[str, pathlib.Path]] = None) -> pathlib.Path: """ Download the zipped ISO tables to `outdir` or cwd. """ @@ -84,7 +89,8 @@ def download_tables(outdir=None) -> pathlib.Path: return target -def iter_tables(zippath=None): +def iter_tables(zippath: Optional[str] = None) -> Generator[Table, None, None]: + """Yield tables from a code tables zip archive.""" with TemporaryDirectory() as tmp: if not zippath: zippath = download_tables(tmp) @@ -99,7 +105,7 @@ def iter_tables(zippath=None): @functools.total_ordering -class Code(object): +class Code: """ Represents one ISO 639-3 code and its associated metadata. @@ -107,7 +113,7 @@ class Code(object): :ivar str name: The language name """ _code_pattern = re.compile(r'\[([a-z]{3})]') - _scope_map = { + _scope_map = { # Scopes for items from table Codes. 'I': 'Individual', 'M': 'Macrolanguage', 'S': 'Special', @@ -128,10 +134,10 @@ class Code(object): 'M': 'merge', } - def __init__(self, item, tablename, registry): + def __init__(self, item: dict[str, str], tablename: str, registry: 'ISO'): code = item['Id'] self._change_to = [] - self.retired = False + self.retired: Union[bool, datetime.date] = False if tablename == 'Codes': self._scope = self._scope_map[item['Scope']] self._type = self._type_map[item['Language_Type']] @@ -155,8 +161,8 @@ def __init__(self, item, tablename, registry): else: raise ValueError(tablename) # pragma: no cover - self.code = code - self.name = item['Ref_Name'] + self.code: str = code + self.name: str = item['Ref_Name'] self._registry = registry @property @@ -164,7 +170,7 @@ def type(self) -> str: """ The type of the code formatted as pair "scope/type" """ - return '{}/{}'.format(self._scope, self._type) + return f'{self._scope}/{self._type}' @property def is_retired(self) -> bool: @@ -174,7 +180,7 @@ def is_retired(self) -> bool: return bool(self.retired) @property - def change_to(self) -> typing.List['Code']: + def change_to(self) -> list['Code']: """ List of codes that supersede a retired code. """ @@ -195,17 +201,19 @@ def is_local(self) -> bool: return self._scope == 'Local' @property - def is_macrolanguage(self) -> bool: + def is_macrolanguage(self) -> bool: # pylint: disable=C0116 return self._scope == 'Macrolanguage' @property - def extension(self) -> typing.List['Code']: + def extension(self) -> list['Code']: """ The codes subsumed by a macrolanguage code. """ - if self.is_macrolanguage: - return [self._registry[c] for c in self._registry._macrolanguage[self.code]] - return [] + if not self.is_macrolanguage: + return [] + return [ + self._registry[c] # pylint: disable=W0212 + for c in self._registry._macrolanguage[self.code]] # pylint: disable=W0212 def __hash__(self): return hash(self.code) @@ -217,10 +225,10 @@ def __lt__(self, other): return self.code < other.code def __repr__(self): - return ''.format(self.code, self.type) + return f'' def __str__(self): - return '{0} [{1}]'.format(self.name, self.code) + return f'{self.name} [{self.code}]' class ISO(collections.OrderedDict): @@ -241,7 +249,7 @@ class ISO(collections.OrderedDict): >>> iso.retirements[0].change_to [] """ - def __init__(self, zippath: typing.Optional[typing.Union[str, pathlib.Path]] = None): + def __init__(self, zippath: Optional[Union[str, pathlib.Path]] = None): """ :param zippath: Path to a local copy of the "Complete Set of Tables" (UTF-8). If `None`, \ the tables will be retrieved from the web. @@ -250,13 +258,13 @@ def __init__(self, zippath: typing.Optional[typing.Union[str, pathlib.Path]] = N self._tables = {t.name: t for t in iter_tables(zippath=zippath)} if zippath and DATESTAMP_PATTERN.search(zippath.name): digits = map(int, DATESTAMP_PATTERN.search(zippath.name).groups()) - self.date = datetime.date(*digits) + self.date: datetime.date = datetime.date(*digits) else: - self.date = max(t.date for t in self._tables.values()) + self.date: datetime.date = max(t.date for t in self._tables.values()) self._macrolanguage = collections.defaultdict(list) for item in self._tables['macrolanguages']: self._macrolanguage[item['M_Id']].append(item['I_Id']) - super(ISO, self).__init__() + super().__init__() for tablename in ['Codes', 'Retirements']: for item in self._tables[tablename]: if item['Id'] not in self: @@ -267,72 +275,74 @@ def __init__(self, zippath: typing.Optional[typing.Union[str, pathlib.Path]] = N for code in ['q' + x + y for x in string.ascii_lowercase[:string.ascii_lowercase.index('t') + 1] for y in string.ascii_lowercase]: - self[code] = Code(dict(Id=code, Ref_Name=None), 'Local', self) + # Codes in the local use area. + self[code] = Code({'Id': code, 'Ref_Name': None}, 'Local', self) def __str__(self): - return 'ISO 639-3 code tables from {0}'.format(self.date) + return f'ISO 639-3 code tables from {self.date}' - def by_type(self, type_) -> typing.List[Code]: - return [c for c in self.values() if c._type == type_] + def by_type(self, type_) -> list[Code]: + """Return codes by type.""" + return [c for c in self.values() if c._type == type_] # pylint: disable=protected-access @property - def living(self) -> typing.List[Code]: + def living(self) -> list[Code]: """ All codes categorized as "Living" """ return self.by_type('Living') @property - def extinct(self) -> typing.List[Code]: + def extinct(self) -> list[Code]: """ All codes categorized as "Extinct" """ return self.by_type('Extinct') @property - def ancient(self) -> typing.List[Code]: + def ancient(self) -> list[Code]: """ All codes categorized as "Ancient" """ return self.by_type('Ancient') @property - def historical(self) -> typing.List[Code]: + def historical(self) -> list[Code]: """ All codes categorized as "Historical" """ return self.by_type('Historical') @property - def constructed(self) -> typing.List[Code]: + def constructed(self) -> list[Code]: """ All codes categorized as "Constructed" """ return self.by_type('Constructed') @property - def special(self) -> typing.List[Code]: + def special(self) -> list[Code]: """ All codes categorized as "Special" """ return self.by_type('Special') @property - def retirements(self) -> typing.List[Code]: + def retirements(self) -> list[Code]: """ All retired codes """ return [c for c in self.values() if c.is_retired] @property - def macrolanguages(self) -> typing.List[Code]: + def macrolanguages(self) -> list[Code]: """ All macrolanguage codes """ return [c for c in self.values() if c.is_macrolanguage] @property - def languages(self) -> typing.List[Code]: + def languages(self) -> list[Code]: """ All active language codes """ diff --git a/src/clldutils/jsonlib.py b/src/clldutils/jsonlib.py index 2d33357..2c4638c 100644 --- a/src/clldutils/jsonlib.py +++ b/src/clldutils/jsonlib.py @@ -25,9 +25,9 @@ import datetime import contextlib import collections -import typing +from typing import Union, TextIO -import dateutil.parser +from ._compat import fromisoformat __all__ = ['parse', 'format', 'dump', 'load', 'update', 'update_ordered'] @@ -52,19 +52,19 @@ def parse(d: dict) -> dict: res = {} for k, v in d.items(): if isinstance(v, str) and DATETIME_ISO_FORMAT.match(v): - v = dateutil.parser.parse(v) + v = fromisoformat(v) elif isinstance(v, dict): v = parse(v) elif isinstance(v, list): v = [ - dateutil.parser.parse(vv) + fromisoformat(vv) if isinstance(vv, str) and DATETIME_ISO_FORMAT.match(vv) else vv for vv in v] res[k] = v return res -def format(value): +def format(value): # pylint: disable=redefined-builtin """ Format a value as ISO timestamp if it is a datetime.date(time) instance, otherwise return it unchanged. @@ -74,7 +74,7 @@ def format(value): return value -def dump(obj, path: typing.Union[typing.TextIO, str, pathlib.Path], **kw): +def dump(obj, path: Union[TextIO, str, pathlib.Path], **kw): """`json.dump` which understands filenames. :param obj: The object to be dumped. @@ -87,7 +87,7 @@ def dump(obj, path: typing.Union[typing.TextIO, str, pathlib.Path], **kw): return json.dump(obj, path, **kw) -def load(path: typing.Union[typing.TextIO, str, pathlib.Path], **kw): +def load(path: Union[TextIO, str, pathlib.Path], **kw): """`json.load` which understands filenames. :param kw: Keyword parameters are passed to json.load @@ -124,8 +124,9 @@ def update(path, default=None, load_kw=None, **kw): def update_ordered(path, **kw): + """The update functionality with ordered dicts.""" return update( path, default=collections.OrderedDict(), - load_kw=dict(object_pairs_hook=collections.OrderedDict), + load_kw=dict(object_pairs_hook=collections.OrderedDict), # pylint: disable=R1735 **kw) diff --git a/src/clldutils/lgr.py b/src/clldutils/lgr.py index 469040d..922e6dc 100644 --- a/src/clldutils/lgr.py +++ b/src/clldutils/lgr.py @@ -397,7 +397,9 @@ """ import re +from typing import Optional, Callable import itertools +from collections.abc import Iterable, Mapping PERSONS = { "1": "first person", @@ -490,14 +492,36 @@ } -def pattern(custom=None): +def pattern(custom: Optional[Iterable[str]] = None) -> re.Pattern: + """ + A regex pattern to search for LGR categories. + + .. code-block:: python + + >>> pattern().search('a.1SG.b').groupdict() + {'pre': '.', 'person': '1', 'abbr': 'SG'} + """ return re.compile( - '(?P
^|[^A-Z1-3])(?P{0})?(?P{1})(?=$|[^A-Z1-3])'.format(
+        '(?P
^|[^A-Z1-3])'  # pylint: disable=C0209
+        '(?P{0})?'
+        '(?P{1})(?=$|[^A-Z1-3])'.format(
             '|'.join(re.escape(k) for k in PERSONS),
             '|'.join(re.escape(k) for k in itertools.chain(ABBRS, custom or {}))))
 
 
-def replace(string, repl=None, custom=None):
+def replace(
+        string: str,
+        repl: Optional[Callable[[re.Match], str]] = None,
+        custom: Optional[Mapping[str, str]] = None,
+) -> str:
+    """
+    .. code-block:: python
+
+        >>> replace('a-1SG-b', custom={'SG': 'OTHER'})
+        'a--b'
+        >>> replace('a-1SG-b', repl=lambda m: f'.{m.group('abbr')}.')
+        'a.SG.-b'
+    """
     custom = custom or {}
     if repl is None:
         def repl(m):
@@ -505,6 +529,6 @@ def repl(m):
             if m.group('person'):
                 res += PERSONS[m.group('person')] + ' '
             res += custom.get(m.group('abbr'), ABBRS.get(m.group('abbr')))
-            return '{0}<{1}>'.format(m.group('pre'), res)
+            return f"{m.group('pre')}<{res}>"
 
     return pattern(custom).sub(repl, string)
diff --git a/src/clldutils/licenses.py b/src/clldutils/licenses.py
index 58971cc..7bb5977 100644
--- a/src/clldutils/licenses.py
+++ b/src/clldutils/licenses.py
@@ -1,6 +1,10 @@
+# pylint: disable=C0302
+"""
+Licenses suitable for data licensing.
+"""
+from typing import Optional
 import pathlib
-
-import attr
+import dataclasses
 
 _LICENSES = {
     "Glide": {
@@ -1086,23 +1090,27 @@
 }
 
 
-@attr.s
-class License(object):
-    id = attr.ib()
-    name = attr.ib()
-    url = attr.ib()
+@dataclasses.dataclass
+class License:
+    """A license."""
+    id: str
+    name: str
+    url: str
 
     @property
-    def legalcode(self):
+    def legalcode(self) -> Optional[str]:
+        """Return the license text."""
         p = pathlib.Path(__file__).parent / 'legalcode' / self.id
         if p.exists():
             return p.read_text(encoding='utf8')
+        return None
 
 
 _LICENSES = [License(id_, l['name'], l['url']) for id_, l in _LICENSES.items()]
 
 
-def find(q):
+def find(q: str) -> Optional[License]:
+    """Flexibly retrieve a license."""
     for license_ in _LICENSES:
         if q.lower() == license_.id.lower() or q == license_.name or q == license_.url:
             return license_
@@ -1111,3 +1119,4 @@ def find(q):
             u2 = q.split('://')[1]
             if u1.startswith(u2) or u2.startswith(u1):
                 return license_
+    return None
diff --git a/src/clldutils/loglib.py b/src/clldutils/loglib.py
index 77e040e..1e14321 100644
--- a/src/clldutils/loglib.py
+++ b/src/clldutils/loglib.py
@@ -22,7 +22,7 @@
 __all__ = ['get_colorlog', 'Logging']
 
 
-def get_colorlog(name, stream=None, level=logging.INFO) -> logging.Logger:
+def get_colorlog(name: str, stream=None, level=logging.INFO) -> logging.Logger:
     """
     Get a logger set up with `colorlog`'s formatter.
     """
@@ -37,7 +37,7 @@ def get_colorlog(name, stream=None, level=logging.INFO) -> logging.Logger:
     return log
 
 
-class Logging(object):
+class Logging:
     """
     A context manager to execute a block of code at a specific logging level.
     """
diff --git a/src/clldutils/markup.py b/src/clldutils/markup.py
index 787a739..dd21c96 100644
--- a/src/clldutils/markup.py
+++ b/src/clldutils/markup.py
@@ -1,12 +1,17 @@
+"""
+Functionality for marking up text, mostly using Markdown.
+"""
 import io
 import re
 import csv
 import sys
-import typing
+import enum
+from typing import Union, Optional, Callable, Any, IO
+import dataclasses
 import urllib.parse
+from collections.abc import Generator, Sequence, Iterable
 
-import attr
-from tabulate import tabulate
+from prettytable import PrettyTable, TableStyle
 from markdown import markdown
 from lxml import etree
 
@@ -14,11 +19,48 @@
 from clldutils.text import replace_pattern
 
 __all__ = [
-    'Table',
+    'Table', 'TableFormat',
     'iter_markdown_tables', 'iter_markdown_sections', 'add_markdown_text',
     'MarkdownLink', 'MarkdownImageLink']
 
 
+class TableFormat(enum.Enum):
+    """Available formatting options for tables."""
+    pipe = enum.auto()  # pylint: disable=invalid-name
+    simple = enum.auto()  # pylint: disable=invalid-name
+    tsv = enum.auto()  # pylint: disable=invalid-name
+    csv = enum.auto()  # pylint: disable=invalid-name
+    ascii = enum.auto()  # pylint: disable=invalid-name
+
+    @classmethod
+    def get(cls, s: Union[None, str, 'TableFormat']):
+        """Factory method, allowing selection of a format by name."""
+        if s is None:
+            return cls.pipe
+        if isinstance(s, str):
+            return getattr(cls, s)
+        assert isinstance(s, cls), s
+        return s
+
+
+def _padded_row(row, num_rows: int, fill: str = '') -> list[Any]:
+    row = list(row)
+    while len(row) < num_rows:
+        row.append(fill)
+    return row
+
+
+def _dedup_cols(ocols: Sequence[str]) -> list[str]:
+    cols = []
+    for col in ocols:
+        i = 1
+        while col in cols:
+            i += 1
+            col = f'{col}_{i}'
+        cols.append(col)
+    return cols
+
+
 class Table(list):
     """
     A context manager to
@@ -31,9 +73,9 @@ class Table(list):
         >>> with Table('col1', 'col2', tablefmt='simple') as t:
         ...     t.append(['v1', 'v2'])
         ...
-        col1    col2
-        ------  ------
-        v1      v2
+         col1   col2
+        ------ ------
+         v1     v2
 
     For more control of the table rendering, a `Table` can be used without a `with` statement,
     calling :meth:`Table.render` instead:
@@ -43,37 +85,55 @@ class Table(list):
         >>> t = Table('col1', 'col2')
         >>> t.extend([['z', 1], ['a', 2]])
         >>> print(t.render(sortkey=lambda r: r[0], tablefmt='simple'))
-        col1      col2
-        ------  ------
-        a            2
-        z            1
+         col1   col2
+        ------ ------
+         a      2
+         z      1
     """
-    def __init__(self, *cols: str, **kw):
+    def __init__(
+            self,
+            *cols: str,
+            rows: Optional[Sequence[Sequence[Any]]] = None,
+            file: Optional[IO] = None,
+            tablefmt: Optional[Union[str, TableFormat]] = None,
+            floatfmt: Optional[str] = '.2',
+    ):
+        """
+
+        """
         self.columns = list(cols)
-        super(Table, self).__init__(kw.pop('rows', []))
-        self._file = kw.pop('file', sys.stdout)
-        self._kw = kw
-
-    def render(self,
-               sortkey=None,
-               condensed=True,
-               verbose=False,
-               reverse=False,
-               **kw):
+        super().__init__(rows or [])
+        self._file = file or sys.stdout
+        self._tablefmt = TableFormat.get(tablefmt)
+        self._floatfmt = floatfmt
+
+    def render(  # pylint: disable=R0913,R0917
+            self,
+            sortkey: Callable[[Any], Any] = None,
+            condensed: bool = True,
+            verbose: bool = False,
+            reverse: bool = False,
+            tablefmt: Optional[Union[str, TableFormat]] = None,
+            floatfmt: Optional[str] = None,
+    ) -> str:
         """
         :param sortkey: A callable which can be used as key when sorting the rows.
         :param condensed: Flag signalling whether whitespace padding should be collapsed.
         :param verbose: Flag signalling whether to output additional info.
         :param reverse: Flag signalling whether we should sort in reverse order.
-        :param kw: Additional keyword arguments are passed to the `tabulate` function.
         :return: String representation of the table in the chosen format.
         """
-        tab_kw = dict(tablefmt='pipe', headers=self.columns, floatfmt='.2f')
-        tab_kw.update(self._kw)
-        tab_kw.update(kw)
-        if tab_kw['tablefmt'] == 'tsv':
+        if not self.columns and not self:
+            return ''
+
+        tablefmt = self._tablefmt if tablefmt is None else TableFormat.get(tablefmt)
+
+        if floatfmt is None:
+            floatfmt = self._floatfmt
+
+        if tablefmt in (TableFormat.tsv, TableFormat.csv):
             res = io.StringIO()
-            w = csv.writer(res, delimiter='\t')
+            w = csv.writer(res, delimiter='\t' if tablefmt == TableFormat.tsv else ',')
             w.writerow(self.columns)
             for row in (sorted(self, key=sortkey, reverse=reverse) if sortkey else self):
                 w.writerow(row)
@@ -82,15 +142,37 @@ def render(self,
             if res.endswith('\r\n'):
                 res = res[:-2]
             return res
-        res = tabulate(
-            sorted(self, key=sortkey, reverse=reverse) if sortkey else self, **tab_kw)
-        if tab_kw['tablefmt'] == 'pipe':
+
+        table = PrettyTable()
+        table.field_names = _dedup_cols(self.columns)
+        rows = sorted(self, key=sortkey, reverse=reverse) if sortkey else self
+        if self.columns:
+            rows = [_padded_row(row, len(self.columns)) for row in rows]
+        if not rows:
+            # Force backwards compatible behaviour:
+            # tabulate also displayed headers for empty tables.
+            rows = [_padded_row([], len(self.columns))]
+        table.add_rows(rows)
+
+        if tablefmt == TableFormat.pipe:
+            table.set_style(TableStyle.MARKDOWN)
+        elif tablefmt == TableFormat.simple:
+            table.border = False
+            table.preserve_internal_border = True
+            table.align = 'l'
+            table.vertical_char = ' '
+            table.junction_char = ' '
+
+        table.float_format = floatfmt
+        res = str(table)
+
+        if tablefmt == TableFormat.pipe:
             if condensed:
                 # remove whitespace padding around column content:
                 res = re.sub(r'\|[ ]+', '| ', res)
                 res = re.sub(r'[ ]+\|', ' |', res)
             if verbose:
-                res += '\n\n(%s rows)\n\n' % len(self)
+                res += f'\n\n({len(self)} rows)\n\n'
         return res
 
     def __enter__(self):
@@ -100,8 +182,7 @@ def __exit__(self, exc_type, exc_val, exc_tb):
         print(self.render(), file=self._file)
 
 
-def iter_markdown_tables(text) -> \
-        typing.Generator[typing.Tuple[typing.List[str], typing.List[typing.List[str]]], None, None]:
+def iter_markdown_tables(text: str) -> Generator[tuple[list[str], list[list[str]]], None, None]:
     """
     Parse tables from a markdown formatted text.
 
@@ -109,7 +190,7 @@ def iter_markdown_tables(text) -> \
     :return: generator of (header, rows) pairs, where "header" is a `list` of column names and \
     rows is a list of lists of row values.
     """
-    def split_row(line, outer_pipes):
+    def split_row(line: str, outer_pipes: bool) -> list[str]:
         line = line.strip()
         if outer_pipes:
             assert line.startswith('|') and line.endswith('|'), 'inconsistent table formatting'
@@ -120,11 +201,11 @@ def split_row(line, outer_pipes):
         yield split_row(header, outer_pipes), [split_row(row, outer_pipes) for row in rows]
 
 
-def _iter_table_blocks(lines):
+def _iter_table_blocks(lines: Iterable[str]) -> Generator[tuple[str, list[str], bool], None, None]:
     # Tables are detected by
     # 1. A header line, i.e. a line with at least one `|`
     # 2. A line separating header and body of the form below
-    SEP = re.compile(r'\s*\|?\s*:?--(-)+:?\s*(\|\s*:?--(-)+:?\s*)+\|?\s*')
+    sep = re.compile(r'\s*\|?\s*:?-(-)*:?\s*(\|\s*:?-(-)*:?\s*)+\|?\s*')
 
     lines = list(lines)
     header, table, outer_pipes = None, [], False
@@ -135,17 +216,17 @@ def _iter_table_blocks(lines):
                     yield header, table, outer_pipes
                 header, table, outer_pipes = None, [], False
             else:
-                if not SEP.fullmatch(line):
+                if not sep.fullmatch(line):
                     table.append(line)
         else:
-            if '|' in line and len(lines) > i + 1 and SEP.fullmatch(lines[i + 1]):
+            if '|' in line and len(lines) > i + 1 and sep.fullmatch(lines[i + 1]):
                 header = line
                 outer_pipes = lines[i + 1].strip().startswith('|')
     if table:
         yield header, table, outer_pipes
 
 
-def iter_markdown_sections(text) -> typing.Generator[typing.Tuple[int, str, str], None, None]:
+def iter_markdown_sections(text) -> Generator[tuple[int, str, str], None, None]:
     """
     Parse sections from a markdown formatted text.
 
@@ -170,9 +251,11 @@ def iter_markdown_sections(text) -> typing.Generator[typing.Tuple[int, str, str]
         yield level, header, ''.join(lines)
 
 
-def add_markdown_text(text: str,
-                      new: str,
-                      section: typing.Optional[typing.Union[typing.Callable, str]] = None) -> str:
+def add_markdown_text(
+        text: str,
+        new: str,
+        section: Optional[Union[Callable[[str], bool], str]] = None,
+) -> str:
     """
     Append markdown text to a (specific section of a) markdown document.
 
@@ -187,7 +270,7 @@ def add_markdown_text(text: str,
     :raises ValueError: The specified section was not encountered.
     """
     res = []
-    for level, header, content in iter_markdown_sections(text):
+    for _, header, content in iter_markdown_sections(text):
         if header:
             res.append(header)
         res.append(content)
@@ -206,7 +289,7 @@ def add_markdown_text(text: str,
     return res
 
 
-@attr.s
+@dataclasses.dataclass
 class MarkdownLink:
     """
     Functionality to detect and manipulate links in markdown text.
@@ -224,36 +307,40 @@ class MarkdownLink:
         >>> MarkdownLink.replace('[](http://example.com)', lambda ml: ml.update_url(scheme='https'))
         '[l](https://example.com)'
     """
-    label = attr.ib()
-    url = attr.ib()
+    label: str
+    url: str
     # Link starts with "[" if not preceeded by "!" or escaped with "\".
     # We match up to the next unescaped "]" and only optionally match the href enclosed in "()".
     # This is we don't force parsing up to the next "]("!
-    pattern = re.compile(r'(?.*?)(?[^)]+)\))?')
-    html_link = ('a', 'href')
+    pattern: re.Pattern = re.compile(r'(?.*?)(?[^)]+)\))?')
+    html_link: tuple[str, str] = ('a', 'href')
 
     @classmethod
-    def from_string(cls, s):
+    def from_string(cls, s) -> 'MarkdownLink':
+        """Create an instance from a Markdown formatted string, i.e. [...](...)."""
         try:
             return cls.from_match(cls.pattern.search(s))
-        except AttributeError:
-            raise ValueError('No markdown link found')
+        except AttributeError as e:
+            raise ValueError('No markdown link found') from e
 
     @classmethod
-    def from_match(cls, match):
+    def from_match(cls, match) -> 'MarkdownLink':
+        """Create an instance from a match object as returned e.g. by .pattern.search."""
         if match.groupdict()['url'] is None:
             raise AttributeError
         return cls(**match.groupdict())
 
     @property
-    def parsed_url(self):
+    def parsed_url(self) -> urllib.parse.ParseResult:
+        """Parsed components of the link's HREF value."""
         return urllib.parse.urlparse(self.url)
 
     @property
-    def parsed_url_query(self):
+    def parsed_url_query(self) -> dict[str, list[str]]:
+        """The query of the link's HREF value."""
         return urllib.parse.parse_qs(self.parsed_url.query, keep_blank_values=True)
 
-    def update_url(self, **comps):
+    def update_url(self, **comps) -> 'MarkdownLink':
         """
         Updates the `MarkdownLink.url` according to `comps`.
 
@@ -272,14 +359,16 @@ def update_url(self, **comps):
         return self
 
     def __str__(self):
-        return '[{0.label}]({0.url})'.format(self)
+        return f'[{self.label}]({self.url})'
 
     @classmethod
-    def replace(cls,
-                md: str,
-                repl: typing.Callable,
-                simple: bool = True,
-                markdown_kw: typing.Optional[dict] = None) -> str:
+    def replace(
+            cls,
+            md: str,
+            repl: Callable[['MarkdownLink'], Any],
+            simple: bool = True,
+            markdown_kw: Optional[dict] = None,
+    ) -> str:
         """
         Replace links in a markdown document.
 
@@ -363,7 +452,7 @@ def replace(cls,
                     [label](xyz)
 
                 [label](url)
-       """
+        """
         links = []
         if not simple:
             # We convert the markdown text to HTML and extract the links:
@@ -373,7 +462,7 @@ def replace(cls,
                 links.append((slug(''.join(node.itertext())), node.get(attrib)))
             links = list(reversed(links))
 
-        def repl_wrapper(m):
+        def repl_wrapper(m: re.Match) -> Generator[str, None, None]:
             if m.groupdict()['url'] is None:
                 yield m.string[m.start():m.end()]
                 return
@@ -398,10 +487,11 @@ def repl_wrapper(m):
         return replace_pattern(cls.pattern, repl_wrapper, md)
 
 
-@attr.s
+@dataclasses.dataclass
 class MarkdownImageLink(MarkdownLink):
-    pattern = re.compile(r'!\[(?P