diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..b42ca8c --- /dev/null +++ b/.editorconfig @@ -0,0 +1,15 @@ +root = true + +[*] +charset = utf-8 +end_of_line = lf +indent_size = 4 +indent_style = space +insert_final_newline = true +trim_trailing_whitespace = true + +[*.py] +max_line_length = 79 + +[*.yml] +indent_size = 2 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..f49b137 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,107 @@ +name: build + +on: [push, pull_request] + +env: + PYTEST_ADDOPTS: "--color=yes" + +jobs: + test: + name: Test + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Set up Pip cache + uses: actions/cache@v2 + id: pip-cache + with: + path: ~/.cache/pip + key: pip-${{ hashFiles('**/pyproject.toml') }} + - name: Upgrade Pip + run: python -m pip install -U pip + - name: Install Poetry + run: python -m pip install poetry + - name: Set up Poetry cache + uses: actions/cache@v2 + id: poetry-cache + with: + path: ~/.cache/pypoetry/virtualenvs + key: poetry-${{ hashFiles('**/poetry.lock') }} + - name: Install dependencies + run: | + poetry run pip install -U pip + poetry install + - name: Install pandoc + run: | + wget https://github.com/jgm/pandoc/releases/download/2.11.0.2/pandoc-2.11.0.2-1-amd64.deb + sudo dpkg -i pandoc-2.11.0.2-1-amd64.deb + - name: Run tests + run: poetry run invoke tests + + + lint: + name: Lint + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Set Poetry cache + uses: actions/cache@v2 + id: poetry-cache + with: + path: ~/.cache/pypoetry/virtualenvs + key: poetry-${{ hashFiles('**/poetry.lock') }} + - name: Upgrade Pip + run: python -m pip install -U pip + - name: Install Poetry + run: python -m pip install poetry + - name: Install dependencies + run: | + poetry run pip install -U pip + poetry install + - name: Run linters + run: poetry run invoke lint + + + deploy: + name: Deploy + needs: [test, lint] + runs-on: ubuntu-latest + if: ${{ github.ref=='refs/heads/main' && github.event_name!='pull_request' }} + + steps: + - uses: actions/checkout@v2 + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + - name: Check release + id: check_release + run: | + python -m pip install pip --upgrade + python -m pip install poetry githubrelease autopub + echo "##[set-output name=release;]$(autopub check)" + - name: Publish + if: ${{ steps.check_release.outputs.release=='' }} + env: + GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} + PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} + run: | + git remote set-url origin https://$GITHUB_TOKEN@github.com/${{ github.repository }} + autopub prepare + poetry build + autopub commit + autopub githubrelease + poetry publish -u __token__ -p $PYPI_PASSWORD diff --git a/.gitignore b/.gitignore index c5a9333..6435045 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,55 @@ -*.pyc -.idea -*~ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +.DS_Store + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# VSCode Project Settings +.vscode diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1cf1dd8 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +# See https://pre-commit.com/hooks.html for info on hooks +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: check-added-large-files + - id: check-ast + - id: check-toml + - id: check-yaml + - id: debug-statements + - id: detect-private-key + - id: end-of-file-fixer + - id: trailing-whitespace + + - repo: https://github.com/psf/black + rev: 19.10b0 + hooks: + - id: black + + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.3 + hooks: + - id: flake8 + args: [--max-line-length=88] + language_version: python3.7 + + - repo: https://github.com/timothycrosley/isort + rev: 5.4.2 + hooks: + - id: isort diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..aee7097 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,9 @@ +Contributing +============ + +Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues][]. + +To start contributing to this plugin, review the [Contributing to Pelican][] documentation, beginning with the **Contributing Code** section. + +[existing issues]: https://github.com/pelican-plugins/pandoc-reader/issues +[Contributing to Pelican]: https://docs.getpelican.com/en/latest/contribute.html diff --git a/README.md b/README.md index 08055fc..ca8674d 100644 --- a/README.md +++ b/README.md @@ -1,52 +1,261 @@ -pandoc_reader -============= +# Pandoc Reader -A pandoc [markdown] reader plugin for [pelican] +Pandoc Reader is a [Pelican](http://getpelican.com) plugin that converts documents written in [Pandoc's Markdown](https://pandoc.org/MANUAL.html#pandocs-markdown) into HTML 5. +## Prerequisites -Requirements ------------- +The plugin has a number of dependencies: - - [pandoc] in $PATH +* Python >= 3.7 +* Pelican >= 4.5.1 +* Pandoc >= 2.11.0 +* PyYAML >= 5.3.1 +* Markdown Word Count >= 0.0.1 +All five **must** be installed locally on your machine or webserver. -Installation ------------- +To find out how to install Python please see [here](https://wiki.python.org/moin/BeginnersGuide/Download) -Instructions for installation of pelican plugins can be obtained from the [pelican plugin manual](https://github.com/getpelican/pelican-plugins/blob/master/Readme.rst). +To install Pandoc follow these [instructions](https://pandoc.org/installing.html). +Pelican, [PyYAML](https://pypi.org/project/PyYAML/) and the [Markdown Word Count](https://github.com/gandreadis/markdown-word-count) packages can be installed using [pip](https://pip.pypa.io/en/stable/installing/) as shown below: -Configuration -------------- +```bash +pip install pelican +pip install PyYAML +pip install markdown-word-count +``` -Additional command line parameters can be passed to pandoc via the PANDOC_ARGS parameter. +The plugin should function correctly on newer versions of the above dependencies as well. - PANDOC_ARGS = [ - '--mathjax', - '--smart', - '--toc', - '--toc-depth=2', - '--number-sections', - ] +## Installation -Pandoc's markdown extensions can be enabled or disabled via the -PANDOC_EXTENSIONS parameter. +To install the plugin execute the following command: - PANDOC_EXTENSIONS = [ - '+hard_line_breaks', - '-citations' - ] +```bash +python -m pip install pelican-pandoc-reader +``` -Contributing ------------- +## Usage -1. Fork it -2. Create your feature branch (`git checkout -b my-new-feature`) -3. Commit your changes (`git commit -am 'Add some feature'`) -4. Push to the branch (`git push origin my-new-feature`) -5. Create new Pull Request +This plugin converts Pandoc's Markdown into HTML 5. Conversion from other flavours of Markdown is supported but requires the use of a default file as described [here](https://github.com/nandac/pandoc-reader#method-two-using-pandoc-defaults-files). +Converting to formats other than HTML 5 is not supported. -[markdown]: http://daringfireball.net/projects/markdown/ -[pandoc]: http://johnmacfarlane.net/pandoc/ -[pelican]: http://getpelican.com +### Specifying File Metadata + +The plugin expects all Markdown files to start with a YAML block as shown below. + +```yaml +--- +title: "" +author: "" +data: "" +--- +``` + +or + +```yaml +... +title: "" +author: "" +date: "" +... +``` + +**Note: The YAML block shown above is Pandoc's syntax for specifying file metadata. This is different to Pelican's format. You may need to be rewrite the metadata in your files, in Pelican's format, if you stop using this plugin.** + +YAML blocks that define more than one level, such as YAML lists are not supported, although, they are supported by Pandoc. This is due to metadata processing limitations. In cases where you would normally add a YAML list, use a comma separated string instead. + +More information on Pandoc's YAML metadata blocks are available [here](https://pandoc.org/MANUAL.html#metadata-blocks). + +Information about Pelican's predefined metadata is available [here](https://docs.getpelican.com/en/stable/content.html#file-metadata). + +### Specifying Pandoc Options + +The plugin supports two **mutually exclusive** methods to pass options to Pandoc. + +#### Method One: Using Settings in `pelicanconf.py` + +The first method involves configuring two settings in your `pelicanconf.py` file: + +* `PANDOC_ARGS` +* `PANDOC_EXTENSIONS` + +In the `PANDOC_ARGS` parameter you may specify any argument supported by Pandoc as shown below: + +```python +PANDOC_ARGS = [ + '--mathjax' + '--citeproc' +] +``` + +Then in the `PANDOC_EXTENSIONS` parameter you may enable/disable any number of the supported [Pandoc extensions](https://pandoc.org/MANUAL.html#extensions): + +```python +PANDOC_EXTENSIONS = [ + '+footnotes', # Enabled extension + '-pipe_tables' # Disabled extension +] +``` + +#### Method Two: Using Pandoc Default Files + +The second method involves specifying the path(s) to one or more Pandoc default file(s), with all your preferences written in YAML format. + +These paths should be set in your `pelicanconf.py` file by using the setting `PANDOC_DEFAULT_FILES`. The paths maybe absolute or relative but we recommend using relative paths as they are more portable. + +```python +PANDOC_DEFAULT_FILES = [ + '', + '' +] +``` + +Here is a minimal example of content that should be available in a Pandoc default file: + +```yaml +reader: markdown +writer: html5 +``` + +Using default files has the added benefit of allowing you to use other Markdown flavors supported by Pandoc such as, [CommonMark](https://commonmark.org/) and [GitHub-Flavored Markdown](https://docs.github.com/en/free-pro-team@latest/github/writing-on-github). + +Please see [Pandoc Default files](https://pandoc.org/MANUAL.html#default-files) for a more complete example. + +**Note: In both methods specifying the arguments `--standalone` or `--self-contained` is not supported and will result in an error.** + +### Generating a Table of Contents + +If you desire to create a Table of Contents for posts or pages, you may do so by specifying the `--toc` or `--table-of-contents` argument in the `PANDOC_ARGS` setting as shown: + +```python +PANDOC_ARGS = [ + '--toc' +] +``` + +or + +```python +PANDOC_ARGS = [ + '--table-of-contents' +] +``` + +To set this in a Pandoc default file use the syntax below: + +```yaml +table-of-contents: true +``` + +The table of contents will be available for use in templates using the `{{ article.toc }}` or `{{ page.toc }}` Jinja template variables. + +### Enabling Citations + +You may enable citations by specifying the `citations` extension and the `-C` or `--citeproc` option. + +Set the `PANDOC_ARGS` and `PANDOC_EXTENSIONS` in `pelicanconf.py` as shown below: + +```python +PANDOC_ARGS = [ + '--citeproc' +] +``` + +or + +```python +PANDOC_ARGS = [ + '-C' +] +``` + +and + +```python +PANDOC_EXTENSIONS = [ + '+citations' +] +``` + +If you are using a Pandoc default file you need the following as a bare minimum to enable citations: + +```yaml +reader: markdown+citations +writer: html5 + +citeproc: true +``` + +Without these settings citations will not be processed by the plugin. + +You may write your bibliography in any format supported by Pandoc with the appropriate extensions specified. However, you **must** name the bibliography file the same as your blog. + +For example, a blog with the file name `my-blog.md` should have a bibliography file called `my-blog.bib`, `my-blog.json`, `my-blog.yaml` or `my-blog.bibtex` in the same directory as your blog, or in a subdirectory of the directory that your blog resides in. Failure to do so will mean that the references will not be picked up. + +#### Known Issues with Citations + +If enabling citations with a specific style, you need to specify a CSL (Citation Style Language) file, available from the [Zotero Style Repository](https://www.zotero.org/styles). For example, if you are using `ieee-with-url` style file it may be specified in your `pelicanconf.py` as shown: + +```python +PANDOC_ARGS = [ + '--csl=https://www.zotero.org/styles/ieee-with-url' +] +``` + +Or in a Pandoc default file like so: + +```yaml +csl: "https://www.zotero.org/styles/ieee-with-url" +``` + +Specifying a remote CSL file as shown above, dramatically increases the time taken to process the Markdown content. + +To improve processing speed, it is highly recommended that you use a local copy of the CSL file downloaded from Zotero. + +You may then reference it in `pelicanconf.py` as shown below: + +```python +PANDOC_ARGS = [ + '--csl=path/to/file/ieee-with-url.csl' +] +``` + +Or in a Pandoc default file like so: + +```yaml +csl: "path/to/file/ieee-with-url.csl" +``` + +### Calculating and Displaying Reading Time + +The plugin may be used to calculate the reading time of articles and pages by setting `CALCULATE_READING_TIME` to `True` in your `pelicanconf.py` file: + +```python +CALCULATE_READING_TIME = True +``` + +You may display the reading time using the `{{ article.reading_time }}` or `{{ page.reading_time }}` template variables. The unit of time will be displayed as minute for reading times less than or equal to one minute, or minutes for those greater than one minute. + +The reading time is calculated by dividing the number of words by the reading speed which is the average number words read in a minute. + +The default value for reading speed is set to 200 words per minute, but may be customized, by setting `READING_SPEED` to the desired words per minute value in `pelicanconf.py`: + +```python +READING_SPEED = +``` + +The number of words in a document is calculated using the Markdown Word Count python package. + +## Contributing + +Contributions are welcome and much appreciated. Every little bit helps. You can contribute by improving the documentation, adding missing features, and fixing bugs. You can also help out by reviewing and commenting on [existing issues](https://github.com/pelican-plugins/pandoc-reader/issues). + +To start contributing to this plugin, review the [Contributing to Pelican](https://docs.getpelican.com/en/latest/contribute.html) documentation, beginning with the **Contributing Code** section. + +## Credits + +Originally authored by [Hinrich B. Winther](https://github.com/liob) in December 2014, which was subsequently forked and completely redesigned and rewritten by [Nandakumar Chandrasekhar](https://www.linkedin.com/in/nandakumar-chandrasekhar-a400b45b/) in October 2020. diff --git a/__init__.py b/__init__.py deleted file mode 100644 index 31d0856..0000000 --- a/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .pandoc_reader import * diff --git a/pandoc_reader.py b/pandoc_reader.py deleted file mode 100644 index 87c7735..0000000 --- a/pandoc_reader.py +++ /dev/null @@ -1,48 +0,0 @@ -import subprocess -from pelican import signals -from pelican.readers import BaseReader -from pelican.utils import pelican_open - -class PandocReader(BaseReader): - enabled = True - file_extensions = ['md', 'markdown', 'mkd', 'mdown'] - - def read(self, filename): - with pelican_open(filename) as fp: - text = list(fp.splitlines()) - - metadata = {} - for i, line in enumerate(text): - kv = line.split(':', 1) - if len(kv) == 2: - name, value = kv[0].lower(), kv[1].strip() - metadata[name] = self.process_metadata(name, value) - else: - content = "\n".join(text[i:]) - break - - extra_args = self.settings.get('PANDOC_ARGS', []) - extensions = self.settings.get('PANDOC_EXTENSIONS', '') - if isinstance(extensions, list): - extensions = ''.join(extensions) - - pandoc_cmd = ["pandoc", "--from=markdown" + extensions, "--to=html5"] - pandoc_cmd.extend(extra_args) - - proc = subprocess.Popen(pandoc_cmd, - stdin = subprocess.PIPE, - stdout = subprocess.PIPE) - - output = proc.communicate(content.encode('utf-8'))[0].decode('utf-8') - status = proc.wait() - if status: - raise subprocess.CalledProcessError(status, pandoc_cmd) - - return output, metadata - -def add_reader(readers): - for ext in PandocReader.file_extensions: - readers.reader_classes[ext] = PandocReader - -def register(): - signals.readers_init.connect(add_reader) diff --git a/pelican/plugins/pandoc_reader/__init__.py b/pelican/plugins/pandoc_reader/__init__.py new file mode 100644 index 0000000..124ec0b --- /dev/null +++ b/pelican/plugins/pandoc_reader/__init__.py @@ -0,0 +1,2 @@ +"""Importing pandoc_reader package.""" +from .pandoc_reader import * # NOQA diff --git a/pelican/plugins/pandoc_reader/pandoc_reader.py b/pelican/plugins/pandoc_reader/pandoc_reader.py new file mode 100644 index 0000000..e44cc3b --- /dev/null +++ b/pelican/plugins/pandoc_reader/pandoc_reader.py @@ -0,0 +1,368 @@ +"""Reader that processes Pandoc Markdown and returns HTML 5.""" +import math +import os +import shutil +import subprocess + +from yaml import safe_load + +from mwc.counter import count_words_in_markdown +from pelican import signals +from pelican.readers import BaseReader +from pelican.utils import pelican_open + +DIR_PATH = os.path.dirname(__file__) +TEMPLATES_PATH = os.path.abspath(os.path.join(DIR_PATH, "templates")) +TOC_TEMPLATE = "toc-template.html" +DEFAULT_READING_SPEED = 200 # Words per minute + +ENCODED_LINKS_TO_RAW_LINKS_MAP = { + "%7Bstatic%7D": "{static}", + "%7Battach%7D": "{attach}", + "%7Bfilename%7D": "{filename}", +} + +VALID_INPUT_FORMATS = ("markdown", "commonmark", "gfm") +VALID_OUTPUT_FORMATS = ("html", "html5") +UNSUPPORTED_ARGUMENTS = ("--standalone", "--self-contained") +VALID_BIB_EXTENSIONS = ["json", "yaml", "bibtex", "bib"] +FILE_EXTENSIONS = ["md", "markdown", "mkd", "mdown"] + + +class PandocReader(BaseReader): + """Convert files written in Pandoc Markdown to HTML 5.""" + + enabled = True + file_extensions = FILE_EXTENSIONS + + def read(self, source_path): + """Parse Pandoc Markdown and return HTML5 markup and metadata.""" + # Check if pandoc is installed and is executable + if not shutil.which("pandoc"): + raise Exception("Could not find Pandoc. Please install.") + + # Open markdown file and read content + content = "" + with pelican_open(source_path) as file_content: + content = file_content + + # Retrieve HTML content and metadata + output, metadata = self._create_html(source_path, content) + + return output, metadata + + def _create_html(self, source_path, content): + """Create HTML5 content.""" + # Get settings set in pelicanconf.py + default_files = self.settings.get("PANDOC_DEFAULT_FILES", []) + arguments = self.settings.get("PANDOC_ARGS", []) + extensions = self.settings.get("PANDOC_EXTENSIONS", []) + + if isinstance(extensions, list): + extensions = "".join(extensions) + + # Check validity of arguments or default files + table_of_contents, citations = self._validate_fields( + default_files, arguments, extensions + ) + + # Construct preliminary pandoc command + pandoc_cmd = self._construct_pandoc_command( + default_files, arguments, extensions + ) + + # Find and add bibliography if citations are specified + if citations: + for bib_file in self._find_bibs(source_path): + pandoc_cmd.append("--bibliography={0}".format(bib_file)) + + # Create HTML content + output = self._run_pandoc(pandoc_cmd, content) + + # Replace all occurrences of %7Bstatic%7D to {static}, + # %7Battach%7D to {attach} and %7Bfilename%7D to {filename} + # so that static links are resolvable by pelican + for encoded_str, raw_str in ENCODED_LINKS_TO_RAW_LINKS_MAP.items(): + output = output.replace(encoded_str, raw_str) + + metadata = {} + if table_of_contents: + # Create table of contents and add to metadata + metadata["toc"] = self.process_metadata( + "toc", self._create_toc(pandoc_cmd, content) + ) + + if self.settings.get("CALCULATE_READING_TIME", []): + # Calculate reading time and add to metadata + metadata["reading_time"] = self.process_metadata( + "reading_time", self._calculate_reading_time(content) + ) + + # Parse YAML metadata placed in the document's header + metadata = self._process_header_metadata( + list(content.splitlines()), metadata, pandoc_cmd + ) + + return output, metadata + + def _validate_fields(self, default_files, arguments, extensions): + """Validate fields and return citations and ToC request values.""" + # If default_files is empty then validate the argument and extensions + if not default_files: + # Validate the arguments to see that they are supported + # by the plugin + self._check_arguments(arguments) + + # Check if citations have been requested + citations = self._check_if_citations(arguments, extensions) + + # Check if table of contents has been requested + table_of_contents = self._check_if_toc(arguments) + else: + # Validate default files and get the citations + # abd table of contents request value + citations, table_of_contents = self._check_defaults(default_files) + return table_of_contents, citations + + def _check_defaults(self, default_files): + """Check if the given Pandoc defaults file has valid values.""" + citations = False + table_of_contents = False + for default_file in default_files: + defaults = {} + + # Convert YAML data to a Python dictionary + with open(default_file) as file_handle: + defaults = safe_load(file_handle) + + self._check_if_unsupported_settings(defaults) + reader = self._check_input_format(defaults) + self._check_output_format(defaults) + + if not citations: + if defaults.get("citeproc", "") and "+citations" in reader: + citations = True + + if not table_of_contents: + if defaults.get("table-of-contents", ""): + table_of_contents = True + + return citations, table_of_contents + + def _create_toc(self, pandoc_cmd, content): + """Generate table of contents.""" + toc_args = [ + "--standalone", + "--template", + os.path.join(TEMPLATES_PATH, TOC_TEMPLATE), + ] + + pandoc_cmd = pandoc_cmd + toc_args + table_of_contents = self._run_pandoc(pandoc_cmd, content) + return table_of_contents + + def _calculate_reading_time(self, content): + """Calculate time taken to read content.""" + reading_speed = self.settings.get( + "READING_SPEED", DEFAULT_READING_SPEED + ) + wordcount = count_words_in_markdown(content) + + time_unit = "minutes" + try: + reading_time = math.ceil(float(wordcount) / float(reading_speed)) + if reading_time == 1: + time_unit = "minute" + reading_time = "{} {}".format(str(reading_time), time_unit) + except ValueError as words_per_minute_nan: + raise ValueError( + "READING_SPEED setting must be a number." + ) from words_per_minute_nan + + return reading_time + + def _process_header_metadata(self, content, metadata, pandoc_cmd): + """Process YAML metadata and export.""" + # Check that the given text is not empty + if not content: + raise Exception("Could not find metadata. File is empty.") + + # Check that the first line of the file starts with a YAML header + if content[0].strip() not in ["---", "..."]: + raise Exception("Could not find metadata header '...' or '---'.") + + # Find the end of the YAML block + lines = content[1:] + yaml_end = "" + for line_num, line in enumerate(lines): + if line.strip() in ["---", "..."]: + yaml_end = line_num + break + + # Check if the end of the YAML block was found + if not yaml_end: + raise Exception("Could not find end of metadata block.") + + # Process the YAML block + for line in lines[:yaml_end]: + metalist = line.split(":", 1) + if len(metalist) == 2: + key, value = ( + metalist[0].lower(), + metalist[1].strip().strip('"'), + ) + # Takes care of metadata that should be converted to HTML + if key in self.settings["FORMATTED_FIELDS"]: + value = self._run_pandoc(pandoc_cmd, value) + metadata[key] = self.process_metadata(key, value) + return metadata + + @staticmethod + def _construct_pandoc_command(default_files, arguments, extensions): + """Construct Pandoc command for content.""" + pandoc_cmd = [] + if not default_files: + pandoc_cmd = [ + "pandoc", + "--from", + "markdown" + extensions, + "--to", + "html5", + ] + pandoc_cmd.extend(arguments) + else: + pandoc_cmd = ["pandoc"] + for default_file in default_files: + pandoc_cmd.append("--defaults={0}".format(default_file)) + return pandoc_cmd + + @staticmethod + def _run_pandoc(pandoc_cmd, content): + """Execute the given pandoc command and return output.""" + output = subprocess.run( + pandoc_cmd, + input=content, + capture_output=True, + encoding="utf-8", + check=True, + ) + return output.stdout + + @staticmethod + def _check_if_citations(arguments, extensions): + """Check if citations are specified.""" + citations = False + if arguments and extensions: + if ( + "--citeproc" in arguments or "-C" in arguments + ) and "+citations" in extensions: + citations = True + return citations + + @staticmethod + def _check_if_toc(arguments): + """Check if a table of contents should be generated.""" + table_of_contents = False + if arguments: + if "--toc" in arguments or "--table-of-contents" in arguments: + table_of_contents = True + return table_of_contents + + @staticmethod + def _find_bibs(source_path): + """Find bibliographies recursively in the sourcepath given.""" + bib_files = [] + filename = os.path.splitext(os.path.basename(source_path))[0] + directory_path = os.path.dirname(os.path.abspath(source_path)) + for root, _, files in os.walk(directory_path): + for extension in VALID_BIB_EXTENSIONS: + bib_name = ".".join([filename, extension]) + if bib_name in files: + bib_files.append(os.path.join(root, bib_name)) + return bib_files + + @staticmethod + def _check_arguments(arguments): + """Check to see that only supported arguments have been passed.""" + for arg in arguments: + if arg in UNSUPPORTED_ARGUMENTS: + raise ValueError("Argument {0} is not supported.".format(arg)) + + @staticmethod + def _check_if_unsupported_settings(defaults): + """Check if unsupported settings are specified in the defaults.""" + for arg in UNSUPPORTED_ARGUMENTS: + arg = arg[2:] + if defaults.get(arg, ""): + raise ValueError( + "The default {} should be set to false.".format(arg) + ) + + @staticmethod + def _check_input_format(defaults): + """Check if the input format given is a Markdown variant.""" + reader = "" + reader_input = defaults.get("reader", "") + from_input = defaults.get("from", "") + + # Case where no input format is specified + if not reader_input and not from_input: + raise ValueError("No input format specified.") + + # Case where both reader and from are specified which is not supported + if reader_input and from_input: + raise ValueError( + ( + "Specifying both from and reader is not supported." + " Please specify just one." + ) + ) + + if reader_input or from_input: + if reader_input: + reader = reader_input + elif from_input: + reader = from_input + + reader_prefix = reader.replace("+", "-").split("-")[0] + + # Check to see if the reader_prefix matches a valid input + if not reader_prefix.startswith(VALID_INPUT_FORMATS): + raise ValueError("Input type has to be a markdown variant.") + return reader + + @staticmethod + def _check_output_format(defaults): + """Check if the output format is HTML or HTML5.""" + writer_output = defaults.get("writer", "") + to_output = defaults.get("to", "") + + # Case where both writer and to are specified which is not supported + if writer_output and to_output: + raise ValueError( + ( + "Specifying both to and writer is not supported." + " Please specify just one." + ) + ) + + # Case where neither writer nor to value is set to html + if ( + writer_output not in VALID_OUTPUT_FORMATS + and to_output not in VALID_OUTPUT_FORMATS + ): + output_formats = " or ".join(VALID_OUTPUT_FORMATS) + raise ValueError( + "Output format type must be either {}.".format(output_formats) + ) + + +def add_reader(readers): + """Add the PandocReader as the reader for all Pandoc Markdown files.""" + for ext in PandocReader.file_extensions: + readers.reader_classes[ext] = PandocReader + + +def register(): + """Register the PandocReader.""" + signals.readers_init.connect(add_reader) diff --git a/pelican/plugins/pandoc_reader/templates/toc-template.html b/pelican/plugins/pandoc_reader/templates/toc-template.html new file mode 100644 index 0000000..ceb3efe --- /dev/null +++ b/pelican/plugins/pandoc_reader/templates/toc-template.html @@ -0,0 +1,8 @@ +$if(toc)$ + +$endif$ diff --git a/pelican/plugins/pandoc_reader/test_content/empty.md b/pelican/plugins/pandoc_reader/test_content/empty.md new file mode 100644 index 0000000..e69de29 diff --git a/pelican/plugins/pandoc_reader/test_content/mathjax_content.md b/pelican/plugins/pandoc_reader/test_content/mathjax_content.md new file mode 100644 index 0000000..f982084 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/mathjax_content.md @@ -0,0 +1,8 @@ +--- +title: "MathJax Content" +author: "My Author" +date: "2020-10-16" +--- +$$ +e^{i\theta} = \cos\theta + i \sin\theta. +$$ diff --git a/pelican/plugins/pandoc_reader/test_content/no_metadata.md b/pelican/plugins/pandoc_reader/test_content/no_metadata.md new file mode 100644 index 0000000..72aa6f1 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/no_metadata.md @@ -0,0 +1,3 @@ +# File that does not contain any metadata + +This is a file that does not have any metadata. diff --git a/pelican/plugins/pandoc_reader/test_content/no_metadata_end.md b/pelican/plugins/pandoc_reader/test_content/no_metadata_end.md new file mode 100644 index 0000000..195442c --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/no_metadata_end.md @@ -0,0 +1,4 @@ +--- +title: "No Metadata End" +author: "My Author" +date: "2020-10-16" diff --git a/pelican/plugins/pandoc_reader/test_content/no_title_in_metadata.md b/pelican/plugins/pandoc_reader/test_content/no_title_in_metadata.md new file mode 100644 index 0000000..31da45b --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/no_title_in_metadata.md @@ -0,0 +1,5 @@ +--- +author: "My Author" +date: "2020-10-16" +--- +This is some valid content that should pass. If it does not pass we will know something is wrong. diff --git a/pelican/plugins/pandoc_reader/test_content/reading_time_content.md b/pelican/plugins/pandoc_reader/test_content/reading_time_content.md new file mode 100644 index 0000000..4610dba --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/reading_time_content.md @@ -0,0 +1,9 @@ +--- +title: "Reading time Content" +author: "My Author" +date: "2020-10-16" +--- + +## What is Lorem Ipsum + +Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. diff --git a/pelican/plugins/pandoc_reader/test_content/valid_content.md b/pelican/plugins/pandoc_reader/test_content/valid_content.md new file mode 100644 index 0000000..7cfa55b --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/valid_content.md @@ -0,0 +1,6 @@ +--- +title: "Valid Content" +author: "My Author" +date: "2020-10-16" +--- +This is some valid content that should pass. If it does not pass we will know something is wrong. diff --git a/pelican/plugins/pandoc_reader/test_content/valid_content_with_citation.bib b/pelican/plugins/pandoc_reader/test_content/valid_content_with_citation.bib new file mode 100644 index 0000000..fded15a --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/valid_content_with_citation.bib @@ -0,0 +1,59 @@ +@online{castelvecchi2016, +url = {https://www.nature.com/news/feuding-physicists-turn-to-philosophy-for-help-1.19076}, +title = {{Feuding physicists turn to philosophy for help}}, +titleaddon = {String theory is at the heart of a debate over the integrity of the scientific method itself}, +author = {Davide Castelvecchi}, +date = {2016-01-05}, +urldate = {2020-11-12} +} + +@online{francis2019, +url = {https://www.scientificamerican.com/article/is-string-theory-science/}, +title = {{Falsifiability and physics}}, +titleaddon = {Can a theory that isn’t completely testable still be useful to physics?}, +author = {Matthew R Francis}, +date = {2019-04-23}, +urldate = {2020-11-12} +} + +@online{alves2017, +url = {https://metafact.io/factchecks/30-is-string-theory-falsifiable}, +title = {{Is String theory falsifiable?}}, +titleaddon = {Can a theory that isn’t completely testable still be useful to physics?}, +author = {Rafael Alves Batista and Joel Primack}, +date = {circa 2017}, +urldate = {2020-11-12} +} + +@online{siegel2015, +url = {https://www.forbes.com/sites/startswithabang/2015/12/23/why-string-theory-is-not-science/}, +title = {{Why String Theory Is Not A Scientific Theory}}, +author = {Ethan Siegel}, +date = {2015-12-23}, +urldate = {2020-11-12} +} + +@online{mann2019, +url = {https://www.livescience.com/65033-what-is-string-theory.html}, +title = {{What Is String Theory?}}, +author = {Adam Mann}, +date = {2019-03-20}, +urldate = {2020-11-12} +} + +@online{wood2019, +url = {https://www.space.com/17594-string-theory.html}, +title = {{What Is String Theory?}}, +titleaddon = {Reference Article: A simplified explanation and brief history of string theory}, +author = {Charlie Wood}, +date = {2019-07-11}, +urldate = {2020-11-12} +} + +@online{jones2020, +url = {https://www.thoughtco.com/what-is-string-theory-2699363}, +title = {{The Basics of String Theory}}, +author = {Andrew Zimmerman Jones}, +date = {2019-03-02}, +urldate = {2020-11-12} +} diff --git a/pelican/plugins/pandoc_reader/test_content/valid_content_with_citation.md b/pelican/plugins/pandoc_reader/test_content/valid_content_with_citation.md new file mode 100644 index 0000000..82e9969 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/valid_content_with_citation.md @@ -0,0 +1,9 @@ +--- +title: Valid Content With Citation +author: My Author +date: 2020-10-16 +summary: "But this foundational principle of science has now been called into question by [String Theory](https://www.britannica.com/science/string-theory)." +--- +## String Theory + +But this foundational principle of science has now been called into question by [String Theory](https://www.britannica.com/science/string-theory), which is a relative newcomer to theoretical physics, but one that has captured the common imagination, judging by the popular explanations that abound on the Web [@mann2019; @wood2019; @jones2020]. And whether string theory is or is not science, Popper notwithstanding, is an issue that is still up for debate [@siegel2015; @castelvecchi2016; @alves2017; @francis2019]. diff --git a/pelican/plugins/pandoc_reader/test_content/valid_content_with_raw_paths.md b/pelican/plugins/pandoc_reader/test_content/valid_content_with_raw_paths.md new file mode 100644 index 0000000..95e701f --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/valid_content_with_raw_paths.md @@ -0,0 +1,12 @@ +--- +title: "Valid Content with Fictitious Raw Paths" +author: "My Author" +date: "2020-10-16" +--- +This is some valid content that should pass. If it does not pass we will know something is wrong. + +Our fictitious internal files are available [at]({filename}/path/to/file): + +Our fictitious static files are available [at]({static}/path/to/file): + +Our fictitious attachments are available [at]({attach}path/to/file): diff --git a/pelican/plugins/pandoc_reader/test_content/valid_content_with_toc.md b/pelican/plugins/pandoc_reader/test_content/valid_content_with_toc.md new file mode 100644 index 0000000..03aaa04 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/valid_content_with_toc.md @@ -0,0 +1,22 @@ +--- +title: "Valid Content with Table of Contents" +author: "My Author" +date: "2020-10-16" +--- +This is some valid content that should pass. If it does not pass we will know something is wrong. + +## First Heading + +This should be the first heading in my table of contents. + +## Second Heading + +This should be the second heading in my table of contents. + +### First Subheading + +This is a subsection that should be shown as such in the table of contents. + +### Second Subheading + +This is another subsection that should be shown as such in the table of contents. diff --git a/pelican/plugins/pandoc_reader/test_content/wrong_metadata_end.md b/pelican/plugins/pandoc_reader/test_content/wrong_metadata_end.md new file mode 100644 index 0000000..1e46a70 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_content/wrong_metadata_end.md @@ -0,0 +1,5 @@ +--- +title: "No Metadata End" +author: "My Author" +date: "2020-10-16" +~~~ diff --git a/pelican/plugins/pandoc_reader/test_default_files/from_reader_both_given.yaml b/pelican/plugins/pandoc_reader/test_default_files/from_reader_both_given.yaml new file mode 100644 index 0000000..de711a9 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/from_reader_both_given.yaml @@ -0,0 +1,11 @@ +# from_reader_both_given.yaml +# +# A test default file that sets both reader and from +# +reader: markdown+smart+citations+implicit_figures +from: markdown+smart+citations+implicit_figures +writer: html5 + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/invalid_from_input_format.yaml b/pelican/plugins/pandoc_reader/test_default_files/invalid_from_input_format.yaml new file mode 100644 index 0000000..90b74a6 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/invalid_from_input_format.yaml @@ -0,0 +1,10 @@ +# invalid_from_input_format.yaml +# +# A test default file that sets from to an invalid input format +# +from: myinputformat +to: html5 + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/invalid_reader_input_format.yaml b/pelican/plugins/pandoc_reader/test_default_files/invalid_reader_input_format.yaml new file mode 100644 index 0000000..0cde43d --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/invalid_reader_input_format.yaml @@ -0,0 +1,10 @@ +# invalid_reader_input_format.yaml +# +# A test default file that sets reader to an invalid input format +# +reader: myinputformat +writer: html5 + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/invalid_to_output_format.yaml b/pelican/plugins/pandoc_reader/test_default_files/invalid_to_output_format.yaml new file mode 100644 index 0000000..95e5be2 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/invalid_to_output_format.yaml @@ -0,0 +1,10 @@ +# invalid_to_output_format.yaml +# +# A test default file that sets to an invalid output format +# +from: markdown+smart+citations+implicit_figures +to: myoutputformat + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/invalid_writer_output_format.yaml b/pelican/plugins/pandoc_reader/test_default_files/invalid_writer_output_format.yaml new file mode 100644 index 0000000..2e1bc08 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/invalid_writer_output_format.yaml @@ -0,0 +1,10 @@ +# invalid_writer_output_format.yaml +# +# A test default file that sets writer to an invalid output format +# +reader: markdown+smart+citations+implicit_figures +writer: myoutputformat + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/no_input_format.yaml b/pelican/plugins/pandoc_reader/test_default_files/no_input_format.yaml new file mode 100644 index 0000000..ae4d890 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/no_input_format.yaml @@ -0,0 +1,9 @@ +# no_input_format.yaml +# +# A test default file that specifies no input format +# +writer: html5 + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/no_output_format.yaml b/pelican/plugins/pandoc_reader/test_default_files/no_output_format.yaml new file mode 100644 index 0000000..cc65a82 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/no_output_format.yaml @@ -0,0 +1,9 @@ +# no_output_format.yaml +# +# A test default file that specifies no output format +# +reader: markdown + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/selfcontained_true.yaml b/pelican/plugins/pandoc_reader/test_default_files/selfcontained_true.yaml new file mode 100644 index 0000000..5b98857 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/selfcontained_true.yaml @@ -0,0 +1,11 @@ +# selfcontained_true.yaml +# +# A test default file that sets self-contained to true +# +reader: markdown+smart+citations+implicit_figures +writer: html5 +self-contained: true + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/standalone_true.yaml b/pelican/plugins/pandoc_reader/test_default_files/standalone_true.yaml new file mode 100644 index 0000000..5a9fda3 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/standalone_true.yaml @@ -0,0 +1,11 @@ +# standalone_true.yaml +# +# A test default file that sets standalone to true +# +reader: markdown+smart+citations+implicit_figures +writer: html5 +standalone: true + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/to_writer_both_given.yaml b/pelican/plugins/pandoc_reader/test_default_files/to_writer_both_given.yaml new file mode 100644 index 0000000..81989b4 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/to_writer_both_given.yaml @@ -0,0 +1,11 @@ +# to_writer_both_given.yaml +# +# A test default file that sets both reader and from +# +reader: markdown+smart+citations+implicit_figures +to: html5 +writer: html5 + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/valid_defaults.yaml b/pelican/plugins/pandoc_reader/test_default_files/valid_defaults.yaml new file mode 100644 index 0000000..0ae12ce --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/valid_defaults.yaml @@ -0,0 +1,10 @@ +# valid_defaults.yml +# +# A test default file that is valid +# +reader: markdown+smart+citations+implicit_figures +writer: html5 + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/valid_defaults_with_toc.yaml b/pelican/plugins/pandoc_reader/test_default_files/valid_defaults_with_toc.yaml new file mode 100644 index 0000000..8db4ba4 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/valid_defaults_with_toc.yaml @@ -0,0 +1,12 @@ +# valid_defaults_with_toc.yml +# +# A test default file that is valid and sets table_of_contents to true +# +reader: markdown+smart+citations+implicit_figures +writer: html5 + +table-of-contents: true + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" diff --git a/pelican/plugins/pandoc_reader/test_default_files/valid_defaults_with_toc_and_citations.yaml b/pelican/plugins/pandoc_reader/test_default_files/valid_defaults_with_toc_and_citations.yaml new file mode 100644 index 0000000..203cd59 --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_default_files/valid_defaults_with_toc_and_citations.yaml @@ -0,0 +1,19 @@ +# valid_defaults_with_toc_and_citations.yml +# +# A test default file that is valid and sets table_of_contents to true +# +reader: markdown+smart+citations+implicit_figures +writer: html5 + +table-of-contents: true + +html-math-method: + method: mathjax + url: "https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js" + +citeproc: true +csl: "https://www.zotero.org/styles/ieee-with-url" + +metadata: + link-citations: false + reference-section-title: References diff --git a/pelican/plugins/pandoc_reader/test_pandoc_reader.py b/pelican/plugins/pandoc_reader/test_pandoc_reader.py new file mode 100644 index 0000000..f04012b --- /dev/null +++ b/pelican/plugins/pandoc_reader/test_pandoc_reader.py @@ -0,0 +1,1216 @@ +"""Tests for pandoc-reader plugin.""" +# pylint: disable=too-many-lines +import os +import shutil +import unittest + +from pelican.tests.support import get_settings + +from pandoc_reader import PandocReader + +DIR_PATH = os.path.dirname(__file__) +TEST_CONTENT_PATH = os.path.abspath(os.path.join(DIR_PATH, "test_content")) +TEST_DEFAULT_FILES_PATH = os.path.abspath( + os.path.join(DIR_PATH, "test_default_files") +) + +# Test settings that will be set in pelicanconf.py by plugin users +PANDOC_ARGS = ["--mathjax"] +PANDOC_EXTENSIONS = ["+smart", "+implicit_figures"] +CALCULATE_READING_TIME = True +FORMATTED_FIELDS = ["summary"] + + +class TestGeneralTestCases(unittest.TestCase): + """Test installation of Pandoc.""" + + # Test using pelicanconf settings variables + def test_pandoc_installed(self): + """Check if Pandoc is installed.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") + + if not shutil.which("pandoc"): + # Case where pandoc is not installed + with self.assertRaises(Exception) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Could not find Pandoc. Please install.", message) + else: + # Case where pandoc is installed + message = "Pandoc is installed." + self.assertEqual("Pandoc is installed.", message) + + def test_default_wpm_reading_time(self): + """Check if 200 words per minute give us reading time of 1 minute.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, + PANDOC_ARGS=PANDOC_ARGS, + CALCULATE_READING_TIME=CALCULATE_READING_TIME, + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "reading_time_content.md" + ) + + _, metadata = pandoc_reader.read(source_path) + + self.assertEqual("1 minute", str(metadata["reading_time"])) + + def test_user_defined_wpm_reading_time(self): + """Check if 100 words per minute user defined gives us 2 minutes.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, + PANDOC_ARGS=PANDOC_ARGS, + CALCULATE_READING_TIME=CALCULATE_READING_TIME, + READING_SPEED=100, + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "reading_time_content.md" + ) + + _, metadata = pandoc_reader.read(source_path) + + self.assertEqual("2 minutes", str(metadata["reading_time"])) + + def test_invalid_user_defined_wpm(self): + """Check if exception is raised if words per minute is not a number.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, + PANDOC_ARGS=PANDOC_ARGS, + CALCULATE_READING_TIME=CALCULATE_READING_TIME, + READING_SPEED="my words per minute", + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "reading_time_content.md" + ) + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("READING_SPEED setting must be a number.", message) + + def test_summary(self): + """Check if summary output is valid.""" + + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, + "valid_defaults_with_toc_and_citations.yaml", + ) + ] + + settings = get_settings( + PANDOC_DEFAULT_FILES=pandoc_default_files, + FORMATTED_FIELDS=FORMATTED_FIELDS, + ) + pandoc_reader = PandocReader(settings) + + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_citation.md" + ) + + _, metadata = pandoc_reader.read(source_path) + + self.assertEqual( + ( + "

But this foundational principle of science has now been" + " called into question by" + ' ' + "String Theory.

\n" + ), + str(metadata["summary"]), + ) + + +class TestInvalidCasesWithArguments(unittest.TestCase): + """Invalid test cases using Pandoc arguments and extensions.""" + + def test_empty_file(self): + """Check if a file is empty.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "empty.md") + + # If the file is empty retrieval of metadata should fail + with self.assertRaises(Exception) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Could not find metadata. File is empty.", message) + + def test_non_empty_file_no_metadata(self): + """Check if a file has no metadata.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "no_metadata.md") + + # If the file is not empty but has no metadata it should fail + with self.assertRaises(Exception) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "Could not find metadata header '...' or '---'.", message + ) + + def test_no_metadata_block_end(self): + """Check if the metadata block ends.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "no_metadata_end.md") + + # Metadata blocks should end with '___' or '...' if not it should fail + with self.assertRaises(Exception) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Could not find end of metadata block.", message) + + def test_invalid_metadata_block_end(self): + """Check if the metadata block end is wrong.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "no_metadata_end.md") + + # Metadata blocks should end with '___' or '...' if not it should fail + with self.assertRaises(Exception) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Could not find end of metadata block.", message) + + def test_invalid_standalone_argument(self): + """Check that specifying --standalone raises an exception.""" + pandoc_arguments = ["--standalone"] + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=pandoc_arguments + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Argument --standalone is not supported.", message) + + def test_invalid_self_contained_argument(self): + """Check that specifying --self-contained raises an exception.""" + pandoc_arguments = ["--self-contained"] + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=pandoc_arguments + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "Argument --self-contained is not supported.", message + ) + + +class TestValidCasesWithArguments(unittest.TestCase): + """Valid test cases using Pandoc arguments and extensions.""" + + def test_valid_file(self): + """Check if we get the appropriate output for valid input.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + output, metadata = pandoc_reader.read(source_path) + + self.assertEqual( + ( + "

This is some valid content that should pass." + " If it does not pass we" + " will know something is wrong.

\n" + ), + output, + ) + + self.assertEqual("Valid Content", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + + def test_mathjax_content(self): + """Check if mathematics is rendered correctly.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "mathjax_content.md") + output, metadata = pandoc_reader.read(source_path) + + self.assertEqual( + ( + '

\\[\ne^{i\\theta} = ' + "\\cos\\theta + i \\sin\\theta.\n\\]

\n" + ), + output, + ) + + self.assertEqual("MathJax Content", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + + def test_encoded_to_raw_conversion(self): + """Check if raw paths are left untouched in output returned""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, PANDOC_ARGS=PANDOC_ARGS + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_raw_paths.md" + ) + output, metadata = pandoc_reader.read(source_path) + + # Setting this so that assert is able to execute the difference + self.maxDiff = None # pylint: disable=invalid-name + + self.assertEqual( + ( + "

This is some valid content that should pass." + " If it does not pass we will know something is wrong.

\n" + "

Our fictitious internal files are available" + ' at:

\n' + "

Our fictitious static files are available" + ' at:

\n' + "

Our fictitious attachments are available" + ' at:

\n' + ), + output, + ) + + self.assertEqual( + "Valid Content with Fictitious Raw Paths", str(metadata["title"]) + ) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + + def test_valid_content_with_toc(self): + """Check if output returned is valid and table of contents is valid.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, + PANDOC_ARGS=PANDOC_ARGS + ["--toc"], + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_toc.md" + ) + output, metadata = pandoc_reader.read(source_path) + + # Setting this so that assert is able to execute the difference + self.maxDiff = None + + self.assertEqual( + ( + "

This is some valid content that should pass." + " If it does not pass we will know something is wrong.

\n" + '

First Heading

\n' + "

This should be the first heading in my" + " table of contents.

\n" + '

Second Heading

\n' + "

This should be the second heading in my" + " table of contents.

\n" + '

First Subheading

\n' + "

This is a subsection that should be shown as such" + " in the table of contents.

\n" + '

Second Subheading

\n' + "

This is another subsection that should be shown as" + " such in the table of contents.

\n" + ), + output, + ) + self.assertEqual( + "Valid Content with Table of Contents", str(metadata["title"]) + ) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + self.assertEqual( + '\n", + str(metadata["toc"]), + ) + + def test_valid_content_with_toc_2(self): + """Check if output returned is valid and table of contents is valid.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS, + PANDOC_ARGS=PANDOC_ARGS + ["--table-of-contents"], + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_toc.md" + ) + output, metadata = pandoc_reader.read(source_path) + + # Setting this so that assert is able to execute the difference + self.maxDiff = None + + self.assertEqual( + ( + "

This is some valid content that should pass." + " If it does not pass we will know something is wrong.

\n" + '

First Heading

\n' + "

This should be the first heading in my" + " table of contents.

\n" + '

Second Heading

\n' + "

This should be the second heading in my" + " table of contents.

\n" + '

First Subheading

\n' + "

This is a subsection that should be shown as such" + " in the table of contents.

\n" + '

Second Subheading

\n' + "

This is another subsection that should be shown as" + " such in the table of contents.

\n" + ), + output, + ) + self.assertEqual( + "Valid Content with Table of Contents", str(metadata["title"]) + ) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + self.assertEqual( + '\n", + str(metadata["toc"]), + ) + + def test_citations_and_toc(self): + """Check if output, citations and table of contents CLI are valid.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS + ["+citations"], + PANDOC_ARGS=PANDOC_ARGS + + [ + "--toc", + "-C", + "--csl=https://www.zotero.org/styles/ieee-with-url", + "--metadata=link-citations:false", + "--metadata=reference-section-title:References", + ], + FORMATTED_FIELDS=FORMATTED_FIELDS, + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_citation.md" + ) + output, metadata = pandoc_reader.read(source_path) + self.maxDiff = None + + self.assertEqual( + ( + '

String Theory

\n' + "

But this foundational principle of science has" + " now been called into question by" + ' String Theory,' + " which is a relative newcomer to theoretical physics, but one" + " that has captured the common imagination, judging by" + " the popular explanations that abound on the Web" + ' [1]–[3].' + " And whether string theory is or is not science, Popper" + " notwithstanding, is an issue that is still up for debate" + " [4]–[7].

\n' + '

References

\n' + '
\n' + '
\n' + '
[1]' + '
A. Mann,' + " What Is String Theory?" + " 20-Mar-2019. [Online]." + ' Available: ' + "https://www.livescience.com/" + "65033-what-is-string-theory.html." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[2]
' + '
' + "C. Wood, What Is String Theory?." + " Reference article:" + " A simplified explanation and brief history of string" + " theory,” 11-Jul-2019." + ' [Online]. Available: ' + "https://www.space.com/17594-string-theory.html." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[3]' + '
' + 'A. Z. Jones, The Basics of String' + " Theory,” 02-Mar-2019. [Online]. Available:" + ' ' + "https://www.thoughtco.com/what-is-string-theory-2699363." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[4]' + '
' + "E. Siegel, Why String Theory Is Not A Scientific" + " Theory,” 23-Dec-2015. [Online]. Available:" + " https://www.forbes.com/' + "sites/startswithabang/2015/12/23/" + "why-string-theory-is-not-science/." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[5]' + '
' + 'D. Castelvecchi, ' + "Feuding physicists turn" + " to philosophy for help. String theory is at the" + " heart of a debate over the integrity of the scientific" + " method itself,” 05-Jan-2016. [Online]. Available:" + ' ' + "https://www.nature.com/news/" + "feuding-physicists-turn-to-philosophy-for-help-1.19076." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[6]
' + '
' + 'R. A. Batista and J. Primack, ' + "Is String theory falsifiable?. Can a theory that isn’t" + " completely testable still be useful to physics?”" + " [Online]." + ' Available: ' + "https://metafact.io/factchecks/" + "30-is-string-theory-falsifiable." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[7]' + '
' + 'M. R. Francis, Falsifiability and' + " physics. Can a theory that isn’t completely testable" + " still be useful to physics?” 23-Apr-2019." + " [Online]. Available:" + ' ' + "https://www.scientificamerican.com/article/is-" + "string-theory-science/. [Accessed: 12-Nov-2020]
\n" + "
\n" + "
\n" + ), + output, + ) + + self.assertEqual("Valid Content With Citation", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + self.assertEqual( + ( + '\n" + ), + str(metadata["toc"]), + ) + self.assertEqual( + ( + "

But this foundational principle of science has now been" + " called into question by" + ' ' + "String Theory.

\n" + ), + str(metadata["summary"]), + ) + + def test_citations_and_toc_2(self): + """Check if output, citations and table of contents CLI are valid.""" + settings = get_settings( + PANDOC_EXTENSIONS=PANDOC_EXTENSIONS + ["+citations"], + PANDOC_ARGS=PANDOC_ARGS + + [ + "--table-of-contents", + "--citeproc", + "--csl=https://www.zotero.org/styles/ieee-with-url", + "--metadata=link-citations:false", + "--metadata=reference-section-title:References", + ], + FORMATTED_FIELDS=FORMATTED_FIELDS, + ) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_citation.md" + ) + output, metadata = pandoc_reader.read(source_path) + self.maxDiff = None + + self.assertEqual( + ( + '

String Theory

\n' + "

But this foundational principle of science has" + " now been called into question by" + ' String Theory,' + " which is a relative newcomer to theoretical physics, but one" + " that has captured the common imagination, judging by" + " the popular explanations that abound on the Web" + ' [1]–[3].' + " And whether string theory is or is not science, Popper" + " notwithstanding, is an issue that is still up for debate" + " [4]–[7].

\n' + '

References

\n' + '
\n' + '
\n' + '
[1]' + '
A. Mann,' + " What Is String Theory?" + " 20-Mar-2019. [Online]." + ' Available: ' + "https://www.livescience.com/" + "65033-what-is-string-theory.html." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[2]
' + '
' + "C. Wood, What Is String Theory?." + " Reference article:" + " A simplified explanation and brief history of string" + " theory,” 11-Jul-2019." + ' [Online]. Available: ' + "https://www.space.com/17594-string-theory.html." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[3]' + '
' + 'A. Z. Jones, The Basics of String' + " Theory,” 02-Mar-2019. [Online]. Available:" + ' ' + "https://www.thoughtco.com/what-is-string-theory-2699363." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[4]' + '
' + "E. Siegel, Why String Theory Is Not A Scientific" + " Theory,” 23-Dec-2015. [Online]. Available:" + " https://www.forbes.com/' + "sites/startswithabang/2015/12/23/" + "why-string-theory-is-not-science/." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[5]' + '
' + 'D. Castelvecchi, ' + "Feuding physicists turn" + " to philosophy for help. String theory is at the" + " heart of a debate over the integrity of the scientific" + " method itself,” 05-Jan-2016. [Online]. Available:" + ' ' + "https://www.nature.com/news/" + "feuding-physicists-turn-to-philosophy-for-help-1.19076." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[6]
' + '
' + 'R. A. Batista and J. Primack, ' + "Is String theory falsifiable?. Can a theory that isn’t" + " completely testable still be useful to physics?”" + " [Online]." + ' Available: ' + "https://metafact.io/factchecks/" + "30-is-string-theory-falsifiable." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[7]' + '
' + 'M. R. Francis, Falsifiability and' + " physics. Can a theory that isn’t completely testable" + " still be useful to physics?” 23-Apr-2019." + " [Online]. Available:" + ' ' + "https://www.scientificamerican.com/article/is-" + "string-theory-science/. [Accessed: 12-Nov-2020]
\n" + "
\n" + "
\n" + ), + output, + ) + + self.assertEqual("Valid Content With Citation", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + self.assertEqual( + ( + '\n" + ), + str(metadata["toc"]), + ) + + +class TestInvalidCasesWithDefaultFiles(unittest.TestCase): + """Invalid test cases using default files.""" + + def test_invalid_standalone(self): + """Check if exception is raised if standalone is true.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "standalone_true.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "The default standalone should be set to false.", message + ) + + def test_invalid_self_contained(self): + """Check if exception is raised if self-contained is true.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "selfcontained_true.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "The default self-contained should be set to false.", message + ) + + def test_no_input_format(self): + """Check if exception is raised if no input format is specified.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "no_input_format.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("No input format specified.", message) + + def test_invalid_reader_input_format(self): + """Check if exception is raised if reader input format is invalid.""" + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, "invalid_reader_input_format.yaml" + ) + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Input type has to be a markdown variant.", message) + + def test_invalid_from_input_format(self): + """Check if exception is raised if from input format is invalid.""" + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, "invalid_from_input_format.yaml" + ) + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual("Input type has to be a markdown variant.", message) + + def test_from_reader_both_given(self): + """Check if exception is raised if from and reader are both given.""" + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, "from_reader_both_given.yaml" + ) + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + ( + "Specifying both from and reader is not supported." + " Please specify just one." + ), + message, + ) + + def test_to_writer_both_given(self): + """Check if exception is raised if to and writer are both given.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "to_writer_both_given.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + ( + "Specifying both to and writer is not supported." + " Please specify just one." + ), + message, + ) + + def test_no_output_format(self): + """Check if exception is raised if no output format is specified.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "no_output_format.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "Output format type must be either html or html5.", message + ) + + def test_invalid_writer_output_format(self): + """Check if exception is raised if writer output format is invalid.""" + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, "invalid_writer_output_format.yaml" + ) + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "Output format type must be either html or html5.", message + ) + + def test_invalid_to_output_format(self): + """Check if exception is raised if to output format is invalid.""" + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, "invalid_to_output_format.yaml" + ) + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + + with self.assertRaises(ValueError) as context_manager: + pandoc_reader.read(source_path) + + message = str(context_manager.exception) + self.assertEqual( + "Output format type must be either html or html5.", message + ) + + +class TestValidCasesWithDefaultFiles(unittest.TestCase): + """Valid test cases using default files.""" + + def test_valid_file_with_valid_defaults(self): + """Check if we get the appropriate output specifying defaults.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "valid_defaults.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + + source_path = os.path.join(TEST_CONTENT_PATH, "valid_content.md") + output, metadata = pandoc_reader.read(source_path) + + self.assertEqual( + ( + "

This is some valid content that should pass." + " If it does not pass we will know something is wrong.

\n" + ), + output, + ) + + self.assertEqual("Valid Content", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + + def test_mathjax_with_valid_defaults(self): + """Check if mathematics is rendered correctly with defaults.""" + pandoc_default_files = [ + os.path.join(TEST_DEFAULT_FILES_PATH, "valid_defaults.yaml") + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + + pandoc_reader = PandocReader(settings) + + source_path = os.path.join(TEST_CONTENT_PATH, "mathjax_content.md") + output, metadata = pandoc_reader.read(source_path) + + self.assertEqual( + ( + '

\\[\ne^{i\\theta} = ' + "\\cos\\theta + i \\sin\\theta.\n\\]

\n" + ), + output, + ) + + self.assertEqual("MathJax Content", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + + def test_toc_with_valid_defaults(self): + """Check if output and table of contents are valid with defaults.""" + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, "valid_defaults_with_toc.yaml" + ) + ] + + settings = get_settings(PANDOC_DEFAULT_FILES=pandoc_default_files) + pandoc_reader = PandocReader(settings) + + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_toc.md" + ) + output, metadata = pandoc_reader.read(source_path) + + self.assertEqual( + ( + "

This is some valid content that should pass." + " If it does not pass we will know something is wrong.

\n" + '

First Heading

\n' + "

This should be the first heading in my" + " table of contents.

\n" + '

Second Heading

\n' + "

This should be the second heading in my" + " table of contents.

\n" + '

First Subheading

\n' + "

This is a subsection that should be shown as such" + " in the table of contents.

\n" + '

Second Subheading

\n' + "

This is another subsection that should be shown as" + " such in the table of contents.

\n" + ), + output, + ) + self.assertEqual( + "Valid Content with Table of Contents", str(metadata["title"]) + ) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + self.assertEqual( + '\n", + str(metadata["toc"]), + ) + + def test_citations_and_toc_with_valid_defaults(self): + """Check if output, citations and table of contents are valid.""" + + pandoc_default_files = [ + os.path.join( + TEST_DEFAULT_FILES_PATH, + "valid_defaults_with_toc_and_citations.yaml", + ) + ] + + settings = get_settings( + PANDOC_DEFAULT_FILES=pandoc_default_files, + FORMATTED_FIELDS=FORMATTED_FIELDS, + ) + pandoc_reader = PandocReader(settings) + + source_path = os.path.join( + TEST_CONTENT_PATH, "valid_content_with_citation.md" + ) + output, metadata = pandoc_reader.read(source_path) + self.maxDiff = None # pylint: disable=invalid-name + + self.assertEqual( + ( + '

String Theory

\n' + "

But this foundational principle of science has" + " now been called into question by" + ' String Theory,' + " which is a relative newcomer to theoretical physics, but one" + " that has captured the common imagination, judging by" + " the popular explanations that abound on the Web" + ' [1]–[3].' + " And whether string theory is or is not science, Popper" + " notwithstanding, is an issue that is still up for debate" + " [4]–[7].

\n' + '

References

\n' + '
\n' + '
\n' + '
[1]' + '
A. Mann,' + " What Is String Theory?" + " 20-Mar-2019. [Online]." + ' Available: ' + "https://www.livescience.com/" + "65033-what-is-string-theory.html." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[2]
' + '
' + "C. Wood, What Is String Theory?." + " Reference article:" + " A simplified explanation and brief history of string" + " theory,” 11-Jul-2019." + ' [Online]. Available: ' + "https://www.space.com/17594-string-theory.html." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[3]' + '
' + 'A. Z. Jones, The Basics of String' + " Theory,” 02-Mar-2019. [Online]. Available:" + ' ' + "https://www.thoughtco.com/what-is-string-theory-2699363." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[4]' + '
' + "E. Siegel, Why String Theory Is Not A Scientific" + " Theory,” 23-Dec-2015. [Online]. Available:" + " https://www.forbes.com/' + "sites/startswithabang/2015/12/23/" + "why-string-theory-is-not-science/." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[5]' + '
' + 'D. Castelvecchi, ' + "Feuding physicists turn" + " to philosophy for help. String theory is at the" + " heart of a debate over the integrity of the scientific" + " method itself,” 05-Jan-2016. [Online]. Available:" + ' ' + "https://www.nature.com/news/" + "feuding-physicists-turn-to-philosophy-for-help-1.19076." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[6]
' + '
' + 'R. A. Batista and J. Primack, ' + "Is String theory falsifiable?. Can a theory that isn’t" + " completely testable still be useful to physics?”" + " [Online]." + ' Available: ' + "https://metafact.io/factchecks/" + "30-is-string-theory-falsifiable." + " [Accessed: 12-Nov-2020]
\n" + "
\n" + '
\n' + '
[7]' + '
' + 'M. R. Francis, Falsifiability and' + " physics. Can a theory that isn’t completely testable" + " still be useful to physics?” 23-Apr-2019." + " [Online]. Available:" + ' ' + "https://www.scientificamerican.com/article/is-" + "string-theory-science/. [Accessed: 12-Nov-2020]
\n" + "
\n" + "
\n" + ), + output, + ) + + self.assertEqual("Valid Content With Citation", str(metadata["title"])) + self.assertEqual("My Author", str(metadata["author"])) + self.assertEqual("2020-10-16 00:00:00", str(metadata["date"])) + self.assertEqual( + ( + '\n" + ), + str(metadata["toc"]), + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..725a2a3 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,79 @@ +[tool.poetry] +name = "pelican-pandoc-reader" +version = "0.0.1" +description = "Pelican plugin to convert Pandoc Markdown files to HTML5." +authors = ["Nandakumar Chandrasekhar "] +license = "AGPL-3.0" +readme = "README.md" +keywords = ["pelican", "plugin", "pandoc"] +repository = "https://github.com/pelican-plugins/pandoc-reader" +documentation = "https://docs.getpelican.com" +packages = [ + { include = "pelican" }, +] + +classifiers = [ + "Development Status :: 4 - Beta", + "Environment :: Console", + "Framework :: Pelican", + "Framework :: Pelican :: Plugins", + "Intended Audience :: End Users/Desktop", + "Operating System :: OS Independent", + "Topic :: Internet :: WWW/HTTP", + "Topic :: Software Development :: Libraries :: Python Modules", +] + +[tool.poetry.urls] +"Funding" = "https://donate.getpelican.com/" +"Issue Tracker" = "https://github.com/pelican-plugins/pandoc-reader/issues" + +[tool.poetry.dependencies] +python = "^3.6" +pelican = "^4.5" +markdown = {version = "^3.2.2", optional = true} +pyyaml = "^5.3.1" +markdown-word-count = "^0.0.1" + + +[tool.poetry.dev-dependencies] +black = {version = "^19.10b0", allow-prereleases = true} +flake8 = "^3.8" +flake8-black = "^0.1.0" +invoke = "^1.3" +isort = "^5.4" +livereload = "^2.6" +markdown = "^3.2.2" +pytest = "^6.0" +pytest-cov = "^2.7" +pytest-pythonpath = "^0.7.3" +pytest-sugar = "^0.9.4" +Werkzeug = "^1.0" + +[tool.poetry.extras] +markdown = ["markdown"] + +[tool.autopub] +project-name = "Pandoc Reader" +git-username = "botpub" +git-email = "botpub@autopub.rocks" + +[tool.isort] +sections = ['FUTURE', 'STDLIB', 'YAML', 'THIRDPARTY', 'FIRSTPARTY', 'LOCALFOLDER'] +known_yaml = ['yaml'] +known_third_party = 'pelican' +known_first_party = 'pandoc_reader' +combine_as_imports = true +force_grid_wrap = 0 +include_trailing_comma = true +line_length = 79 +multi_line_output = 3 + +# Sort imports within their section independent of the import type +force_sort_within_sections = true + +[tool.black] +line_length = 79 + +[build-system] +requires = ["poetry>=1.0"] +build-backend = "poetry.masonry.api" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..42a02f9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +blinker==1.4 +docutils==0.16 +feedgenerator==1.9.1 +Jinja2==2.11.2 +MarkupSafe==1.1.1 +pelican==4.5.1 +Pygments==2.6.1 +python-dateutil==2.8.1 +pytz==2020.1 +PyYAML==5.3.1 +six==1.15.0 +Unidecode==1.1.1 +markdown-word-count==0.0.1 diff --git a/tasks.py b/tasks.py new file mode 100644 index 0000000..0c7cc45 --- /dev/null +++ b/tasks.py @@ -0,0 +1,86 @@ +import os +from pathlib import Path +from shutil import which + +from invoke import task + +PKG_NAME = "pandoc_reader" +PKG_PATH = Path(f"pelican/plugins/{PKG_NAME}") +ACTIVE_VENV = os.environ.get("VIRTUAL_ENV", None) +VENV_HOME = Path(os.environ.get("WORKON_HOME", "~/.local/share/virtualenvs")) +VENV_PATH = Path(ACTIVE_VENV) if ACTIVE_VENV else (VENV_HOME / PKG_NAME) +VENV = str(VENV_PATH.expanduser()) + +TOOLS = ["poetry", "pre-commit"] +POETRY = ( + which("poetry") if which("poetry") else (VENV / Path("bin") / "poetry") +) +PRECOMMIT = ( + which("pre-commit") + if which("pre-commit") + else (VENV / Path("bin") / "pre-commit") +) + + +@task +def tests(c): + """Run the test suite""" + c.run(f"{VENV}/bin/pytest", pty=True) + + +@task +def black(c, check=False, diff=False): + """Run Black auto-formatter, optionally with --check or --diff""" + check_flag, diff_flag = "", "" + if check: + check_flag = "--check" + if diff: + diff_flag = "--diff" + line_length = "--line-length 79" + c.run( + f"{VENV}/bin/black {check_flag} {diff_flag} {line_length} {PKG_PATH} tasks.py" + ) + + +@task +def isort(c, check=False, diff=False): + check_flag, diff_flag = "", "" + if check: + check_flag = "-c" + if diff: + diff_flag = "--diff" + c.run(f"{VENV}/bin/isort {check_flag} {diff_flag} .") + + +@task +def flake8(c): + c.run(f"{VENV}/bin/flake8 {PKG_PATH} tasks.py") + + +@task +def lint(c): + isort(c, check=True) + black(c, check=True) + flake8(c) + + +@task +def tools(c): + """Install tools in the virtual environment if not already on PATH""" + for tool in TOOLS: + if not which(tool): + c.run(f"{VENV}/bin/pip install {tool}") + + +@task +def precommit(c): + """Install pre-commit hooks to .git/hooks/pre-commit""" + c.run(f"{PRECOMMIT} install") + + +@task +def setup(c): + c.run(f"{VENV}/bin/pip install -U pip") + tools(c) + c.run(f"{POETRY} install") + precommit(c) diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..82598e1 --- /dev/null +++ b/tox.ini @@ -0,0 +1,3 @@ +[flake8] +max-line-length = 79 +ignore = E203, E266, E501, W503