From 71384263fbf76970cff8c8f6810596326beda45e Mon Sep 17 00:00:00 2001 From: anchal7299 Date: Mon, 5 Jul 2021 19:39:39 +0530 Subject: [PATCH 1/3] Fix new line inside emphasis --- html2text/__init__.py | 37 ++++++++++++++++++++++----- test/new_line_in_emphasis.html | 1 + test/new_line_in_emphasis.md | 3 +++ test/test_new_line_inside_emphasis.py | 8 ++++++ 4 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 test/new_line_in_emphasis.html create mode 100644 test/new_line_in_emphasis.md create mode 100644 test/test_new_line_inside_emphasis.py diff --git a/html2text/__init__.py b/html2text/__init__.py index 7e1a279..0184858 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -86,7 +86,7 @@ def __init__( self.tag_callback = None self.open_quote = config.OPEN_QUOTE # covered in cli self.close_quote = config.CLOSE_QUOTE # covered in cli - + if out is None: self.out = self.outtextf else: @@ -120,6 +120,8 @@ def __init__( self.tag_stack = ( [] ) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]] + self.emphasis_tag_stack = {} + self.remove_space = False self.emphasis = 0 self.drop_white_space = 0 self.inheader = False @@ -302,10 +304,19 @@ def handle_tag( ) -> None: self.current_tag = tag + if tag in ["b","em","i","u"]: + if start: + if tag in self.emphasis_tag_stack: + self.emphasis_tag_stack[tag] += 1 + else: + self.emphasis_tag_stack[tag] = 1 + elif list(self.emphasis_tag_stack.keys()): + self.emphasis_tag_stack.popitem() + if self.tag_callback is not None: if self.tag_callback(self, tag, attrs, start) is True: return - + # first thing inside the anchor tag is another tag # that produces some output if ( @@ -372,10 +383,24 @@ def handle_tag( self.p() if tag == "br" and start: + for key in list(self.emphasis_tag_stack.keys())[::-1]: + if(key == "b"): + self.o(self.strong_mark) + elif key in ["em","i","u"]: + self.o(self.emphasis_mark) + if self.blockquote > 0: self.o(" \n> ") else: self.o(" \n") + + for key in list(self.emphasis_tag_stack.keys()): + if(key == "b"): + self.o(self.strong_mark) + elif key in ["em","i","u"]: + self.o(self.emphasis_mark) + self.remove_space = True + self.drop_white_space = 1 if tag == "hr" and start: self.p() @@ -640,11 +665,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None: # https://spec.commonmark.org/0.28/#motivation # TODO: line up
  1. s > 9 correctly. parent_list = None - for list in self.list: + for item in self.list: self.o( - " " if parent_list == "ol" and list.name == "ul" else " " + " " if parent_list == "ol" and item.name == "ul" else " " ) - parent_list = list.name + parent_list = item.name if li.name == "ul": self.o(self.ul_item_mark + " ") @@ -743,7 +768,7 @@ def o( self.abbr_data += data if not self.quiet: - if self.google_doc: + if self.google_doc or self.remove_space: # prevent white space immediately after 'begin emphasis' # marks ('**' and '_') lstripped_data = data.lstrip() diff --git a/test/new_line_in_emphasis.html b/test/new_line_in_emphasis.html new file mode 100644 index 0000000..4290ada --- /dev/null +++ b/test/new_line_in_emphasis.html @@ -0,0 +1 @@ +Our multiline
    bold text
    \ No newline at end of file diff --git a/test/new_line_in_emphasis.md b/test/new_line_in_emphasis.md new file mode 100644 index 0000000..df3a7ae --- /dev/null +++ b/test/new_line_in_emphasis.md @@ -0,0 +1,3 @@ +**Our multiline** +**bold text** + diff --git a/test/test_new_line_inside_emphasis.py b/test/test_new_line_inside_emphasis.py new file mode 100644 index 0000000..b5d69a4 --- /dev/null +++ b/test/test_new_line_inside_emphasis.py @@ -0,0 +1,8 @@ +import html2text + +def test_emphasis_with_new_line(): + h = html2text.HTML2Text() + html = "Our multiline
    bold text
    " + result = h.handle(html) + assert result == '**Our multiline** \n**bold text**\n\n' + \ No newline at end of file From 8c4f446c97e7b74641af0243e02dde6ac995f1fb Mon Sep 17 00:00:00 2001 From: Alireza Savand <591113+Alir3z4@users.noreply.github.com> Date: Fri, 22 Dec 2023 23:23:12 +0300 Subject: [PATCH 2/3] Using Github Actions instead of Travis CI (#358) * Using Github Actions instead of Travis. * Deprecated support for Python 3.5 (DEPRECATION: Python 3.5 reached the end of its life on September 13th, 2020. Please upgrade your Python as Python 3.5 is no longer maintained. pip 21.0 will drop support for Python 3.5 in January 2021. pip 21.0 will remove support for this functionality.). Removed travis.yml. * Revert "Merge pull request #373 from mborsetti/br_in_a" This reverts commit a3ed67b69160746b03a5861bf5c1b7a5ba76a4e1, reversing changes made to 2ee4495f33c793a110e5fa918b3d8f94b891b24a. --------- Co-authored-by: mborsetti --- .github/workflows/main.yml | 95 ++++++++++++++++++++++++++++++++++++++ .gitignore | 1 + .travis.yml | 33 ------------- ChangeLog.rst | 3 ++ html2text/__init__.py | 4 +- setup.cfg | 7 ++- test/br_inside_a.html | 1 - test/br_inside_a.md | 1 - tox.ini | 10 ++-- 9 files changed, 108 insertions(+), 47 deletions(-) create mode 100644 .github/workflows/main.yml delete mode 100644 .travis.yml delete mode 100644 test/br_inside_a.html delete mode 100644 test/br_inside_a.md diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..f392b03 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,95 @@ +name: CI + +# Controls when the action will run. +on: + # Triggers the workflow on push or pull request events but only for the master branch + push: + branches: [ master ] + pull_request: + branches: [ master ] + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + code-quality: + runs-on: ubuntu-latest + strategy: + matrix: + toxenv: + - black + - flake8 + - mypy + - isort + env: + TOXENV: ${{ matrix.toxenv }} + + name: "Tox ${{ matrix.toxenv }}" + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: '3.9' + + - name: Install Requirements [${{ matrix.toxenv }}] + run: pip install tox + + - name: Tox-${{ matrix.toxenv }} + run: tox + # This workflow contains a single job called "build" + test: + # The type of runner that the job will run on + runs-on: ubuntu-latest + strategy: + matrix: + toxenv: + - py38 + - py39 + - py310 + include: + - toxenv: py38 + python-version: '3.8' + - toxenv: py39 + python-version: '3.9' + - toxenv: py310 + python-version: '3.10' + - toxenv: py311 + python-version: '3.11' + - toxenv: py312 + python-version: '3.12' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + TOXENV: ${{ matrix.toxenv }} + name: "Python ${{ matrix.python-version }} | Tox ${{ matrix.toxenv }}" + + # Steps represent a sequence of tasks that will be executed as part of the job + steps: + # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + + - name: setup python + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Requirements [Python-${{ matrix.python-version }}] + run: pip install tox + + - name: Tox-${{ matrix.toxenv }} + run: tox + + - name: Upload coverage to Codecov + # see https://github.com/codecov/codecov-action/blob/master/README.md + uses: codecov/codecov-action@v2 + with: + flags: unittests-${{ matrix.python-version }} + fail_ci_if_error: true # default = false + os: toxenv + verbose: true # default = false diff --git a/.gitignore b/.gitignore index 047a2c1..889669a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ dist .idea .coverage .coverage.* +coverage.xml env/ .c9/ .vscode diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 26c3b4c..0000000 --- a/.travis.yml +++ /dev/null @@ -1,33 +0,0 @@ -language: python -cache: pip - -matrix: - include: - - python: "3.7" - env: TOXENV=black - - python: "3.7" - env: TOXENV=flake8 - - python: "3.7" - env: TOXENV=mypy - - python: "3.7" - env: TOXENV=isort - - python: "3.5" - env: TOXENV=py35 - - python: "3.6" - env: TOXENV=py36 - - python: "3.7" - env: TOXENV=py37 - - python: 3.8 - env: TOXENV=py38 - - python: 3.9 - env: TOXENV=py39 - - python: "pypy3" - env: TOXENV=pypy3 - -install: - - pip install tox -script: - - tox -after_success: - - pip install coveralls - - coveralls diff --git a/ChangeLog.rst b/ChangeLog.rst index 3c642f9..caf2f78 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -9,10 +9,13 @@ UNRELEASED * Fix #344: indent ``
      `` inside ``
        `` three spaces instead of two to comply with CommonMark, GFM, etc. * Fix #324: unnecessary spaces around ````, ````, and ``strike`` tags. * Don't wrap tables by default and add a ``--wrap-tables`` config option +* Remove support for Python ≤ 3.5. Now requires Python 3.6+. +* Support for Python 3.10. * Fix #320 padding empty tables and tables with no tags. * Add ``ignore_mailto_links`` config option to ignore ``mailto:`` style links. + 2020.1.16 ========= ---- diff --git a/html2text/__init__.py b/html2text/__init__.py index c59ae16..7e1a279 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -372,9 +372,7 @@ def handle_tag( self.p() if tag == "br" and start: - if self.astack: - self.space = True - elif self.blockquote > 0: + if self.blockquote > 0: self.o(" \n> ") else: self.o(" \n") diff --git a/setup.cfg b/setup.cfg index 6ba62eb..e03cc99 100644 --- a/setup.cfg +++ b/setup.cfg @@ -17,11 +17,10 @@ classifiers = Operating System :: OS Independent Programming Language :: Python Programming Language :: Python :: 3 - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 + Programming Language :: Python :: 3.10 Programming Language :: Python :: 3 :: Only Programming Language :: Python :: Implementation :: CPython Programming Language :: Python :: Implementation :: PyPy @@ -30,7 +29,7 @@ platform = OS Independent [options] zip_safe = False packages = html2text -python_requires = >=3.5 +python_requires = >=3.7 [options.entry_points] console_scripts = @@ -48,4 +47,4 @@ combine_as_imports = True profile = black [mypy] -python_version = 3.5 +python_version = 3.7 diff --git a/test/br_inside_a.html b/test/br_inside_a.html deleted file mode 100644 index b8ad150..0000000 --- a/test/br_inside_a.html +++ /dev/null @@ -1 +0,0 @@ -This is a
        test
        diff --git a/test/br_inside_a.md b/test/br_inside_a.md deleted file mode 100644 index 8997d9b..0000000 --- a/test/br_inside_a.md +++ /dev/null @@ -1 +0,0 @@ -[This is a test](https://example.com) diff --git a/tox.ini b/tox.ini index baaf18f..0971292 100644 --- a/tox.ini +++ b/tox.ini @@ -4,12 +4,12 @@ envlist = flake8 isort mypy - py{35,36,37,38,py3} -minversion = 1.9 + py{38,39,310,311,312} +minversion = 3.24 [testenv] commands = - pytest --cov=html2text {posargs} + pytest --cov=./ --cov-report=xml {posargs} deps = pytest pytest-cov @@ -17,7 +17,7 @@ deps = [testenv:black] basepython = python3 commands = - black --target-version py35 --check --diff . + black --target-version py311 --check --diff . deps = black skip_install = true @@ -35,7 +35,7 @@ basepython = python3 commands = isort --check-only --diff . deps = - isort >= 5.0.1 + isort >= 5.10.1 skip_install = true [testenv:mypy] From 099c4b8bfeea09d640e18324bb1d44f051371940 Mon Sep 17 00:00:00 2001 From: Arjoonn Date: Sat, 23 Dec 2023 03:45:09 +0530 Subject: [PATCH 3/3] Fixes #163: Extra newlines added at start of markdown on second (and higher) invocation --- html2text/__init__.py | 1 + test/test_newlines_on_multiple_calls.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 test/test_newlines_on_multiple_calls.py diff --git a/html2text/__init__.py b/html2text/__init__.py index 7e1a279..a4115d5 100644 --- a/html2text/__init__.py +++ b/html2text/__init__.py @@ -142,6 +142,7 @@ def feed(self, data: str) -> None: super().feed(data) def handle(self, data: str) -> str: + self.start = True self.feed(data) self.feed("") markdown = self.optwrap(self.finish()) diff --git a/test/test_newlines_on_multiple_calls.py b/test/test_newlines_on_multiple_calls.py new file mode 100644 index 0000000..9c493ed --- /dev/null +++ b/test/test_newlines_on_multiple_calls.py @@ -0,0 +1,12 @@ +import html2text + +# See https://github.com/Alir3z4/html2text/issues/163 for more information. + + +def test_newline_on_multiple_calls(): + h = html2text.HTML2Text() + html = "

        test

        " + md1 = h.handle(html) + md2 = h.handle(html) + md3 = h.handle(html) + assert md1 == md2 == md3