diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
new file mode 100644
index 0000000..f392b03
--- /dev/null
+++ b/.github/workflows/main.yml
@@ -0,0 +1,95 @@
+name: CI
+
+# Controls when the action will run.
+on:
+ # Triggers the workflow on push or pull request events but only for the master branch
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+
+ # Allows you to run this workflow manually from the Actions tab
+ workflow_dispatch:
+
+jobs:
+ code-quality:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ toxenv:
+ - black
+ - flake8
+ - mypy
+ - isort
+ env:
+ TOXENV: ${{ matrix.toxenv }}
+
+ name: "Tox ${{ matrix.toxenv }}"
+ steps:
+ # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 0
+
+ - name: setup python
+ uses: actions/setup-python@v2
+ with:
+ python-version: '3.9'
+
+ - name: Install Requirements [${{ matrix.toxenv }}]
+ run: pip install tox
+
+ - name: Tox-${{ matrix.toxenv }}
+ run: tox
+ # This workflow contains a single job called "build"
+ test:
+ # The type of runner that the job will run on
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ toxenv:
+ - py38
+ - py39
+ - py310
+ include:
+ - toxenv: py38
+ python-version: '3.8'
+ - toxenv: py39
+ python-version: '3.9'
+ - toxenv: py310
+ python-version: '3.10'
+ - toxenv: py311
+ python-version: '3.11'
+ - toxenv: py312
+ python-version: '3.12'
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ TOXENV: ${{ matrix.toxenv }}
+ name: "Python ${{ matrix.python-version }} | Tox ${{ matrix.toxenv }}"
+
+ # Steps represent a sequence of tasks that will be executed as part of the job
+ steps:
+ # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+ - uses: actions/checkout@v2
+ with:
+ fetch-depth: 2
+
+ - name: setup python
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install Requirements [Python-${{ matrix.python-version }}]
+ run: pip install tox
+
+ - name: Tox-${{ matrix.toxenv }}
+ run: tox
+
+ - name: Upload coverage to Codecov
+ # see https://github.com/codecov/codecov-action/blob/master/README.md
+ uses: codecov/codecov-action@v2
+ with:
+ flags: unittests-${{ matrix.python-version }}
+ fail_ci_if_error: true # default = false
+ os: toxenv
+ verbose: true # default = false
diff --git a/.gitignore b/.gitignore
index 047a2c1..889669a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ dist
.idea
.coverage
.coverage.*
+coverage.xml
env/
.c9/
.vscode
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 26c3b4c..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,33 +0,0 @@
-language: python
-cache: pip
-
-matrix:
- include:
- - python: "3.7"
- env: TOXENV=black
- - python: "3.7"
- env: TOXENV=flake8
- - python: "3.7"
- env: TOXENV=mypy
- - python: "3.7"
- env: TOXENV=isort
- - python: "3.5"
- env: TOXENV=py35
- - python: "3.6"
- env: TOXENV=py36
- - python: "3.7"
- env: TOXENV=py37
- - python: 3.8
- env: TOXENV=py38
- - python: 3.9
- env: TOXENV=py39
- - python: "pypy3"
- env: TOXENV=pypy3
-
-install:
- - pip install tox
-script:
- - tox
-after_success:
- - pip install coveralls
- - coveralls
diff --git a/ChangeLog.rst b/ChangeLog.rst
index 3c642f9..caf2f78 100644
--- a/ChangeLog.rst
+++ b/ChangeLog.rst
@@ -9,10 +9,13 @@ UNRELEASED
* Fix #344: indent ``
`` inside ```` three spaces instead of two to comply with CommonMark, GFM, etc.
* Fix #324: unnecessary spaces around ````, ````, and ``strike`` tags.
* Don't wrap tables by default and add a ``--wrap-tables`` config option
+* Remove support for Python ≤ 3.5. Now requires Python 3.6+.
+* Support for Python 3.10.
* Fix #320 padding empty tables and tables with no tags.
* Add ``ignore_mailto_links`` config option to ignore ``mailto:`` style links.
+
2020.1.16
=========
----
diff --git a/html2text/__init__.py b/html2text/__init__.py
index c59ae16..e8222ba 100644
--- a/html2text/__init__.py
+++ b/html2text/__init__.py
@@ -86,7 +86,7 @@ def __init__(
self.tag_callback = None
self.open_quote = config.OPEN_QUOTE # covered in cli
self.close_quote = config.CLOSE_QUOTE # covered in cli
-
+
if out is None:
self.out = self.outtextf
else:
@@ -120,6 +120,8 @@ def __init__(
self.tag_stack = (
[]
) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
+ self.emphasis_tag_stack = {}
+ self.remove_space = False
self.emphasis = 0
self.drop_white_space = 0
self.inheader = False
@@ -142,6 +144,7 @@ def feed(self, data: str) -> None:
super().feed(data)
def handle(self, data: str) -> str:
+ self.start = True
self.feed(data)
self.feed("")
markdown = self.optwrap(self.finish())
@@ -302,10 +305,19 @@ def handle_tag(
) -> None:
self.current_tag = tag
+ if tag in ["b","em","i","u"]:
+ if start:
+ if tag in self.emphasis_tag_stack:
+ self.emphasis_tag_stack[tag] += 1
+ else:
+ self.emphasis_tag_stack[tag] = 1
+ elif list(self.emphasis_tag_stack.keys()):
+ self.emphasis_tag_stack.popitem()
+
if self.tag_callback is not None:
if self.tag_callback(self, tag, attrs, start) is True:
return
-
+
# first thing inside the anchor tag is another tag
# that produces some output
if (
@@ -372,12 +384,24 @@ def handle_tag(
self.p()
if tag == "br" and start:
- if self.astack:
- self.space = True
- elif self.blockquote > 0:
+ for key in list(self.emphasis_tag_stack.keys())[::-1]:
+ if(key == "b"):
+ self.o(self.strong_mark)
+ elif key in ["em","i","u"]:
+ self.o(self.emphasis_mark)
+
+ if self.blockquote > 0:
self.o(" \n> ")
else:
self.o(" \n")
+
+ for key in list(self.emphasis_tag_stack.keys()):
+ if(key == "b"):
+ self.o(self.strong_mark)
+ elif key in ["em","i","u"]:
+ self.o(self.emphasis_mark)
+ self.remove_space = True
+ self.drop_white_space = 1
if tag == "hr" and start:
self.p()
@@ -642,11 +666,11 @@ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
# https://spec.commonmark.org/0.28/#motivation
# TODO: line up - s > 9 correctly.
parent_list = None
- for list in self.list:
+ for item in self.list:
self.o(
- " " if parent_list == "ol" and list.name == "ul" else " "
+ " " if parent_list == "ol" and item.name == "ul" else " "
)
- parent_list = list.name
+ parent_list = item.name
if li.name == "ul":
self.o(self.ul_item_mark + " ")
@@ -745,7 +769,7 @@ def o(
self.abbr_data += data
if not self.quiet:
- if self.google_doc:
+ if self.google_doc or self.remove_space:
# prevent white space immediately after 'begin emphasis'
# marks ('**' and '_')
lstripped_data = data.lstrip()
diff --git a/setup.cfg b/setup.cfg
index 6ba62eb..e03cc99 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -17,11 +17,10 @@ classifiers =
Operating System :: OS Independent
Programming Language :: Python
Programming Language :: Python :: 3
- Programming Language :: Python :: 3.5
- Programming Language :: Python :: 3.6
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
+ Programming Language :: Python :: 3.10
Programming Language :: Python :: 3 :: Only
Programming Language :: Python :: Implementation :: CPython
Programming Language :: Python :: Implementation :: PyPy
@@ -30,7 +29,7 @@ platform = OS Independent
[options]
zip_safe = False
packages = html2text
-python_requires = >=3.5
+python_requires = >=3.7
[options.entry_points]
console_scripts =
@@ -48,4 +47,4 @@ combine_as_imports = True
profile = black
[mypy]
-python_version = 3.5
+python_version = 3.7
diff --git a/test/br_inside_a.html b/test/br_inside_a.html
deleted file mode 100644
index b8ad150..0000000
--- a/test/br_inside_a.html
+++ /dev/null
@@ -1 +0,0 @@
-This is a
test
diff --git a/test/br_inside_a.md b/test/br_inside_a.md
deleted file mode 100644
index 8997d9b..0000000
--- a/test/br_inside_a.md
+++ /dev/null
@@ -1 +0,0 @@
-[This is a test](https://example.com)
diff --git a/test/new_line_in_emphasis.html b/test/new_line_in_emphasis.html
new file mode 100644
index 0000000..4290ada
--- /dev/null
+++ b/test/new_line_in_emphasis.html
@@ -0,0 +1 @@
+Our multiline
bold text
\ No newline at end of file
diff --git a/test/new_line_in_emphasis.md b/test/new_line_in_emphasis.md
new file mode 100644
index 0000000..df3a7ae
--- /dev/null
+++ b/test/new_line_in_emphasis.md
@@ -0,0 +1,3 @@
+**Our multiline**
+**bold text**
+
diff --git a/test/test_new_line_inside_emphasis.py b/test/test_new_line_inside_emphasis.py
new file mode 100644
index 0000000..b5d69a4
--- /dev/null
+++ b/test/test_new_line_inside_emphasis.py
@@ -0,0 +1,8 @@
+import html2text
+
+def test_emphasis_with_new_line():
+ h = html2text.HTML2Text()
+ html = "Our multiline
bold text"
+ result = h.handle(html)
+ assert result == '**Our multiline** \n**bold text**\n\n'
+
\ No newline at end of file
diff --git a/test/test_newlines_on_multiple_calls.py b/test/test_newlines_on_multiple_calls.py
new file mode 100644
index 0000000..9c493ed
--- /dev/null
+++ b/test/test_newlines_on_multiple_calls.py
@@ -0,0 +1,12 @@
+import html2text
+
+# See https://github.com/Alir3z4/html2text/issues/163 for more information.
+
+
+def test_newline_on_multiple_calls():
+ h = html2text.HTML2Text()
+ html = "test
"
+ md1 = h.handle(html)
+ md2 = h.handle(html)
+ md3 = h.handle(html)
+ assert md1 == md2 == md3
diff --git a/tox.ini b/tox.ini
index baaf18f..0971292 100644
--- a/tox.ini
+++ b/tox.ini
@@ -4,12 +4,12 @@ envlist =
flake8
isort
mypy
- py{35,36,37,38,py3}
-minversion = 1.9
+ py{38,39,310,311,312}
+minversion = 3.24
[testenv]
commands =
- pytest --cov=html2text {posargs}
+ pytest --cov=./ --cov-report=xml {posargs}
deps =
pytest
pytest-cov
@@ -17,7 +17,7 @@ deps =
[testenv:black]
basepython = python3
commands =
- black --target-version py35 --check --diff .
+ black --target-version py311 --check --diff .
deps =
black
skip_install = true
@@ -35,7 +35,7 @@ basepython = python3
commands =
isort --check-only --diff .
deps =
- isort >= 5.0.1
+ isort >= 5.10.1
skip_install = true
[testenv:mypy]