From 50b95d5deac45fcb6dff08f5e9d8484ab389b18c Mon Sep 17 00:00:00 2001 From: patchwright <292882882+patchwright@users.noreply.github.com> Date: Sun, 14 Jun 2026 20:13:58 +0200 Subject: [PATCH] Fix leading space in surnames after capitalize() with empty middle name capitalize() split each attribute with str.split(' '), which returns [''] (not []) for an empty string. cap_piece() returns '' for an empty part, so an empty middle name produced middle_list = [''], which leaked into surnames_list (middle_list + last_list) and yielded a leading space in the surnames property: >>> hn = HumanName('john doe'); hn.capitalize(); hn.surnames ' Doe' # leading space (should be 'Doe') The same spurious '' element also appeared in title_list/first_list/last_list for empty attributes. Using str.split() instead returns [] for empty strings and is equivalent for the already-whitespace-collapsed pieces cap_piece() returns. The suffix split (', ') is intentionally left unchanged. Added a regression test in HumanNameCapitalizationTestCase. --- nameparser/parser.py | 8 ++++---- tests.py | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/nameparser/parser.py b/nameparser/parser.py index ee361de..8eaa1ac 100644 --- a/nameparser/parser.py +++ b/nameparser/parser.py @@ -980,10 +980,10 @@ def capitalize(self, force: bool | None = None) -> None: if not force and not (name == name.upper() or name == name.lower()): return - self.title_list = self.cap_piece(self.title, 'title').split(' ') - self.first_list = self.cap_piece(self.first, 'first').split(' ') - self.middle_list = self.cap_piece(self.middle, 'middle').split(' ') - self.last_list = self.cap_piece(self.last, 'last').split(' ') + self.title_list = self.cap_piece(self.title, 'title').split() + self.first_list = self.cap_piece(self.first, 'first').split() + self.middle_list = self.cap_piece(self.middle, 'middle').split() + self.last_list = self.cap_piece(self.last, 'last').split() self.suffix_list = self.cap_piece(self.suffix, 'suffix').split(', ') def handle_capitalization(self) -> None: diff --git a/tests.py b/tests.py index c137ab4..b77aba2 100644 --- a/tests.py +++ b/tests.py @@ -2124,6 +2124,28 @@ def test_capitization_middle_initial_is_also_a_conjunction(self) -> None: hn.capitalize() self.m(str(hn), 'Scott E. Werner', hn) + def test_capitalize_empty_name_part_has_no_leading_space_in_surnames(self) -> None: + # capitalize() splits each attribute with str.split(' '), which returns + # [''] (rather than []) for an empty string. That spurious element + # leaked into surnames_list (middle_list + last_list) and produced a + # leading space in the surnames property, e.g. ' Doe' instead of 'Doe'. + hn = HumanName('john doe') + hn.capitalize() + self.m(hn.surnames, 'Doe', hn) + self.assertEqual(hn.middle_list, []) + self.assertEqual(hn.surnames_list, ['Doe']) + + # force=True on a mixed-case name hits the same code path + hn = HumanName('Jane Doe') + hn.capitalize(force=True) + self.m(hn.surnames, 'Doe', hn) + self.assertEqual(hn.middle_list, []) + + # other empty attribute lists are also free of the spurious '' element + self.assertEqual(hn.title_list, []) + self.assertEqual(hn.first_list, ['Jane']) + self.assertEqual(hn.last_list, ['Doe']) + # Leaving already-capitalized names alone def test_no_change_to_mixed_chase(self) -> None: hn = HumanName('Shirley Maclaine')