From d95fcddaedc6b6f8622a92e72d46c19de141f112 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Fri, 16 Jan 2026 10:53:20 +0200 Subject: [PATCH 1/2] gh-143897: Remove the isxidstart() and isxidcontinue() methods of unicodedata.ucd_3_2_0 They are now only exposed as the unicodedata function. --- Lib/test/test_unicodedata.py | 27 +++++++------- ...-01-16-10-53-17.gh-issue-143897.hWJBHN.rst | 3 ++ Modules/clinic/unicodedata.c.h | 30 ++++++++-------- Modules/unicodedata.c | 36 +++++-------------- 4 files changed, 39 insertions(+), 57 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py index 07aa992de6d706..f9c0cd20438174 100644 --- a/Lib/test/test_unicodedata.py +++ b/Lib/test/test_unicodedata.py @@ -83,15 +83,7 @@ def test_method_checksum(self): self.assertEqual(result, self.expectedchecksum) -class UnicodeFunctionsTest(unittest.TestCase): - db = unicodedata - old = False - - # Update this if the database changes. Make sure to do a full rebuild - # (e.g. 'make distclean && make') to get the correct checksum. - expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349' - if quicktest else - '65670ae03a324c5f9e826a4de3e25bae4d73c9b7') +class BaseUnicodeFunctionsTest: def test_function_checksum(self): db = self.db @@ -589,6 +581,16 @@ def test_east_asian_width_unassigned(self): self.assertEqual(eaw(char), 'A') self.assertIs(self.db.name(char, None), None) +class UnicodeFunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest): + db = unicodedata + old = False + + # Update this if the database changes. Make sure to do a full rebuild + # (e.g. 'make distclean && make') to get the correct checksum. + expectedchecksum = ('83cc43a2fbb779185832b4c049217d80b05bf349' + if quicktest else + '65670ae03a324c5f9e826a4de3e25bae4d73c9b7') + def test_isxidstart(self): self.assertTrue(self.db.isxidstart('S')) self.assertTrue(self.db.isxidstart('\u0AD0')) # GUJARATI OM @@ -832,18 +834,13 @@ def graphemes(*args): ['a', '\U0001F1FA\U0001F1E6', '\U0001F1FA\U0001F1F3']) -class Unicode_3_2_0_FunctionsTest(UnicodeFunctionsTest): +class Unicode_3_2_0_FunctionsTest(unittest.TestCase, BaseUnicodeFunctionsTest): db = unicodedata.ucd_3_2_0 old = True expectedchecksum = ('f4526159891a4b766dd48045646547178737ba09' if quicktest else 'f217b8688d7bdff31db4207e078a96702f091597') - test_grapheme_cluster_break = None - test_indic_conjunct_break = None - test_extended_pictographic = None - test_grapheme_break = None - class UnicodeMiscTest(unittest.TestCase): db = unicodedata diff --git a/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst b/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst new file mode 100644 index 00000000000000..9047ae130ae6ab --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst @@ -0,0 +1,3 @@ +Remove the :meth:`!isxidstart` and :meth:`!isxidcontinue` methods of +:data:`unicodedata.ucd_3_2_0`. They are now only exposed as the +:mod:`unicodedata` function. diff --git a/Modules/clinic/unicodedata.c.h b/Modules/clinic/unicodedata.c.h index c0497cf45f6cff..8e2dd7a0ce5663 100644 --- a/Modules/clinic/unicodedata.c.h +++ b/Modules/clinic/unicodedata.c.h @@ -519,20 +519,20 @@ unicodedata_UCD_name(PyObject *self, PyObject *const *args, Py_ssize_t nargs) return return_value; } -PyDoc_STRVAR(unicodedata_UCD_isxidstart__doc__, -"isxidstart($self, chr, /)\n" +PyDoc_STRVAR(unicodedata_isxidstart__doc__, +"isxidstart($module, chr, /)\n" "--\n" "\n" "Return True if the character has the XID_Start property, else False."); -#define UNICODEDATA_UCD_ISXIDSTART_METHODDEF \ - {"isxidstart", (PyCFunction)unicodedata_UCD_isxidstart, METH_O, unicodedata_UCD_isxidstart__doc__}, +#define UNICODEDATA_ISXIDSTART_METHODDEF \ + {"isxidstart", (PyCFunction)unicodedata_isxidstart, METH_O, unicodedata_isxidstart__doc__}, static PyObject * -unicodedata_UCD_isxidstart_impl(PyObject *self, int chr); +unicodedata_isxidstart_impl(PyObject *module, int chr); static PyObject * -unicodedata_UCD_isxidstart(PyObject *self, PyObject *arg) +unicodedata_isxidstart(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; int chr; @@ -549,26 +549,26 @@ unicodedata_UCD_isxidstart(PyObject *self, PyObject *arg) goto exit; } chr = PyUnicode_READ_CHAR(arg, 0); - return_value = unicodedata_UCD_isxidstart_impl(self, chr); + return_value = unicodedata_isxidstart_impl(module, chr); exit: return return_value; } -PyDoc_STRVAR(unicodedata_UCD_isxidcontinue__doc__, -"isxidcontinue($self, chr, /)\n" +PyDoc_STRVAR(unicodedata_isxidcontinue__doc__, +"isxidcontinue($module, chr, /)\n" "--\n" "\n" "Return True if the character has the XID_Continue property, else False."); -#define UNICODEDATA_UCD_ISXIDCONTINUE_METHODDEF \ - {"isxidcontinue", (PyCFunction)unicodedata_UCD_isxidcontinue, METH_O, unicodedata_UCD_isxidcontinue__doc__}, +#define UNICODEDATA_ISXIDCONTINUE_METHODDEF \ + {"isxidcontinue", (PyCFunction)unicodedata_isxidcontinue, METH_O, unicodedata_isxidcontinue__doc__}, static PyObject * -unicodedata_UCD_isxidcontinue_impl(PyObject *self, int chr); +unicodedata_isxidcontinue_impl(PyObject *module, int chr); static PyObject * -unicodedata_UCD_isxidcontinue(PyObject *self, PyObject *arg) +unicodedata_isxidcontinue(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; int chr; @@ -585,7 +585,7 @@ unicodedata_UCD_isxidcontinue(PyObject *self, PyObject *arg) goto exit; } chr = PyUnicode_READ_CHAR(arg, 0); - return_value = unicodedata_UCD_isxidcontinue_impl(self, chr); + return_value = unicodedata_isxidcontinue_impl(module, chr); exit: return return_value; @@ -798,4 +798,4 @@ unicodedata_extended_pictographic(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=6991246310e3f2aa input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0f09cc90f06ace76 input=a9049054013a1b77]*/ diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c index 6904ee14811d48..586ce8d36dd46f 100644 --- a/Modules/unicodedata.c +++ b/Modules/unicodedata.c @@ -1565,9 +1565,8 @@ unicodedata_UCD_name_impl(PyObject *self, int chr, PyObject *default_value) } /*[clinic input] -unicodedata.UCD.isxidstart +unicodedata.isxidstart - self: self chr: int(accept={str}) / @@ -1576,24 +1575,15 @@ Return True if the character has the XID_Start property, else False. [clinic start generated code]*/ static PyObject * -unicodedata_UCD_isxidstart_impl(PyObject *self, int chr) -/*[clinic end generated code: output=944005823c72c3ef input=9353f88d709c21fb]*/ +unicodedata_isxidstart_impl(PyObject *module, int chr) +/*[clinic end generated code: output=7ae0e1a3915aa031 input=3812717f3a6bfc56]*/ { - if (UCD_Check(self)) { - const change_record *old = get_old_record(self, chr); - if (old->category_changed == 0) { - /* unassigned */ - Py_RETURN_FALSE; - } - } - return PyBool_FromLong(_PyUnicode_IsXidStart(chr)); } /*[clinic input] -unicodedata.UCD.isxidcontinue +unicodedata.isxidcontinue - self: self chr: int(accept={str}) / @@ -1602,17 +1592,9 @@ Return True if the character has the XID_Continue property, else False. [clinic start generated code]*/ static PyObject * -unicodedata_UCD_isxidcontinue_impl(PyObject *self, int chr) -/*[clinic end generated code: output=9438dcbff5ca3e41 input=bbb8dd3ac0d2d709]*/ +unicodedata_isxidcontinue_impl(PyObject *module, int chr) +/*[clinic end generated code: output=517caa8b38c73aed input=a971ed6e57cac374]*/ { - if (UCD_Check(self)) { - const change_record *old = get_old_record(self, chr); - if (old->category_changed == 0) { - /* unassigned */ - Py_RETURN_FALSE; - } - } - return PyBool_FromLong(_PyUnicode_IsXidContinue(chr)); } @@ -2128,10 +2110,12 @@ static PyMethodDef unicodedata_functions[] = { UNICODEDATA_INDIC_CONJUNCT_BREAK_METHODDEF UNICODEDATA_EXTENDED_PICTOGRAPHIC_METHODDEF UNICODEDATA_ITER_GRAPHEMES_METHODDEF + UNICODEDATA_ISXIDSTART_METHODDEF + UNICODEDATA_ISXIDCONTINUE_METHODDEF // The following definitions are shared between the module // and the UCD class. -#define DB_methods (unicodedata_functions + 4) +#define DB_methods (unicodedata_functions + 6) UNICODEDATA_UCD_DECIMAL_METHODDEF UNICODEDATA_UCD_DIGIT_METHODDEF @@ -2143,8 +2127,6 @@ static PyMethodDef unicodedata_functions[] = { UNICODEDATA_UCD_EAST_ASIAN_WIDTH_METHODDEF UNICODEDATA_UCD_DECOMPOSITION_METHODDEF UNICODEDATA_UCD_NAME_METHODDEF - UNICODEDATA_UCD_ISXIDSTART_METHODDEF - UNICODEDATA_UCD_ISXIDCONTINUE_METHODDEF UNICODEDATA_UCD_LOOKUP_METHODDEF UNICODEDATA_UCD_IS_NORMALIZED_METHODDEF UNICODEDATA_UCD_NORMALIZE_METHODDEF From 6ebd6c37de537f7d5c25e01b85e8710304f7d9b1 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Sat, 17 Jan 2026 15:20:25 +0200 Subject: [PATCH 2/2] Update Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com> --- .../Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst b/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst index 9047ae130ae6ab..d53eac0bd356ea 100644 --- a/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst +++ b/Misc/NEWS.d/next/Library/2026-01-16-10-53-17.gh-issue-143897.hWJBHN.rst @@ -1,3 +1,3 @@ Remove the :meth:`!isxidstart` and :meth:`!isxidcontinue` methods of -:data:`unicodedata.ucd_3_2_0`. They are now only exposed as the -:mod:`unicodedata` function. +:data:`unicodedata.ucd_3_2_0`. They are now only exposed as +:func:`unicodedata.isxidstart` and :func:`unicodedata.isxidcontinue`.