From 4794c8cf4adcfe094f360bce31a4c568a7f2731b Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Sun, 17 Mar 2024 17:10:41 -0400 Subject: [PATCH 1/8] Switching from pybind11 to nanobind. --- CMakeLists.txt | 16 ++++++++++++++++ src/binding.cpp | 48 +++++++++++++++++++++++++++--------------------- 2 files changed, 43 insertions(+), 21 deletions(-) create mode 100644 CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..f1feeaa --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,16 @@ +cmake_minimum_required(VERSION 3.15...3.27) +project(can_ada) +find_package(Python 3.8 COMPONENTS Interpreter Development.Module REQUIRED) + +if (NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build." FORCE) + set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") +endif() + +execute_process( + COMMAND "${Python_EXECUTABLE}" -m nanobind --cmake_dir + OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE NB_DIR) +list(APPEND CMAKE_PREFIX_PATH "${NB_DIR}") +find_package(nanobind CONFIG REQUIRED) + +nanobind_add_module(can_ada src/binding.cpp src/ada.cpp) \ No newline at end of file diff --git a/src/binding.cpp b/src/binding.cpp index 38dc87f..384ed09 100644 --- a/src/binding.cpp +++ b/src/binding.cpp @@ -1,9 +1,10 @@ -#include +#include +#include #include "ada.h" -namespace py = pybind11; +namespace py = nanobind; -PYBIND11_MODULE(can_ada, m) { +NB_MODULE(can_ada, m) { #ifdef VERSION_INFO m.attr("__version__") = Py_STRINGIFY(VERSION_INFO); #else @@ -13,21 +14,21 @@ PYBIND11_MODULE(can_ada, m) { m.def("can_parse", &ada::can_parse, py::arg("input"), - py::arg("base_input") = static_cast(nullptr)); + py::arg("base_input") = static_cast(nullptr)); py::class_(m, "URL") - .def_property("hash", &ada::url_aggregator::get_hash, &ada::url_aggregator::set_hash) - .def_property("host", &ada::url_aggregator::get_host, &ada::url_aggregator::set_host) - .def_property("hostname", &ada::url_aggregator::get_hostname, &ada::url_aggregator::set_hostname) - .def_property("href", &ada::url_aggregator::get_href, &ada::url_aggregator::set_href) - .def_property("origin", &ada::url_aggregator::get_origin, nullptr) - .def_property("password", &ada::url_aggregator::get_password, &ada::url_aggregator::set_password) - .def_property("pathname", &ada::url_aggregator::get_pathname, &ada::url_aggregator::set_pathname) - .def_property("pathname_length", &ada::url_aggregator::get_pathname_length, nullptr) - .def_property("port", &ada::url_aggregator::get_port, &ada::url_aggregator::set_port) - .def_property("protocol", &ada::url_aggregator::get_protocol, &ada::url_aggregator::set_protocol) - .def_property("search", &ada::url_aggregator::get_search, &ada::url_aggregator::set_search) - .def_property("username", &ada::url_aggregator::get_username, &ada::url_aggregator::set_username) + .def_prop_rw("hash", &ada::url_aggregator::get_hash, &ada::url_aggregator::set_hash) + .def_prop_rw("host", &ada::url_aggregator::get_host, &ada::url_aggregator::set_host) + .def_prop_rw("hostname", &ada::url_aggregator::get_hostname, &ada::url_aggregator::set_hostname) + .def_prop_rw("href", &ada::url_aggregator::get_href, &ada::url_aggregator::set_href) + .def_prop_ro("origin", &ada::url_aggregator::get_origin) + .def_prop_rw("password", &ada::url_aggregator::get_password, &ada::url_aggregator::set_password) + .def_prop_rw("pathname", &ada::url_aggregator::get_pathname, &ada::url_aggregator::set_pathname) + .def_prop_ro("pathname_length", &ada::url_aggregator::get_pathname_length) + .def_prop_rw("port", &ada::url_aggregator::get_port, &ada::url_aggregator::set_port) + .def_prop_rw("protocol", &ada::url_aggregator::get_protocol, &ada::url_aggregator::set_protocol) + .def_prop_rw("search", &ada::url_aggregator::get_search, &ada::url_aggregator::set_search) + .def_prop_rw("username", &ada::url_aggregator::get_username, &ada::url_aggregator::set_username) .def("has_credentials", &ada::url_aggregator::has_credentials) .def("has_empty_hostname", &ada::url_aggregator::has_empty_hostname) @@ -44,15 +45,20 @@ PYBIND11_MODULE(can_ada, m) { .def("__str__", &ada::url_aggregator::get_href) .def("validate", &ada::url_aggregator::validate); - m.def("idna_decode", &ada::idna::to_unicode); - m.def("idna_encode", [](std::string input) -> py::bytes { - return py::bytes(ada::idna::to_ascii(input)); + m.def("idna_decode", [](py::bytes input) -> py::str { + auto result = ada::idna::to_unicode(input.c_str()); + return py::str(result.c_str()); }); - m.def("parse", [](std::string_view input) { + m.def("idna_encode", [](const std::string_view input) -> py::bytes { + auto result = ada::idna::to_ascii(input); + return py::bytes(result.c_str(), result.size()); + }); + + m.def("parse", [](const std::string_view input) { ada::result url = ada::parse(input); if (!url) { - throw pybind11::value_error("URL could not be parsed."); + throw py::value_error("URL could not be parsed."); } return url.value(); }); From 106d420ea2f862526dfc4b2aafcc6375af730c13 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Mon, 18 Mar 2024 12:24:13 -0400 Subject: [PATCH 2/8] Closes #4, closes #5. Patch version bump. --- LICENSE | 43 +++++++++++++++++++++++++++++++++++++++++++ setup.py | 5 +++-- 2 files changed, 46 insertions(+), 2 deletions(-) create mode 100644 LICENSE diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..c91373b --- /dev/null +++ b/LICENSE @@ -0,0 +1,43 @@ +-------------------------------------------------------------------------------- +The can_ada binding is available under the following licenses: + +Copyright 2024 Tyler Kennedy + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- +The included Ada project is used under the following license: + +Copyright 2023 Yagiz Nizipli and Daniel Lemire + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. \ No newline at end of file diff --git a/setup.py b/setup.py index 9cb3788..c1808f9 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup from pybind11.setup_helpers import Pybind11Extension -__version__ = "1.1.1" +__version__ = "1.1.2" setup( name="can_ada", @@ -10,8 +10,9 @@ author_email="tk@tkte.ch", long_description_content_type="text/markdown", long_description=open("README.md").read(), - url="https://github.com/tktech/can-ada", + url="https://github.com/tktech/can_ada", description="Ada is a fast spec-compliant url parser", + license="MIT", ext_modules=[ Pybind11Extension( "can_ada", From 4ace025e8ac4e722b7360c5ea7c5d817390f6d04 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Wed, 28 Jan 2026 00:10:57 -0500 Subject: [PATCH 3/8] Bumping Ada to v3.4.1 --- src/ada.cpp | 151 +++++++++++++++++++++++----------------------------- src/ada.h | 85 +++++++++-------------------- 2 files changed, 93 insertions(+), 143 deletions(-) diff --git a/src/ada.cpp b/src/ada.cpp index d7f9b3a..f7709ae 100644 --- a/src/ada.cpp +++ b/src/ada.cpp @@ -1,4 +1,4 @@ -/* auto-generated on 2024-09-02 20:07:32 -0400. Do not edit! */ +/* auto-generated on 2024-06-18 17:01:23 -0400. Do not edit! */ /* begin file src/ada.cpp */ #include "ada.h" /* begin file src/checkers.cpp */ @@ -9795,10 +9795,6 @@ ADA_POP_DISABLE_WARNINGS namespace ada::unicode { -constexpr bool is_tabs_or_newline(char c) noexcept { - return c == '\r' || c == '\n' || c == '\t'; -} - constexpr uint64_t broadcast(uint8_t v) noexcept { return 0x101010101010101ull * v; } @@ -9833,8 +9829,13 @@ ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { // first check for short strings in which case we do it naively. if (user_input.size() < 16) { // slow path - return std::any_of(user_input.begin(), user_input.end(), - is_tabs_or_newline); + for (size_t i = 0; i < user_input.size(); i++) { + if (user_input[i] == '\r' || user_input[i] == '\n' || + user_input[i] == '\t') { + return true; + } + } + return false; } // fast path for long strings (expected to be common) size_t i = 0; @@ -9872,8 +9873,13 @@ ada_really_inline bool has_tabs_or_newline( std::string_view user_input) noexcept { // first check for short strings in which case we do it naively. if (user_input.size() < 16) { // slow path - return std::any_of(user_input.begin(), user_input.end(), - is_tabs_or_newline); + for (size_t i = 0; i < user_input.size(); i++) { + if (user_input[i] == '\r' || user_input[i] == '\n' || + user_input[i] == '\t') { + return true; + } + } + return false; } // fast path for long strings (expected to be common) size_t i = 0; @@ -10258,6 +10264,10 @@ std::string percent_encode(const std::string_view input, return out; } +std::string to_unicode(std::string_view input) { + return ada::idna::to_unicode(input); +} + } // namespace ada::unicode /* end file src/unicode.cpp */ /* begin file src/serializers.cpp */ @@ -11553,21 +11563,21 @@ ada_really_inline bool url::parse_scheme(const std::string_view input) { // If url's scheme is not a special scheme and buffer is a special scheme, // then return. if (is_special() != is_input_special) { - return false; + return true; } // If url includes credentials or has a non-null port, and buffer is // "file", then return. if ((has_credentials() || port.has_value()) && parsed_type == ada::scheme::type::FILE) { - return false; + return true; } // If url's scheme is "file" and its host is an empty host, then return. // An empty host is the empty string. if (type == ada::scheme::type::FILE && host.has_value() && host.value().empty()) { - return false; + return true; } } @@ -12177,7 +12187,7 @@ result_type parse_url_impl(std::string_view user_input, if (!url.is_valid) { return url; } - if constexpr (result_type_is_ada_url_aggregator && store_values) { + if constexpr (result_type_is_ada_url_aggregator) { // Most of the time, we just need user_input.size(). // In some instances, we may need a bit more. /////////////////////////// @@ -12194,6 +12204,9 @@ result_type parse_url_impl(std::string_view user_input, helpers::leading_zeroes(uint32_t(1 | user_input.size()))) + 1; url.reserve(reserve_capacity); + // + // + // } std::string tmp_buffer; std::string_view internal_input; @@ -12416,36 +12429,32 @@ result_type parse_url_impl(std::string_view user_input, password_token_seen = password_token_location != std::string_view::npos; - if constexpr (store_values) { - if (!password_token_seen) { - if constexpr (result_type_is_ada_url) { - url.username += unicode::percent_encode( - authority_view, - character_sets::USERINFO_PERCENT_ENCODE); - } else { - url.append_base_username(unicode::percent_encode( - authority_view, - character_sets::USERINFO_PERCENT_ENCODE)); - } + if (!password_token_seen) { + if constexpr (result_type_is_ada_url) { + url.username += unicode::percent_encode( + authority_view, character_sets::USERINFO_PERCENT_ENCODE); } else { - if constexpr (result_type_is_ada_url) { - url.username += unicode::percent_encode( - authority_view.substr(0, password_token_location), - character_sets::USERINFO_PERCENT_ENCODE); - url.password += unicode::percent_encode( - authority_view.substr(password_token_location + 1), - character_sets::USERINFO_PERCENT_ENCODE); - } else { - url.append_base_username(unicode::percent_encode( - authority_view.substr(0, password_token_location), - character_sets::USERINFO_PERCENT_ENCODE)); - url.append_base_password(unicode::percent_encode( - authority_view.substr(password_token_location + 1), - character_sets::USERINFO_PERCENT_ENCODE)); - } + url.append_base_username(unicode::percent_encode( + authority_view, character_sets::USERINFO_PERCENT_ENCODE)); + } + } else { + if constexpr (result_type_is_ada_url) { + url.username += unicode::percent_encode( + authority_view.substr(0, password_token_location), + character_sets::USERINFO_PERCENT_ENCODE); + url.password += unicode::percent_encode( + authority_view.substr(password_token_location + 1), + character_sets::USERINFO_PERCENT_ENCODE); + } else { + url.append_base_username(unicode::percent_encode( + authority_view.substr(0, password_token_location), + character_sets::USERINFO_PERCENT_ENCODE)); + url.append_base_password(unicode::percent_encode( + authority_view.substr(password_token_location + 1), + character_sets::USERINFO_PERCENT_ENCODE)); } } - } else if constexpr (store_values) { + } else { if constexpr (result_type_is_ada_url) { url.password += unicode::percent_encode( authority_view, character_sets::USERINFO_PERCENT_ENCODE); @@ -12472,10 +12481,8 @@ result_type parse_url_impl(std::string_view user_input, break; } if (end_of_authority == input_size) { - if constexpr (store_values) { - if (fragment.has_value()) { - url.update_unencoded_base_hash(*fragment); - } + if (fragment.has_value()) { + url.update_unencoded_base_hash(*fragment); } return url; } @@ -12790,11 +12797,9 @@ result_type parse_url_impl(std::string_view user_input, // Optimization: Avoiding going into PATH state improves the // performance of urls ending with /. if (input_position == input_size) { - if constexpr (store_values) { - url.update_base_pathname("/"); - if (fragment.has_value()) { - url.update_unencoded_base_hash(*fragment); - } + url.update_base_pathname("/"); + if (fragment.has_value()) { + url.update_unencoded_base_hash(*fragment); } return url; } @@ -13040,10 +13045,8 @@ result_type parse_url_impl(std::string_view user_input, ada::unreachable(); } } - if constexpr (store_values) { - if (fragment.has_value()) { - url.update_unencoded_base_hash(*fragment); - } + if (fragment.has_value()) { + url.update_unencoded_base_hash(*fragment); } return url; } @@ -13215,21 +13218,21 @@ template // If url's scheme is not a special scheme and buffer is a special scheme, // then return. if (is_special() != is_input_special) { - return false; + return true; } // If url includes credentials or has a non-null port, and buffer is // "file", then return. if ((has_credentials() || components.port != url_components::omitted) && parsed_type == ada::scheme::type::FILE) { - return false; + return true; } // If url's scheme is "file" and its host is an empty host, then return. // An empty host is the empty string. if (type == ada::scheme::type::FILE && components.host_start == components.host_end) { - return false; + return true; } } @@ -13830,8 +13833,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return "null"; } -[[nodiscard]] std::string_view url_aggregator::get_username() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_username() const noexcept { ada_log("url_aggregator::get_username"); if (has_non_empty_username()) { return helpers::substring(buffer, components.protocol_end + 2, @@ -13840,8 +13842,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return ""; } -[[nodiscard]] std::string_view url_aggregator::get_password() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_password() const noexcept { ada_log("url_aggregator::get_password"); if (has_non_empty_password()) { return helpers::substring(buffer, components.username_end + 1, @@ -13850,8 +13851,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return ""; } -[[nodiscard]] std::string_view url_aggregator::get_port() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_port() const noexcept { ada_log("url_aggregator::get_port"); if (components.port == url_components::omitted) { return ""; @@ -13860,8 +13860,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { components.pathname_start); } -[[nodiscard]] std::string_view url_aggregator::get_hash() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_hash() const noexcept { ada_log("url_aggregator::get_hash"); // If this's URL's fragment is either null or the empty string, then return // the empty string. Return U+0023 (#), followed by this's URL's fragment. @@ -13874,8 +13873,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, components.hash_start); } -[[nodiscard]] std::string_view url_aggregator::get_host() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_host() const noexcept { ada_log("url_aggregator::get_host"); // Technically, we should check if there is a hostname, but // the code below works even if there isn't. @@ -13893,8 +13891,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, start, components.pathname_start); } -[[nodiscard]] std::string_view url_aggregator::get_hostname() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_hostname() const noexcept { ada_log("url_aggregator::get_hostname"); // Technically, we should check if there is a hostname, but // the code below works even if there isn't. @@ -13908,8 +13905,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, start, components.host_end); } -[[nodiscard]] std::string_view url_aggregator::get_pathname() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_pathname() const noexcept { ada_log("url_aggregator::get_pathname pathname_start = ", components.pathname_start, " buffer.size() = ", buffer.size(), " components.search_start = ", components.search_start, @@ -13923,8 +13919,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, components.pathname_start, ending_index); } -[[nodiscard]] std::string_view url_aggregator::get_search() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_search() const noexcept { ada_log("url_aggregator::get_search"); // If this's URL's query is either null or the empty string, then return the // empty string. Return U+003F (?), followed by this's URL's query. @@ -13941,8 +13936,7 @@ bool url_aggregator::set_hostname(const std::string_view input) { return helpers::substring(buffer, components.search_start, ending_index); } -[[nodiscard]] std::string_view url_aggregator::get_protocol() const noexcept - ada_lifetime_bound { +[[nodiscard]] std::string_view url_aggregator::get_protocol() const noexcept { ada_log("url_aggregator::get_protocol"); return helpers::substring(buffer, 0, components.protocol_end); } @@ -15427,15 +15421,6 @@ void ada_search_params_sort(ada_url_search_params result) { } } -void ada_search_params_reset(ada_url_search_params result, const char* input, - size_t length) { - ada::result& r = - *(ada::result*)result; - if (r) { - r->reset(std::string_view(input, length)); - } -} - void ada_search_params_append(ada_url_search_params result, const char* key, size_t key_length, const char* value, size_t value_length) { diff --git a/src/ada.h b/src/ada.h index 4b00198..f5e6acd 100644 --- a/src/ada.h +++ b/src/ada.h @@ -1,4 +1,4 @@ -/* auto-generated on 2024-09-02 20:07:32 -0400. Do not edit! */ +/* auto-generated on 2024-06-18 17:01:23 -0400. Do not edit! */ /* begin file include/ada.h */ /** * @file ada.h @@ -479,18 +479,6 @@ namespace ada { #define ADA_NEON 1 #endif -#ifndef __has_cpp_attribute -#define ada_lifetime_bound -#elif __has_cpp_attribute(msvc::lifetimebound) -#define ada_lifetime_bound [[msvc::lifetimebound]] -#elif __has_cpp_attribute(clang::lifetimebound) -#define ada_lifetime_bound [[clang::lifetimebound]] -#elif __has_cpp_attribute(lifetimebound) -#define ada_lifetime_bound [[lifetimebound]] -#else -#define ada_lifetime_bound -#endif - #endif // ADA_COMMON_DEFS_H /* end file include/ada/common_defs.h */ #include @@ -4632,6 +4620,12 @@ namespace ada::unicode { bool to_ascii(std::optional& out, std::string_view plain, size_t first_percent); +/** + * @private + * @see https://www.unicode.org/reports/tr46/#ToUnicode + */ +std::string to_unicode(std::string_view input); + /** * @private * Checks if the input has tab or newline characters. @@ -4857,38 +4851,35 @@ struct url_aggregator : url_base { * @see https://url.spec.whatwg.org/#dom-url-href * @see https://url.spec.whatwg.org/#concept-url-serializer */ - [[nodiscard]] inline std::string_view get_href() const noexcept - ada_lifetime_bound; + [[nodiscard]] inline std::string_view get_href() const noexcept; /** * The username getter steps are to return this's URL's username. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-username */ - [[nodiscard]] std::string_view get_username() const noexcept - ada_lifetime_bound; + [[nodiscard]] std::string_view get_username() const noexcept; /** * The password getter steps are to return this's URL's password. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-password */ - [[nodiscard]] std::string_view get_password() const noexcept - ada_lifetime_bound; + [[nodiscard]] std::string_view get_password() const noexcept; /** * Return this's URL's port, serialized. * This function does not allocate memory. * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-port */ - [[nodiscard]] std::string_view get_port() const noexcept ada_lifetime_bound; + [[nodiscard]] std::string_view get_port() const noexcept; /** * Return U+0023 (#), followed by this's URL's fragment. * This function does not allocate memory. * @return a lightweight std::string_view.. * @see https://url.spec.whatwg.org/#dom-url-hash */ - [[nodiscard]] std::string_view get_hash() const noexcept ada_lifetime_bound; + [[nodiscard]] std::string_view get_hash() const noexcept; /** * Return url's host, serialized, followed by U+003A (:) and url's port, * serialized. @@ -4897,7 +4888,7 @@ struct url_aggregator : url_base { * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-host */ - [[nodiscard]] std::string_view get_host() const noexcept ada_lifetime_bound; + [[nodiscard]] std::string_view get_host() const noexcept; /** * Return this's URL's host, serialized. * This function does not allocate memory. @@ -4905,8 +4896,7 @@ struct url_aggregator : url_base { * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-hostname */ - [[nodiscard]] std::string_view get_hostname() const noexcept - ada_lifetime_bound; + [[nodiscard]] std::string_view get_hostname() const noexcept; /** * The pathname getter steps are to return the result of URL path serializing * this's URL. @@ -4914,8 +4904,7 @@ struct url_aggregator : url_base { * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-pathname */ - [[nodiscard]] std::string_view get_pathname() const noexcept - ada_lifetime_bound; + [[nodiscard]] std::string_view get_pathname() const noexcept; /** * Compute the pathname length in bytes without instantiating a view or a * string. @@ -4929,7 +4918,7 @@ struct url_aggregator : url_base { * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-search */ - [[nodiscard]] std::string_view get_search() const noexcept ada_lifetime_bound; + [[nodiscard]] std::string_view get_search() const noexcept; /** * The protocol getter steps are to return this's URL's scheme, followed by * U+003A (:). @@ -4937,8 +4926,7 @@ struct url_aggregator : url_base { * @return a lightweight std::string_view. * @see https://url.spec.whatwg.org/#dom-url-protocol */ - [[nodiscard]] std::string_view get_protocol() const noexcept - ada_lifetime_bound; + [[nodiscard]] std::string_view get_protocol() const noexcept; /** * A URL includes credentials if its username or password is not the empty @@ -5846,7 +5834,7 @@ inline void url::set_scheme(std::string &&new_scheme) noexcept { type = ada::scheme::get_scheme_type(new_scheme); // We only move the 'scheme' if it is non-special. if (!is_special()) { - non_special_scheme = std::move(new_scheme); + non_special_scheme = new_scheme; } } @@ -5895,15 +5883,10 @@ inline void url::copy_scheme(const ada::url &u) { ada_really_inline size_t url::parse_port(std::string_view view, bool check_trailing_content) noexcept { ada_log("parse_port('", view, "') ", view.size()); - if (!view.empty() && view[0] == '-') { - ada_log("parse_port: view[0] == '0' && view.size() > 1"); - is_valid = false; - return 0; - } uint16_t parsed_port{}; auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port); if (r.ec == std::errc::result_out_of_range) { - ada_log("parse_port: r.ec == std::errc::result_out_of_range"); + ada_log("parse_port: std::errc::result_out_of_range"); is_valid = false; return 0; } @@ -6799,8 +6782,8 @@ inline bool url_aggregator::has_port() const noexcept { buffer[components.host_end + 1] == '.'; } -[[nodiscard]] inline std::string_view url_aggregator::get_href() const noexcept - ada_lifetime_bound { +[[nodiscard]] inline std::string_view url_aggregator::get_href() + const noexcept { ada_log("url_aggregator::get_href"); return buffer; } @@ -6808,15 +6791,10 @@ inline bool url_aggregator::has_port() const noexcept { ada_really_inline size_t url_aggregator::parse_port( std::string_view view, bool check_trailing_content) noexcept { ada_log("url_aggregator::parse_port('", view, "') ", view.size()); - if (!view.empty() && view[0] == '-') { - ada_log("parse_port: view[0] == '0' && view.size() > 1"); - is_valid = false; - return 0; - } uint16_t parsed_port{}; auto r = std::from_chars(view.data(), view.data() + view.size(), parsed_port); if (r.ec == std::errc::result_out_of_range) { - ada_log("parse_port: r.ec == std::errc::result_out_of_range"); + ada_log("parse_port: std::errc::result_out_of_range"); is_valid = false; return 0; } @@ -7015,14 +6993,6 @@ struct url_search_params { inline auto back() const { return params.back(); } inline auto operator[](size_t index) const { return params[index]; } - /** - * @private - * Used to reset the search params to a new input. - * Used primarily for C API. - * @param input - */ - void reset(std::string_view input); - private: typedef std::pair key_value_pair; std::vector params{}; @@ -7093,11 +7063,6 @@ namespace ada { template url_search_params url_search_params_iter::EMPTY; -inline void url_search_params::reset(std::string_view input) { - params.clear(); - initialize(input); -} - inline void url_search_params::initialize(std::string_view input) { if (!input.empty() && input.front() == '?') { input.remove_prefix(1); @@ -7307,14 +7272,14 @@ url_search_params_entries_iter::next() { #ifndef ADA_ADA_VERSION_H #define ADA_ADA_VERSION_H -#define ADA_VERSION "2.9.2" +#define ADA_VERSION "2.8.0" namespace ada { enum { ADA_VERSION_MAJOR = 2, - ADA_VERSION_MINOR = 9, - ADA_VERSION_REVISION = 2, + ADA_VERSION_MINOR = 8, + ADA_VERSION_REVISION = 0, }; } // namespace ada From 6dda8e4e8bce9653592684fdc1d69a152b61b2d6 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Wed, 28 Jan 2026 00:46:34 -0500 Subject: [PATCH 4/8] --amend --- src/binding.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/binding.cpp b/src/binding.cpp index 8daec1e..eac2a68 100644 --- a/src/binding.cpp +++ b/src/binding.cpp @@ -16,12 +16,10 @@ NB_MODULE(can_ada, m) { m.attr("__version__") = "dev"; #endif - // Fast path without optional base - avoids std::optional overhead m.def("can_parse", [](std::string_view input) { return ada::can_parse(input); }, py::arg("input")); - // Overload with base URL m.def("can_parse", [](std::string_view input, std::string_view base_input) { return ada::can_parse(input, &base_input); }, py::arg("input"), py::arg("base_input")); From 5e5d983fb3f2723a8bbd3f80f56194c4457fbc63 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Wed, 28 Jan 2026 01:15:26 -0500 Subject: [PATCH 5/8] Adding 3.14 and bumping action versions --- .github/workflows/release.yml | 26 +++++++++++++------------- .github/workflows/test.yml | 6 +++--- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 809dd64..b96f76f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -13,12 +13,12 @@ jobs: name: Creating source release runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v6.0.2 - name: Setting up Python - uses: actions/setup-python@v5.4.0 + uses: actions/setup-python@v6.2.0 with: - python-version: 3.9 + python-version: "3.9" - name: Installing python build dependencies run: | @@ -35,7 +35,7 @@ jobs: run: | pytest - - uses: actions/upload-artifact@v4.3.1 + - uses: actions/upload-artifact@v6.0.0 with: name: dist-sdist path: dist/*.tar.gz @@ -51,31 +51,31 @@ jobs: fail-fast: true matrix: os: [ubuntu-22.04, windows-2022, macos-13] - py: ["cp39", "cp310", "cp311", "cp312", "cp313"] + py: ["cp39", "cp310", "cp311", "cp312", "cp313", "cp314"] steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v6.0.2 - - uses: actions/setup-python@v5.4.0 + - uses: actions/setup-python@v6.2.0 name: Setting up Python with: - python-version: '3.9' + python-version: "3.9" - name: Set up QEMU if: runner.os == 'Linux' - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v3.7.0 with: platforms: all - name: Build & test wheels - uses: pypa/cibuildwheel@v2.22.0 + uses: pypa/cibuildwheel@v3.3.1 env: CIBW_ARCHS_LINUX: auto aarch64 ppc64le s390x CIBW_ARCHS_MACOS: x86_64 arm64 universal2 CIBW_BUILD: "${{ matrix.py }}-*" CIBW_TEST_SKIP: "*_arm64 *_universal2:arm64" - - uses: actions/upload-artifact@v4.3.1 + - uses: actions/upload-artifact@v6.0.0 with: name: dist-${{ matrix.os }}-${{ matrix.py }} path: ./wheelhouse/*.whl @@ -85,13 +85,13 @@ jobs: name: Uploading built packages to pypi for release. runs-on: ubuntu-latest steps: - - uses: actions/download-artifact@v4.1.4 + - uses: actions/download-artifact@v7.0.0 with: pattern: dist-* merge-multiple: true path: dist - - uses: pypa/gh-action-pypi-publish@v1.8.14 + - uses: pypa/gh-action-pypi-publish@v1.13.0 with: user: ${{ secrets.PYPI_USERNAME }} password: ${{ secrets.PYPI_PASSWORD }} diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4b82544..ef09ab8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -16,13 +16,13 @@ jobs: fail-fast: false matrix: os: [ubuntu-22.04, macos-13, windows-2022] - python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: - - uses: actions/checkout@v4.1.1 + - uses: actions/checkout@v6.0.2 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5.0.0 + uses: actions/setup-python@v6.2.0 with: python-version: ${{ matrix.python-version }} From cb041a2bd62cea96c35d14814d7e33f05bb42874 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Wed, 28 Jan 2026 01:15:59 -0500 Subject: [PATCH 6/8] Version bump --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7775ee1..f3172d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build" [project] name = "can_ada" -version = "2.0.0" +version = "3.0.0" description = "Ada is a fast spec-compliant url parser" readme = "README.md" license = "MIT" From 2f757adb31a777f5d62d836d2af1ef88924463a5 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Wed, 28 Jan 2026 01:17:16 -0500 Subject: [PATCH 7/8] macos-13 got retired, bump to 15 --- .github/workflows/release.yml | 2 +- .github/workflows/test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b96f76f..95c59e1 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -50,7 +50,7 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-22.04, windows-2022, macos-13] + os: [ubuntu-22.04, windows-2022, macos-15] py: ["cp39", "cp310", "cp311", "cp312", "cp313", "cp314"] steps: diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index ef09ab8..0bc6c17 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-22.04, macos-13, windows-2022] + os: [ubuntu-22.04, macos-15, windows-2022] python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] steps: From 8f28e02e854a2ebf6f6be27427a2d8f39cef9687 Mon Sep 17 00:00:00 2001 From: Tyler Kennedy Date: Wed, 28 Jan 2026 02:06:02 -0500 Subject: [PATCH 8/8] Add yarl and update README --- README.md | 20 +++++++++++--------- pyproject.toml | 2 +- tests/test_benchmark.py | 16 ++++++++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 793a3e0..b96d071 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,8 @@ C++17-or-greater compiler will be required to build the underlying Ada library. ## WHATWG URL compliance -Unlike the standard library's `urllib.parse` module, this library is compliant with the WHATWG URL specification. +Unlike the standard library's `urllib.parse` module, this library is compliant +with the WHATWG URL specification. ```python import can_ada @@ -82,13 +83,14 @@ print(params.values()) # ["usa", "off", "2", "3"] We find that `can_ada` is typically ~4x faster than urllib: ``` ---------------------------------------------------------------------------------- -Name (time in ms) Min Max Mean ---------------------------------------------------------------------------------- -test_can_ada_parse 54.1304 (1.0) 54.6734 (1.0) 54.3699 (1.0) -test_ada_python_parse 107.5653 (1.99) 108.1666 (1.98) 107.7817 (1.98) -test_urllib_parse 251.5167 (4.65) 255.1327 (4.67) 253.2407 (4.66) ---------------------------------------------------------------------------------- +------------------------------------------------------------------------------------- benchmark: 4 tests ------------------------------------------------------------------------------------ +Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_can_ada_parse 36.7565 (1.0) 40.3057 (1.0) 37.1606 (1.0) 0.6789 (1.0) 36.9869 (1.0) 0.2526 (1.0) 2;3 26.9102 (1.0) 27 1 +test_ada_python_parse 134.0627 (3.65) 143.6443 (3.56) 135.8992 (3.66) 3.1977 (4.71) 134.7860 (3.64) 1.2441 (4.92) 1;1 7.3584 (0.27) 8 1 +test_urllib_parse 208.8403 (5.68) 212.9208 (5.28) 211.2021 (5.68) 1.7273 (2.54) 211.3141 (5.71) 2.9319 (11.60) 1;0 4.7348 (0.18) 5 1 +test_yarl_parse 238.6351 (6.49) 246.4206 (6.11) 242.4351 (6.52) 3.4108 (5.02) 241.8302 (6.54) 6.1566 (24.37) 2;0 4.1248 (0.15) 5 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` To run the benchmarks locally, use: @@ -97,4 +99,4 @@ To run the benchmarks locally, use: pytest --runslow ``` -[Ada]: https://ada-url.com/ \ No newline at end of file +[Ada]: https://ada-url.com/ diff --git a/pyproject.toml b/pyproject.toml index f3172d7..cd53457 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ requires-python = ">=3.9" Homepage = "https://github.com/tktech/can_ada" [project.optional-dependencies] -test = ["pytest", "pytest-benchmark", "ada_url"] +test = ["pytest", "pytest-benchmark", "ada_url", "yarl"] [tool.scikit-build] wheel.packages = ["can_ada-stubs"] diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py index 448221f..639e0d3 100644 --- a/tests/test_benchmark.py +++ b/tests/test_benchmark.py @@ -6,6 +6,7 @@ ada_url = pytest.importorskip("ada_url") can_ada = pytest.importorskip("can_ada") +yarl = pytest.importorskip("yarl") @lru_cache @@ -40,6 +41,16 @@ def can_ada_parse(): pass +def yarl_parse(): + for line in data(): + try: + yarl.URL(line) + except ValueError: + # There are a small number of URLs in the sample data that are + # not valid WHATWG URLs. + pass + + @pytest.mark.slow def test_urllib_parse(benchmark): benchmark(urllib_parse) @@ -53,3 +64,8 @@ def test_ada_python_parse(benchmark): @pytest.mark.slow def test_can_ada_parse(benchmark): benchmark(can_ada_parse) + + +@pytest.mark.slow +def test_yarl_parse(benchmark): + benchmark(yarl_parse)