From c5b8b06498e067db3b8feafc385c9b4c49119c63 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Sun, 3 Mar 2024 22:41:14 -0700 Subject: [PATCH 01/14] update readme for ulid --- HISTORY.md | 4 ++++ README.md | 22 ++++++++++++++++++++-- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index bcb3813..aaa6a4d 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,4 +1,8 @@ # Torid Changelog +## Version 1.4.0 - 2024-03-XX + +* Add Torid::ULID to implement the [ulid specification](https://github.com/ulid/spec) + ## Version 1.3.0 - 2017-02-17 * Add Torid::UUID#node_id_s to allow access to just the node portion diff --git a/README.md b/README.md index 63ab138..f59891f 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,8 @@ ## DESCRIPTION -Temporally Ordered IDs. Generate universally unique identifiers (UUID) -that sort lexically in time order. +Temporally Ordered IDs. Generate Universally Unique Lexicographically Sortable +Identifiers (ULID) and (UUID) that sort lexically in time order. ## DETAILS @@ -24,11 +24,27 @@ events that are entering a system with the following criteria: 5. Eventually stored in a UUID field in a database. So 128bit ids are totally fine. +Torid generates these id's in 2 different algorithms, you may choose which one +you want. + +### Original algorithm + The IDs that Torid generates are 128bit IDs made up of 2, 64bit parts. * 64bit microsecond level UNIX timestamp * 64bit hash of the system hostname, process id and a random value. +### ULID + +[Universally Unique Lexicographically Sortable Identifier](https://github.com/ulid/spec) + +These ID's are 128bit ID's that are: + +* 128bit combatible with UUID +* Canonically encoded as 26 character strings +* Case insensitive +* URL Safe + ## EXAMPLES #### Using the defaults @@ -60,8 +76,10 @@ The vast majority of the credit and research stems from: * [jondot's](https://github.com/jondot) blog post on [Fast ID Generation](http://blog.paracode.com/2012/04/16/fast-id-generation-part-1/) served to solidify my thoughts on the criteria I needed in an ID generation system. * This let me to [Boundary's Flake](http://boundary.com/blog/2012/01/12/flake-a-decentralized-k-ordered-unique-id-generator-in-erlang/) * [James Golick's](https://github.com/jamesgolick) [lexical_uuid](https://github.com/jamesgolick/lexical_uuid), which if I had found a day earlier, I might be using instead of creating this. +* [ulid specification](https://github.com/ulid/spec) You could consider Torid to be a reimplementation of [lexical_uuid](https://github.com/jamesgolick/lexical_uuid). It definately steals some code from it and [simple_uuid](https://github.com/cassandra-rb/simple_uuid) +and an implementation of [ulid](https://github.com/ulid/spec). Blog posts around ID generation: From d5de97c6197adf5affebd2e27fc3d9d6f32367b5 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Mon, 4 Mar 2024 00:55:57 -0700 Subject: [PATCH 02/14] Add in a crockford base 32 encoding library --- lib/torid/crockford.rb | 66 ++++++++++++++++++++++++++++++++++++++++++ test/test_crockford.rb | 64 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 lib/torid/crockford.rb create mode 100644 test/test_crockford.rb diff --git a/lib/torid/crockford.rb b/lib/torid/crockford.rb new file mode 100644 index 0000000..65ca9d9 --- /dev/null +++ b/lib/torid/crockford.rb @@ -0,0 +1,66 @@ +module Torid + # Internal: + # + # An implementation of Crockford's Base32 encoding and decoding, without the + # check digit. + # + # https://www.crockford.com/base32.html + module Crockford + ENCODING = "0123456789ABCDEFGHJKMNPQRSTVWXYZ".freeze + DASH = "-".freeze + + BITS_PER_CODE = 5 + BASE_RADIX = (1 << BITS_PER_CODE) + + # Precalculate the string to integer mapping for the decoding to account for + # case insensitivity and confusing characters + # + TO_0 = "0Oo".freeze TO_1 = "1IiLl".freeze + + DECODING = Hash.new.tap do |h| + ENCODING.chars.each_with_index do |char, value| + h[char] = value + h[char.downcase] = value + end + TO_0.each_char do |char| + h[char] = 0 + end + TO_1.each_char do |char| + h[char] = 1 + end + end.freeze + + # Internal: Encode an integer into a Crockford Base32 string + # + # Example: + # + # Crockkford.encode( 1234567890 ) # => "14SC0PJ" + # + def self.encode( value ) + case value + when Integer + value.digits(BASE_RADIX).map { |i| ENCODING[i] }.reverse.join + else + raise ArgumentError, "#{int} must be an Integer" + end + end + + # Internal: Decode a Crockford Base32 string into an integer + # + # Example: + # Crockford.decode( "14SC0PJ" ) # => 1234567890 + # Crockford.decode( "14SCoPK" ) # => 1234567890 + # Crockford.decode( "14SCOPK" ) # => 1234567890 + # Crockford.decode( "l4SCOPK" ) # => 1234567890 + # Crockford.decode( "L4SCOPK" ) # => 1234567890 + # Crockford.decode( "L4S-COPK" ) # => 1234567890 + # + def self.decode( str ) + str.chars + .reject { |c| c == DASH } + .reduce(0) do |acc, c| + acc = acc * BASE_RADIX + DECODING[c] + end + end + end +end diff --git a/test/test_crockford.rb b/test/test_crockford.rb new file mode 100644 index 0000000..8c4eebd --- /dev/null +++ b/test/test_crockford.rb @@ -0,0 +1,64 @@ +require 'test_helper' +require 'torid/crockford' + +module Torid + class CrockfordTest < ::Minitest::Test + ENCODE_TESTS = { + 822354 => "S32J", + 822355 => "S32K", + 1234 => "16J", + 32 => "10", + 33 => "11", + 1234567890 => "14SC0PJ", + 1234567891 => "14SC0PK", + 1e20.to_i => "2PQHTY5NHH0000", + (1e20.to_i + 1) =>"2PQHTY5NHH0001", + 973113317 => "X011Z5", + } + + def test_encodings + ENCODE_TESTS.each do |int, str| + assert_equal( str, Crockford.encode( int ) ) + end + end + + def test_decodings + ENCODE_TESTS.each do |int, str| + assert_equal( int, Crockford.decode( str ) ) + end + end + + def test_decodings_with_mixed_case + ENCODE_TESTS.each do |int, str| + str = str.downcase + assert_equal( int, Crockford.decode( str ) ) + end + end + + def test_decodings_with_confusing_oO0 + ENCODE_TESTS.each do |int, str| + Torid::Crockford::TO_0.each_char do |char| + str = str.tr("0", char) + assert_equal( int, Crockford.decode( str ) ) + end + end + end + + def test_decodings_with_confusing_iIlL + ENCODE_TESTS.each do |int, str| + Torid::Crockford::TO_1.each_char do |char| + str = str.tr("1", char) + assert_equal( int, Crockford.decode( str ) ) + end + end + end + + require 'debug' + def test_decodings_with_hyphens + ENCODE_TESTS.each do |int, in_str| + str = in_str.chars.each_slice(5).map { |a| a.join("") }.join("-") + assert_equal( int, Crockford.decode( str ) ) + end + end + end +end From 7ad14c8151d333083da9870406ef44b1930435c6 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Tue, 5 Mar 2024 21:14:27 -0700 Subject: [PATCH 03/14] update benchmark --- test/bench_base32.rb | 68 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 test/bench_base32.rb diff --git a/test/bench_base32.rb b/test/bench_base32.rb new file mode 100644 index 0000000..26c41d9 --- /dev/null +++ b/test/bench_base32.rb @@ -0,0 +1,68 @@ +require 'benchmark/ips' + +# This is a quick bemchmark to compare the performance of ways to convert +# an integer to a base32 string, we want to comare a low-level way to do it +# that uses bit shifting and a high-level way that uses the Integer#digits +# +# Turns out using the digits(32) method is faster than using bit shifting... +# unless you turn on --yjit in which case the bit shifting loop is better +# +# Here are the results on my machine: +# ----------------------------------------------------------------------------- +# % ruby bench_base32.rb +# ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] +# Warming up -------------------------------------- +# digits(32) 51.362k i/100ms +# shifty 50.049k i/100ms +# Calculating ------------------------------------- +# digits(32) 516.402k (± 0.7%) i/s - 5.188M in 10.046042s +# shifty 499.623k (± 1.7%) i/s - 5.005M in 10.020259s +# +# Comparison: +# digits(32): 516402.3 i/s +# shifty: 499623.3 i/s - 1.03x slower +# +# Shared at: https://ips.fastruby.io/7NL +# +# ----------------------------------------------------------------------------- +# % ruby --yjit bench_base32.rb +# ruby 3.3.0 (2023-12-25 revision 5124f9ac75) +YJIT [arm64-darwin22] +# Warming up -------------------------------------- +# digits(32) 55.593k i/100ms +# shifty 79.186k i/100ms +# Calculating ------------------------------------- +# digits(32) 557.021k (± 3.5%) i/s - 5.615M in 10.094148s +# shifty 806.013k (± 2.7%) i/s - 8.077M in 10.028949s +# +# Comparison: +# shifty: 806013.3 i/s +# digits(32): 557021.4 i/s - 1.45x slower +# +# Shared at: https://ips.fastruby.io/7NM +# +Benchmark.ips do |x| + x.config(:time => 10, :warmup => 2) + + ENCODING = "0123456789ABCDEFGHJKMNPQRSTVWXYZ".freeze + NUMBER = (1e20.to_i + 1) + BITS_PER_CODE = 5 + CODE_MASK = (1 << BITS_PER_CODE) - 1 + + x.report("digits(32)") do + value = NUMBER + value.digits(32).map { |i| ENCODING[i] }.join + end + + x.report("shifty") do + chars = [] + value = NUMBER + while value > 0 + offset = value & CODE_MASK + chars.unshift(ENCODING[offset]) + value >>= BITS_PER_CODE + end + chars.join + end + + x.compare! +end From 2d94124f9d3ee81596339c6db4113959c0885467 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 00:09:10 -0700 Subject: [PATCH 04/14] fix layout --- lib/torid/crockford.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/torid/crockford.rb b/lib/torid/crockford.rb index 65ca9d9..8fb68cf 100644 --- a/lib/torid/crockford.rb +++ b/lib/torid/crockford.rb @@ -15,7 +15,8 @@ module Crockford # Precalculate the string to integer mapping for the decoding to account for # case insensitivity and confusing characters # - TO_0 = "0Oo".freeze TO_1 = "1IiLl".freeze + TO_0 = "0Oo".freeze + TO_1 = "1IiLl".freeze DECODING = Hash.new.tap do |h| ENCODING.chars.each_with_index do |char, value| From 3c3f8be1f0a6e572d940155040a700135f7a3c4e Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 00:09:19 -0700 Subject: [PATCH 05/14] speling --- lib/torid/generator.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/torid/generator.rb b/lib/torid/generator.rb index d5b11bd..43489a5 100644 --- a/lib/torid/generator.rb +++ b/lib/torid/generator.rb @@ -63,7 +63,7 @@ def node_id # # http://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function # - # This method is copeid from + # This method is copied from # https://github.com/jamesgolick/lexical_uuid/blob/master/lib/lexical_uuid.rb#L14 # with the random bytes added by me. # From 69c4ea6ac1e4e915065473479b5ef1f67776d95b Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 00:10:56 -0700 Subject: [PATCH 06/14] cleanroom implementation of fnv --- HISTORY.md | 1 + Rakefile | 3 +- lib/torid.rb | 1 + lib/torid/fnv.rb | 220 ++++++++++++++++++++++++++++++++++++++++++++++ tasks/custom.rake | 115 ++++++++++++++++++++++++ test/test_fnv.rb | 17 ++++ torid.gemspec | 1 - 7 files changed, 355 insertions(+), 3 deletions(-) create mode 100644 lib/torid/fnv.rb create mode 100644 tasks/custom.rake create mode 100644 test/test_fnv.rb diff --git a/HISTORY.md b/HISTORY.md index aaa6a4d..3b8aa2f 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,7 @@ ## Version 1.4.0 - 2024-03-XX * Add Torid::ULID to implement the [ulid specification](https://github.com/ulid/spec) +* Remove dependency on very old fnv gem and reimplment fnv algorithm internally ## Version 1.3.0 - 2017-02-17 diff --git a/Rakefile b/Rakefile index 009d020..2e7d0ca 100644 --- a/Rakefile +++ b/Rakefile @@ -7,8 +7,6 @@ This.email = "jeremy@copiousfreetime.org" This.homepage = "http://github.com/copiousfreetime/#{ This.name }" This.ruby_gemspec do |spec| - spec.add_dependency( 'fnv', '~> 0.2' ) - spec.add_development_dependency( 'rake', '~> 13.0') spec.add_development_dependency( 'minitest', '~> 5.21' ) spec.add_development_dependency( 'minitest-junit', '~> 1.1' ) @@ -27,3 +25,4 @@ This.ruby_gemspec do |spec| end load 'tasks/default.rake' +load 'tasks/custom.rake' diff --git a/lib/torid.rb b/lib/torid.rb index f7ab3cb..98a008c 100644 --- a/lib/torid.rb +++ b/lib/torid.rb @@ -18,5 +18,6 @@ def self.uuid end end require 'torid/clock' +require 'torid/fnv' require 'torid/uuid' require 'torid/generator' diff --git a/lib/torid/fnv.rb b/lib/torid/fnv.rb new file mode 100644 index 0000000..34f11ef --- /dev/null +++ b/lib/torid/fnv.rb @@ -0,0 +1,220 @@ +# DO NOT EDIT: +# +# This file is generated from the original prime configuration of the +# FNV Algorithm and is generated for runtime performance. See the +# rake task that generates this file to edit. +# +module Torid + module Fnv + + #-------------------------------------------------------------------------- + # Constants and code for the 32 bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N32_PRIME = 0x1000193 + N32_BASIS = 0x811c9dc5 + N32_MASK = 0xffffffff + + # Public: Return the 32 hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_32(data) => >>32 bit number<< + # + def self.fnv1_32(data) + hash = N32_BASIS + data.each_byte do |byte| + hash = ((hash * N32_PRIME) ^ byte) & N32_MASK + end + hash + end + + # Public: Return the 32 hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_32(data) => >>32 bit number<< + # + def self.fnv1a_32(data) + hash = N32_BASIS + data.each_byte do |byte| + hash = ((hash ^ byte) * N32_PRIME) & N32_MASK + end + hash + end + + #-------------------------------------------------------------------------- + # Constants and code for the 64 bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N64_PRIME = 0x100000001b3 + N64_BASIS = 0xcbf29ce484222325 + N64_MASK = 0xffffffffffffffff + + # Public: Return the 64 hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_64(data) => >>64 bit number<< + # + def self.fnv1_64(data) + hash = N64_BASIS + data.each_byte do |byte| + hash = ((hash * N64_PRIME) ^ byte) & N64_MASK + end + hash + end + + # Public: Return the 64 hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_64(data) => >>64 bit number<< + # + def self.fnv1a_64(data) + hash = N64_BASIS + data.each_byte do |byte| + hash = ((hash ^ byte) * N64_PRIME) & N64_MASK + end + hash + end + + #-------------------------------------------------------------------------- + # Constants and code for the 128 bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N128_PRIME = 0x1000000000000000000013b + N128_BASIS = 0x6c62272e07bb014262b821756295c58d + N128_MASK = 0xffffffffffffffffffffffffffffffff + + # Public: Return the 128 hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_128(data) => >>128 bit number<< + # + def self.fnv1_128(data) + hash = N128_BASIS + data.each_byte do |byte| + hash = ((hash * N128_PRIME) ^ byte) & N128_MASK + end + hash + end + + # Public: Return the 128 hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_128(data) => >>128 bit number<< + # + def self.fnv1a_128(data) + hash = N128_BASIS + data.each_byte do |byte| + hash = ((hash ^ byte) * N128_PRIME) & N128_MASK + end + hash + end + + #-------------------------------------------------------------------------- + # Constants and code for the 256 bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N256_PRIME = 0x1000000000000000000000000000000000000000163 + N256_BASIS = 0xdd268dbcaac550362d98c384c4e576ccc8b1536847b6bbb31023b4c8caee0535 + N256_MASK = 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + + # Public: Return the 256 hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_256(data) => >>256 bit number<< + # + def self.fnv1_256(data) + hash = N256_BASIS + data.each_byte do |byte| + hash = ((hash * N256_PRIME) ^ byte) & N256_MASK + end + hash + end + + # Public: Return the 256 hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_256(data) => >>256 bit number<< + # + def self.fnv1a_256(data) + hash = N256_BASIS + data.each_byte do |byte| + hash = ((hash ^ byte) * N256_PRIME) & N256_MASK + end + hash + end + + #-------------------------------------------------------------------------- + # Constants and code for the 512 bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N512_PRIME = 0x100000000000000000000000000000000000000000000000000000000000000000000000000000000000157 + N512_BASIS = 0xb86db0b1171f4416dca1e50f309990acac87d059c90000000000000000000d21e948f68a34c192f62ea79bc942dbe7ce182036415f56e34bac982aac4afe9fd9 + N512_MASK = 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + + # Public: Return the 512 hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_512(data) => >>512 bit number<< + # + def self.fnv1_512(data) + hash = N512_BASIS + data.each_byte do |byte| + hash = ((hash * N512_PRIME) ^ byte) & N512_MASK + end + hash + end + + # Public: Return the 512 hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_512(data) => >>512 bit number<< + # + def self.fnv1a_512(data) + hash = N512_BASIS + data.each_byte do |byte| + hash = ((hash ^ byte) * N512_PRIME) & N512_MASK + end + hash + end + + #-------------------------------------------------------------------------- + # Constants and code for the 1024 bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N1024_PRIME = 0x10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000018d + N1024_BASIS = 0x5f7a76758ecc4d32e56d5a591028b74b29fc4223fdada16c3bf34eda3674da9a21d9000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004c6d7eb6e73802734510a555f256cc005ae556bde8cc9c6a93b21aff4b16c71ee90b3 + N1024_MASK = 0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff + + # Public: Return the 1024 hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_1024(data) => >>1024 bit number<< + # + def self.fnv1_1024(data) + hash = N1024_BASIS + data.each_byte do |byte| + hash = ((hash * N1024_PRIME) ^ byte) & N1024_MASK + end + hash + end + + # Public: Return the 1024 hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_1024(data) => >>1024 bit number<< + # + def self.fnv1a_1024(data) + hash = N1024_BASIS + data.each_byte do |byte| + hash = ((hash ^ byte) * N1024_PRIME) & N1024_MASK + end + hash + end + end +end diff --git a/tasks/custom.rake b/tasks/custom.rake new file mode 100644 index 0000000..6dd43c2 --- /dev/null +++ b/tasks/custom.rake @@ -0,0 +1,115 @@ +# Generate the constants used for the fuctions +# +desc "generate the fnv.rb file used for the FNV functions" +task :generate_fnv do |t| + OFFSET_SEED = "chongo /\\../\\" + PRIMES = { + # 32 bit FNV_prime = 2^24 + 2^8 + 0x93 + 32 => (1 << 24) + (1 << 8) + 0x93, + + # 64 bit FNV_prime = 2^40 + 2^8 + 0xb3 + 64 => (1 << 40) + (1 << 8) + 0xb3, + + # 128 bit FNV_prime = 2^88 + 2^8 + 0x3b + 128 => (1 << 88) + (1 << 8) + 0x3b, + + # 256 bit FNV_prime = 2^168 + 2^8 + 0x63 + 256 => (1 << 168) + (1 << 8) + 0x63, + + # 512 bit FNV_prime = 2^344 + 2^8 + 0x57 + 512 => (1 << 344) + (1 << 8) + 0x57, + + # 1024 bit FNV prime = 1024 bit FNV_prime = 2^680 + 2^8 + 0x8d + 1024 => (1 << 680) + (1 << 8) + 0x8d, + } + + def fnv0(data: OFFSET_SEED, prime:, mask:) + hash = 0 + data.each_byte do |byte| + hash = ((hash * prime) ^ byte) & mask + end + hash + end + + + + File.open("lib/torid/fnv.rb", "w+") do |f| + pre = <<~PRE + # DO NOT EDIT: + # + # This file is generated from the original prime configuration of the + # FNV Algorithm and is generated for runtime performance. See the + # rake task that generates this file to edit. + # + module Torid + module Fnv + PRE + + f.puts(pre) + + indent = " " * 4 + + PRIMES.each do |bits, prime| + mask = (1 << bits) - 1 + basis = fnv0(prime:, mask:) + + prime_name = "N#{bits}_PRIME" + basis_name = "N#{bits}_BASIS" + mask_name = "N#{bits}_MASK" + + f.puts + code = <<~CODE + #-------------------------------------------------------------------------- + # Constants and code for the #{bits} bit fnv1 and fnv1a functions + #-------------------------------------------------------------------------- + N#{bits}_PRIME = 0x#{prime.to_s(16)} + N#{bits}_BASIS = 0x#{basis.to_s(16)} + N#{bits}_MASK = 0x#{mask.to_s(16)} + + # Public: Return the #{bits} hash of the data using the fnv1 algorithm + # + # Example: + # + # Fnv.fnv1_#{bits}(data) => >>#{bits} bit number<< + # + def self.fnv1_#{bits}(data) + hash = #{basis_name} + data.each_byte do |byte| + hash = ((hash * #{prime_name}) ^ byte) & #{mask_name} + end + hash + end + + # Public: Return the #{bits} hash of the data using the fnv1a algorithm + # + # Example: + # + # Fnv.fnv1a_#{bits}(data) => >>#{bits} bit number<< + # + def self.fnv1a_#{bits}(data) + hash = #{basis_name} + data.each_byte do |byte| + hash = ((hash ^ byte) * #{prime_name}) & #{mask_name} + end + hash + end + CODE + + #f.puts(code) + code.each_line do |line| + if line.strip.length.zero? then + f.puts + else + f.puts("#{indent}#{line.chomp}") + end + end + end + + f.puts(<<~POST) + end + end + POST + end +end + + diff --git a/test/test_fnv.rb b/test/test_fnv.rb new file mode 100644 index 0000000..d5bd41d --- /dev/null +++ b/test/test_fnv.rb @@ -0,0 +1,17 @@ +require 'test_helper' +require 'torid/fnv' +require 'debug' + +module Torid + class FnvTest < ::Minitest::Test + def test_fnv1a_32 + [ + ["", 0x811c9dc5], + ["a", 0xe40c292c], + ["foobar", 0xbf9cf968], + ].each do |data, result| + assert_equal(Torid::Fnv.fnv1a_32(data), result) + end + end + end +end diff --git a/torid.gemspec b/torid.gemspec index 5c4f414..200626e 100644 --- a/torid.gemspec +++ b/torid.gemspec @@ -25,7 +25,6 @@ Gem::Specification.new do |s| s.specification_version = 4 - s.add_runtime_dependency(%q.freeze, ["~> 0.2".freeze]) s.add_development_dependency(%q.freeze, ["~> 13.0".freeze]) s.add_development_dependency(%q.freeze, ["~> 5.21".freeze]) s.add_development_dependency(%q.freeze, ["~> 1.1".freeze]) From 8b671735817628bfb25fba081db9b7185c6456ec Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 00:22:42 -0700 Subject: [PATCH 07/14] documentation tweaks --- lib/torid/fnv.rb | 23 +++++++++++++++++++++-- tasks/custom.rake | 20 +++++++++++++++----- 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/lib/torid/fnv.rb b/lib/torid/fnv.rb index 34f11ef..a615d29 100644 --- a/lib/torid/fnv.rb +++ b/lib/torid/fnv.rb @@ -1,10 +1,17 @@ -# DO NOT EDIT: +# DO NOT EDIT - THIS IS A GENERATED FILE # # This file is generated from the original prime configuration of the # FNV Algorithm and is generated for runtime performance. See the -# rake task that generates this file to edit. +# rake task `generate_fnv` that generates this file. # module Torid + # Public: + # + # This is an imlementation of the FNV-1 and FNV-1a hash functions + # + # - https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + # - http://isthe.com/chongo/tech/comp/fnv/index.html + # module Fnv #-------------------------------------------------------------------------- @@ -20,6 +27,7 @@ module Fnv # # Fnv.fnv1_32(data) => >>32 bit number<< # + # Returns a 32-bit Integer def self.fnv1_32(data) hash = N32_BASIS data.each_byte do |byte| @@ -34,6 +42,7 @@ def self.fnv1_32(data) # # Fnv.fnv1a_32(data) => >>32 bit number<< # + # Returns a 32-bit Integer def self.fnv1a_32(data) hash = N32_BASIS data.each_byte do |byte| @@ -55,6 +64,7 @@ def self.fnv1a_32(data) # # Fnv.fnv1_64(data) => >>64 bit number<< # + # Returns a 64-bit Integer def self.fnv1_64(data) hash = N64_BASIS data.each_byte do |byte| @@ -69,6 +79,7 @@ def self.fnv1_64(data) # # Fnv.fnv1a_64(data) => >>64 bit number<< # + # Returns a 64-bit Integer def self.fnv1a_64(data) hash = N64_BASIS data.each_byte do |byte| @@ -90,6 +101,7 @@ def self.fnv1a_64(data) # # Fnv.fnv1_128(data) => >>128 bit number<< # + # Returns a 128-bit Integer def self.fnv1_128(data) hash = N128_BASIS data.each_byte do |byte| @@ -104,6 +116,7 @@ def self.fnv1_128(data) # # Fnv.fnv1a_128(data) => >>128 bit number<< # + # Returns a 128-bit Integer def self.fnv1a_128(data) hash = N128_BASIS data.each_byte do |byte| @@ -125,6 +138,7 @@ def self.fnv1a_128(data) # # Fnv.fnv1_256(data) => >>256 bit number<< # + # Returns a 256-bit Integer def self.fnv1_256(data) hash = N256_BASIS data.each_byte do |byte| @@ -139,6 +153,7 @@ def self.fnv1_256(data) # # Fnv.fnv1a_256(data) => >>256 bit number<< # + # Returns a 256-bit Integer def self.fnv1a_256(data) hash = N256_BASIS data.each_byte do |byte| @@ -160,6 +175,7 @@ def self.fnv1a_256(data) # # Fnv.fnv1_512(data) => >>512 bit number<< # + # Returns a 512-bit Integer def self.fnv1_512(data) hash = N512_BASIS data.each_byte do |byte| @@ -174,6 +190,7 @@ def self.fnv1_512(data) # # Fnv.fnv1a_512(data) => >>512 bit number<< # + # Returns a 512-bit Integer def self.fnv1a_512(data) hash = N512_BASIS data.each_byte do |byte| @@ -195,6 +212,7 @@ def self.fnv1a_512(data) # # Fnv.fnv1_1024(data) => >>1024 bit number<< # + # Returns a 1024-bit Integer def self.fnv1_1024(data) hash = N1024_BASIS data.each_byte do |byte| @@ -209,6 +227,7 @@ def self.fnv1_1024(data) # # Fnv.fnv1a_1024(data) => >>1024 bit number<< # + # Returns a 1024-bit Integer def self.fnv1a_1024(data) hash = N1024_BASIS data.each_byte do |byte| diff --git a/tasks/custom.rake b/tasks/custom.rake index 6dd43c2..078afe0 100644 --- a/tasks/custom.rake +++ b/tasks/custom.rake @@ -1,8 +1,11 @@ -# Generate the constants used for the fuctions +# Generate the FNV implementations # desc "generate the fnv.rb file used for the FNV functions" task :generate_fnv do |t| OFFSET_SEED = "chongo /\\../\\" + + # http://isthe.com/chongo/tech/comp/fnv/index.html#FNV-param + # PRIMES = { # 32 bit FNV_prime = 2^24 + 2^8 + 0x93 32 => (1 << 24) + (1 << 8) + 0x93, @@ -31,17 +34,22 @@ task :generate_fnv do |t| hash end - - File.open("lib/torid/fnv.rb", "w+") do |f| pre = <<~PRE - # DO NOT EDIT: + # DO NOT EDIT - THIS IS A GENERATED FILE # # This file is generated from the original prime configuration of the # FNV Algorithm and is generated for runtime performance. See the - # rake task that generates this file to edit. + # rake task `generate_fnv` that generates this file. # module Torid + # Public: + # + # This is an imlementation of the FNV-1 and FNV-1a hash functions + # + # - https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function + # - http://isthe.com/chongo/tech/comp/fnv/index.html + # module Fnv PRE @@ -72,6 +80,7 @@ task :generate_fnv do |t| # # Fnv.fnv1_#{bits}(data) => >>#{bits} bit number<< # + # Returns a #{bits}-bit Integer def self.fnv1_#{bits}(data) hash = #{basis_name} data.each_byte do |byte| @@ -86,6 +95,7 @@ task :generate_fnv do |t| # # Fnv.fnv1a_#{bits}(data) => >>#{bits} bit number<< # + # Returns a #{bits}-bit Integer def self.fnv1a_#{bits}(data) hash = #{basis_name} data.each_byte do |byte| From 4c7eafb46a3af4152b7eb3a08789bbb5c864ff30 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 21:33:19 -0700 Subject: [PATCH 08/14] rework the readme explaining where things are going --- README.md | 122 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 99 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index f59891f..f442dd6 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -## Torid +# Torid [![Build Status](https://copiousfreetime.semaphoreci.com/badges/torid/branches/main.svg)](https://copiousfreetime.semaphoreci.com/projects/torid) @@ -7,8 +7,8 @@ ## DESCRIPTION -Temporally Ordered IDs. Generate Universally Unique Lexicographically Sortable -Identifiers (ULID) and (UUID) that sort lexically in time order. +A library to generate UUID's that are temporally ordered when sorted +lexigraphically. Torid implements UUIDv6, UUIDv7, and UUIDv8. ## DETAILS @@ -24,26 +24,8 @@ events that are entering a system with the following criteria: 5. Eventually stored in a UUID field in a database. So 128bit ids are totally fine. -Torid generates these id's in 2 different algorithms, you may choose which one -you want. - -### Original algorithm - -The IDs that Torid generates are 128bit IDs made up of 2, 64bit parts. - -* 64bit microsecond level UNIX timestamp -* 64bit hash of the system hostname, process id and a random value. - -### ULID - -[Universally Unique Lexicographically Sortable Identifier](https://github.com/ulid/spec) - -These ID's are 128bit ID's that are: - -* 128bit combatible with UUID -* Canonically encoded as 26 character strings -* Case insensitive -* URL Safe +Torid generates these id's in multiple different algorithms, you may choose +which one you want. ## EXAMPLES @@ -52,9 +34,19 @@ These ID's are 128bit ID's that are: ```ruby require 'torid' +# Generate a UUIDv7 that defaults to UUID format uuid = Torid.uuid uuid.to_s # => "0004fda4-318e-f380-5a45-5321cd065b02" uuid.bytes # => "\x00\x04\xFD\xA41\x8E\xF3\x80ZES!\xCD\x06[\x02" +uuid.as_ulid # => +uuid.to_i # => + +# Generator a UUIDv7 that defaults to ULID display format +ulid = Torid.ulid +ulid.to_s # => +ulid.bytes # => "\x00\x04\xFD\xA41\x8E\xF3\x80ZES!\xCD\x06[\x02" +ulid.as_uuid # => "0004fda4-318e-f380-5a45-5321cd065b02" +ulid.to_i # => ``` #### Using your own instance of a Generator @@ -67,8 +59,91 @@ uuid = generator.next uuid.to_s # => "0004fda4-3f42-3d01-4731-5a4aa8ddd6c3" uuid.bytes # => "\x00\x04\xFD\xA4?B=\x01G1ZJ\xA8\xDD\xD6\xC3" +uuid.as_ulid # => ``` +#### Configure the Generator + +```ruby + +# Default uuidv7 generator +uuidv7_generator = Torid::Generator.new + +# Default uuidv7 generator that defaults to ulid string format +ulid_generator = Torid::Generator.new(format: :ulid) + +# uuidv8 generator that defaults to ulid string format +ulid_v8_generator = Torid::Generator.new(format: :ulid, algorithm: :v8) + +``` + +## UUID Representation + +All the UUID values generated by Torid may be displayed in any of the following +formats: + +* the standard 8-4-4-4-12 hexadecimal UUID format +* a base 32 crockford encoded string, also known as [ULID format](https://github.com/ulid/spec) +* an array of 16 bytes +* a 128 bit integer + +## Algorithms + +All the new UUID formats that are detailed in [New UUID Formats +RFC](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format). + +The default algorithm used by Torid is the UUIDv7 algorithm. + +Torid's original custom algorithm is now the UUIDv8 implementation. + +### UUIDv6 + +This is a field-compatible version of UUIDv1, with the time bits reordered so +they sort lexically in time order. You probably don't want this unless you are +migrating from a UUIDv1 system. + +The UUIDv6 uses a 60 bit timestamp of the number of 100 nanosecond intervals +(0.1 microseconds) from 00:00:00.00 15 Oct 1585. + +* 48 most significant bits of a 60 bit timestamp +* 4 bits containing the UUIDv6 version (0110) +* 12 least significant bits of the 60 bit timestamp +* 2 bits (10) for the UUID variant +* 14 bit clock sequence value +* 48 bit node_id + +### UUIDv7 + +I am also calling this the [ULID implementation](https://github.com/ulid/spec) +and is aliased as such. + +* 48 bit timestamp of the number of milliseconds from UNIX Epoch +* 4 bits containing the UUIDv7 version (0111) +* 12 bit of pseudo random data +* 2 bit variant (10) +* 62 bits of pseudo random data + +### UUIDv8 + +Torid had an original temporally ordered uuid generation algorithm that existed +before the new UUIDv7/8/9 standard was proposed. That algorithm is now slightly +altered and is the UUIDv8 implementation. + +The IDs that Torid generated in version 1.x were 128bit IDs made up of 2, 64bit +parts. + +* 64bit microseconds since the UNIX epoch +* 64bit hash of the system hostname, process id and a random value. + +In version 2.x the custom algorithm is the same, but the bit fields are now: + +* 48 most significant bits of a 64bit microseconds since the UNIX epoch +* 4 bits containing the UUIDv8 version (1000) +* 12 middle significant bits of the 64bit microseconds since the UNIX epoch +* 2 bit variant (10) +* 4 least significant bits of the 64bit microseconds since the UNIX epoch +* 58 bits of the system hostname, process id and a random value. + ## CREDITS / RESOURCES The vast majority of the credit and research stems from: @@ -77,6 +152,7 @@ The vast majority of the credit and research stems from: * This let me to [Boundary's Flake](http://boundary.com/blog/2012/01/12/flake-a-decentralized-k-ordered-unique-id-generator-in-erlang/) * [James Golick's](https://github.com/jamesgolick) [lexical_uuid](https://github.com/jamesgolick/lexical_uuid), which if I had found a day earlier, I might be using instead of creating this. * [ulid specification](https://github.com/ulid/spec) +* [draft-peabody-dispatch-new-uuid-format-04](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format) You could consider Torid to be a reimplementation of [lexical_uuid](https://github.com/jamesgolick/lexical_uuid). It definately steals some code from it and [simple_uuid](https://github.com/cassandra-rb/simple_uuid) and an implementation of [ulid](https://github.com/ulid/spec). From 4cedf54d20e8b8472c4a4ba13a1f5bb59fc31f18 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 21:37:13 -0700 Subject: [PATCH 09/14] editing --- README.md | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index f442dd6..88d44ae 100644 --- a/README.md +++ b/README.md @@ -73,7 +73,7 @@ uuidv7_generator = Torid::Generator.new ulid_generator = Torid::Generator.new(format: :ulid) # uuidv8 generator that defaults to ulid string format -ulid_v8_generator = Torid::Generator.new(format: :ulid, algorithm: :v8) +ulid_v8_generator = Torid::Generator.new(format: :ulid, algorithm: :v8) ``` @@ -89,10 +89,10 @@ formats: ## Algorithms -All the new UUID formats that are detailed in [New UUID Formats +Torid implments all the UUID formats detailed in [New UUID Formats RFC](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format). -The default algorithm used by Torid is the UUIDv7 algorithm. +The default algorithm for Torid is the UUIDv7 algorithm. Torid's original custom algorithm is now the UUIDv8 implementation. @@ -184,8 +184,7 @@ Copyright (c) 2014 Jeremy Hinegardner Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above -copyright notice -and this permission notice appear in all copies. +copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF From 0747911372b037885fc3afb3c7f522f02a2d3327 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 21:51:24 -0700 Subject: [PATCH 10/14] update version and history to 2.0 --- Gemfile.lock | 2 -- HISTORY.md | 5 +++-- lib/torid.rb | 3 ++- torid.gemspec | 10 +++++----- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 4d3d6c5..f498aa7 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,14 +2,12 @@ PATH remote: . specs: torid (1.3.0) - fnv (~> 0.2) GEM remote: https://rubygems.org/ specs: builder (3.2.4) docile (1.4.0) - fnv (0.2.0) jar-dependencies (0.4.1) minitest (5.22.2) minitest-focus (1.4.0) diff --git a/HISTORY.md b/HISTORY.md index 3b8aa2f..7cdc4eb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,8 +1,9 @@ # Torid Changelog -## Version 1.4.0 - 2024-03-XX +## Version 2.0.0 - 2024-03-XX -* Add Torid::ULID to implement the [ulid specification](https://github.com/ulid/spec) +* Refactor entire library to be a UUIDv6, UUIDv7, UUIDv8 implementation. * Remove dependency on very old fnv gem and reimplment fnv algorithm internally +* Implement the crockford base 32 algorithm for alternative display format ## Version 1.3.0 - 2017-02-17 diff --git a/lib/torid.rb b/lib/torid.rb index 98a008c..cf926c3 100644 --- a/lib/torid.rb +++ b/lib/torid.rb @@ -2,7 +2,7 @@ # that sort lexically in time order. module Torid # Public: The Version of the Torid library as a String - VERSION = "1.3.0" + VERSION = "2.0.0" # Public: return the next Torid::UUID from the default Generator # @@ -18,6 +18,7 @@ def self.uuid end end require 'torid/clock' +require 'torid/crockford' require 'torid/fnv' require 'torid/uuid' require 'torid/generator' diff --git a/torid.gemspec b/torid.gemspec index 200626e..526aa29 100644 --- a/torid.gemspec +++ b/torid.gemspec @@ -1,18 +1,18 @@ # DO NOT EDIT - This file is automatically generated # Make changes to Manifest.txt and/or Rakefile and regenerate # -*- encoding: utf-8 -*- -# stub: torid 1.3.0 ruby lib +# stub: torid 2.0.0 ruby lib Gem::Specification.new do |s| s.name = "torid".freeze - s.version = "1.3.0".freeze + s.version = "2.0.0".freeze s.required_rubygems_version = Gem::Requirement.new(">= 0".freeze) if s.respond_to? :required_rubygems_version= s.metadata = { "bug_tracker_uri" => "https://github.com/copiousfreetime/torid/issues", "changelog_uri" => "https://github.com/copiousfreetime/torid/blob/master/HISTORY.md", "homepage_uri" => "https://github.com/copiousfreetime/torid", "source_code_uri" => "https://github.com/copiousfreetime/torid" } if s.respond_to? :metadata= s.require_paths = ["lib".freeze] s.authors = ["Jeremy Hinegardner".freeze] - s.date = "2024-03-04" - s.description = "Temporally Ordered IDs. Generate universally unique identifiers (UUID) that sort lexically in time order.".freeze + s.date = "2024-03-07" + s.description = "A library to generate UUID's that are temporally ordered when sorted lexigraphically. Torid implements UUIDv6, UUIDv7, and UUIDv8.".freeze s.email = "jeremy@copiousfreetime.org".freeze s.extra_rdoc_files = ["CONTRIBUTING.md".freeze, "HISTORY.md".freeze, "Manifest.txt".freeze, "README.md".freeze] s.files = ["CONTRIBUTING.md".freeze, "HISTORY.md".freeze, "LICENSE".freeze, "Manifest.txt".freeze, "README.md".freeze, "Rakefile".freeze, "lib/torid.rb".freeze, "lib/torid/clock.rb".freeze, "lib/torid/generator.rb".freeze, "lib/torid/uuid.rb".freeze, "tasks/default.rake".freeze, "tasks/this.rb".freeze, "test/test_clock.rb".freeze, "test/test_generator.rb".freeze, "test/test_helper.rb".freeze, "test/test_torid.rb".freeze, "test/test_uuid.rb".freeze, "test/test_version.rb".freeze] @@ -21,7 +21,7 @@ Gem::Specification.new do |s| s.rdoc_options = ["--main".freeze, "README.md".freeze, "--markup".freeze, "tomdoc".freeze] s.required_ruby_version = Gem::Requirement.new(">= 2.3.0".freeze) s.rubygems_version = "3.5.3".freeze - s.summary = "Temporally Ordered IDs. Generate universally unique identifiers (UUID) that sort lexically in time order.".freeze + s.summary = "A library to generate UUID's that are temporally ordered when sorted lexigraphically. Torid implements UUIDv6, UUIDv7, and UUIDv8.".freeze s.specification_version = 4 From 635210afa9f5c426db3b96ab54150bacc7532f1d Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 23:12:52 -0700 Subject: [PATCH 11/14] parse a base32 string for uuid --- lib/torid/crockford.rb | 1 + lib/torid/uuid.rb | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/lib/torid/crockford.rb b/lib/torid/crockford.rb index 8fb68cf..b669b8b 100644 --- a/lib/torid/crockford.rb +++ b/lib/torid/crockford.rb @@ -8,6 +8,7 @@ module Torid module Crockford ENCODING = "0123456789ABCDEFGHJKMNPQRSTVWXYZ".freeze DASH = "-".freeze + REGEX = %r{([#{DASH}#{ENCODING}]+)}i BITS_PER_CODE = 5 BASE_RADIX = (1 << BITS_PER_CODE) diff --git a/lib/torid/uuid.rb b/lib/torid/uuid.rb index 0b8a24b..d24a8d5 100644 --- a/lib/torid/uuid.rb +++ b/lib/torid/uuid.rb @@ -54,6 +54,7 @@ def self.from( str ) return from_bytes( str ) if str.bytesize == 16 return from_string( str ) if UUID.match( str ) raise ArgumentError, "UUID can only be loaded from a 16 byte binary string or a 36 byte formatted UUID string." + return from_base32_string( str ) if Crockford::REGEX.match( str ) end # Internal: Create a new UUID from an existing string in the 8-4-4-4-12 format @@ -69,6 +70,17 @@ def self.from_string( str ) from_bytes( bytes ) end + # Internal: Create a new UUID from an existing string in the Crockford Base32 format + # + # str - The String from which to create the UUID. + # + # Returns a Torid::UUID + def self.from_base32_string( str ) + value = Crockford.decode( str ) + raise ArgumentError, "Crockfor base32 string must decode to a 128bit value" if value >= MAX.value + new( value ) + end + # Internal: Create a new UUID from an existing 16 byte String # # str - The String from which to create the UUID. From 75c29f5c5f5a0040829a05e0c006c098af9dced9 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 23:13:06 -0700 Subject: [PATCH 12/14] use the local fnv implementation --- lib/torid/generator.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/torid/generator.rb b/lib/torid/generator.rb index 43489a5..3fdb92d 100644 --- a/lib/torid/generator.rb +++ b/lib/torid/generator.rb @@ -1,8 +1,8 @@ -require 'fnv' require 'socket' require 'securerandom' -require 'torid/clock' -require 'torid/uuid' +require_relative './fnv' +require_relative './clock' +require_relative './uuid' module Torid # Public: A class that will generate unique identifiers. @@ -75,7 +75,7 @@ def node_id def self.create_node_id( pid = Process.pid ) hostname = Socket.gethostname random = SecureRandom.hex( 16 ) - FNV.new.fnv1a_64("#{hostname}-#{pid}-#{random}") + Fnv.fnv1a_64("#{hostname}-#{pid}-#{random}") end # Internal: The default generator used by the system. From 7a097259ef7d9f60bb65c4f93113fb9b9045a36b Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 23:13:14 -0700 Subject: [PATCH 13/14] require relative --- lib/torid.rb | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/torid.rb b/lib/torid.rb index cf926c3..f0ef5e8 100644 --- a/lib/torid.rb +++ b/lib/torid.rb @@ -17,8 +17,8 @@ def self.uuid Torid::Generator.next end end -require 'torid/clock' -require 'torid/crockford' -require 'torid/fnv' -require 'torid/uuid' -require 'torid/generator' +require_relative 'torid/clock' +require_relative 'torid/crockford' +require_relative 'torid/fnv' +require_relative 'torid/uuid' +require_relative 'torid/generator' From 07021a90a93152d1f6c7d2673664560097c8ab36 Mon Sep 17 00:00:00 2001 From: copiousfreetime Date: Wed, 6 Mar 2024 23:13:34 -0700 Subject: [PATCH 14/14] start converting to new uuid class --- lib/torid/uuid.rb | 98 +++++++++++++++++++++-------------------------- test/test_uuid.rb | 63 +++++++++++++++--------------- 2 files changed, 75 insertions(+), 86 deletions(-) diff --git a/lib/torid/uuid.rb b/lib/torid/uuid.rb index d24a8d5..52efc4f 100644 --- a/lib/torid/uuid.rb +++ b/lib/torid/uuid.rb @@ -1,24 +1,26 @@ module Torid # Public: Represents a UUID generated by Torid # - # Torid::UUID wraps 2 64bit Integer values and can convert them back and forth - # between raw bytes and the canonical UUID form of 32 lowercase hexadecimal - # lowercase hexadecimal digits, displayed in five groups separated by hyphens, - # in the form 8-4-4-4-12 for a total of 36 characters (32 alphanumeric - # characters and four hyphens) + # Torid::UUID wraps a 128bit Integer value and can convert it back and forth + # the following formats: # - # Since internally, Torid::UUID's represent a 64bit microsecond timestamp and - # a 'node_id', those data fields are also able to be returned as a Time - # instance or an Integer respectively. + # - 36 byte hexadecimal string in the standard UUID format 8-4-4-4-12 + # - a 26 character string in crockford's base32 + # - 16 byte binary string + # - 128bit Integer + # + # Since internally, Torid::UUID's represent a 128bit Integer, that includes + # some sort of timestamp. Depending on the version of UUIDv6, UUIDv7, or UUIDv8 + # there may be other accessors. # # Examples # # uuid = Torid.uuid - # uuid.to_s # => "0004fda3-8c73-5e0f-bae4-e9c86e3684a5" - # uuid.bytes # => "\x00\x04\xFD\xA3\x8Cs^\x0F\xBA\xE4\xE9\xC8n6\x84\xA5" + # uuid.to_s # => "0004fda3-8c73-5e0f-bae4-e9c86e3684a5" + # uuid.bytes # => "\x00\x04\xFD\xA3\x8Cs^\x0F\xBA\xE4\xE9\xC8n6\x84\xA5" + # uuid.to_i # => 1234567890 + # uuid.as_ulid # => "01ARZ3NDEKTSV4RRFFQ69G5FAV" # - # uuid.timestamp # => Time - # uuid.node_id # => Integer class UUID # Regular expression that matches the 36 byte 8-4-4-4-12 format @@ -51,10 +53,11 @@ def self.match( str ) # Returns a Torid::UUID # Raises ArgumentError if the String is not convertable to a UUID. def self.from( str ) + return new(str) if str.is_a?(Integer) return from_bytes( str ) if str.bytesize == 16 - return from_string( str ) if UUID.match( str ) - raise ArgumentError, "UUID can only be loaded from a 16 byte binary string or a 36 byte formatted UUID string." + return from_uuid_string( str ) if UUID.match( str ) return from_base32_string( str ) if Crockford::REGEX.match( str ) + raise ArgumentError, "UUID can only be loaded from a 16 byte binary string, 36 byte formatted UUID string, or a 26 character base32 string" end # Internal: Create a new UUID from an existing string in the 8-4-4-4-12 format @@ -64,7 +67,7 @@ def self.from( str ) # Copied from lexical_uuid # # Returns a Torid::UUID - def self.from_string( str ) + def self.from_uuid_string( str ) hex = str.split('-').join bytes = Array( hex ).pack("H32") from_bytes( bytes ) @@ -83,23 +86,21 @@ def self.from_base32_string( str ) # Internal: Create a new UUID from an existing 16 byte String # - # str - The String from which to create the UUID. - # - # Copied from lexical_uuid + # str - The binary string from which to create the UUID. # # Returns a Torid::UUID def self.from_bytes( bytes ) - time_high, time_low, node_high, node_low = bytes.unpack("NNNN") - timestamp = ( time_high << 32 ) | time_low - node_id = ( node_high << 32 ) | node_low - new( timestamp, node_id ) + a, b, c, d = bytes.unpack("NNNN") + value = ( a << 96 ) | ( b << 64 ) | ( c << 32 ) | d + new( value ) end - # Public: The 64bit microsecond UNIX timestamp + # Public: The timestamp value of the UUID attr_reader :timestamp - # Public: The 64bit node id - attr_reader :node_id + # Public: The 128bit integer value of the UUID + attr_reader :value + alias :to_i :value # Internal: Create a new UUID. # @@ -107,15 +108,10 @@ def self.from_bytes( bytes ) # generate id's. See `Torid.uuid` or `Torid::Generator.next`. This # constructor should not be called by users of this library. # - # timestamp - an Integer value representing UNIX timestamp in microseconds - # node_id - an Integer value representing the unique node id where this - # UUID is generatoed + # value - a 128bit Integer value # - def initialize( timestamp = nil, node_id = nil ) - @timestamp = timestamp - @node_id = node_id - @bytes = nil - @time = nil + def initialize( value ) + @value = value end # Public: Return the Time value the internal microsecond timestamp @@ -127,7 +123,7 @@ def initialize( timestamp = nil, node_id = nil ) # # Returns a Time instance def time - @time ||= Time.at( timestamp / 1_000_000.0 ) + raise NotImplementedError, "This method must be implemented in a subclass" end # Public: Return the UUID as 16 bytes of raw data. @@ -140,10 +136,12 @@ def time # # Returns a binary String def bytes - @bytes ||= [ @timestamp >> 32, - @timestamp & 0xFFFF_FFFF, - @node_id >> 32, - @node_id & 0xFFFF_FFFF ].pack("NNNN") + @bytes ||= ( + _bytes = @value.digits(256).reverse + _bytes.unshift(0) until _bytes.length == 16 + _bytes.pack("C*") + ) + end # Public: Return the hexadecimal UUID string representation. This is the @@ -157,22 +155,13 @@ def bytes # # Returns a String def to_s + debugger elements = bytes.unpack("NnnCCa6") - node = elements[-1].unpack('C*') - elements[-1] = '%02x%02x%02x%02x%02x%02x' % node + twelve = elements[-1].unpack('C*') + elements[-1] = '%02x%02x%02x%02x%02x%02x' % twelve "%08x-%04x-%04x-%02x%02x-%s" % elements end - # Public: Return the hexidcimal UUID string representation of just the - # node_id. This is just the last 2 parts - def node_id_s - node_bytes = [ @node_id >> 32, @node_id & 0xFFFF_FFFF].pack("NN") - elements = node_bytes.unpack("CCa6") - node = elements[-1].unpack('C*') - elements[-1] = '%02x%02x%02x%02x%02x%02x' % node - "%02x%02x-%s" % elements - end - # Public: Compare the equality of UUID's # # Examples @@ -182,14 +171,13 @@ def node_id_s # Returns true or false def ==(other) other.is_a?(::Torid::UUID) && - other.node_id == self.node_id && - other.timestamp == self.timestamp + other.value == self.value end alias :eql? :== # Public: Generate the hash of the UUID for ruby hash equality # - # This allows two UUID objects that have the same node_id and timestamp to + # This allows two UUID objects that have the value to # be considered the same object for keys in Hash. # # Examples @@ -198,8 +186,10 @@ def ==(other) # h = { one => "a value" } # h.has_key?( other ) # => true def hash - [node_id, timestamp, ::Torid::UUID].hash + [value, ::Torid::UUID].hash end + MIN = Torid::UUID.new(0) + MAX = Torid::UUID.new(0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF) end end diff --git a/test/test_uuid.rb b/test/test_uuid.rb index 0f241ef..082b1bd 100644 --- a/test/test_uuid.rb +++ b/test/test_uuid.rb @@ -5,12 +5,10 @@ module Torid class UUIDTest < ::Minitest::Test def setup - @timestamp = 1404617330909742 - @node_id = 42 - @bytes = [ @timestamp >> 32, @timestamp & 0xFFFF_FFFF, - @node_id >> 32, @node_id & 0XFFFF_FFFF ].pack("NNNN") - @guid = "0004fd7d-f50d-e22e-0000-00000000002a" - @node_id_s = "0000-00000000002a" + @bytes = "\x00\x04\xFD\xA3\x8Cs^\x0F\xBA\xE4\xE9\xC8n6\x84\xA5" + @guid = "0004fd7d-f50d-e22e-0000-00000000002a" + @value = 1234567890 + @ulid = "01ARZ3NDEKTSV4RRFFQ69G5FAV" end def test_uuid_regex_matches @@ -40,35 +38,35 @@ def test_round_trips_bytes assert_equal( @bytes, uuid.bytes ) end - def test_extracts_timestamp_from_bytes - uuid = ::Torid::UUID.from( @bytes) - assert_equal( @timestamp, uuid.timestamp ) - end + # def test_extracts_timestamp_from_bytes + # uuid = ::Torid::UUID.from( @bytes) + # assert_equal( @timestamp, uuid.timestamp ) + # end - def test_extracts_node_id_from_bytes - uuid = ::Torid::UUID.from( @bytes) - assert_equal( @node_id , uuid.node_id ) - end + # def test_extracts_node_id_from_bytes + # uuid = ::Torid::UUID.from( @bytes) + # assert_equal( @node_id , uuid.node_id ) + # end - def test_extracts_node_id_as_hex - uuid = ::Torid::UUID.from( @bytes) - assert_equal( @node_id_s, uuid.node_id_s ) - end + # def test_extracts_node_id_as_hex + # uuid = ::Torid::UUID.from( @bytes) + # assert_equal( @node_id_s, uuid.node_id_s ) + # end - def test_round_trips_uuid_string + def test_round_trips_guid_string uuid = ::Torid::UUID.from( @guid ) assert_equal( uuid.to_s, uuid.to_s ) end - def test_extracts_timestamp_from_uuid - uuid = ::Torid::UUID.from( @guid ) - assert_equal( @timestamp, uuid.timestamp ) - end + # def test_extracts_timestamp_from_uuid + # uuid = ::Torid::UUID.from( @guid ) + # assert_equal( @timestamp, uuid.timestamp ) + # end - def test_extracts_node_id_from_uuid - uuid = ::Torid::UUID.from( @guid ) - assert_equal( @node_id , uuid.node_id ) - end + # def test_extracts_node_id_from_uuid + # uuid = ::Torid::UUID.from( @guid ) + # assert_equal( @node_id , uuid.node_id ) + # end def test_create_from_raises_error assert_raises( ArgumentError ) do @@ -76,11 +74,11 @@ def test_create_from_raises_error end end - def test_creates_a_time_from_uuid - time = Time.at( @timestamp / 1_000_000.0 ) - uuid = ::Torid::UUID.from( @guid ) - assert_equal( time, uuid.time ) - end + # def test_creates_a_time_from_uuid + # time = Time.at( @timestamp / 1_000_000.0 ) + # uuid = ::Torid::UUID.from( @guid ) + # assert_equal( time, uuid.time ) + # end def test_standard_equality one = ::Torid::UUID.from( @guid ) @@ -97,6 +95,7 @@ def test_case_equality end def test_hash + debugger one = ::Torid::UUID.from( @guid ) other = ::Torid::UUID.from( @guid ) assert( one.hash == other.hash )