From 510750424b5cf7e10cf608cf7713a8b0c99da37c Mon Sep 17 00:00:00 2001 From: Joe Ardent Date: Tue, 25 Jul 2023 12:04:38 -0700 Subject: [PATCH] first commit --- .gitignore | 2 + .rustfmt.toml | 4 + Cargo.toml | 8 ++ src/base32.rs | 184 ++++++++++++++++++++++++++++++++++ src/julid.rs | 273 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 4 + src/main.rs | 23 +++++ 7 files changed, 498 insertions(+) create mode 100644 .gitignore create mode 100644 .rustfmt.toml create mode 100644 Cargo.toml create mode 100644 src/base32.rs create mode 100644 src/julid.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..4c8d0e1 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,4 @@ +imports_granularity = "Crate" +group_imports = "StdExternalCrate" +wrap_comments = true +edition = "2021" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..d606eee --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "julid" +version = "0.1.0" +edition = "2021" + +[dependencies] +bitfield = "0.14.0" +rand = "0.8.5" diff --git a/src/base32.rs b/src/base32.rs new file mode 100644 index 0000000..e9a8c38 --- /dev/null +++ b/src/base32.rs @@ -0,0 +1,184 @@ +/* +this code shamelessly mostly stolen from +https://github.com/dylanhart/ulid-rs/blob/0b9295c2db2114cd87aa19abcc1fc00c16b272db/src/base32.rs +and used under the terms of the MIT license: + +Copyright (c) 2017 Dylan Hart + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +use core::fmt; + +/// Length of a string-encoded Ulid +pub const ULID_LEN: usize = 26; + +const ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ"; + +const NO_VALUE: u8 = 255; + +const LOOKUP: [u8; 256] = [ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, + 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, 255, 18, 19, 255, 20, 21, 255, 22, 23, 24, + 25, 26, 255, 27, 28, 29, 30, 31, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, + 255, 18, 19, 255, 20, 21, 255, 22, 23, 24, 25, 26, 255, 27, 28, 29, 30, 31, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +]; +/* +fn test_lookup_table() { +let mut lookup = [NO_VALUE; 256]; +for (i, &c) in ALPHABET.iter().enumerate() { +lookup[c as usize] = i as u8; +if !(c as char).is_numeric() { +//lowercase +lookup[(c + 32) as usize] = i as u8; + } + } + assert_eq!(LOOKUP, lookup); +} +*/ + +/// Encode the given 128-bit number as a base32 string. +pub fn encode(mut value: u128) -> String { + let mut buffer: [u8; ULID_LEN] = [0; ULID_LEN]; + for i in 0..ULID_LEN { + buffer[ULID_LEN - 1 - i] = ALPHABET[(value & 0x1f) as usize]; + value >>= 5; + } + + String::from_utf8(buffer.to_vec()).expect("unexpected failure in base32 encode for ulid") +} + +/// An error that can occur when decoding a base32 string +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum DecodeError { + /// The length of the string does not match the expected length + InvalidLength(usize), + /// A non-base32 character was found + InvalidChar(char), +} + +impl std::error::Error for DecodeError {} + +impl fmt::Display for DecodeError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + let text = match *self { + DecodeError::InvalidLength(len) => format!("invalid length: {len}"), + DecodeError::InvalidChar(c) => format!("invalid character: {c}"), + }; + write!(f, "{}", text) + } +} + +pub const fn decode(encoded: &str) -> Result { + let len = encoded.len(); + if len != ULID_LEN { + return Err(DecodeError::InvalidLength(len)); + } + + let mut value: u128 = 0; + + let bytes = encoded.as_bytes(); + + // Manual for loop because Range::iter() isn't const + let mut i = 0; + while i < ULID_LEN { + let val = LOOKUP[bytes[i] as usize]; + if val != NO_VALUE { + value = (value << 5) | val as u128; + } else { + return Err(DecodeError::InvalidChar(bytes[i] as char)); + } + i += 1; + } + + Ok(value) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_valid() { + let val = 0x41414141414141414141414141414141; + assert_eq!(decode("21850M2GA1850M2GA1850M2GA1").unwrap(), val); + assert_eq!(encode(val), "21850M2GA1850M2GA1850M2GA1"); + + let val = 0x4d4e385051444a59454234335a413756; + let enc = "2D9RW50MA499CMAGHM6DD42DTP"; + let lower = enc.to_lowercase(); + assert_eq!(encode(val), enc); + assert_eq!(decode(enc).unwrap(), val); + assert_eq!(decode(&lower).unwrap(), val); + } + + #[test] + fn test_length() { + assert_eq!(encode(0xffffffffffffffffffffffffffffffff).len(), ULID_LEN); + assert_eq!(encode(0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f).len(), ULID_LEN); + assert_eq!(encode(0x00000000000000000000000000000000).len(), ULID_LEN); + + assert_eq!(decode(""), Err(DecodeError::InvalidLength(0))); + assert_eq!( + decode("2D9RW50MA499CMAGHM6DD42DT"), + Err(DecodeError::InvalidLength(25)) + ); + assert_eq!( + decode("2D9RW50MA499CMAGHM6DD42DTPP"), + Err(DecodeError::InvalidLength(27)) + ); + } + + #[test] + fn test_chars() { + for ref c in encode(0xffffffffffffffffffffffffffffffff).bytes() { + assert!(ALPHABET.contains(c)); + } + for ref c in encode(0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f).bytes() { + assert!(ALPHABET.contains(c)); + } + for ref c in encode(0x00000000000000000000000000000000).bytes() { + assert!(ALPHABET.contains(c)); + } + + assert_eq!( + decode("2D9RW50[A499CMAGHM6DD42DTP"), + Err(DecodeError::InvalidChar('[')) + ); + assert_eq!( + decode("2D9RW50LA499CMAGHM6DD42DTP"), + Err(DecodeError::InvalidChar('L')) + ); + assert_eq!( + decode("2D9RW50IA499CMAGHM6DD42DTP"), + Err(DecodeError::InvalidChar('I')) + ); + } +} diff --git a/src/julid.rs b/src/julid.rs new file mode 100644 index 0000000..f87f883 --- /dev/null +++ b/src/julid.rs @@ -0,0 +1,273 @@ +use core::{fmt, str::FromStr}; +use std::{sync::Mutex, time::Duration}; + +use rand::random; + +use crate::base32::{self, DecodeError}; + +/// This ID is used to ensure monotonicity for new IDs. +static LAST_ID: Mutex = Mutex::new(Julid::alpha()); + +/// The number of bits in a Julid's time portion +pub const TIME_BITS: u8 = 48; +/// The number of bits in the monotonic counter for intra-millisecond IDs +pub const MBITS: u8 = 16; +/// The number of random bits + bits in the monotonic counter +pub const UNIQUE_BITS: u8 = 80; +pub const RANDOM_BITS: u8 = UNIQUE_BITS - MBITS; + +macro_rules! bitmask { + ($len:expr) => { + ((1 << $len) - 1) + }; +} + +/// A Julid is a unique 128-bit lexicographically sortable identifier, +/// compatible with ULIDs. +/// +/// Canonically, it is represented as a 26 character Crockford Base32 encoded +/// string, or as a sequence of 16 bytes in big-endian order. +/// +/// Of the 128-bits, the 48 most-significant are a unix timestamp in +/// milliseconds. The next 16 bits are a monotonic counter for IDs created in +/// the same millisecond. The remaining 64 least-significant bits are fully +/// random. +#[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Clone, Copy)] +pub struct Julid(pub u128); + +impl Julid { + /// Return a new Julid. If a previous ID was generated in the same + /// millisecond, increment the monotonic counter, up to u16::MAX. The random + /// bits are always fresh, so once the monotonic counter is saturated, + /// subsequent IDs from the current millisecond will not have an + /// inherent ordering. See discussion at https://github.com/ahawker/ulid/issues/306#issuecomment-451850395 + pub fn new() -> Self { + let lsb: u64 = random(); + loop { + let guard = LAST_ID.try_lock(); + if let Ok(mut guard) = guard { + let ts = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or(Duration::ZERO) + .as_millis() as u64; + let ots = guard.timestamp(); + if ots < ts { + let new = Julid::new_time(ts, lsb); + *guard = new; + break new; + } else { + let counter = guard.counter().saturating_add(1); + let tbits = ots & bitmask!(TIME_BITS); + let msb = (tbits << MBITS) + counter as u64; + let new: Julid = (msb, lsb).into(); + *guard = new; + break new; + } + } + std::thread::sleep(Duration::from_micros(50)); + } + } + + fn new_time(time: u64, lsb: u64) -> Self { + let tbits = time & bitmask!(TIME_BITS); + let msb = tbits << MBITS; + (msb, lsb).into() + } + + /// Creates a Julid from a Crockford Base32 encoded string + /// + /// An DecodeError will be returned when the given string is not formated + /// properly. + /// + /// # Example + /// ```rust + /// use julid::julid::Julid; + /// let text = "01D39ZY06FGSCTVN4T2V9PKHFZ"; + /// let result = Julid::from_string(text); + /// + /// assert!(result.is_ok()); + /// assert_eq!(&result.unwrap().to_string(), text); + /// ``` + pub const fn from_string(encoded: &str) -> Result { + match base32::decode(encoded) { + Ok(int_val) => Ok(Julid(int_val)), + Err(err) => Err(err), + } + } + + /// The 'Alpha Julid'. + /// + /// The Alpha Julid is special form of Julid that is specified to have + /// all 128 bits set to zero. + pub const fn alpha() -> Julid { + Julid(0) + } + + pub const fn omega() -> Self { + Julid(u128::MAX) + } + + /// Gets the timestamp section of this ulid + pub const fn timestamp(&self) -> u64 { + (self.0 >> UNIQUE_BITS) as u64 + } + + pub const fn counter(&self) -> u16 { + let mask = bitmask!(MBITS); + ((self.0 >> RANDOM_BITS) & mask) as u16 + } + + pub const fn sortable(&self) -> u64 { + let mask = bitmask!(TIME_BITS + MBITS); + ((self.0 >> RANDOM_BITS) & mask) as u64 + } + + pub const fn random(&self) -> u128 { + self.0 & bitmask!(RANDOM_BITS) + } + + /// Gets the non-timestamp section of this Julid (random + counter bits). + pub const fn unique(&self) -> u128 { + self.0 & bitmask!(UNIQUE_BITS) + } + + /// Creates a Crockford Base32 encoded string that represents this Julid + /// + /// # Example + /// ```rust + /// use julid::julid::Julid; + /// let text = "01D39ZY06FGSCTVN4T2V9PKHFZ"; + /// let id = Julid::from_string(text).unwrap(); + /// + /// assert_eq!(&id.to_string(), text); + /// ``` + pub fn as_string(self) -> String { + base32::encode(self.0) + } + + /// Test if the Julid is Alpha + pub const fn is_alpha(&self) -> bool { + self.0 == 0u128 + } + + /// Creates a Julid using the provided bytes array, assumed big-endian. + pub const fn from_bytes(bytes: [u8; 16]) -> Julid { + Self(u128::from_be_bytes(bytes)) + } + + /// Returns the bytes of the Julid in big-endian order. + pub const fn to_bytes(self) -> [u8; 16] { + self.0.to_be_bytes() + } +} + +impl Default for Julid { + fn default() -> Self { + Julid::alpha() + } +} + +impl From for String { + fn from(ulid: Julid) -> String { + ulid.as_string() + } +} + +impl From<(u64, u64)> for Julid { + fn from((msb, lsb): (u64, u64)) -> Self { + Julid(u128::from(msb) << 64 | u128::from(lsb)) + } +} + +impl From for (u64, u64) { + fn from(ulid: Julid) -> (u64, u64) { + ((ulid.0 >> 64) as u64, (ulid.0 & bitmask!(64)) as u64) + } +} + +impl From for Julid { + fn from(value: u128) -> Julid { + Julid(value) + } +} + +impl From for u128 { + fn from(ulid: Julid) -> u128 { + ulid.0 + } +} + +impl From<[u8; 16]> for Julid { + fn from(bytes: [u8; 16]) -> Self { + Self(u128::from_be_bytes(bytes)) + } +} + +impl From for [u8; 16] { + fn from(ulid: Julid) -> Self { + ulid.0.to_be_bytes() + } +} + +impl FromStr for Julid { + type Err = DecodeError; + + fn from_str(s: &str) -> Result { + Julid::from_string(s) + } +} + +impl fmt::Display for Julid { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + write!(f, "{}", self.as_string()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_static() { + let s = Julid(0x41414141414141414141414141414141).as_string(); + let u = Julid::from_string(&s).unwrap(); + assert_eq!(&s, "21850M2GA1850M2GA1850M2GA1"); + assert_eq!(u.0, 0x41414141414141414141414141414141); + } + + #[test] + fn can_into_thing() { + let ulid = Julid::from_str("01FKMG6GAG0PJANMWFN84TNXCD").unwrap(); + let s: String = ulid.into(); + let u: u128 = ulid.into(); + let uu: (u64, u64) = ulid.into(); + let bytes: [u8; 16] = ulid.into(); + + assert_eq!(Julid::from_str(&s).unwrap(), ulid); + assert_eq!(Julid::from(u), ulid); + assert_eq!(Julid::from(uu), ulid); + assert_eq!(Julid::from(bytes), ulid); + } + + #[test] + fn default_is_nil() { + assert_eq!(Julid::default(), Julid::alpha()); + } + + #[test] + fn can_display_things() { + println!("{}", Julid::alpha()); + println!("{}", DecodeError::InvalidLength(0)); + println!("{}", DecodeError::InvalidChar('^')); + } + + #[test] + fn can_increment() { + let mut max = 0; + for _ in 0..100 { + let id = Julid::new(); + max = id.counter().max(max); + } + assert!(max > 0); + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..e00d4d7 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,4 @@ +mod base32; +pub mod julid; + +pub use julid::Julid; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..71a7930 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,23 @@ +use std::time::Instant; + +use julid::julid::Julid; + +fn main() { + let mut v = Vec::with_capacity(2000); + let start = Instant::now(); + for _ in 0..2000 { + v.push(Julid::new()); + } + let end = Instant::now(); + let dur = (end - start).as_micros(); + + for id in v.iter() { + println!( + "{id}: {}ms and {} incs; sortable: {}", + id.timestamp(), + id.counter(), + id.sortable() + ); + } + println!("2000 IDs generated in {dur}us"); +}