julid-rs/src/base32.rs

use core::fmt;

/// Length of a string-encoded Julid
pub(crate) const JULID_STR_LEN: usize = 26;

const ALPHABET: &[u8; 32] = b"0123456789ABCDEFGHJKMNPQRSTVWXYZ";

const NO_VALUE: u8 = 255;

const LOOKUP: [u8; 256] = [
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255,
    255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, 255, 18, 19, 255, 20, 21, 255, 22, 23, 24,
    25, 26, 255, 27, 28, 29, 30, 31, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17,
    255, 18, 19, 255, 20, 21, 255, 22, 23, 24, 25, 26, 255, 27, 28, 29, 30, 31, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
];
/*
fn test_lookup_table() {
let mut lookup = [NO_VALUE; 256];
for (i, &c) in ALPHABET.iter().enumerate() {
lookup[c as usize] = i as u8;
if !(c as char).is_numeric() {
//lowercase
lookup[(c + 32) as usize] = i as u8;
        }
    }
    assert_eq!(LOOKUP, lookup);
}
*/

/// Encode the given 128-bit little-endian number as a base32 string.
pub(crate) fn encode(mut value: u128) -> String {
    let mut buffer: [u8; JULID_STR_LEN] = [0; JULID_STR_LEN];
    for i in 0..JULID_STR_LEN {
        buffer[JULID_STR_LEN - 1 - i] = ALPHABET[(value & 0x1f) as usize];
        value >>= 5;
    }

    String::from_utf8(buffer.to_vec()).expect("unexpected failure in base32 encode for ulid")
}

/// An error that can occur when decoding a base32 string into a Julid (invalid
/// length, or invalid character)
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
pub enum DecodeError {
    /// The length of the string does not match the expected length
    InvalidLength(usize),
    /// A non-base32 character was found
    InvalidChar(char),
}

impl std::error::Error for DecodeError {}

impl fmt::Display for DecodeError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
        let text = match *self {
            DecodeError::InvalidLength(len) => format!("invalid length: {len}"),
            DecodeError::InvalidChar(c) => format!("invalid character: {c}"),
        };
        write!(f, "{text}")
    }
}

pub(crate) const fn decode(encoded: &str) -> Result<u128, DecodeError> {
    let len = encoded.len();
    if len != JULID_STR_LEN {
        return Err(DecodeError::InvalidLength(len));
    }

    let mut value: u128 = 0;

    let bytes = encoded.as_bytes();

    // Manual for loop because Range::iter() isn't const
    let mut i = 0;
    while i < JULID_STR_LEN {
        let val = LOOKUP[bytes[i] as usize];
        if val != NO_VALUE {
            value = (value << 5) | val as u128;
        } else {
            return Err(DecodeError::InvalidChar(bytes[i] as char));
        }
        i += 1;
    }

    Ok(value)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_valid() {
        let val = 0x41414141414141414141414141414141;
        assert_eq!(decode("21850M2GA1850M2GA1850M2GA1").unwrap(), val);
        assert_eq!(encode(val), "21850M2GA1850M2GA1850M2GA1");

        let val = 0x4d4e385051444a59454234335a413756;
        let enc = "2D9RW50MA499CMAGHM6DD42DTP";
        let lower = enc.to_lowercase();
        assert_eq!(encode(val), enc);
        assert_eq!(decode(enc).unwrap(), val);
        assert_eq!(decode(&lower).unwrap(), val);
    }

    #[test]
    fn test_length() {
        assert_eq!(
            encode(0xffffffffffffffffffffffffffffffff).len(),
            JULID_STR_LEN
        );
        assert_eq!(
            encode(0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f).len(),
            JULID_STR_LEN
        );
        assert_eq!(
            encode(0x00000000000000000000000000000000).len(),
            JULID_STR_LEN
        );

        assert_eq!(decode(""), Err(DecodeError::InvalidLength(0)));
        assert_eq!(
            decode("2D9RW50MA499CMAGHM6DD42DT"),
            Err(DecodeError::InvalidLength(25))
        );
        assert_eq!(
            decode("2D9RW50MA499CMAGHM6DD42DTPP"),
            Err(DecodeError::InvalidLength(27))
        );
    }

    #[test]
    fn test_chars() {
        for ref c in encode(0xffffffffffffffffffffffffffffffff).bytes() {
            assert!(ALPHABET.contains(c));
        }
        for ref c in encode(0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f).bytes() {
            assert!(ALPHABET.contains(c));
        }
        for ref c in encode(0x00000000000000000000000000000000).bytes() {
            assert!(ALPHABET.contains(c));
        }

        assert_eq!(
            decode("2D9RW50[A499CMAGHM6DD42DTP"),
            Err(DecodeError::InvalidChar('['))
        );
        assert_eq!(
            decode("2D9RW50LA499CMAGHM6DD42DTP"),
            Err(DecodeError::InvalidChar('L'))
        );
        assert_eq!(
            decode("2D9RW50IA499CMAGHM6DD42DTP"),
            Err(DecodeError::InvalidChar('I'))
        );
    }
}