base64url_nopad

base64url without padding library.
git clone https://git.philomathiclife.com/repos/base64url_nopad
Log | Files | Refs | README

commit 6d69f46c2910fc18e772aa5a889e83b8bca0fe92
parent 0858cccfdf2b69f61a389808e47202f01f3af2fb
Author: Zack Newman <zack@philomathiclife.com>
Date:   Thu, 30 Apr 2026 16:42:14 -0600

bump msrv. use chunk (en/de)coding. reduce unsafe

Diffstat:
MCargo.toml | 4++--
Msrc/lib.rs | 1201+++++++++++++++++++++++++++++++++----------------------------------------------
2 files changed, 505 insertions(+), 700 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,8 +9,8 @@ license = "MIT OR Apache-2.0" name = "base64url_nopad" readme = "README.md" repository = "https://git.philomathiclife.com/repos/base64url_nopad/" -rust-version = "1.93.1" -version = "0.1.4" +rust-version = "1.95.0" +version = "0.1.5" [lints.rust] deprecated-safe = { level = "deny", priority = -1 } diff --git a/src/lib.rs b/src/lib.rs @@ -73,264 +73,9 @@ use alloc::{collections::TryReserveError, string::String, vec::Vec}; use core::{ error::Error, fmt::{self, Display, Formatter, Write}, + hint::cold_path, mem, }; -/// `b'A'`. -const UPPER_A: u8 = b'A'; -/// `b'B'`. -const UPPER_B: u8 = b'B'; -/// `b'C'`. -const UPPER_C: u8 = b'C'; -/// `b'D'`. -const UPPER_D: u8 = b'D'; -/// `b'E'`. -const UPPER_E: u8 = b'E'; -/// `b'F'`. -const UPPER_F: u8 = b'F'; -/// `b'G'`. -const UPPER_G: u8 = b'G'; -/// `b'H'`. -const UPPER_H: u8 = b'H'; -/// `b'I'`. -const UPPER_I: u8 = b'I'; -/// `b'J'`. -const UPPER_J: u8 = b'J'; -/// `b'K'`. -const UPPER_K: u8 = b'K'; -/// `b'L'`. -const UPPER_L: u8 = b'L'; -/// `b'M'`. -const UPPER_M: u8 = b'M'; -/// `b'N'`. -const UPPER_N: u8 = b'N'; -/// `b'O'`. -const UPPER_O: u8 = b'O'; -/// `b'P'`. -const UPPER_P: u8 = b'P'; -/// `b'Q'`. -const UPPER_Q: u8 = b'Q'; -/// `b'R'`. -const UPPER_R: u8 = b'R'; -/// `b'S'`. -const UPPER_S: u8 = b'S'; -/// `b'T'`. -const UPPER_T: u8 = b'T'; -/// `b'U'`. -const UPPER_U: u8 = b'U'; -/// `b'V'`. -const UPPER_V: u8 = b'V'; -/// `b'W'`. -const UPPER_W: u8 = b'W'; -/// `b'X'`. -const UPPER_X: u8 = b'X'; -/// `b'Y'`. -const UPPER_Y: u8 = b'Y'; -/// `b'Z'`. -const UPPER_Z: u8 = b'Z'; -/// `b'a'`. -const LOWER_A: u8 = b'a'; -/// `b'b'`. -const LOWER_B: u8 = b'b'; -/// `b'c'`. -const LOWER_C: u8 = b'c'; -/// `b'd'`. -const LOWER_D: u8 = b'd'; -/// `b'e'`. -const LOWER_E: u8 = b'e'; -/// `b'f'`. -const LOWER_F: u8 = b'f'; -/// `b'g'`. -const LOWER_G: u8 = b'g'; -/// `b'h'`. -const LOWER_H: u8 = b'h'; -/// `b'i'`. -const LOWER_I: u8 = b'i'; -/// `b'j'`. -const LOWER_J: u8 = b'j'; -/// `b'k'`. -const LOWER_K: u8 = b'k'; -/// `b'l'`. -const LOWER_L: u8 = b'l'; -/// `b'm'`. -const LOWER_M: u8 = b'm'; -/// `b'n'`. -const LOWER_N: u8 = b'n'; -/// `b'o'`. -const LOWER_O: u8 = b'o'; -/// `b'p'`. -const LOWER_P: u8 = b'p'; -/// `b'q'`. -const LOWER_Q: u8 = b'q'; -/// `b'r'`. -const LOWER_R: u8 = b'r'; -/// `b's'`. -const LOWER_S: u8 = b's'; -/// `b't'`. -const LOWER_T: u8 = b't'; -/// `b'u'`. -const LOWER_U: u8 = b'u'; -/// `b'v'`. -const LOWER_V: u8 = b'v'; -/// `b'w'`. -const LOWER_W: u8 = b'w'; -/// `b'x'`. -const LOWER_X: u8 = b'x'; -/// `b'y'`. -const LOWER_Y: u8 = b'y'; -/// `b'z'`. -const LOWER_Z: u8 = b'z'; -/// `b'0'`. -const ZERO: u8 = b'0'; -/// `b'1'`. -const ONE: u8 = b'1'; -/// `b'2'`. -const TWO: u8 = b'2'; -/// `b'3'`. -const THREE: u8 = b'3'; -/// `b'4'`. -const FOUR: u8 = b'4'; -/// `b'5'`. -const FIVE: u8 = b'5'; -/// `b'6'`. -const SIX: u8 = b'6'; -/// `b'7'`. -const SEVEN: u8 = b'7'; -/// `b'8'`. -const EIGHT: u8 = b'8'; -/// `b'9'`. -const NINE: u8 = b'9'; -/// `b'-'`. -const HYPHEN: u8 = b'-'; -/// `b'_'`. -const UNDERSCORE: u8 = b'_'; -/// `'A'`. -const UPPER_A_CHAR: char = 'A'; -/// `'B'`. -const UPPER_B_CHAR: char = 'B'; -/// `'C'`. -const UPPER_C_CHAR: char = 'C'; -/// `'D'`. -const UPPER_D_CHAR: char = 'D'; -/// `'E'`. -const UPPER_E_CHAR: char = 'E'; -/// `'F'`. -const UPPER_F_CHAR: char = 'F'; -/// `'G'`. -const UPPER_G_CHAR: char = 'G'; -/// `'H'`. -const UPPER_H_CHAR: char = 'H'; -/// `'I'`. -const UPPER_I_CHAR: char = 'I'; -/// `'J'`. -const UPPER_J_CHAR: char = 'J'; -/// `'K'`. -const UPPER_K_CHAR: char = 'K'; -/// `'L'`. -const UPPER_L_CHAR: char = 'L'; -/// `'M'`. -const UPPER_M_CHAR: char = 'M'; -/// `'N'`. -const UPPER_N_CHAR: char = 'N'; -/// `'O'`. -const UPPER_O_CHAR: char = 'O'; -/// `'P'`. -const UPPER_P_CHAR: char = 'P'; -/// `'Q'`. -const UPPER_Q_CHAR: char = 'Q'; -/// `'R'`. -const UPPER_R_CHAR: char = 'R'; -/// `'S'`. -const UPPER_S_CHAR: char = 'S'; -/// `'T'`. -const UPPER_T_CHAR: char = 'T'; -/// `'U'`. -const UPPER_U_CHAR: char = 'U'; -/// `'V'`. -const UPPER_V_CHAR: char = 'V'; -/// `'W'`. -const UPPER_W_CHAR: char = 'W'; -/// `'X'`. -const UPPER_X_CHAR: char = 'X'; -/// `'Y'`. -const UPPER_Y_CHAR: char = 'Y'; -/// `'Z'`. -const UPPER_Z_CHAR: char = 'Z'; -/// `'a'`. -const LOWER_A_CHAR: char = 'a'; -/// `'b'`. -const LOWER_B_CHAR: char = 'b'; -/// `'c'`. -const LOWER_C_CHAR: char = 'c'; -/// `'d'`. -const LOWER_D_CHAR: char = 'd'; -/// `'e'`. -const LOWER_E_CHAR: char = 'e'; -/// `'f'`. -const LOWER_F_CHAR: char = 'f'; -/// `'g'`. -const LOWER_G_CHAR: char = 'g'; -/// `'h'`. -const LOWER_H_CHAR: char = 'h'; -/// `'i'`. -const LOWER_I_CHAR: char = 'i'; -/// `'j'`. -const LOWER_J_CHAR: char = 'j'; -/// `'k'`. -const LOWER_K_CHAR: char = 'k'; -/// `'l'`. -const LOWER_L_CHAR: char = 'l'; -/// `'m'`. -const LOWER_M_CHAR: char = 'm'; -/// `'n'`. -const LOWER_N_CHAR: char = 'n'; -/// `'o'`. -const LOWER_O_CHAR: char = 'o'; -/// `'p'`. -const LOWER_P_CHAR: char = 'p'; -/// `'q'`. -const LOWER_Q_CHAR: char = 'q'; -/// `'r'`. -const LOWER_R_CHAR: char = 'r'; -/// `'s'`. -const LOWER_S_CHAR: char = 's'; -/// `'t'`. -const LOWER_T_CHAR: char = 't'; -/// `'u'`. -const LOWER_U_CHAR: char = 'u'; -/// `'v'`. -const LOWER_V_CHAR: char = 'v'; -/// `'w'`. -const LOWER_W_CHAR: char = 'w'; -/// `'x'`. -const LOWER_X_CHAR: char = 'x'; -/// `'y'`. -const LOWER_Y_CHAR: char = 'y'; -/// `'z'`. -const LOWER_Z_CHAR: char = 'z'; -/// `'0'`. -const ZERO_CHAR: char = '0'; -/// `'1'`. -const ONE_CHAR: char = '1'; -/// `'2'`. -const TWO_CHAR: char = '2'; -/// `'3'`. -const THREE_CHAR: char = '3'; -/// `'4'`. -const FOUR_CHAR: char = '4'; -/// `'5'`. -const FIVE_CHAR: char = '5'; -/// `'6'`. -const SIX_CHAR: char = '6'; -/// `'7'`. -const SEVEN_CHAR: char = '7'; -/// `'8'`. -const EIGHT_CHAR: char = '8'; -/// `'9'`. -const NINE_CHAR: char = '9'; -/// `'-'`. -const HYPHEN_CHAR: char = '-'; -/// `'_'`. -const UNDERSCORE_CHAR: char = '_'; /// The base64url alphabet. #[expect( non_camel_case_types, @@ -469,6 +214,274 @@ pub enum Alphabet { /// _. Underscore, } +/// Sorted ASCII `u8`s for [`Alphabet`]. +const ASCII: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; +/// Sorted `char`s for [`Alphabet`]. +const CHARS: &[char; 64] = &[ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', + 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', + 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', + '5', '6', '7', '8', '9', '-', '_', +]; +/// [`Alphabet`] variants indexed based on the their ASCII representation. +const FROM_ASCII: &[Option<Alphabet>; 256] = &[ + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + Some(Alphabet::Hyphen), + None, + None, + Some(Alphabet::Zero), + Some(Alphabet::One), + Some(Alphabet::Two), + Some(Alphabet::Three), + Some(Alphabet::Four), + Some(Alphabet::Five), + Some(Alphabet::Six), + Some(Alphabet::Seven), + Some(Alphabet::Eight), + Some(Alphabet::Nine), + None, + None, + None, + None, + None, + None, + None, + Some(Alphabet::A), + Some(Alphabet::B), + Some(Alphabet::C), + Some(Alphabet::D), + Some(Alphabet::E), + Some(Alphabet::F), + Some(Alphabet::G), + Some(Alphabet::H), + Some(Alphabet::I), + Some(Alphabet::J), + Some(Alphabet::K), + Some(Alphabet::L), + Some(Alphabet::M), + Some(Alphabet::N), + Some(Alphabet::O), + Some(Alphabet::P), + Some(Alphabet::Q), + Some(Alphabet::R), + Some(Alphabet::S), + Some(Alphabet::T), + Some(Alphabet::U), + Some(Alphabet::V), + Some(Alphabet::W), + Some(Alphabet::X), + Some(Alphabet::Y), + Some(Alphabet::Z), + None, + None, + None, + None, + Some(Alphabet::Underscore), + None, + Some(Alphabet::a), + Some(Alphabet::b), + Some(Alphabet::c), + Some(Alphabet::d), + Some(Alphabet::e), + Some(Alphabet::f), + Some(Alphabet::g), + Some(Alphabet::h), + Some(Alphabet::i), + Some(Alphabet::j), + Some(Alphabet::k), + Some(Alphabet::l), + Some(Alphabet::m), + Some(Alphabet::n), + Some(Alphabet::o), + Some(Alphabet::p), + Some(Alphabet::q), + Some(Alphabet::r), + Some(Alphabet::s), + Some(Alphabet::t), + Some(Alphabet::u), + Some(Alphabet::v), + Some(Alphabet::w), + Some(Alphabet::x), + Some(Alphabet::y), + Some(Alphabet::z), + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, +]; impl Alphabet { /// Returns `Self` that corresponds to `b`. /// @@ -495,21 +508,12 @@ impl Alphabet { // represented by `Self::Underscore`. if b <= Self::Underscore as u8 { // SAFETY: - // Just checked that `b` is in-range - Some(unsafe { Self::from_u8_unchecked(b) }) + // Our safety precondition is that `b` is in-range. + Some(unsafe { mem::transmute::<u8, Self>(b) }) } else { None } } - /// # Safety: - /// - /// `b` must be in `0..=63`, or else this is UB. - #[expect(unsafe_code, reason = "comment justifies correctness")] - const unsafe fn from_u8_unchecked(b: u8) -> Self { - // SAFETY: - // Our safety precondition is that `b` is in-range. - unsafe { mem::transmute(b) } - } /// Returns the `u8` `self` represents. /// /// # Examples @@ -534,75 +538,17 @@ impl Alphabet { /// # use base64url_nopad::Alphabet; /// assert_eq!(Alphabet::c.to_ascii(), b'c'); /// ``` + #[expect( + clippy::as_conversions, + clippy::indexing_slicing, + reason = "comments justify correctness" + )] #[inline] #[must_use] pub const fn to_ascii(self) -> u8 { - match self { - Self::A => UPPER_A, - Self::B => UPPER_B, - Self::C => UPPER_C, - Self::D => UPPER_D, - Self::E => UPPER_E, - Self::F => UPPER_F, - Self::G => UPPER_G, - Self::H => UPPER_H, - Self::I => UPPER_I, - Self::J => UPPER_J, - Self::K => UPPER_K, - Self::L => UPPER_L, - Self::M => UPPER_M, - Self::N => UPPER_N, - Self::O => UPPER_O, - Self::P => UPPER_P, - Self::Q => UPPER_Q, - Self::R => UPPER_R, - Self::S => UPPER_S, - Self::T => UPPER_T, - Self::U => UPPER_U, - Self::V => UPPER_V, - Self::W => UPPER_W, - Self::X => UPPER_X, - Self::Y => UPPER_Y, - Self::Z => UPPER_Z, - Self::a => LOWER_A, - Self::b => LOWER_B, - Self::c => LOWER_C, - Self::d => LOWER_D, - Self::e => LOWER_E, - Self::f => LOWER_F, - Self::g => LOWER_G, - Self::h => LOWER_H, - Self::i => LOWER_I, - Self::j => LOWER_J, - Self::k => LOWER_K, - Self::l => LOWER_L, - Self::m => LOWER_M, - Self::n => LOWER_N, - Self::o => LOWER_O, - Self::p => LOWER_P, - Self::q => LOWER_Q, - Self::r => LOWER_R, - Self::s => LOWER_S, - Self::t => LOWER_T, - Self::u => LOWER_U, - Self::v => LOWER_V, - Self::w => LOWER_W, - Self::x => LOWER_X, - Self::y => LOWER_Y, - Self::z => LOWER_Z, - Self::Zero => ZERO, - Self::One => ONE, - Self::Two => TWO, - Self::Three => THREE, - Self::Four => FOUR, - Self::Five => FIVE, - Self::Six => SIX, - Self::Seven => SEVEN, - Self::Eight => EIGHT, - Self::Nine => NINE, - Self::Hyphen => HYPHEN, - Self::Underscore => UNDERSCORE, - } + // `u8 as usize` is always OK; and we want this to be `const` so can't rely on `usize::from`. + // `self.to_u8() < 64` and `ASCII.len() == 64`, so indexing can't `panic`. + ASCII[self.to_u8() as usize] } /// Returns `Some` iff `ascii` is the ASCII representation of `Self`. /// @@ -618,76 +564,17 @@ impl Alphabet { /// } /// } /// ``` + #[expect( + clippy::as_conversions, + clippy::indexing_slicing, + reason = "comments justify correctness" + )] #[inline] #[must_use] pub const fn from_ascii(ascii: u8) -> Option<Self> { - match ascii { - UPPER_A => Some(Self::A), - UPPER_B => Some(Self::B), - UPPER_C => Some(Self::C), - UPPER_D => Some(Self::D), - UPPER_E => Some(Self::E), - UPPER_F => Some(Self::F), - UPPER_G => Some(Self::G), - UPPER_H => Some(Self::H), - UPPER_I => Some(Self::I), - UPPER_J => Some(Self::J), - UPPER_K => Some(Self::K), - UPPER_L => Some(Self::L), - UPPER_M => Some(Self::M), - UPPER_N => Some(Self::N), - UPPER_O => Some(Self::O), - UPPER_P => Some(Self::P), - UPPER_Q => Some(Self::Q), - UPPER_R => Some(Self::R), - UPPER_S => Some(Self::S), - UPPER_T => Some(Self::T), - UPPER_U => Some(Self::U), - UPPER_V => Some(Self::V), - UPPER_W => Some(Self::W), - UPPER_X => Some(Self::X), - UPPER_Y => Some(Self::Y), - UPPER_Z => Some(Self::Z), - LOWER_A => Some(Self::a), - LOWER_B => Some(Self::b), - LOWER_C => Some(Self::c), - LOWER_D => Some(Self::d), - LOWER_E => Some(Self::e), - LOWER_F => Some(Self::f), - LOWER_G => Some(Self::g), - LOWER_H => Some(Self::h), - LOWER_I => Some(Self::i), - LOWER_J => Some(Self::j), - LOWER_K => Some(Self::k), - LOWER_L => Some(Self::l), - LOWER_M => Some(Self::m), - LOWER_N => Some(Self::n), - LOWER_O => Some(Self::o), - LOWER_P => Some(Self::p), - LOWER_Q => Some(Self::q), - LOWER_R => Some(Self::r), - LOWER_S => Some(Self::s), - LOWER_T => Some(Self::t), - LOWER_U => Some(Self::u), - LOWER_V => Some(Self::v), - LOWER_W => Some(Self::w), - LOWER_X => Some(Self::x), - LOWER_Y => Some(Self::y), - LOWER_Z => Some(Self::z), - ZERO => Some(Self::Zero), - ONE => Some(Self::One), - TWO => Some(Self::Two), - THREE => Some(Self::Three), - FOUR => Some(Self::Four), - FIVE => Some(Self::Five), - SIX => Some(Self::Six), - SEVEN => Some(Self::Seven), - EIGHT => Some(Self::Eight), - NINE => Some(Self::Nine), - HYPHEN => Some(Self::Hyphen), - UNDERSCORE => Some(Self::Underscore), - _ => None, - } + // `u8 as usize` is always OK; and we want this to be `const` so can't rely on `usize::from`. + // `FROM_ASCII` has length 256, so indexing can't `panic`. + FROM_ASCII[ascii as usize] } /// Same as [`Self::to_ascii`] except a `char` is returned. /// @@ -697,75 +584,17 @@ impl Alphabet { /// # use base64url_nopad::Alphabet; /// assert_eq!(Alphabet::J.to_char(), 'J'); /// ``` + #[expect( + clippy::as_conversions, + clippy::indexing_slicing, + reason = "comments justify correctness" + )] #[inline] #[must_use] pub const fn to_char(self) -> char { - match self { - Self::A => UPPER_A_CHAR, - Self::B => UPPER_B_CHAR, - Self::C => UPPER_C_CHAR, - Self::D => UPPER_D_CHAR, - Self::E => UPPER_E_CHAR, - Self::F => UPPER_F_CHAR, - Self::G => UPPER_G_CHAR, - Self::H => UPPER_H_CHAR, - Self::I => UPPER_I_CHAR, - Self::J => UPPER_J_CHAR, - Self::K => UPPER_K_CHAR, - Self::L => UPPER_L_CHAR, - Self::M => UPPER_M_CHAR, - Self::N => UPPER_N_CHAR, - Self::O => UPPER_O_CHAR, - Self::P => UPPER_P_CHAR, - Self::Q => UPPER_Q_CHAR, - Self::R => UPPER_R_CHAR, - Self::S => UPPER_S_CHAR, - Self::T => UPPER_T_CHAR, - Self::U => UPPER_U_CHAR, - Self::V => UPPER_V_CHAR, - Self::W => UPPER_W_CHAR, - Self::X => UPPER_X_CHAR, - Self::Y => UPPER_Y_CHAR, - Self::Z => UPPER_Z_CHAR, - Self::a => LOWER_A_CHAR, - Self::b => LOWER_B_CHAR, - Self::c => LOWER_C_CHAR, - Self::d => LOWER_D_CHAR, - Self::e => LOWER_E_CHAR, - Self::f => LOWER_F_CHAR, - Self::g => LOWER_G_CHAR, - Self::h => LOWER_H_CHAR, - Self::i => LOWER_I_CHAR, - Self::j => LOWER_J_CHAR, - Self::k => LOWER_K_CHAR, - Self::l => LOWER_L_CHAR, - Self::m => LOWER_M_CHAR, - Self::n => LOWER_N_CHAR, - Self::o => LOWER_O_CHAR, - Self::p => LOWER_P_CHAR, - Self::q => LOWER_Q_CHAR, - Self::r => LOWER_R_CHAR, - Self::s => LOWER_S_CHAR, - Self::t => LOWER_T_CHAR, - Self::u => LOWER_U_CHAR, - Self::v => LOWER_V_CHAR, - Self::w => LOWER_W_CHAR, - Self::x => LOWER_X_CHAR, - Self::y => LOWER_Y_CHAR, - Self::z => LOWER_Z_CHAR, - Self::Zero => ZERO_CHAR, - Self::One => ONE_CHAR, - Self::Two => TWO_CHAR, - Self::Three => THREE_CHAR, - Self::Four => FOUR_CHAR, - Self::Five => FIVE_CHAR, - Self::Six => SIX_CHAR, - Self::Seven => SEVEN_CHAR, - Self::Eight => EIGHT_CHAR, - Self::Nine => NINE_CHAR, - Self::Hyphen => HYPHEN_CHAR, - Self::Underscore => UNDERSCORE_CHAR, - } + // `u8 as usize` is always OK; and we want this to be `const` so can't rely on `usize::from`. + // `self.to_u8() < 64` and `CHARS.len() == 64`, so indexing can't `panic`. + CHARS[self.to_u8() as usize] } /// Same as [`Self::from_ascii`] except the input is a `char`. /// @@ -781,75 +610,21 @@ impl Alphabet { /// } /// } /// ``` + #[expect( + clippy::as_conversions, + clippy::cast_possible_truncation, + reason = "comments justify correctness" + )] #[inline] #[must_use] pub const fn from_char(c: char) -> Option<Self> { - match c { - UPPER_A_CHAR => Some(Self::A), - UPPER_B_CHAR => Some(Self::B), - UPPER_C_CHAR => Some(Self::C), - UPPER_D_CHAR => Some(Self::D), - UPPER_E_CHAR => Some(Self::E), - UPPER_F_CHAR => Some(Self::F), - UPPER_G_CHAR => Some(Self::G), - UPPER_H_CHAR => Some(Self::H), - UPPER_I_CHAR => Some(Self::I), - UPPER_J_CHAR => Some(Self::J), - UPPER_K_CHAR => Some(Self::K), - UPPER_L_CHAR => Some(Self::L), - UPPER_M_CHAR => Some(Self::M), - UPPER_N_CHAR => Some(Self::N), - UPPER_O_CHAR => Some(Self::O), - UPPER_P_CHAR => Some(Self::P), - UPPER_Q_CHAR => Some(Self::Q), - UPPER_R_CHAR => Some(Self::R), - UPPER_S_CHAR => Some(Self::S), - UPPER_T_CHAR => Some(Self::T), - UPPER_U_CHAR => Some(Self::U), - UPPER_V_CHAR => Some(Self::V), - UPPER_W_CHAR => Some(Self::W), - UPPER_X_CHAR => Some(Self::X), - UPPER_Y_CHAR => Some(Self::Y), - UPPER_Z_CHAR => Some(Self::Z), - LOWER_A_CHAR => Some(Self::a), - LOWER_B_CHAR => Some(Self::b), - LOWER_C_CHAR => Some(Self::c), - LOWER_D_CHAR => Some(Self::d), - LOWER_E_CHAR => Some(Self::e), - LOWER_F_CHAR => Some(Self::f), - LOWER_G_CHAR => Some(Self::g), - LOWER_H_CHAR => Some(Self::h), - LOWER_I_CHAR => Some(Self::i), - LOWER_J_CHAR => Some(Self::j), - LOWER_K_CHAR => Some(Self::k), - LOWER_L_CHAR => Some(Self::l), - LOWER_M_CHAR => Some(Self::m), - LOWER_N_CHAR => Some(Self::n), - LOWER_O_CHAR => Some(Self::o), - LOWER_P_CHAR => Some(Self::p), - LOWER_Q_CHAR => Some(Self::q), - LOWER_R_CHAR => Some(Self::r), - LOWER_S_CHAR => Some(Self::s), - LOWER_T_CHAR => Some(Self::t), - LOWER_U_CHAR => Some(Self::u), - LOWER_V_CHAR => Some(Self::v), - LOWER_W_CHAR => Some(Self::w), - LOWER_X_CHAR => Some(Self::x), - LOWER_Y_CHAR => Some(Self::y), - LOWER_Z_CHAR => Some(Self::z), - ZERO_CHAR => Some(Self::Zero), - ONE_CHAR => Some(Self::One), - TWO_CHAR => Some(Self::Two), - THREE_CHAR => Some(Self::Three), - FOUR_CHAR => Some(Self::Four), - FIVE_CHAR => Some(Self::Five), - SIX_CHAR => Some(Self::Six), - SEVEN_CHAR => Some(Self::Seven), - EIGHT_CHAR => Some(Self::Eight), - NINE_CHAR => Some(Self::Nine), - HYPHEN_CHAR => Some(Self::Hyphen), - UNDERSCORE_CHAR => Some(Self::Underscore), - _ => None, + // `char as u32` is always OK. + let code_point = c as u32; + if code_point < 256 { + // We just verified `code_point` does not exceed `u8::MAX`, so `code_point as u8` is lossless. + Self::from_ascii(code_point as u8) + } else { + None } } } @@ -895,15 +670,6 @@ impl From<Alphabet> for char { value.to_char() } } -/// Ordinal numbers from first to third inclusively. -enum ThreeOrdinal { - /// First. - First, - /// Second. - Second, - /// Third. - Third, -} /// The maximum value [`encode_len_checked`] will accept before returning `None`. // This won't `panic` since `usize::MAX` ≢ 1 (mod 4). pub const MAX_ENCODE_INPUT_LEN: usize = decode_len(usize::MAX).unwrap(); @@ -972,16 +738,17 @@ pub const fn encode_len_checked(input_length: usize) -> Option<usize> { // `(4 * (n / 3)) + (4 * (n % 3)).div_ceil(3)` since none of the intermediate calculations suffer // from overflow. if input_length <= MAX_ENCODE_INPUT_LEN { - // (n / 3) << 2u8 <= m <= usize::MAX; thus the left operand of + is fine. + // (n / 3) << 2 <= m <= usize::MAX; thus the left operand of + is fine. // n % 3 <= 2 // <==> - // 4(n % 3) <= 8 < usize::MAX; thus (n % 3) << 2u8 is fine. + // 4(n % 3) <= 8 < usize::MAX; thus (n % 3) << 2 is fine. // <==> // ⌈4(n % 3)/3⌉ <= 4(n % 3), so the right operand of + is fine. // The sum is fine since - // m = ⌈4n/3⌉ = 4⌊n/3⌋ + ⌈4(n mod 3)/3⌉ = ((n / 3) << 2u8) + ((n % 3) << 2u8).div_ceil(3), and m <= usize::MAX. - Some(((input_length / 3) << 2u8) + ((input_length % 3) << 2u8).div_ceil(3)) + // m = ⌈4n/3⌉ = 4⌊n/3⌋ + ⌈4(n mod 3)/3⌉ = ((n / 3) << 2) + ((n % 3) << 2).div_ceil(3), and m <= usize::MAX. + Some(((input_length / 3) << 2) + ((input_length % 3) << 2).div_ceil(3)) } else { + cold_path(); None } } @@ -1048,78 +815,84 @@ pub const fn encode_len(input_length: usize) -> usize { /// ``` #[expect(unsafe_code, reason = "comments justify correctness")] #[expect( + clippy::missing_panics_doc, + clippy::panic, + reason = "false positive in that the panic is impossible modulo bugs" +)] +#[expect( + clippy::missing_asserts_for_indexing, + reason = "trust the compiler to already optimize since we match on the length" +)] +#[expect( clippy::arithmetic_side_effects, + clippy::as_conversions, clippy::indexing_slicing, reason = "comments justify correctness" )] #[inline] -pub const fn encode_buffer_checked<'a>( - mut input: &[u8], - output: &'a mut [u8], -) -> Option<&'a mut str> { +pub const fn encode_buffer_checked<'a>(input: &[u8], output: &'a mut [u8]) -> Option<&'a mut str> { // This won't `panic` since Rust guarantees that all memory allocations won't exceed `isize::MAX`. let final_len = encode_len(input.len()); if output.len() >= final_len { - // We increment this by `1` for each `u8` in `input`. On every third `u8`, we increment it an extra - // time since we use 4 base64url `u8`s for each `u8`. - // We also verified that `output.len()` large enough; thus all indexing operations - // using it are correct, and incrementing it never results in overflow. + let (mut chunks, rem) = input.as_chunks::<3>(); + let (mut fst, mut snd, mut third); let mut output_idx = 0; - let mut counter = ThreeOrdinal::First; - let mut trailing = 0; - let mut shift; - while let [first, ref rest @ ..] = *input { - match counter { - ThreeOrdinal::First => { - // We trim the last two bits and interpret `first` as a 6-bit integer. - shift = first >> 2; - // SAFETY: - // `shift <= 63` since we shifted at least two bits to the right. - output[output_idx] = unsafe { Alphabet::from_u8_unchecked(shift) }.to_ascii(); - // The two bits we trimmed are the first two bits of the next 6-bit integer. - trailing = (first & 3) << 4; - counter = ThreeOrdinal::Second; - } - ThreeOrdinal::Second => { - // We trim the last four bits and interpret `first` as a 6-bit integer. - // The first two bits are the trailing 2 bits from the previous value. - shift = trailing | (first >> 4); - // SAFETY: - // `shift <= 63` since `first` was shifted at least two bits to the right, and - // `trailing = (first & 3) << 4` which means its high two bits are 0 as well. - output[output_idx] = unsafe { Alphabet::from_u8_unchecked(shift) }.to_ascii(); - // The four bits we trimmed are the first four bits of the next 6-bit integer. - trailing = (first & 15) << 2; - counter = ThreeOrdinal::Third; - } - ThreeOrdinal::Third => { - // We trim the last six bits and interpret `first` as a 6-bit integer. - // The first four bits are the trailing 4 bits from the previous value. - shift = trailing | (first >> 6); - // SAFETY: - // `shift <= 63` since `first` was shifted at least two bits to the right, and - // `trailing = (first & 15) << 2` which means its high two bits are 0 as well. - output[output_idx] = unsafe { Alphabet::from_u8_unchecked(shift) }.to_ascii(); - // Every third `u8` corresponds to a fourth base64url `u8`. - output_idx += 1; - // We use the 6 bits we just trimmed. - shift = first & 63; - // SAFETY: - // `shift <= 63` since `first & 63` is. - output[output_idx] = unsafe { Alphabet::from_u8_unchecked(shift) }.to_ascii(); - counter = ThreeOrdinal::First; - } - } - input = rest; + // There is a _substantial_ boost in performance if we chunk encode. + while let [first, ref rest @ ..] = *chunks { + (fst, snd, third) = (first[0], first[1], first[2]); + // We trim the last two bits and interpret `fst` as a 6-bit integer. + // `u8 as usize` is always OK; and we want this to be `const` so `usize::from` won't work. + // `ASCII.len() == 64 > fst >> 2`, so indexing won't `panic`. + output[output_idx] = ASCII[(fst >> 2) as usize]; + // The two bits we trimmed are the first two bits of the next 6-bit integer. + output_idx += 1; + // We trim the last four bits and interpret `snd` as a 6-bit integer. + // The first two bits are the trailing 2 bits from the previous value. + // `u8 as usize` is always OK; and we want this to be `const` so `usize::from` won't work. + // `ASCII.len() == 64 > ((fst & 3) << 4) | (snd >> 4)`, so indexing won't `panic`. + output[output_idx] = ASCII[(((fst & 3) << 4) | (snd >> 4)) as usize]; + output_idx += 1; + // We trim the last six bits and interpret `third` as a 6-bit integer. + // The first four bits are the trailing 4 bits from the previous value. + // `u8 as usize` is always OK; and we want this to be `const` so `usize::from` won't work. + // `ASCII.len() == 64 > ((snd & 15) << 2) | (third >> 6)`, so indexing won't `panic`. + output[output_idx] = ASCII[(((snd & 15) << 2) | (third >> 6)) as usize]; + // Every third `u8` corresponds to a fourth base64url `u8`. output_idx += 1; + // `u8 as usize` is always OK; and we want this to be `const` so `usize::from` won't work. + // `ASCII.len() == 64 > (third & 63)`, so indexing won't `panic`. + output[output_idx] = ASCII[(third & 63) as usize]; + output_idx += 1; + chunks = rest; } - if !matches!(counter, ThreeOrdinal::First) { - // `input.len()` is not a multiple of 3; thus we have to append a trailing base64url `u8` that - // is simply the current value of `trailing`. - // SAFETY: - // `trailing <= 63` since `trailing` is either `(first & 3) << 4` or `(first & 15) << 2` where - // `first` is any `u8`. This means the high two bits are guaranteed to be 0. - output[output_idx] = unsafe { Alphabet::from_u8_unchecked(trailing) }.to_ascii(); + match rem.len() { + 0 => {} + 1 => { + // `rem.len() == 1`, so indexing won't `panic`. + fst = rem[0]; + // `u8 as usize` is always OK; and we want this to be `const` so `usize::from` won't work. + // `ASCII.len() == 64 > fst >> 2`, so indexing won't `panic`. + output[output_idx] = ASCII[(fst >> 2) as usize]; + // `ASCII.len() == 64 > (fst & 3) << 4`, so indexing won't `panic`. + output[output_idx + 1] = ASCII[((fst & 3) << 4) as usize]; + } + 2 => { + // `rem.len() == 2`, so indexing won't `panic`. + (fst, snd) = (rem[0], rem[1]); + // `input.len()` is not a multiple of 3; thus we have to append a final `u8` containing the + // last bits. + // `u8 as usize` is always OK; and we want this to be `const` so `usize::from` won't work. + // `ASCII.len() == 64 > fst >> 2`, so indexing won't `panic`. + output[output_idx] = ASCII[(fst >> 2) as usize]; + // `ASCII.len() == 64 > ((fst & 3) << 4) | (snd >> 4)`, so indexing won't `panic`. + output[output_idx + 1] = ASCII[(((fst & 3) << 4) | (snd >> 4)) as usize]; + // `ASCII.len() == 64 > (snd & 15) << 2`, so indexing won't `panic`. + output[output_idx + 2] = ASCII[((snd & 15) << 2) as usize]; + } + _ => { + cold_path(); + panic!("there is a bug in core::slice::as_chunks"); + } } // SAFETY: // We verified `output.len() >= final_len`. @@ -1130,6 +903,7 @@ pub const fn encode_buffer_checked<'a>( // Note the above is vacuously true when `val` is empty. Some(unsafe { str::from_utf8_unchecked_mut(val) }) } else { + cold_path(); None } } @@ -1585,28 +1359,17 @@ pub const fn decode_len(input_length: usize) -> Option<usize> { if rem == 1 { None } else { - // 3 * (n >> 2u8) <= m < usize::MAX; thus the left operand of + is fine. + // 3 * (n >> 2) <= m < usize::MAX; thus the left operand of + is fine. // rem <= 3 // <==> // 3rem <= 9 < usize::MAX; thus 3 * rem is fine. // <==> // ⌊3rem/4⌋ <= 3rem, so the right operand of + is fine. // The sum is fine since - // m = ⌊3n/4⌋ = 3⌊n/4⌋ + ⌊3(n mod 4)/4⌋ = (3 * (n >> 2u8)) + ((3 * rem) >> 2u8), and m < usize::MAX. - Some((3 * (input_length >> 2u8)) + ((3 * rem) >> 2u8)) + // m = ⌊3n/4⌋ = 3⌊n/4⌋ + ⌊3(n mod 4)/4⌋ = (3 * (n >> 2)) + ((3 * rem) >> 2), and m < usize::MAX. + Some((3 * (input_length >> 2)) + ((3 * rem) >> 2)) } } -/// Ordinal numbers from first to fourth inclusively. -enum FourOrdinal { - /// First. - First, - /// Second. - Second, - /// Third. - Third, - /// Fourth. - Fourth, -} /// Error returned from [`decode_buffer`] and [`decode`]. /// /// Note when [`alloc`](./index.html#alloc) is not enabled, [`Copy`] is also implemented. @@ -1679,82 +1442,101 @@ impl Error for DecodeErr {} /// ``` #[expect(unsafe_code, reason = "comment justifies correctness")] #[expect( + clippy::missing_panics_doc, + clippy::panic, + clippy::panic_in_result_fn, + reason = "want to crash when there is a bug" +)] +#[expect( + clippy::missing_asserts_for_indexing, + reason = "trust the compiler to already optimize since we match on the length" +)] +#[expect( clippy::arithmetic_side_effects, clippy::indexing_slicing, reason = "comments justify correctness" )] +#[expect(clippy::redundant_else, reason = "prefer the elses")] #[inline] pub const fn decode_buffer<'a>( - mut input: &[u8], + input: &[u8], output: &'a mut [u8], ) -> Result<&'a mut [u8], DecodeErr> { - if let Some(output_len) = decode_len(input.len()) { + let len = input.len(); + if let Some(output_len) = decode_len(len) { if output.len() >= output_len { - // `input.len() % 4`. - let len = input.len() & 3; - // A trailing `Alphabet` is added iff the encode value is not a multiple of 4 (i.e., len % 4 != 0). - match len { + let mut output_idx = 0; + let (mut chunks, rem) = input.as_chunks::<4>(); + let (mut snd, mut third); + // There is a _substantial_ boost in performance if we chunk decode. + while let [first, ref rest @ ..] = *chunks { + if let Some(base64_fst) = Alphabet::from_ascii(first[0]) + && let Some(base64_snd) = Alphabet::from_ascii(first[1]) + && let Some(base64_third) = Alphabet::from_ascii(first[2]) + && let Some(base64_fourth) = Alphabet::from_ascii(first[3]) + { + (snd, third) = (base64_snd.to_u8(), base64_third.to_u8()); + output[output_idx] = (base64_fst.to_u8() << 2) | (snd >> 4); + output_idx += 1; + output[output_idx] = (snd << 4) | (third >> 2); + output_idx += 1; + output[output_idx] = (third << 6) | base64_fourth.to_u8(); + output_idx += 1; + chunks = rest; + } else { + return Err(DecodeErr::InvalidByte); + } + } + match rem.len() { + 0 => {} + 1 => { + cold_path(); + panic!("there is a bug in base64url_nopad::decode_len"); + } 2 => { - // We know `input` is not empty; otherwise `len % 3 == 0`. - if let Some(val) = Alphabet::from_ascii(input[input.len() - 1]) { - if val.to_u8().trailing_zeros() < 4 { + if let Some(base64_fst) = Alphabet::from_ascii(rem[0]) + && let Some(base64_snd) = Alphabet::from_ascii(rem[1]) + { + snd = base64_snd.to_u8(); + if snd.trailing_zeros() < 4 { + cold_path(); return Err(DecodeErr::TrailingBits); + } else { + output[output_idx] = (base64_fst.to_u8() << 2) | (snd >> 4); } } else { + cold_path(); return Err(DecodeErr::InvalidByte); } } 3 => { - // We know `input` is not empty; otherwise `len % 3 == 0`. - if let Some(val) = Alphabet::from_ascii(input[input.len() - 1]) { - if val.to_u8().trailing_zeros() < 2 { + if let Some(base64_fst) = Alphabet::from_ascii(rem[0]) + && let Some(base64_snd) = Alphabet::from_ascii(rem[1]) + && let Some(base64_third) = Alphabet::from_ascii(rem[2]) + { + (snd, third) = (base64_snd.to_u8(), base64_third.to_u8()); + if third.trailing_zeros() < 2 { + cold_path(); return Err(DecodeErr::TrailingBits); + } else { + output[output_idx] = (base64_fst.to_u8() << 2) | (snd >> 4); + output[output_idx + 1] = (snd << 4) | (third >> 2); } } else { + cold_path(); return Err(DecodeErr::InvalidByte); } } - // The only possible value is `0` since if `len` were `1`, `decode_len` would have failed. - _ => {} - } - let mut val = 0; - let mut output_idx = 0; - let mut counter = FourOrdinal::First; - while let [mut first, ref rest @ ..] = *input { - if let Some(base64) = Alphabet::from_ascii(first) { - first = base64.to_u8(); - match counter { - FourOrdinal::First => { - val = first << 2; - counter = FourOrdinal::Second; - } - FourOrdinal::Second => { - output[output_idx] = val | (first >> 4); - val = first << 4; - counter = FourOrdinal::Third; - output_idx += 1; - } - FourOrdinal::Third => { - output[output_idx] = val | (first >> 2); - val = first << 6; - counter = FourOrdinal::Fourth; - output_idx += 1; - } - FourOrdinal::Fourth => { - output[output_idx] = val | first; - counter = FourOrdinal::First; - output_idx += 1; - } - } - input = rest; - } else { - return Err(DecodeErr::InvalidByte); + _ => { + cold_path(); + panic!("there is a bug in core::slice::as_chunks"); } } // SAFETY: // `output.len() >= output_len`. Ok(unsafe { output.split_at_mut_unchecked(output_len) }.0) } else { + cold_path(); Err(DecodeErr::BufferLen) } } else { @@ -1842,50 +1624,73 @@ pub fn decode(input: &[u8]) -> Result<Vec<u8>, DecodeErr> { /// # Ok::<_, DecodeErr>(()) /// ``` #[expect( - clippy::arithmetic_side_effects, - clippy::indexing_slicing, - reason = "comments justify correctness" + clippy::missing_panics_doc, + clippy::panic, + clippy::panic_in_result_fn, + reason = "want to crash when there is a bug" +)] +#[expect( + clippy::missing_asserts_for_indexing, + reason = "trust the compiler to already optimize since we match on the length" )] +#[expect(clippy::indexing_slicing, reason = "comments justify correctness")] #[inline] -pub const fn validate_encoded_data(mut input: &[u8]) -> Result<(), DecodeErr> { - let len = input.len(); - // `len % 4`. - match len & 3 { - // `input.len()` is invalid iff it is equivalent to 1 modulo 4 per the proof in - // `decode_len`. - 1 => return Err(DecodeErr::EncodedLen), +pub const fn validate_encoded_data(input: &[u8]) -> Result<(), DecodeErr> { + let (mut chunks, rem) = input.as_chunks::<4>(); + // There is a _substantial_ boost in performance if we chunk decode. + while let [first, ref rest @ ..] = *chunks { + if Alphabet::from_ascii(first[0]).is_some() + && Alphabet::from_ascii(first[1]).is_some() + && Alphabet::from_ascii(first[2]).is_some() + && Alphabet::from_ascii(first[3]).is_some() + { + chunks = rest; + } else { + return Err(DecodeErr::InvalidByte); + } + } + match rem.len() { + 0 => Ok(()), + 1 => { + cold_path(); + Err(DecodeErr::EncodedLen) + } 2 => { - // We know `input` is not empty; otherwise `len % 4 == 0`. - if let Some(val) = Alphabet::from_ascii(input[len - 1]) { - if val.to_u8().trailing_zeros() < 4 { - return Err(DecodeErr::TrailingBits); + if Alphabet::from_ascii(rem[0]).is_some() + && let Some(base64_snd) = Alphabet::from_ascii(rem[1]) + { + if base64_snd.to_u8().trailing_zeros() < 4 { + cold_path(); + Err(DecodeErr::TrailingBits) + } else { + Ok(()) } } else { - return Err(DecodeErr::InvalidByte); + cold_path(); + Err(DecodeErr::InvalidByte) } } 3 => { - // We know `input` is not empty; otherwise `len % 4 == 0`. - if let Some(val) = Alphabet::from_ascii(input[len - 1]) { - if val.to_u8().trailing_zeros() < 2 { - return Err(DecodeErr::TrailingBits); + if Alphabet::from_ascii(rem[0]).is_some() + && Alphabet::from_ascii(rem[1]).is_some() + && let Some(base64_third) = Alphabet::from_ascii(rem[2]) + { + if base64_third.to_u8().trailing_zeros() < 2 { + cold_path(); + Err(DecodeErr::TrailingBits) + } else { + Ok(()) } } else { - return Err(DecodeErr::InvalidByte); + cold_path(); + Err(DecodeErr::InvalidByte) } } - // When the input has length that is a multple of 4, then no trailing bits were added and thus - // all values are possible. - _ => {} - } - while let [first, ref rest @ ..] = *input { - if Alphabet::from_ascii(first).is_some() { - input = rest; - } else { - return Err(DecodeErr::InvalidByte); + _ => { + cold_path(); + panic!("there is a bug in core::slice::as_chunks"); } } - Ok(()) } /// Same as [`encode_buffer`] except `output` must have the _exact_ length needed to encode `input`, and the /// encoded `str` is not returned. @@ -1943,7 +1748,7 @@ pub const fn encode_buffer_exact(input: &[u8], output: &mut [u8]) { /// DecodeErr::TrailingBits /// ); /// let mut buffer = [0; base64url_nopad::decode_len(b"C8Aa_A--91VZbx0".len()).unwrap()]; -/// base64url_nopad::decode_buffer(b"C8Aa_A--91VZbx0", &mut buffer)?; +/// base64url_nopad::decode_buffer_exact(b"C8Aa_A--91VZbx0", &mut buffer)?; /// assert_eq!(buffer, [0x0b, 0xc0, 0x1a, 0xfc, 0x0f, 0xbe, 0xf7, b'U', b'Y', b'o', 0x1d]); /// // Uncommenting below will cause a `panic` since a larger output buffer than necessary is _not_ OK. /// // base64url_nopad::decode_buffer_exact(b"C8Aa_A--91VZbx0", &mut [0; 128])?;