ascii_domain

Domains whose labels are only ASCII.
git clone https://git.philomathiclife.com/repos/ascii_domain
Log | Files | Refs | README

commit e0a00a4da03c17329c5a87755c3d9e5b7571a343
parent e3cb725644c383097ab3d5752548b3f7b56760dd
Author: Zack Newman <zack@philomathiclife.com>
Date:   Sun, 11 Feb 2024 10:08:24 -0700

improve LabelIter. add more from impls

Diffstat:
MCargo.toml | 2+-
MLICENSE-MIT | 2+-
Msrc/char_set.rs | 2+-
Msrc/dom.rs | 870+++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------
Msrc/serde.rs | 126++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
5 files changed, 702 insertions(+), 300 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0" name = "ascii_domain" readme = "README.md" repository = "https://git.philomathiclife.com/repos/ascii_domain/" -version = "0.4.0" +version = "0.5.0" [lib] name = "ascii_domain" diff --git a/LICENSE-MIT b/LICENSE-MIT @@ -1,4 +1,4 @@ -Copyright © 2023 Zack Newman +Copyright © 2024 Zack Newman Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/src/char_set.rs b/src/char_set.rs @@ -148,7 +148,7 @@ impl<T: AsMut<[u8]>> AllowedAscii<T> { /// assert!(AllowedAscii::try_from_unique_ascii(b"asdfghjkl".to_owned()).map_or(false, |ascii| ascii.contains(b'a') && !ascii.contains(b'A'))); /// assert!(AllowedAscii::try_from_unique_ascii(b"aa".to_owned()).map_or_else(|err| err == AsciiErr::Duplicate(b'a'), |_| false)); /// assert!(AllowedAscii::try_from_unique_ascii([255]).map_or_else(|err| err == AsciiErr::InvalidByte(255), |_| false)); - /// assert!(AllowedAscii::try_from_unique_ascii([0; 129]).map_or_else(|err| err == AsciiErr::CountTooLarge(129), |_| false)); + /// assert!(AllowedAscii::try_from_unique_ascii([0; 128]).map_or_else(|err| err == AsciiErr::CountTooLarge(128), |_| false)); /// assert!(AllowedAscii::try_from_unique_ascii([b'.']).map_or_else(|err| err == AsciiErr::Contains46, |_| false)); /// ``` #[inline] diff --git a/src/dom.rs b/src/dom.rs @@ -1,7 +1,7 @@ extern crate alloc; #[cfg(feature = "std")] extern crate std; -use crate::char_set::AllowedAscii; +use crate::char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS}; use alloc::{string::String, vec::Vec}; use core::{ borrow::Borrow, @@ -16,6 +16,8 @@ use core::{ }; #[cfg(feature = "std")] use std::error::Error; +/// The `AllowedAscii` used by `Rfc1123Domain`. +static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; /// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it /// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater /// from a `Domain` that has the same `Label`s as another but simply more. @@ -85,92 +87,8 @@ pub struct Domain<T> { /// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`. value: T, } -impl<'a> Domain<&'a [u8]> { - /// Same as [`Self::as_bytes`] except the lifetime is tied to the slice and not `self`. - /// - /// # Example - /// - /// ``` - /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; - /// assert!(Domain::try_from_bytes(b"example.com.".as_slice(), &ASCII_LOWERCASE).unwrap().domain_without_trailing_dot() == b"example.com"); - /// ``` - #[allow(clippy::as_conversions, clippy::indexing_slicing)] - #[inline] - #[must_use] - pub fn domain_without_trailing_dot(&self) -> &'a [u8] { - // `self.len().get() as usize` is fine since it's a positive `u8`. - // Indexing won't `panic` since `self.len()` is at most as long as `self.value`. - &self.value[..self.len().get() as usize] - } -} -impl<'a> Domain<&'a str> { - /// Same as [`Self::as_str`] except the lifetime is tied to the `str` and not `self`. - /// - /// # Example - /// - /// ``` - /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; - /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().domain_without_trailing_dot() == "example.com"); - /// ``` - #[allow(unsafe_code, clippy::as_conversions, clippy::indexing_slicing)] - #[inline] - #[must_use] - pub fn domain_without_trailing_dot(&self) -> &'a str { - // `self.len().get() as usize` is fine since it's a positive `u8`. - // Indexing won't `panic` since `self.len()` is at most as long as `self.value`. - let utf8 = &self.value.as_bytes()[..self.len().get() as usize]; - // SAFETY: - // Only ASCII is allowed, so this is fine. - unsafe { str::from_utf8_unchecked(utf8) } - } -} -impl From<Domain<Vec<u8>>> for Domain<String> { - #[allow(unsafe_code)] - #[inline] - fn from(value: Domain<Vec<u8>>) -> Self { - // SAFETY: - // We only allow ASCII, so this is fine. - let val = unsafe { String::from_utf8_unchecked(value.value) }; - Self { value: val } - } -} -impl From<Domain<String>> for Domain<Vec<u8>> { - #[inline] - fn from(value: Domain<String>) -> Self { - Self { - value: value.value.into_bytes(), - } - } -} -impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { - #[allow(unsafe_code)] - #[inline] - fn from(value: Domain<&'a [u8]>) -> Self { - // SAFETY: - // We only allow ASCII, so this is fine. - let val = unsafe { str::from_utf8_unchecked(value.value) }; - Self { value: val } - } -} -impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { - #[inline] - fn from(value: Domain<&'a str>) -> Self { - Self { - value: value.value.as_bytes(), - } - } -} impl<T> Domain<T> { - /// The maximum length of a `Domain` which is 253. - // SAFETY: 0 < 253 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(253) }; - /// The minimum length of a `Domain` which is 1. - // SAFETY: 0 < 1 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) }; - /// The inner `T`. This should be treated with caution since it will contain a trailing `b'.'` if there - /// is one as well as ASCII uppercase letters if there are any. + /// A reference to the contained `T`. /// /// # Example /// @@ -219,13 +137,9 @@ impl<T: AsRef<[u8]>> Domain<T> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; /// assert!(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap().as_str() == "Example.com"); /// ``` - #[allow(unsafe_code)] #[inline] pub fn as_str(&self) -> &str { - let input = self.as_bytes(); - // SAFETY: - // We only allow ASCII so this is safe. - unsafe { str::from_utf8_unchecked(input) } + <&str>::from(Domain::<&str>::from(Domain::<&[u8]>::from(self))) } /// The domain without a trailing `b'.'` if there was one. /// @@ -235,16 +149,12 @@ impl<T: AsRef<[u8]>> Domain<T> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; /// assert!(Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap().as_bytes() == b"Example.com"); /// ``` - #[allow(clippy::as_conversions, clippy::indexing_slicing)] #[inline] pub fn as_bytes(&self) -> &[u8] { - // This is correct so long as `Self::len` is correct. - &self.value.as_ref()[..self.len().get() as usize] + <&[u8]>::from(Domain::<&[u8]>::from(self)) } /// The length of the `Domain`. This does _not_ include the trailing `b'.'` if there was one. /// - /// Same as `self.as_str().len()` and `self.as_bytes().len()`. - /// /// # Example /// /// ``` @@ -291,7 +201,6 @@ impl<T: AsRef<[u8]>> Domain<T> { /// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`. #[allow( clippy::arithmetic_side_effects, - clippy::as_conversions, clippy::indexing_slicing, clippy::into_iter_on_ref )] @@ -308,16 +217,18 @@ impl<T: AsRef<[u8]>> Domain<T> { if val.len() == 1 { return Err(DomainErr::RootDomain); } - // We know `val.len` is at least 1; otherwise `last` would have returned `None`. - // Therefore this won't underflow and indexing won't `panic`. - &val[..val.len() - 1] + // We know `val.len` is at least 2. + let len = val.len() - 1; + if val[len - 1] == b'.' { + return Err(DomainErr::EmptyLabel); + } + &val[..len] } else { val } } }; - // `MAX_LEN` is 253 so this is fine. - if value.len() > Self::MAX_LEN.get() as usize { + if value.len() > 253 { Err(DomainErr::LenExceeds253(value.len())) } else { value @@ -335,11 +246,7 @@ impl<T: AsRef<[u8]>> Domain<T> { Ok(label_len + 1) } }) - .and_then(|label_len| { - NonZeroU8::new(label_len) - .ok_or(DomainErr::EmptyLabel) - .map(|_| Self { value: v }) - }) + .map(|_| Self { value: v }) } } /// Returns an [`Iterator`] of [`Label`]s without consuming the `Domain`. @@ -347,11 +254,13 @@ impl<T: AsRef<[u8]>> Domain<T> { /// /// ``` /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; - /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().iter().next().unwrap().as_str() == "com"); + /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().unwrap().as_str() == "com"); /// ``` #[inline] - pub fn iter(&self) -> LabelIter<T> { - LabelIter::new(self) + pub fn iter(&self) -> LabelIter<'_> { + LabelIter { + domain: self.as_bytes(), + } } /// Returns `true` iff `self` and `right` are part of the same branch in the DNS hierarchy. /// @@ -553,7 +462,7 @@ impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> TryFrom<(T, &AllowedAscii<T2>)> for Domain impl<T: AsRef<[u8]>> Display for Domain<T> { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.write_str(self.as_str()) + f.write_str(self) } } impl<T: AsRef<[u8]>> AsRef<str> for Domain<T> { @@ -575,6 +484,135 @@ impl<T: AsRef<[u8]>> Deref for Domain<T> { self.as_str() } } +impl From<Domain<Vec<u8>>> for Domain<String> { + #[allow(unsafe_code)] + #[inline] + fn from(value: Domain<Vec<u8>>) -> Self { + // SAFETY: + // We only allow ASCII, so this is fine. + let val = unsafe { String::from_utf8_unchecked(value.value) }; + Self { value: val } + } +} +impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Domain<T>> for Domain<&'b [u8]> { + #[inline] + fn from(value: &'a Domain<T>) -> Self { + Self { + value: value.value.as_ref(), + } + } +} +impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Domain<T>> for Domain<&'b str> { + #[inline] + fn from(value: &'a Domain<T>) -> Self { + Self { + value: value.value.as_ref(), + } + } +} +impl From<Domain<String>> for Domain<Vec<u8>> { + #[inline] + fn from(value: Domain<String>) -> Self { + Self { + value: value.value.into_bytes(), + } + } +} +impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { + #[allow(unsafe_code)] + #[inline] + fn from(value: Domain<&'a [u8]>) -> Self { + // SAFETY: + // We only allow ASCII, so this is fine. + let val = unsafe { str::from_utf8_unchecked(value.value) }; + Self { value: val } + } +} +impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { + #[inline] + fn from(value: Domain<&'a str>) -> Self { + Self { + value: value.value.as_bytes(), + } + } +} +impl From<Domain<Self>> for String { + /// Returns the contained `String` _without_ a trailing `'.'` if there was one. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; + /// assert!(String::from(Domain::try_from_bytes(String::from("Example.com."), &ASCII_LETTERS).unwrap()).as_str() == "Example.com"); + /// ``` + #[inline] + fn from(value: Domain<Self>) -> Self { + if value.contains_trailing_dot() { + let mut val = value.value; + val.pop(); + val + } else { + value.value + } + } +} +impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str { + /// Returns the contained `str` _without_ a trailing `'.'` if there was one. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; + /// assert!(<&str>::from(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap()) == "Example.com"); + /// ``` + #[allow(unsafe_code, clippy::as_conversions, clippy::indexing_slicing)] + #[inline] + fn from(value: Domain<&'a str>) -> Self { + // `value.len().get() as usize` is fine since it's a positive `u8`. + // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. + let utf8 = &value.value.as_bytes()[..value.len().get() as usize]; + // SAFETY: + // Only ASCII is allowed, so this is fine. + unsafe { str::from_utf8_unchecked(utf8) } + } +} +impl From<Domain<Self>> for Vec<u8> { + /// Returns the contained `Vec` _without_ a trailing `b'.'` if there was one. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; + /// assert!(Vec::from(Domain::try_from_bytes(vec![b'F', b'o', b'o', b'.', b'c', b'o', b'm'], &ASCII_LETTERS).unwrap()).as_slice() == b"Foo.com"); + /// ``` + #[inline] + fn from(value: Domain<Self>) -> Self { + if value.contains_trailing_dot() { + let mut val = value.value; + val.pop(); + val + } else { + value.value + } + } +} +impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] { + /// Returns the contained slice _without_ a trailing `b'.'` if there was one. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; + /// assert!(<&[u8]>::from(Domain::try_from_bytes(b"Example.com.".as_slice(), &ASCII_LETTERS).unwrap()) == b"Example.com"); + /// ``` + #[allow(clippy::as_conversions, clippy::indexing_slicing)] + #[inline] + fn from(value: Domain<&'a [u8]>) -> Self { + // `value.len().get() as usize` is fine since it's a positive `u8`. + // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. + &value.value[..value.len().get() as usize] + } +} /// Error returned from [`Domain::try_from_bytes`]. #[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] @@ -622,14 +660,6 @@ pub struct Label<'a> { value: &'a str, } impl<'a> Label<'a> { - /// The maximum length of a `Label` which is 63. - // SAFETY: 0 < 63 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(63) }; - /// The minimum length of a `Label` which is 1. - // SAFETY: 0 < 1 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) }; /// The label. /// /// # Example @@ -739,6 +769,24 @@ impl<'a> Label<'a> { }) .is_ok() } + /// The length of the `Label`. This is inclusively between 1 and 63. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; + /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.len().get() == 3)); + /// ``` + #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)] + #[inline] + #[must_use] + pub const fn len(self) -> NonZeroU8 { + // The max length of a `Label` is 63. + let len = self.value.len() as u8; + // SAFETY: + // `Label`s are never empty. + unsafe { NonZeroU8::new_unchecked(len) } + } } impl PartialEq<Label<'_>> for Label<'_> { #[inline] @@ -804,155 +852,155 @@ impl<'a> Deref for Label<'a> { self.value } } -/// [`Iterator`] that iterates [`Label`]s from a borrowed [`Domain`] starting from the TLD down. +/// [`Iterator`] that iterates [`Label`]s from a [`Domain`] or [`Rfc1123Domain`] starting from the TLD down. /// /// This iterates `Label`s on demand; so if repeated iteration is desired, it may be better to collect the `Label`s -/// in a collection (e.g., [`Vec`]) than call [`Domain::iter`] again. -#[derive(Clone, Copy, Debug)] -pub struct LabelIter<'a, T> { - /// Domain that contains `Label`s to iterate. - domain: &'a Domain<T>, - /// This is one past the last index of the last character in the "first" `Label` remaining. - /// `idx == idx_back` iff there are no more `Label`s to iterate. - /// Since `idx` is associated with the "first" `Label` remaining and `idx_back` is associated with the - /// "last", `idx >= idx_back`. - /// - /// This means `idx - idx_back` is the total length of `domain` remaining _including_ separators. - idx: u8, - /// This is the index of the first character in the "last" `Label` remaining. - /// - /// Note since `LabelIter` iterates from the TLD down, "last" means "first" left-to-right. - idx_back: u8, -} -impl<'a, T: AsRef<[u8]>> LabelIter<'a, T> { - /// Helper function to construct an instance. - #[allow(clippy::arithmetic_side_effects)] - #[inline] - fn new(domain: &'a Domain<T>) -> Self { - Self { - idx: domain.len().get(), - idx_back: 0, - domain, - } - } -} -impl<'a, T: AsRef<[u8]>> Iterator for LabelIter<'a, T> { +/// in a collection (e.g., [`Vec`]) than create the iterator again. This is also why [`ExactSizeIterator`] is not +/// implemented. +#[derive(Clone, Debug)] +pub struct LabelIter<'a> { + /// Domain as ASCII. + domain: &'a [u8], +} +impl<'a> Iterator for LabelIter<'a> { type Item = Label<'a>; #[allow( unsafe_code, clippy::arithmetic_side_effects, - clippy::as_conversions, clippy::indexing_slicing, clippy::into_iter_on_ref )] #[inline] fn next(&mut self) -> Option<Self::Item> { - let bytes = self.domain.as_bytes(); - // 0 <= `self.idx_back` <= `self.idx` <= `bytes.len()`, so this won't `panic`. - bytes[self.idx_back as usize..self.idx as usize] - .into_iter() - .rev() - .try_fold(1, |count, byt| { - if *byt == b'.' { - let len = self.idx as usize; - // `count` <= `self.idx`, so this won't underflow. - self.idx -= count; - // `self.idx + 1` won't overflow since `self.idx < 255`. - // `self.idx + 1` <= `len` since `count` > 0 so this won't `panic`. - let ascii = &bytes[(self.idx + 1) as usize..len]; - // SAFETY: - // We only allow ASCII, so this is safe. - let value = unsafe { str::from_utf8_unchecked(ascii) }; - Err(Label { value }) - } else { - Ok(count + 1) - } - }) - .map_or_else(Some, |_| { - // `self.idx == self.idx_back` iff there are no more `Label`s to iterate. - (self.idx > self.idx_back).then(|| { - let len = self.idx as usize; - self.idx = self.idx_back; - let ascii = &bytes[self.idx_back as usize..len]; + (!self.domain.is_empty()).then(|| { + self.domain + .into_iter() + .rev() + .try_fold(1, |count, byt| { + if *byt == b'.' { + let len = self.domain.len(); + // `count` < `len` since there is at least one more `u8` before `b'.'`. + let idx = len - count; + // `idx + 1` < `len` since `count` is > 1 since `Label`s are never empty. + let ascii = &self.domain[idx + 1..len]; + // SAFETY: + // We only allow ASCII, so this is safe. + let value = unsafe { str::from_utf8_unchecked(ascii) }; + self.domain = &self.domain[..idx]; + Err(Label { value }) + } else { + Ok(count + 1) + } + }) + .map_or_else(convert::identity, |_| { // SAFETY: // We only allow ASCII, so this is safe. - let value = unsafe { str::from_utf8_unchecked(ascii) }; + let value = unsafe { str::from_utf8_unchecked(self.domain) }; + self.domain = &[]; Label { value } }) - }) + }) } #[inline] fn last(mut self) -> Option<Self::Item> where Self: Sized, { - let opt = self.next_back(); - self.idx_back = self.idx; - opt + self.next_back() + } + #[allow(clippy::integer_division)] + #[inline] + fn size_hint(&self) -> (usize, Option<usize>) { + if self.domain.is_empty() { + (0, Some(0)) + } else { + // The max size of a `Label` is 63; and all but the last have a `b'.'` that follow it. + // This means the fewest `Label`s possible is the floor of the length divided by 64 with + // the added requirement that it's at least one since we know the domain is not empty. + // The min size of a `Label` is 1; and all but the last have a `b'.'` that follow it. + // This means the max number of `Label`s is the ceiling of the length divided by 2. + ( + (self.domain.len() / 64).max(1), + Some(self.domain.len().div_ceil(2)), + ) + } } } -impl<T: AsRef<[u8]>> FusedIterator for LabelIter<'_, T> {} -impl<T: AsRef<[u8]>> DoubleEndedIterator for LabelIter<'_, T> { +impl FusedIterator for LabelIter<'_> {} +impl DoubleEndedIterator for LabelIter<'_> { #[allow( unsafe_code, clippy::arithmetic_side_effects, - clippy::as_conversions, clippy::indexing_slicing, clippy::into_iter_on_ref )] #[inline] fn next_back(&mut self) -> Option<Self::Item> { - let bytes = self.domain.as_bytes(); - // 0 <= `self.idx_back` <= `self.idx` <= `bytes.len()`, so this won't `panic`. - bytes[self.idx_back as usize..self.idx as usize] - .into_iter() - .try_fold(1, |count, byt| { - if *byt == b'.' { - let start = self.idx_back as usize; - // The max value this can be is `self.idx`, so overflow is no concern. - self.idx_back += count; - // `self.idx_back - 1` won't underflow since `self.idx_back` > 0. - // `self.idx_back - 1` < `bytes.len()` since we know there `Label`s can't be empty - // and we are at `.` boundary. - // `start` <= `self.idx_back` since `count` >= 1. - // For those reasons, this won't `panic`. - let ascii = &bytes[start..self.idx_back as usize - 1]; - // SAFETY: - // We only allow ASCII, so this is safe. - let value = unsafe { str::from_utf8_unchecked(ascii) }; - Err(Label { value }) - } else { - Ok(count + 1) - } - }) - .map_or_else(Some, |_| { - // `self.idx == `self.idx_back` iff there are no more `Label`s to iterate. - (self.idx > self.idx_back).then(|| { - let start = self.idx_back as usize; - self.idx_back = self.idx; - let ascii = &bytes[start..self.idx as usize]; + (!self.domain.is_empty()).then(|| { + self.domain + .into_iter() + .try_fold(0, |count, byt| { + if *byt == b'.' { + // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s + // are not empty. + let ascii = &self.domain[..count]; + // SAFETY: + // We only allow ASCII, so this is safe. + let value = unsafe { str::from_utf8_unchecked(ascii) }; + // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s + // are not empty. + self.domain = &self.domain[count + 1..]; + Err(Label { value }) + } else { + Ok(count + 1) + } + }) + .map_or_else(convert::identity, |_| { // SAFETY: // We only allow ASCII, so this is safe. - let value = unsafe { str::from_utf8_unchecked(ascii) }; + let value = unsafe { str::from_utf8_unchecked(self.domain) }; + self.domain = &[]; Label { value } }) - }) + }) } } impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Domain<T> { type Item = Label<'a>; - type IntoIter = LabelIter<'a, T>; + type IntoIter = LabelIter<'a>; #[inline] fn into_iter(self) -> Self::IntoIter { - LabelIter::new(self) + LabelIter { + domain: self.as_bytes(), + } } } -/// Error returned from [`Rfc1123Domain::try_from`]. +impl<'a> IntoIterator for Domain<&'a str> { + type Item = Label<'a>; + type IntoIter = LabelIter<'a>; + #[inline] + fn into_iter(self) -> Self::IntoIter { + LabelIter { + domain: <&str>::from(self).as_bytes(), + } + } +} +impl<'a> IntoIterator for Domain<&'a [u8]> { + type Item = Label<'a>; + type IntoIter = LabelIter<'a>; + #[inline] + fn into_iter(self) -> Self::IntoIter { + LabelIter { + domain: <&[u8]>::from(self), + } + } +} +/// Error returned from [`Rfc1123Domain::try_from`] and [`Rfc1123Domain::try_from_bytes`]. #[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rfc1123Err { - /// A [`Label`] of [`Domain`] contains ASCII not in [`crate::char_set::ASCII_HYPHEN_DIGITS_LETTERS`]. - InvalidAscii, + /// The inputs was not a valid [`Domain`]. + DomainErr(DomainErr), /// A [`Label`] of [`Domain`] starts with an ASCII hyphen. LabelStartsWithAHyphen, /// A [`Label`] of [`Domain`] ends with an ASCII hyphen. @@ -962,9 +1010,7 @@ impl Display for Rfc1123Err { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { - Self::InvalidAscii => f.write_str( - "a label in the domain contained ASCII besides hyphen, digits, and letters", - ), + Self::DomainErr(err) => err.fmt(f), Self::LabelStartsWithAHyphen => { f.write_str("a label in the domain starts with a hyphen") } @@ -1026,31 +1072,136 @@ pub struct Rfc1123Domain<T> { dom: Domain<T>, } impl<T> Rfc1123Domain<T> { - /// Returns the contained [`Domain`]. + /// Returns a reference to the contained [`Domain`]. /// /// # Example /// /// ``` - /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS}; - /// let dom = Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap(); - /// let dom2 = dom.clone(); - /// assert!(Rfc1123Domain::try_from(dom).unwrap().domain() == dom2); + /// use ascii_domain::dom::Rfc1123Domain; + /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().domain().len().get() == 11); /// ``` #[inline] pub const fn domain(&self) -> &Domain<T> { &self.dom } + /// Returns the contained [`Domain`] consuming `self`. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::dom::Rfc1123Domain; + /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().into_domain().len().get() == 11); + /// ``` + #[inline] + pub fn into_domain(self) -> Domain<T> { + self.dom + } } impl<T: AsRef<[u8]>> Rfc1123Domain<T> { + /// Function that transforms `v` into an `Rfc1123Domain` by only allowing [`Label`]s to contain the ASCII `u8`s + /// in [`ASCII_HYPHEN_DIGITS_LETTERS`] with each `Label` not starting or ending with a `b'-'`. A trailing `b'.'` + /// is ignored. + /// + /// Unliked calling [`Domain::try_from_bytes`] then [`Rfc1123Domain::try_from`] which performs two traversals + /// of `v`, this performs a single traversal of `v`. + /// + /// # Examples + /// + /// ``` + /// use ascii_domain::dom::{Rfc1123Domain, Rfc1123Err}; + /// assert!(Rfc1123Domain::try_from_bytes("example.com").is_ok()); + /// assert!(Rfc1123Domain::try_from_bytes("a-.com").map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + /// ``` + /// + /// # Errors + /// + /// Returns [`Rfc1123Err`] iff `v.as_ref()` is an invalid `Rfc1123Domain`. + #[allow( + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + clippy::into_iter_on_ref, + clippy::redundant_else + )] + #[inline] + pub fn try_from_bytes(v: T) -> Result<Self, Rfc1123Err> { + // The easiest implementation would be redirecting to `Domain::try_from_bytes`; and upon success, + // verify each `Label` doesn't begin or end with a hyphen. That requires traversing `v` twice though. + // We opt to traverse just once. + let val = v.as_ref(); + let value = match val.last() { + None => return Err(Rfc1123Err::DomainErr(DomainErr::Empty)), + Some(byt) => { + let b = *byt; + if b == b'.' { + if val.len() == 1 { + return Err(Rfc1123Err::DomainErr(DomainErr::RootDomain)); + } + // We know `val.len` is at least 2. + let len = val.len() - 1; + let lst = val[len - 1]; + if lst == b'.' { + return Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)); + } else if lst == b'-' { + return Err(Rfc1123Err::LabelEndsWithAHyphen); + } else { + &val[..len] + } + } else if b == b'-' { + return Err(Rfc1123Err::LabelEndsWithAHyphen); + } else { + val + } + } + }; + if value.len() > 253 { + Err(Rfc1123Err::DomainErr(DomainErr::LenExceeds253(value.len()))) + } else { + let mut count = 0; + value + .into_iter() + .try_fold(0, |label_len, byt| { + let b = *byt; + if b == b'.' { + NonZeroU8::new(label_len).map_or( + Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)), + |_| { + // We verify the last character in the `Label` is not a hyphen. + // `count` > 0 since `label_len` > 0 and `count` < `value.len()` since + // it's the index of the `b'.'`. + if value[count - 1] == b'-' { + Err(Rfc1123Err::LabelEndsWithAHyphen) + } else { + Ok(0) + } + }, + ) + } else if !RFC_CHARS.contains(b) { + Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(b))) + } else if b == b'-' && label_len == 0 { + Err(Rfc1123Err::LabelStartsWithAHyphen) + } else if label_len == 63 { + Err(Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63)) + } else { + // This caps at 253, so no overflow. + count += 1; + // This is less than 64 due to the above check, so this won't overflow; + Ok(label_len + 1) + } + }) + .map(|_| Self { + dom: Domain { value: v }, + }) + } + } /// Returns `true` iff the domain adheres to the literal interpretation of RFC 1123. For more information /// read the description of [`Rfc1123Domain`]. /// /// # Examples /// /// ``` - /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS}; - /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("example.commmm", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_literal_interpretation()); - /// assert!(!Rfc1123Domain::try_from(Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_literal_interpretation()); + /// use ascii_domain::dom::Rfc1123Domain; + /// assert!(Rfc1123Domain::try_from_bytes("example.commmm").unwrap().is_literal_interpretation()); + /// assert!(!Rfc1123Domain::try_from_bytes("example.c1m").unwrap().is_literal_interpretation()); /// ``` #[inline] pub fn is_literal_interpretation(&self) -> bool { @@ -1062,14 +1213,14 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { /// # Examples /// /// ``` - /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS}; - /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("example.Com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_strict_interpretation()); - /// assert!(!Rfc1123Domain::try_from(Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_strict_interpretation()); + /// use ascii_domain::dom::Rfc1123Domain; + /// assert!(Rfc1123Domain::try_from_bytes("example.Com").unwrap().is_strict_interpretation()); + /// assert!(!Rfc1123Domain::try_from_bytes("example.comm").unwrap().is_strict_interpretation()); /// ``` #[inline] pub fn is_strict_interpretation(&self) -> bool { let tld = self.dom.tld(); - (2..4).contains(&tld.len()) && tld.is_alphabetic() + (2..4).contains(&tld.len().get()) && tld.is_alphabetic() } /// Returns `true` iff the domain has the same format as an IPv4 address. /// @@ -1080,14 +1231,14 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { /// # Example /// /// ``` - /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS}; - /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("1.2.3.4", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_ipv4()); + /// use ascii_domain::dom::Rfc1123Domain; + /// assert!(Rfc1123Domain::try_from_bytes("1.2.3.4").unwrap().is_ipv4()); /// ``` #[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)] #[inline] pub fn is_ipv4(&self) -> bool { // The min length of an IPv4 address is 7 and the max length is 15. - (7..=15).contains(&self.as_bytes().len()) + (7..=15).contains(&self.dom.len().get()) // We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading // 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we // want to consider that as an IP. @@ -1097,7 +1248,7 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { .try_fold(0u8, |count, label| { // If we have more than 4 `Label`s, it's not an IPv4 address. Similarly if a `Label` has // length greater than 3, it's not a valid IPv4 address octet. - if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() { + if count < 4 && label.len().get() < 4 && label.as_str().parse::<u8>().is_ok() { // Overflow is not possible since we know `count < 4`. Ok(count + 1) } else { @@ -1206,27 +1357,78 @@ impl<T> From<Rfc1123Domain<T>> for Domain<T> { value.dom } } +impl From<Rfc1123Domain<Vec<u8>>> for Rfc1123Domain<String> { + #[inline] + fn from(value: Rfc1123Domain<Vec<u8>>) -> Self { + Self { + dom: Domain::<String>::from(value.dom), + } + } +} +impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b [u8]> { + #[inline] + fn from(value: &'a Rfc1123Domain<T>) -> Self { + Self { + dom: Domain::<&'b [u8]>::from(&value.dom), + } + } +} +impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b str> { + #[inline] + fn from(value: &'a Rfc1123Domain<T>) -> Self { + Self { + dom: Domain::<&'b str>::from(&value.dom), + } + } +} +impl From<Rfc1123Domain<String>> for Rfc1123Domain<Vec<u8>> { + #[inline] + fn from(value: Rfc1123Domain<String>) -> Self { + Self { + dom: Domain::<Vec<u8>>::from(value.dom), + } + } +} +impl<'a: 'b, 'b> From<Rfc1123Domain<&'a [u8]>> for Rfc1123Domain<&'b str> { + #[inline] + fn from(value: Rfc1123Domain<&'a [u8]>) -> Self { + Self { + dom: Domain::<&'b str>::from(value.dom), + } + } +} +impl<'a: 'b, 'b> From<Rfc1123Domain<&'a str>> for Rfc1123Domain<&'b [u8]> { + #[inline] + fn from(value: Rfc1123Domain<&'a str>) -> Self { + Self { + dom: Domain::<&'b [u8]>::from(value.dom), + } + } +} impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { type Error = Rfc1123Err; - #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] + #[allow( + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + clippy::into_iter_on_ref + )] #[inline] fn try_from(value: Domain<T>) -> Result<Self, Self::Error> { value .into_iter() .try_fold((), |(), label| { - if label.is_hyphen_or_alphanumeric() { - let bytes = label.value.as_bytes(); - // `Label`s are never empty, so the below indexing is fine. - // Underflow won't occur for the same reason. - if bytes[0] == b'-' { - Err(Rfc1123Err::LabelStartsWithAHyphen) - } else if bytes[bytes.len() - 1] == b'-' { - Err(Rfc1123Err::LabelEndsWithAHyphen) - } else { - Ok(()) - } + let bytes = label.value.as_bytes(); + // `Label`s are never empty, so the below indexing is fine. + // Underflow won't occur for the same reason. + if bytes[0] == b'-' { + Err(Rfc1123Err::LabelStartsWithAHyphen) + } else if bytes[bytes.len() - 1] == b'-' { + Err(Rfc1123Err::LabelEndsWithAHyphen) } else { - Err(Rfc1123Err::InvalidAscii) + bytes.into_iter().try_fold((), |(), byt| match *byt { + b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => Ok(()), + val => Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(val))), + }) } }) .map(|()| Self { dom: value }) @@ -1238,6 +1440,36 @@ impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> { self.dom.fmt(f) } } +impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Rfc1123Domain<T> { + type Item = Label<'a>; + type IntoIter = LabelIter<'a>; + #[inline] + fn into_iter(self) -> Self::IntoIter { + LabelIter { + domain: self.dom.as_bytes(), + } + } +} +impl<'a> IntoIterator for Rfc1123Domain<&'a str> { + type Item = Label<'a>; + type IntoIter = LabelIter<'a>; + #[inline] + fn into_iter(self) -> Self::IntoIter { + LabelIter { + domain: <&str>::from(self.dom).as_bytes(), + } + } +} +impl<'a> IntoIterator for Rfc1123Domain<&'a [u8]> { + type Item = Label<'a>; + type IntoIter = LabelIter<'a>; + #[inline] + fn into_iter(self) -> Self::IntoIter { + LabelIter { + domain: <&[u8]>::from(self.dom), + } + } +} #[cfg(test)] mod tests { extern crate alloc; @@ -1257,6 +1489,10 @@ mod tests { // Test empty label is error. assert!(Domain::try_from_bytes("a..com", &allowed_ascii) .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); + assert!(Domain::try_from_bytes("a..", &allowed_ascii) + .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); + assert!(Domain::try_from_bytes("..", &allowed_ascii) + .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); // Test label too long. let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; // 4 + 64 + 4 @@ -1279,6 +1515,8 @@ mod tests { assert!( Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com") ); + // Test single label. + assert!(Domain::try_from_bytes("c", &allowed_ascii).map_or(false, |d| d.as_str() == "c")); // Test case-insensitivity. assert!( Domain::try_from_bytes("wwW.ExAMple.COm", &allowed_ascii).map_or(false, |d| { @@ -1321,8 +1559,8 @@ mod tests { let input2 = b"a."; assert!( Domain::try_from_bytes(input2, &allowed_ascii).map_or(false, |d| d - .as_str() .len() + .get() == 1 && d.value == input2) ) @@ -1338,7 +1576,7 @@ mod tests { let allowed_ascii = ASCII_FIREFOX; assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.iter(); + let mut iter = d.into_iter(); let Some(l) = iter.next() else { return false; }; @@ -1360,7 +1598,7 @@ mod tests { ); assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.iter(); + let mut iter = d.into_iter(); let Some(l) = iter.next_back() else { return false; }; @@ -1384,7 +1622,7 @@ mod tests { ); assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.iter(); + let mut iter = d.into_iter(); let Some(l) = iter.next_back() else { return false; }; @@ -1417,8 +1655,10 @@ mod tests { assert!( AllowedAscii::try_from_unique_ascii(b"exampl!co".to_owned()).map_or(false, |ascii| { Domain::try_from_bytes("exampl!e.com", &ascii).map_or(false, |dom| { - Rfc1123Domain::try_from(dom) - .map_or_else(|e| e == Rfc1123Err::InvalidAscii, |_| false) + Rfc1123Domain::try_from(dom).map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(b'!')), + |_| false, + ) }) }) ); @@ -1478,6 +1718,11 @@ mod tests { .map_or(false, |dom| Rfc1123Domain::try_from(dom) .map_or(false, |rfc| !rfc.is_ipv4()))); assert!( + Domain::try_from_bytes("1.1.1.1.1", &ASCII_HYPHEN_DIGITS_LETTERS) + .map_or(false, |dom| Rfc1123Domain::try_from(dom) + .map_or(false, |rfc| !rfc.is_ipv4())) + ); + assert!( Domain::try_from_bytes("256.0.0.0", &ASCII_HYPHEN_DIGITS_LETTERS) .map_or(false, |dom| Rfc1123Domain::try_from(dom) .map_or(false, |rfc| !rfc.is_ipv4())) @@ -1515,4 +1760,109 @@ mod tests { .map_or(false, |dom| dom.tld().as_str() == "com",) ); } + #[test] + fn test_rfc1123_parse() { + // Test empty is error. + assert!(Rfc1123Domain::try_from_bytes("") + .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false)); + // Test root domain. + assert!(Rfc1123Domain::try_from_bytes(".").map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::RootDomain), + |_| false + )); + // Test empty label is error. + assert!(Rfc1123Domain::try_from_bytes("a..com").map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), + |_| false + )); + assert!(Rfc1123Domain::try_from_bytes("a..").map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), + |_| false + )); + assert!(Rfc1123Domain::try_from_bytes("..").map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), + |_| false + )); + // Test label too long. + let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; + // 4 + 64 + 4 + assert!(val.len() == 72); + assert!(Rfc1123Domain::try_from_bytes(val).map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63), + |_| false + )); + assert!(Rfc1123Domain::try_from_bytes( + "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", + ) + .map_or(false, |d| d.len().get() == 71)); + // Test domain too long. + assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(254)), |_| false)); + assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); + // Test max labels. + assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(255)), |_| false)); + assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); + assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); + // Test removal of trailing '.'. + assert!(Rfc1123Domain::try_from_bytes("com.").map_or(false, |d| d.as_str() == "com")); + // Test single label. + assert!(Rfc1123Domain::try_from_bytes("c").map_or(false, |d| d.as_str() == "c")); + // Test ends with hyphen. + assert!(Rfc1123Domain::try_from_bytes("-") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + assert!(Rfc1123Domain::try_from_bytes("-.") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + assert!(Rfc1123Domain::try_from_bytes("a.com.-") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + assert!(Rfc1123Domain::try_from_bytes("a.com-") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + assert!(Rfc1123Domain::try_from_bytes("a-.com") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + // Test starts with hyphen. + assert!(Rfc1123Domain::try_from_bytes("a.-com") + .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false)); + assert!(Rfc1123Domain::try_from_bytes("-a.com") + .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false)); + // Test case-insensitivity. + assert!( + Rfc1123Domain::try_from_bytes("wwW.ExAMple.COm").map_or(false, |d| { + Rfc1123Domain::try_from_bytes("www.example.com") + .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) + }) + ); + assert!( + Rfc1123Domain::try_from_bytes("ww-W.com").map_or(false, |d| { + Rfc1123Domain::try_from_bytes("Ww-w.com") + .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) + }) + ); + // Test valid bytes + let mut input; + let mut counter = 0; + for i in 0..=127 { + input = [i]; + match i { + b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => { + counter += 1; + assert!(Rfc1123Domain::try_from_bytes(input) + .map_or(false, |d| d.value.len() == 1 && d.value == input)) + } + b'-' => { + counter += 1; + let input2 = b"a-a"; + assert!(Rfc1123Domain::try_from_bytes(input2) + .map_or(false, |d| d.len().get() == 3 && d.value == input2)) + } + b'.' => { + let input2 = b"a."; + assert!(Rfc1123Domain::try_from_bytes(input2) + .map_or(false, |d| d.len().get() == 1 && d.value == input2)) + } + _ => assert!(Rfc1123Domain::try_from_bytes(input).map_or_else( + |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(i)), + |_| false + )), + } + } + assert!(counter == 63); + } } diff --git a/src/serde.rs b/src/serde.rs @@ -1,18 +1,17 @@ #![cfg(feature = "serde")] extern crate alloc; use crate::{ - char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS, PRINTABLE_ASCII}, - dom::{Domain, Rfc1123Domain}, -}; -use alloc::{ - borrow::ToOwned, - string::{String, ToString}, + char_set::{AllowedAscii, PRINTABLE_ASCII}, + dom::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}, }; +use alloc::{borrow::ToOwned, string::String}; use core::{fmt, marker::PhantomData}; use serde::{ - de::{self, Deserialize, Deserializer, Error, Unexpected, Visitor}, + de::{self, Deserialize, Deserializer, Unexpected, Visitor}, ser::{Serialize, Serializer}, }; +/// The "default" `AllowedAscii` that is used for `Domain`. +static DOMAIN_CHARS: &AllowedAscii<[u8; 92]> = &PRINTABLE_ASCII; impl<T: AsRef<[u8]>> Serialize for Domain<T> { /// Serializes `Domain` as a string. #[inline] @@ -62,6 +61,36 @@ pub struct DomainVisitor<'a, T, T2> { /// The character set the visitor will use when deserializing a string into a `Domain`. pub allowed_ascii: &'a AllowedAscii<T>, } +/// Converts `DomainErr` to a Serde `de::Error`. +#[allow(clippy::as_conversions, clippy::cast_lossless)] +#[inline] +fn dom_err_to_serde<E: de::Error>(value: DomainErr) -> E { + match value { + DomainErr::Empty => E::invalid_length( + 0, + &"a valid domain with length inclusively between 1 and 253", + ), + DomainErr::RootDomain => { + E::invalid_length(0, &"a valid domain with at least one non-root label") + } + DomainErr::LenExceeds253(len) => E::invalid_length( + len, + &"a valid domain with length inclusively between 1 and 253", + ), + DomainErr::LabelLenExceeds63 => E::invalid_length( + 64, + &"a valid domain containing labels of length inclusively between 1 and 63", + ), + DomainErr::EmptyLabel => E::invalid_length( + 0, + &"a valid domain containing labels of length inclusively between 1 and 63", + ), + DomainErr::InvalidByte(byt) => E::invalid_value( + Unexpected::Unsigned(byt as u64), + &"a valid domain containing only the supplied ASCII subset", + ), + } +} impl<'a, T, T2> DomainVisitor<'a, T, T2> { /// Returns `DomainVisitor` with [`Self::allowed_ascii`] set to `allowed_ascii`. /// @@ -90,8 +119,7 @@ impl<'de: 'b, 'a, 'b, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, &'b where E: de::Error, { - Self::Value::try_from_bytes(v, self.allowed_ascii) - .map_err(|err| E::invalid_value(Unexpected::Str(err.to_string().as_str()), &"a Domain")) + Self::Value::try_from_bytes(v, self.allowed_ascii).map_err(|err| dom_err_to_serde::<E>(err)) } } impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> { @@ -105,8 +133,7 @@ impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> { where E: de::Error, { - Self::Value::try_from_bytes(v, self.allowed_ascii) - .map_err(|err| E::invalid_value(Unexpected::Str(err.to_string().as_str()), &"a Domain")) + Self::Value::try_from_bytes(v, self.allowed_ascii).map_err(|err| dom_err_to_serde::<E>(err)) } #[inline] fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> @@ -123,7 +150,7 @@ impl<'de> Deserialize<'de> for Domain<String> { where D: Deserializer<'de>, { - deserializer.deserialize_string(DomainVisitor::<'_, _, String>::new(&PRINTABLE_ASCII)) + deserializer.deserialize_string(DomainVisitor::<'_, _, String>::new(DOMAIN_CHARS)) } } /// Deserializes `str`s into a `Domain` based on [`PRINTABLE_ASCII`]. @@ -133,7 +160,54 @@ impl<'de: 'a, 'a> Deserialize<'de> for Domain<&'a str> { where D: Deserializer<'de>, { - deserializer.deserialize_string(DomainVisitor::<'_, _, &str>::new(&PRINTABLE_ASCII)) + deserializer.deserialize_str(DomainVisitor::<'_, _, &str>::new(DOMAIN_CHARS)) + } +} +/// Converts `Rfc1123Err` to a Serde `de::Error`. +#[inline] +fn rfc_err_to_serde<E: de::Error>(value: Rfc1123Err) -> E { + match value { + Rfc1123Err::DomainErr(err) => dom_err_to_serde(err), + Rfc1123Err::LabelStartsWithAHyphen | Rfc1123Err::LabelEndsWithAHyphen => { + E::invalid_value(Unexpected::Str("-"), &"a valid domain conforming to RFC 1123 which mean all labels don't being or end with a '-'") + } + } +} +/// Serde [`Visitor`] that deserializes a string into an [`Rfc1123Domain`]. +struct Rfc1123Visitor<T>(PhantomData<fn() -> T>); +impl<'de: 'a, 'a> Visitor<'de> for Rfc1123Visitor<&'a str> { + type Value = Rfc1123Domain<&'a str>; + #[inline] + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Rfc1123Domain") + } + #[inline] + fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> + where + E: de::Error, + { + Self::Value::try_from_bytes(v).map_err(|err| rfc_err_to_serde(err)) + } +} +impl<'de> Visitor<'de> for Rfc1123Visitor<String> { + type Value = Rfc1123Domain<String>; + #[inline] + fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + formatter.write_str("Rfc1123Domain") + } + #[inline] + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: de::Error, + { + Self::Value::try_from_bytes(v).map_err(|err| rfc_err_to_serde(err)) + } + #[inline] + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: de::Error, + { + self.visit_string(v.to_owned()) } } impl<'de> Deserialize<'de> for Rfc1123Domain<String> { @@ -142,18 +216,7 @@ impl<'de> Deserialize<'de> for Rfc1123Domain<String> { where D: Deserializer<'de>, { - deserializer - .deserialize_string(DomainVisitor::<'_, _, String>::new( - &ASCII_HYPHEN_DIGITS_LETTERS, - )) - .and_then(|dom| { - Self::try_from(dom).map_err(|err| { - Error::invalid_value( - Unexpected::Str(err.to_string().as_str()), - &"an Rfc1123Domain", - ) - }) - }) + deserializer.deserialize_string(Rfc1123Visitor::<String>(PhantomData)) } } impl<'de: 'a, 'a> Deserialize<'de> for Rfc1123Domain<&'a str> { @@ -162,18 +225,7 @@ impl<'de: 'a, 'a> Deserialize<'de> for Rfc1123Domain<&'a str> { where D: Deserializer<'de>, { - deserializer - .deserialize_string(DomainVisitor::<'_, _, &str>::new( - &ASCII_HYPHEN_DIGITS_LETTERS, - )) - .and_then(|dom| { - Self::try_from(dom).map_err(|err| { - Error::invalid_value( - Unexpected::Str(err.to_string().as_str()), - &"an Rfc1123Domain", - ) - }) - }) + deserializer.deserialize_str(Rfc1123Visitor::<&'a str>(PhantomData)) } } #[cfg(test)]