commit e0a00a4da03c17329c5a87755c3d9e5b7571a343
parent e3cb725644c383097ab3d5752548b3f7b56760dd
Author: Zack Newman <zack@philomathiclife.com>
Date: Sun, 11 Feb 2024 10:08:24 -0700
improve LabelIter. add more from impls
Diffstat:
5 files changed, 702 insertions(+), 300 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0"
name = "ascii_domain"
readme = "README.md"
repository = "https://git.philomathiclife.com/repos/ascii_domain/"
-version = "0.4.0"
+version = "0.5.0"
[lib]
name = "ascii_domain"
diff --git a/LICENSE-MIT b/LICENSE-MIT
@@ -1,4 +1,4 @@
-Copyright © 2023 Zack Newman
+Copyright © 2024 Zack Newman
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
diff --git a/src/char_set.rs b/src/char_set.rs
@@ -148,7 +148,7 @@ impl<T: AsMut<[u8]>> AllowedAscii<T> {
/// assert!(AllowedAscii::try_from_unique_ascii(b"asdfghjkl".to_owned()).map_or(false, |ascii| ascii.contains(b'a') && !ascii.contains(b'A')));
/// assert!(AllowedAscii::try_from_unique_ascii(b"aa".to_owned()).map_or_else(|err| err == AsciiErr::Duplicate(b'a'), |_| false));
/// assert!(AllowedAscii::try_from_unique_ascii([255]).map_or_else(|err| err == AsciiErr::InvalidByte(255), |_| false));
- /// assert!(AllowedAscii::try_from_unique_ascii([0; 129]).map_or_else(|err| err == AsciiErr::CountTooLarge(129), |_| false));
+ /// assert!(AllowedAscii::try_from_unique_ascii([0; 128]).map_or_else(|err| err == AsciiErr::CountTooLarge(128), |_| false));
/// assert!(AllowedAscii::try_from_unique_ascii([b'.']).map_or_else(|err| err == AsciiErr::Contains46, |_| false));
/// ```
#[inline]
diff --git a/src/dom.rs b/src/dom.rs
@@ -1,7 +1,7 @@
extern crate alloc;
#[cfg(feature = "std")]
extern crate std;
-use crate::char_set::AllowedAscii;
+use crate::char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS};
use alloc::{string::String, vec::Vec};
use core::{
borrow::Borrow,
@@ -16,6 +16,8 @@ use core::{
};
#[cfg(feature = "std")]
use std::error::Error;
+/// The `AllowedAscii` used by `Rfc1123Domain`.
+static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS;
/// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it
/// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater
/// from a `Domain` that has the same `Label`s as another but simply more.
@@ -85,92 +87,8 @@ pub struct Domain<T> {
/// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`.
value: T,
}
-impl<'a> Domain<&'a [u8]> {
- /// Same as [`Self::as_bytes`] except the lifetime is tied to the slice and not `self`.
- ///
- /// # Example
- ///
- /// ```
- /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
- /// assert!(Domain::try_from_bytes(b"example.com.".as_slice(), &ASCII_LOWERCASE).unwrap().domain_without_trailing_dot() == b"example.com");
- /// ```
- #[allow(clippy::as_conversions, clippy::indexing_slicing)]
- #[inline]
- #[must_use]
- pub fn domain_without_trailing_dot(&self) -> &'a [u8] {
- // `self.len().get() as usize` is fine since it's a positive `u8`.
- // Indexing won't `panic` since `self.len()` is at most as long as `self.value`.
- &self.value[..self.len().get() as usize]
- }
-}
-impl<'a> Domain<&'a str> {
- /// Same as [`Self::as_str`] except the lifetime is tied to the `str` and not `self`.
- ///
- /// # Example
- ///
- /// ```
- /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
- /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().domain_without_trailing_dot() == "example.com");
- /// ```
- #[allow(unsafe_code, clippy::as_conversions, clippy::indexing_slicing)]
- #[inline]
- #[must_use]
- pub fn domain_without_trailing_dot(&self) -> &'a str {
- // `self.len().get() as usize` is fine since it's a positive `u8`.
- // Indexing won't `panic` since `self.len()` is at most as long as `self.value`.
- let utf8 = &self.value.as_bytes()[..self.len().get() as usize];
- // SAFETY:
- // Only ASCII is allowed, so this is fine.
- unsafe { str::from_utf8_unchecked(utf8) }
- }
-}
-impl From<Domain<Vec<u8>>> for Domain<String> {
- #[allow(unsafe_code)]
- #[inline]
- fn from(value: Domain<Vec<u8>>) -> Self {
- // SAFETY:
- // We only allow ASCII, so this is fine.
- let val = unsafe { String::from_utf8_unchecked(value.value) };
- Self { value: val }
- }
-}
-impl From<Domain<String>> for Domain<Vec<u8>> {
- #[inline]
- fn from(value: Domain<String>) -> Self {
- Self {
- value: value.value.into_bytes(),
- }
- }
-}
-impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> {
- #[allow(unsafe_code)]
- #[inline]
- fn from(value: Domain<&'a [u8]>) -> Self {
- // SAFETY:
- // We only allow ASCII, so this is fine.
- let val = unsafe { str::from_utf8_unchecked(value.value) };
- Self { value: val }
- }
-}
-impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> {
- #[inline]
- fn from(value: Domain<&'a str>) -> Self {
- Self {
- value: value.value.as_bytes(),
- }
- }
-}
impl<T> Domain<T> {
- /// The maximum length of a `Domain` which is 253.
- // SAFETY: 0 < 253 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(253) };
- /// The minimum length of a `Domain` which is 1.
- // SAFETY: 0 < 1 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) };
- /// The inner `T`. This should be treated with caution since it will contain a trailing `b'.'` if there
- /// is one as well as ASCII uppercase letters if there are any.
+ /// A reference to the contained `T`.
///
/// # Example
///
@@ -219,13 +137,9 @@ impl<T: AsRef<[u8]>> Domain<T> {
/// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
/// assert!(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap().as_str() == "Example.com");
/// ```
- #[allow(unsafe_code)]
#[inline]
pub fn as_str(&self) -> &str {
- let input = self.as_bytes();
- // SAFETY:
- // We only allow ASCII so this is safe.
- unsafe { str::from_utf8_unchecked(input) }
+ <&str>::from(Domain::<&str>::from(Domain::<&[u8]>::from(self)))
}
/// The domain without a trailing `b'.'` if there was one.
///
@@ -235,16 +149,12 @@ impl<T: AsRef<[u8]>> Domain<T> {
/// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
/// assert!(Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap().as_bytes() == b"Example.com");
/// ```
- #[allow(clippy::as_conversions, clippy::indexing_slicing)]
#[inline]
pub fn as_bytes(&self) -> &[u8] {
- // This is correct so long as `Self::len` is correct.
- &self.value.as_ref()[..self.len().get() as usize]
+ <&[u8]>::from(Domain::<&[u8]>::from(self))
}
/// The length of the `Domain`. This does _not_ include the trailing `b'.'` if there was one.
///
- /// Same as `self.as_str().len()` and `self.as_bytes().len()`.
- ///
/// # Example
///
/// ```
@@ -291,7 +201,6 @@ impl<T: AsRef<[u8]>> Domain<T> {
/// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`.
#[allow(
clippy::arithmetic_side_effects,
- clippy::as_conversions,
clippy::indexing_slicing,
clippy::into_iter_on_ref
)]
@@ -308,16 +217,18 @@ impl<T: AsRef<[u8]>> Domain<T> {
if val.len() == 1 {
return Err(DomainErr::RootDomain);
}
- // We know `val.len` is at least 1; otherwise `last` would have returned `None`.
- // Therefore this won't underflow and indexing won't `panic`.
- &val[..val.len() - 1]
+ // We know `val.len` is at least 2.
+ let len = val.len() - 1;
+ if val[len - 1] == b'.' {
+ return Err(DomainErr::EmptyLabel);
+ }
+ &val[..len]
} else {
val
}
}
};
- // `MAX_LEN` is 253 so this is fine.
- if value.len() > Self::MAX_LEN.get() as usize {
+ if value.len() > 253 {
Err(DomainErr::LenExceeds253(value.len()))
} else {
value
@@ -335,11 +246,7 @@ impl<T: AsRef<[u8]>> Domain<T> {
Ok(label_len + 1)
}
})
- .and_then(|label_len| {
- NonZeroU8::new(label_len)
- .ok_or(DomainErr::EmptyLabel)
- .map(|_| Self { value: v })
- })
+ .map(|_| Self { value: v })
}
}
/// Returns an [`Iterator`] of [`Label`]s without consuming the `Domain`.
@@ -347,11 +254,13 @@ impl<T: AsRef<[u8]>> Domain<T> {
///
/// ```
/// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
- /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().iter().next().unwrap().as_str() == "com");
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().unwrap().as_str() == "com");
/// ```
#[inline]
- pub fn iter(&self) -> LabelIter<T> {
- LabelIter::new(self)
+ pub fn iter(&self) -> LabelIter<'_> {
+ LabelIter {
+ domain: self.as_bytes(),
+ }
}
/// Returns `true` iff `self` and `right` are part of the same branch in the DNS hierarchy.
///
@@ -553,7 +462,7 @@ impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> TryFrom<(T, &AllowedAscii<T2>)> for Domain
impl<T: AsRef<[u8]>> Display for Domain<T> {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
- f.write_str(self.as_str())
+ f.write_str(self)
}
}
impl<T: AsRef<[u8]>> AsRef<str> for Domain<T> {
@@ -575,6 +484,135 @@ impl<T: AsRef<[u8]>> Deref for Domain<T> {
self.as_str()
}
}
+impl From<Domain<Vec<u8>>> for Domain<String> {
+ #[allow(unsafe_code)]
+ #[inline]
+ fn from(value: Domain<Vec<u8>>) -> Self {
+ // SAFETY:
+ // We only allow ASCII, so this is fine.
+ let val = unsafe { String::from_utf8_unchecked(value.value) };
+ Self { value: val }
+ }
+}
+impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Domain<T>> for Domain<&'b [u8]> {
+ #[inline]
+ fn from(value: &'a Domain<T>) -> Self {
+ Self {
+ value: value.value.as_ref(),
+ }
+ }
+}
+impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Domain<T>> for Domain<&'b str> {
+ #[inline]
+ fn from(value: &'a Domain<T>) -> Self {
+ Self {
+ value: value.value.as_ref(),
+ }
+ }
+}
+impl From<Domain<String>> for Domain<Vec<u8>> {
+ #[inline]
+ fn from(value: Domain<String>) -> Self {
+ Self {
+ value: value.value.into_bytes(),
+ }
+ }
+}
+impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> {
+ #[allow(unsafe_code)]
+ #[inline]
+ fn from(value: Domain<&'a [u8]>) -> Self {
+ // SAFETY:
+ // We only allow ASCII, so this is fine.
+ let val = unsafe { str::from_utf8_unchecked(value.value) };
+ Self { value: val }
+ }
+}
+impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> {
+ #[inline]
+ fn from(value: Domain<&'a str>) -> Self {
+ Self {
+ value: value.value.as_bytes(),
+ }
+ }
+}
+impl From<Domain<Self>> for String {
+ /// Returns the contained `String` _without_ a trailing `'.'` if there was one.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
+ /// assert!(String::from(Domain::try_from_bytes(String::from("Example.com."), &ASCII_LETTERS).unwrap()).as_str() == "Example.com");
+ /// ```
+ #[inline]
+ fn from(value: Domain<Self>) -> Self {
+ if value.contains_trailing_dot() {
+ let mut val = value.value;
+ val.pop();
+ val
+ } else {
+ value.value
+ }
+ }
+}
+impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str {
+ /// Returns the contained `str` _without_ a trailing `'.'` if there was one.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
+ /// assert!(<&str>::from(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap()) == "Example.com");
+ /// ```
+ #[allow(unsafe_code, clippy::as_conversions, clippy::indexing_slicing)]
+ #[inline]
+ fn from(value: Domain<&'a str>) -> Self {
+ // `value.len().get() as usize` is fine since it's a positive `u8`.
+ // Indexing won't `panic` since `value.len()` is at most as long as `value.value`.
+ let utf8 = &value.value.as_bytes()[..value.len().get() as usize];
+ // SAFETY:
+ // Only ASCII is allowed, so this is fine.
+ unsafe { str::from_utf8_unchecked(utf8) }
+ }
+}
+impl From<Domain<Self>> for Vec<u8> {
+ /// Returns the contained `Vec` _without_ a trailing `b'.'` if there was one.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
+ /// assert!(Vec::from(Domain::try_from_bytes(vec![b'F', b'o', b'o', b'.', b'c', b'o', b'm'], &ASCII_LETTERS).unwrap()).as_slice() == b"Foo.com");
+ /// ```
+ #[inline]
+ fn from(value: Domain<Self>) -> Self {
+ if value.contains_trailing_dot() {
+ let mut val = value.value;
+ val.pop();
+ val
+ } else {
+ value.value
+ }
+ }
+}
+impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] {
+ /// Returns the contained slice _without_ a trailing `b'.'` if there was one.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
+ /// assert!(<&[u8]>::from(Domain::try_from_bytes(b"Example.com.".as_slice(), &ASCII_LETTERS).unwrap()) == b"Example.com");
+ /// ```
+ #[allow(clippy::as_conversions, clippy::indexing_slicing)]
+ #[inline]
+ fn from(value: Domain<&'a [u8]>) -> Self {
+ // `value.len().get() as usize` is fine since it's a positive `u8`.
+ // Indexing won't `panic` since `value.len()` is at most as long as `value.value`.
+ &value.value[..value.len().get() as usize]
+ }
+}
/// Error returned from [`Domain::try_from_bytes`].
#[allow(clippy::exhaustive_enums)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
@@ -622,14 +660,6 @@ pub struct Label<'a> {
value: &'a str,
}
impl<'a> Label<'a> {
- /// The maximum length of a `Label` which is 63.
- // SAFETY: 0 < 63 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(63) };
- /// The minimum length of a `Label` which is 1.
- // SAFETY: 0 < 1 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) };
/// The label.
///
/// # Example
@@ -739,6 +769,24 @@ impl<'a> Label<'a> {
})
.is_ok()
}
+ /// The length of the `Label`. This is inclusively between 1 and 63.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.len().get() == 3));
+ /// ```
+ #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)]
+ #[inline]
+ #[must_use]
+ pub const fn len(self) -> NonZeroU8 {
+ // The max length of a `Label` is 63.
+ let len = self.value.len() as u8;
+ // SAFETY:
+ // `Label`s are never empty.
+ unsafe { NonZeroU8::new_unchecked(len) }
+ }
}
impl PartialEq<Label<'_>> for Label<'_> {
#[inline]
@@ -804,155 +852,155 @@ impl<'a> Deref for Label<'a> {
self.value
}
}
-/// [`Iterator`] that iterates [`Label`]s from a borrowed [`Domain`] starting from the TLD down.
+/// [`Iterator`] that iterates [`Label`]s from a [`Domain`] or [`Rfc1123Domain`] starting from the TLD down.
///
/// This iterates `Label`s on demand; so if repeated iteration is desired, it may be better to collect the `Label`s
-/// in a collection (e.g., [`Vec`]) than call [`Domain::iter`] again.
-#[derive(Clone, Copy, Debug)]
-pub struct LabelIter<'a, T> {
- /// Domain that contains `Label`s to iterate.
- domain: &'a Domain<T>,
- /// This is one past the last index of the last character in the "first" `Label` remaining.
- /// `idx == idx_back` iff there are no more `Label`s to iterate.
- /// Since `idx` is associated with the "first" `Label` remaining and `idx_back` is associated with the
- /// "last", `idx >= idx_back`.
- ///
- /// This means `idx - idx_back` is the total length of `domain` remaining _including_ separators.
- idx: u8,
- /// This is the index of the first character in the "last" `Label` remaining.
- ///
- /// Note since `LabelIter` iterates from the TLD down, "last" means "first" left-to-right.
- idx_back: u8,
-}
-impl<'a, T: AsRef<[u8]>> LabelIter<'a, T> {
- /// Helper function to construct an instance.
- #[allow(clippy::arithmetic_side_effects)]
- #[inline]
- fn new(domain: &'a Domain<T>) -> Self {
- Self {
- idx: domain.len().get(),
- idx_back: 0,
- domain,
- }
- }
-}
-impl<'a, T: AsRef<[u8]>> Iterator for LabelIter<'a, T> {
+/// in a collection (e.g., [`Vec`]) than create the iterator again. This is also why [`ExactSizeIterator`] is not
+/// implemented.
+#[derive(Clone, Debug)]
+pub struct LabelIter<'a> {
+ /// Domain as ASCII.
+ domain: &'a [u8],
+}
+impl<'a> Iterator for LabelIter<'a> {
type Item = Label<'a>;
#[allow(
unsafe_code,
clippy::arithmetic_side_effects,
- clippy::as_conversions,
clippy::indexing_slicing,
clippy::into_iter_on_ref
)]
#[inline]
fn next(&mut self) -> Option<Self::Item> {
- let bytes = self.domain.as_bytes();
- // 0 <= `self.idx_back` <= `self.idx` <= `bytes.len()`, so this won't `panic`.
- bytes[self.idx_back as usize..self.idx as usize]
- .into_iter()
- .rev()
- .try_fold(1, |count, byt| {
- if *byt == b'.' {
- let len = self.idx as usize;
- // `count` <= `self.idx`, so this won't underflow.
- self.idx -= count;
- // `self.idx + 1` won't overflow since `self.idx < 255`.
- // `self.idx + 1` <= `len` since `count` > 0 so this won't `panic`.
- let ascii = &bytes[(self.idx + 1) as usize..len];
- // SAFETY:
- // We only allow ASCII, so this is safe.
- let value = unsafe { str::from_utf8_unchecked(ascii) };
- Err(Label { value })
- } else {
- Ok(count + 1)
- }
- })
- .map_or_else(Some, |_| {
- // `self.idx == self.idx_back` iff there are no more `Label`s to iterate.
- (self.idx > self.idx_back).then(|| {
- let len = self.idx as usize;
- self.idx = self.idx_back;
- let ascii = &bytes[self.idx_back as usize..len];
+ (!self.domain.is_empty()).then(|| {
+ self.domain
+ .into_iter()
+ .rev()
+ .try_fold(1, |count, byt| {
+ if *byt == b'.' {
+ let len = self.domain.len();
+ // `count` < `len` since there is at least one more `u8` before `b'.'`.
+ let idx = len - count;
+ // `idx + 1` < `len` since `count` is > 1 since `Label`s are never empty.
+ let ascii = &self.domain[idx + 1..len];
+ // SAFETY:
+ // We only allow ASCII, so this is safe.
+ let value = unsafe { str::from_utf8_unchecked(ascii) };
+ self.domain = &self.domain[..idx];
+ Err(Label { value })
+ } else {
+ Ok(count + 1)
+ }
+ })
+ .map_or_else(convert::identity, |_| {
// SAFETY:
// We only allow ASCII, so this is safe.
- let value = unsafe { str::from_utf8_unchecked(ascii) };
+ let value = unsafe { str::from_utf8_unchecked(self.domain) };
+ self.domain = &[];
Label { value }
})
- })
+ })
}
#[inline]
fn last(mut self) -> Option<Self::Item>
where
Self: Sized,
{
- let opt = self.next_back();
- self.idx_back = self.idx;
- opt
+ self.next_back()
+ }
+ #[allow(clippy::integer_division)]
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ if self.domain.is_empty() {
+ (0, Some(0))
+ } else {
+ // The max size of a `Label` is 63; and all but the last have a `b'.'` that follow it.
+ // This means the fewest `Label`s possible is the floor of the length divided by 64 with
+ // the added requirement that it's at least one since we know the domain is not empty.
+ // The min size of a `Label` is 1; and all but the last have a `b'.'` that follow it.
+ // This means the max number of `Label`s is the ceiling of the length divided by 2.
+ (
+ (self.domain.len() / 64).max(1),
+ Some(self.domain.len().div_ceil(2)),
+ )
+ }
}
}
-impl<T: AsRef<[u8]>> FusedIterator for LabelIter<'_, T> {}
-impl<T: AsRef<[u8]>> DoubleEndedIterator for LabelIter<'_, T> {
+impl FusedIterator for LabelIter<'_> {}
+impl DoubleEndedIterator for LabelIter<'_> {
#[allow(
unsafe_code,
clippy::arithmetic_side_effects,
- clippy::as_conversions,
clippy::indexing_slicing,
clippy::into_iter_on_ref
)]
#[inline]
fn next_back(&mut self) -> Option<Self::Item> {
- let bytes = self.domain.as_bytes();
- // 0 <= `self.idx_back` <= `self.idx` <= `bytes.len()`, so this won't `panic`.
- bytes[self.idx_back as usize..self.idx as usize]
- .into_iter()
- .try_fold(1, |count, byt| {
- if *byt == b'.' {
- let start = self.idx_back as usize;
- // The max value this can be is `self.idx`, so overflow is no concern.
- self.idx_back += count;
- // `self.idx_back - 1` won't underflow since `self.idx_back` > 0.
- // `self.idx_back - 1` < `bytes.len()` since we know there `Label`s can't be empty
- // and we are at `.` boundary.
- // `start` <= `self.idx_back` since `count` >= 1.
- // For those reasons, this won't `panic`.
- let ascii = &bytes[start..self.idx_back as usize - 1];
- // SAFETY:
- // We only allow ASCII, so this is safe.
- let value = unsafe { str::from_utf8_unchecked(ascii) };
- Err(Label { value })
- } else {
- Ok(count + 1)
- }
- })
- .map_or_else(Some, |_| {
- // `self.idx == `self.idx_back` iff there are no more `Label`s to iterate.
- (self.idx > self.idx_back).then(|| {
- let start = self.idx_back as usize;
- self.idx_back = self.idx;
- let ascii = &bytes[start..self.idx as usize];
+ (!self.domain.is_empty()).then(|| {
+ self.domain
+ .into_iter()
+ .try_fold(0, |count, byt| {
+ if *byt == b'.' {
+ // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s
+ // are not empty.
+ let ascii = &self.domain[..count];
+ // SAFETY:
+ // We only allow ASCII, so this is safe.
+ let value = unsafe { str::from_utf8_unchecked(ascii) };
+ // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s
+ // are not empty.
+ self.domain = &self.domain[count + 1..];
+ Err(Label { value })
+ } else {
+ Ok(count + 1)
+ }
+ })
+ .map_or_else(convert::identity, |_| {
// SAFETY:
// We only allow ASCII, so this is safe.
- let value = unsafe { str::from_utf8_unchecked(ascii) };
+ let value = unsafe { str::from_utf8_unchecked(self.domain) };
+ self.domain = &[];
Label { value }
})
- })
+ })
}
}
impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Domain<T> {
type Item = Label<'a>;
- type IntoIter = LabelIter<'a, T>;
+ type IntoIter = LabelIter<'a>;
#[inline]
fn into_iter(self) -> Self::IntoIter {
- LabelIter::new(self)
+ LabelIter {
+ domain: self.as_bytes(),
+ }
}
}
-/// Error returned from [`Rfc1123Domain::try_from`].
+impl<'a> IntoIterator for Domain<&'a str> {
+ type Item = Label<'a>;
+ type IntoIter = LabelIter<'a>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ LabelIter {
+ domain: <&str>::from(self).as_bytes(),
+ }
+ }
+}
+impl<'a> IntoIterator for Domain<&'a [u8]> {
+ type Item = Label<'a>;
+ type IntoIter = LabelIter<'a>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ LabelIter {
+ domain: <&[u8]>::from(self),
+ }
+ }
+}
+/// Error returned from [`Rfc1123Domain::try_from`] and [`Rfc1123Domain::try_from_bytes`].
#[allow(clippy::exhaustive_enums)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum Rfc1123Err {
- /// A [`Label`] of [`Domain`] contains ASCII not in [`crate::char_set::ASCII_HYPHEN_DIGITS_LETTERS`].
- InvalidAscii,
+ /// The inputs was not a valid [`Domain`].
+ DomainErr(DomainErr),
/// A [`Label`] of [`Domain`] starts with an ASCII hyphen.
LabelStartsWithAHyphen,
/// A [`Label`] of [`Domain`] ends with an ASCII hyphen.
@@ -962,9 +1010,7 @@ impl Display for Rfc1123Err {
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
- Self::InvalidAscii => f.write_str(
- "a label in the domain contained ASCII besides hyphen, digits, and letters",
- ),
+ Self::DomainErr(err) => err.fmt(f),
Self::LabelStartsWithAHyphen => {
f.write_str("a label in the domain starts with a hyphen")
}
@@ -1026,31 +1072,136 @@ pub struct Rfc1123Domain<T> {
dom: Domain<T>,
}
impl<T> Rfc1123Domain<T> {
- /// Returns the contained [`Domain`].
+ /// Returns a reference to the contained [`Domain`].
///
/// # Example
///
/// ```
- /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
- /// let dom = Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap();
- /// let dom2 = dom.clone();
- /// assert!(Rfc1123Domain::try_from(dom).unwrap().domain() == dom2);
+ /// use ascii_domain::dom::Rfc1123Domain;
+ /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().domain().len().get() == 11);
/// ```
#[inline]
pub const fn domain(&self) -> &Domain<T> {
&self.dom
}
+ /// Returns the contained [`Domain`] consuming `self`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::dom::Rfc1123Domain;
+ /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().into_domain().len().get() == 11);
+ /// ```
+ #[inline]
+ pub fn into_domain(self) -> Domain<T> {
+ self.dom
+ }
}
impl<T: AsRef<[u8]>> Rfc1123Domain<T> {
+ /// Function that transforms `v` into an `Rfc1123Domain` by only allowing [`Label`]s to contain the ASCII `u8`s
+ /// in [`ASCII_HYPHEN_DIGITS_LETTERS`] with each `Label` not starting or ending with a `b'-'`. A trailing `b'.'`
+ /// is ignored.
+ ///
+ /// Unliked calling [`Domain::try_from_bytes`] then [`Rfc1123Domain::try_from`] which performs two traversals
+ /// of `v`, this performs a single traversal of `v`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ascii_domain::dom::{Rfc1123Domain, Rfc1123Err};
+ /// assert!(Rfc1123Domain::try_from_bytes("example.com").is_ok());
+ /// assert!(Rfc1123Domain::try_from_bytes("a-.com").map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false));
+ /// ```
+ ///
+ /// # Errors
+ ///
+ /// Returns [`Rfc1123Err`] iff `v.as_ref()` is an invalid `Rfc1123Domain`.
+ #[allow(
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref,
+ clippy::redundant_else
+ )]
+ #[inline]
+ pub fn try_from_bytes(v: T) -> Result<Self, Rfc1123Err> {
+ // The easiest implementation would be redirecting to `Domain::try_from_bytes`; and upon success,
+ // verify each `Label` doesn't begin or end with a hyphen. That requires traversing `v` twice though.
+ // We opt to traverse just once.
+ let val = v.as_ref();
+ let value = match val.last() {
+ None => return Err(Rfc1123Err::DomainErr(DomainErr::Empty)),
+ Some(byt) => {
+ let b = *byt;
+ if b == b'.' {
+ if val.len() == 1 {
+ return Err(Rfc1123Err::DomainErr(DomainErr::RootDomain));
+ }
+ // We know `val.len` is at least 2.
+ let len = val.len() - 1;
+ let lst = val[len - 1];
+ if lst == b'.' {
+ return Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel));
+ } else if lst == b'-' {
+ return Err(Rfc1123Err::LabelEndsWithAHyphen);
+ } else {
+ &val[..len]
+ }
+ } else if b == b'-' {
+ return Err(Rfc1123Err::LabelEndsWithAHyphen);
+ } else {
+ val
+ }
+ }
+ };
+ if value.len() > 253 {
+ Err(Rfc1123Err::DomainErr(DomainErr::LenExceeds253(value.len())))
+ } else {
+ let mut count = 0;
+ value
+ .into_iter()
+ .try_fold(0, |label_len, byt| {
+ let b = *byt;
+ if b == b'.' {
+ NonZeroU8::new(label_len).map_or(
+ Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)),
+ |_| {
+ // We verify the last character in the `Label` is not a hyphen.
+ // `count` > 0 since `label_len` > 0 and `count` < `value.len()` since
+ // it's the index of the `b'.'`.
+ if value[count - 1] == b'-' {
+ Err(Rfc1123Err::LabelEndsWithAHyphen)
+ } else {
+ Ok(0)
+ }
+ },
+ )
+ } else if !RFC_CHARS.contains(b) {
+ Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(b)))
+ } else if b == b'-' && label_len == 0 {
+ Err(Rfc1123Err::LabelStartsWithAHyphen)
+ } else if label_len == 63 {
+ Err(Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63))
+ } else {
+ // This caps at 253, so no overflow.
+ count += 1;
+ // This is less than 64 due to the above check, so this won't overflow;
+ Ok(label_len + 1)
+ }
+ })
+ .map(|_| Self {
+ dom: Domain { value: v },
+ })
+ }
+ }
/// Returns `true` iff the domain adheres to the literal interpretation of RFC 1123. For more information
/// read the description of [`Rfc1123Domain`].
///
/// # Examples
///
/// ```
- /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
- /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("example.commmm", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_literal_interpretation());
- /// assert!(!Rfc1123Domain::try_from(Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_literal_interpretation());
+ /// use ascii_domain::dom::Rfc1123Domain;
+ /// assert!(Rfc1123Domain::try_from_bytes("example.commmm").unwrap().is_literal_interpretation());
+ /// assert!(!Rfc1123Domain::try_from_bytes("example.c1m").unwrap().is_literal_interpretation());
/// ```
#[inline]
pub fn is_literal_interpretation(&self) -> bool {
@@ -1062,14 +1213,14 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> {
/// # Examples
///
/// ```
- /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
- /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("example.Com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_strict_interpretation());
- /// assert!(!Rfc1123Domain::try_from(Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_strict_interpretation());
+ /// use ascii_domain::dom::Rfc1123Domain;
+ /// assert!(Rfc1123Domain::try_from_bytes("example.Com").unwrap().is_strict_interpretation());
+ /// assert!(!Rfc1123Domain::try_from_bytes("example.comm").unwrap().is_strict_interpretation());
/// ```
#[inline]
pub fn is_strict_interpretation(&self) -> bool {
let tld = self.dom.tld();
- (2..4).contains(&tld.len()) && tld.is_alphabetic()
+ (2..4).contains(&tld.len().get()) && tld.is_alphabetic()
}
/// Returns `true` iff the domain has the same format as an IPv4 address.
///
@@ -1080,14 +1231,14 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> {
/// # Example
///
/// ```
- /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
- /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("1.2.3.4", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_ipv4());
+ /// use ascii_domain::dom::Rfc1123Domain;
+ /// assert!(Rfc1123Domain::try_from_bytes("1.2.3.4").unwrap().is_ipv4());
/// ```
#[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)]
#[inline]
pub fn is_ipv4(&self) -> bool {
// The min length of an IPv4 address is 7 and the max length is 15.
- (7..=15).contains(&self.as_bytes().len())
+ (7..=15).contains(&self.dom.len().get())
// We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading
// 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we
// want to consider that as an IP.
@@ -1097,7 +1248,7 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> {
.try_fold(0u8, |count, label| {
// If we have more than 4 `Label`s, it's not an IPv4 address. Similarly if a `Label` has
// length greater than 3, it's not a valid IPv4 address octet.
- if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() {
+ if count < 4 && label.len().get() < 4 && label.as_str().parse::<u8>().is_ok() {
// Overflow is not possible since we know `count < 4`.
Ok(count + 1)
} else {
@@ -1206,27 +1357,78 @@ impl<T> From<Rfc1123Domain<T>> for Domain<T> {
value.dom
}
}
+impl From<Rfc1123Domain<Vec<u8>>> for Rfc1123Domain<String> {
+ #[inline]
+ fn from(value: Rfc1123Domain<Vec<u8>>) -> Self {
+ Self {
+ dom: Domain::<String>::from(value.dom),
+ }
+ }
+}
+impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b [u8]> {
+ #[inline]
+ fn from(value: &'a Rfc1123Domain<T>) -> Self {
+ Self {
+ dom: Domain::<&'b [u8]>::from(&value.dom),
+ }
+ }
+}
+impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b str> {
+ #[inline]
+ fn from(value: &'a Rfc1123Domain<T>) -> Self {
+ Self {
+ dom: Domain::<&'b str>::from(&value.dom),
+ }
+ }
+}
+impl From<Rfc1123Domain<String>> for Rfc1123Domain<Vec<u8>> {
+ #[inline]
+ fn from(value: Rfc1123Domain<String>) -> Self {
+ Self {
+ dom: Domain::<Vec<u8>>::from(value.dom),
+ }
+ }
+}
+impl<'a: 'b, 'b> From<Rfc1123Domain<&'a [u8]>> for Rfc1123Domain<&'b str> {
+ #[inline]
+ fn from(value: Rfc1123Domain<&'a [u8]>) -> Self {
+ Self {
+ dom: Domain::<&'b str>::from(value.dom),
+ }
+ }
+}
+impl<'a: 'b, 'b> From<Rfc1123Domain<&'a str>> for Rfc1123Domain<&'b [u8]> {
+ #[inline]
+ fn from(value: Rfc1123Domain<&'a str>) -> Self {
+ Self {
+ dom: Domain::<&'b [u8]>::from(value.dom),
+ }
+ }
+}
impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> {
type Error = Rfc1123Err;
- #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)]
+ #[allow(
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref
+ )]
#[inline]
fn try_from(value: Domain<T>) -> Result<Self, Self::Error> {
value
.into_iter()
.try_fold((), |(), label| {
- if label.is_hyphen_or_alphanumeric() {
- let bytes = label.value.as_bytes();
- // `Label`s are never empty, so the below indexing is fine.
- // Underflow won't occur for the same reason.
- if bytes[0] == b'-' {
- Err(Rfc1123Err::LabelStartsWithAHyphen)
- } else if bytes[bytes.len() - 1] == b'-' {
- Err(Rfc1123Err::LabelEndsWithAHyphen)
- } else {
- Ok(())
- }
+ let bytes = label.value.as_bytes();
+ // `Label`s are never empty, so the below indexing is fine.
+ // Underflow won't occur for the same reason.
+ if bytes[0] == b'-' {
+ Err(Rfc1123Err::LabelStartsWithAHyphen)
+ } else if bytes[bytes.len() - 1] == b'-' {
+ Err(Rfc1123Err::LabelEndsWithAHyphen)
} else {
- Err(Rfc1123Err::InvalidAscii)
+ bytes.into_iter().try_fold((), |(), byt| match *byt {
+ b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => Ok(()),
+ val => Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(val))),
+ })
}
})
.map(|()| Self { dom: value })
@@ -1238,6 +1440,36 @@ impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> {
self.dom.fmt(f)
}
}
+impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Rfc1123Domain<T> {
+ type Item = Label<'a>;
+ type IntoIter = LabelIter<'a>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ LabelIter {
+ domain: self.dom.as_bytes(),
+ }
+ }
+}
+impl<'a> IntoIterator for Rfc1123Domain<&'a str> {
+ type Item = Label<'a>;
+ type IntoIter = LabelIter<'a>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ LabelIter {
+ domain: <&str>::from(self.dom).as_bytes(),
+ }
+ }
+}
+impl<'a> IntoIterator for Rfc1123Domain<&'a [u8]> {
+ type Item = Label<'a>;
+ type IntoIter = LabelIter<'a>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ LabelIter {
+ domain: <&[u8]>::from(self.dom),
+ }
+ }
+}
#[cfg(test)]
mod tests {
extern crate alloc;
@@ -1257,6 +1489,10 @@ mod tests {
// Test empty label is error.
assert!(Domain::try_from_bytes("a..com", &allowed_ascii)
.map_or_else(|e| e == DomainErr::EmptyLabel, |_| false));
+ assert!(Domain::try_from_bytes("a..", &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false));
+ assert!(Domain::try_from_bytes("..", &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false));
// Test label too long.
let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com";
// 4 + 64 + 4
@@ -1279,6 +1515,8 @@ mod tests {
assert!(
Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com")
);
+ // Test single label.
+ assert!(Domain::try_from_bytes("c", &allowed_ascii).map_or(false, |d| d.as_str() == "c"));
// Test case-insensitivity.
assert!(
Domain::try_from_bytes("wwW.ExAMple.COm", &allowed_ascii).map_or(false, |d| {
@@ -1321,8 +1559,8 @@ mod tests {
let input2 = b"a.";
assert!(
Domain::try_from_bytes(input2, &allowed_ascii).map_or(false, |d| d
- .as_str()
.len()
+ .get()
== 1
&& d.value == input2)
)
@@ -1338,7 +1576,7 @@ mod tests {
let allowed_ascii = ASCII_FIREFOX;
assert!(
Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
- let mut iter = d.iter();
+ let mut iter = d.into_iter();
let Some(l) = iter.next() else {
return false;
};
@@ -1360,7 +1598,7 @@ mod tests {
);
assert!(
Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
- let mut iter = d.iter();
+ let mut iter = d.into_iter();
let Some(l) = iter.next_back() else {
return false;
};
@@ -1384,7 +1622,7 @@ mod tests {
);
assert!(
Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
- let mut iter = d.iter();
+ let mut iter = d.into_iter();
let Some(l) = iter.next_back() else {
return false;
};
@@ -1417,8 +1655,10 @@ mod tests {
assert!(
AllowedAscii::try_from_unique_ascii(b"exampl!co".to_owned()).map_or(false, |ascii| {
Domain::try_from_bytes("exampl!e.com", &ascii).map_or(false, |dom| {
- Rfc1123Domain::try_from(dom)
- .map_or_else(|e| e == Rfc1123Err::InvalidAscii, |_| false)
+ Rfc1123Domain::try_from(dom).map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(b'!')),
+ |_| false,
+ )
})
})
);
@@ -1478,6 +1718,11 @@ mod tests {
.map_or(false, |dom| Rfc1123Domain::try_from(dom)
.map_or(false, |rfc| !rfc.is_ipv4())));
assert!(
+ Domain::try_from_bytes("1.1.1.1.1", &ASCII_HYPHEN_DIGITS_LETTERS)
+ .map_or(false, |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |rfc| !rfc.is_ipv4()))
+ );
+ assert!(
Domain::try_from_bytes("256.0.0.0", &ASCII_HYPHEN_DIGITS_LETTERS)
.map_or(false, |dom| Rfc1123Domain::try_from(dom)
.map_or(false, |rfc| !rfc.is_ipv4()))
@@ -1515,4 +1760,109 @@ mod tests {
.map_or(false, |dom| dom.tld().as_str() == "com",)
);
}
+ #[test]
+ fn test_rfc1123_parse() {
+ // Test empty is error.
+ assert!(Rfc1123Domain::try_from_bytes("")
+ .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false));
+ // Test root domain.
+ assert!(Rfc1123Domain::try_from_bytes(".").map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::RootDomain),
+ |_| false
+ ));
+ // Test empty label is error.
+ assert!(Rfc1123Domain::try_from_bytes("a..com").map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel),
+ |_| false
+ ));
+ assert!(Rfc1123Domain::try_from_bytes("a..").map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel),
+ |_| false
+ ));
+ assert!(Rfc1123Domain::try_from_bytes("..").map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel),
+ |_| false
+ ));
+ // Test label too long.
+ let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com";
+ // 4 + 64 + 4
+ assert!(val.len() == 72);
+ assert!(Rfc1123Domain::try_from_bytes(val).map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63),
+ |_| false
+ ));
+ assert!(Rfc1123Domain::try_from_bytes(
+ "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com",
+ )
+ .map_or(false, |d| d.len().get() == 71));
+ // Test domain too long.
+ assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(254)), |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 ));
+ // Test max labels.
+ assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(255)), |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253));
+ assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253));
+ // Test removal of trailing '.'.
+ assert!(Rfc1123Domain::try_from_bytes("com.").map_or(false, |d| d.as_str() == "com"));
+ // Test single label.
+ assert!(Rfc1123Domain::try_from_bytes("c").map_or(false, |d| d.as_str() == "c"));
+ // Test ends with hyphen.
+ assert!(Rfc1123Domain::try_from_bytes("-")
+ .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("-.")
+ .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("a.com.-")
+ .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("a.com-")
+ .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("a-.com")
+ .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false));
+ // Test starts with hyphen.
+ assert!(Rfc1123Domain::try_from_bytes("a.-com")
+ .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false));
+ assert!(Rfc1123Domain::try_from_bytes("-a.com")
+ .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false));
+ // Test case-insensitivity.
+ assert!(
+ Rfc1123Domain::try_from_bytes("wwW.ExAMple.COm").map_or(false, |d| {
+ Rfc1123Domain::try_from_bytes("www.example.com")
+ .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal)
+ })
+ );
+ assert!(
+ Rfc1123Domain::try_from_bytes("ww-W.com").map_or(false, |d| {
+ Rfc1123Domain::try_from_bytes("Ww-w.com")
+ .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal)
+ })
+ );
+ // Test valid bytes
+ let mut input;
+ let mut counter = 0;
+ for i in 0..=127 {
+ input = [i];
+ match i {
+ b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => {
+ counter += 1;
+ assert!(Rfc1123Domain::try_from_bytes(input)
+ .map_or(false, |d| d.value.len() == 1 && d.value == input))
+ }
+ b'-' => {
+ counter += 1;
+ let input2 = b"a-a";
+ assert!(Rfc1123Domain::try_from_bytes(input2)
+ .map_or(false, |d| d.len().get() == 3 && d.value == input2))
+ }
+ b'.' => {
+ let input2 = b"a.";
+ assert!(Rfc1123Domain::try_from_bytes(input2)
+ .map_or(false, |d| d.len().get() == 1 && d.value == input2))
+ }
+ _ => assert!(Rfc1123Domain::try_from_bytes(input).map_or_else(
+ |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(i)),
+ |_| false
+ )),
+ }
+ }
+ assert!(counter == 63);
+ }
}
diff --git a/src/serde.rs b/src/serde.rs
@@ -1,18 +1,17 @@
#![cfg(feature = "serde")]
extern crate alloc;
use crate::{
- char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS, PRINTABLE_ASCII},
- dom::{Domain, Rfc1123Domain},
-};
-use alloc::{
- borrow::ToOwned,
- string::{String, ToString},
+ char_set::{AllowedAscii, PRINTABLE_ASCII},
+ dom::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err},
};
+use alloc::{borrow::ToOwned, string::String};
use core::{fmt, marker::PhantomData};
use serde::{
- de::{self, Deserialize, Deserializer, Error, Unexpected, Visitor},
+ de::{self, Deserialize, Deserializer, Unexpected, Visitor},
ser::{Serialize, Serializer},
};
+/// The "default" `AllowedAscii` that is used for `Domain`.
+static DOMAIN_CHARS: &AllowedAscii<[u8; 92]> = &PRINTABLE_ASCII;
impl<T: AsRef<[u8]>> Serialize for Domain<T> {
/// Serializes `Domain` as a string.
#[inline]
@@ -62,6 +61,36 @@ pub struct DomainVisitor<'a, T, T2> {
/// The character set the visitor will use when deserializing a string into a `Domain`.
pub allowed_ascii: &'a AllowedAscii<T>,
}
+/// Converts `DomainErr` to a Serde `de::Error`.
+#[allow(clippy::as_conversions, clippy::cast_lossless)]
+#[inline]
+fn dom_err_to_serde<E: de::Error>(value: DomainErr) -> E {
+ match value {
+ DomainErr::Empty => E::invalid_length(
+ 0,
+ &"a valid domain with length inclusively between 1 and 253",
+ ),
+ DomainErr::RootDomain => {
+ E::invalid_length(0, &"a valid domain with at least one non-root label")
+ }
+ DomainErr::LenExceeds253(len) => E::invalid_length(
+ len,
+ &"a valid domain with length inclusively between 1 and 253",
+ ),
+ DomainErr::LabelLenExceeds63 => E::invalid_length(
+ 64,
+ &"a valid domain containing labels of length inclusively between 1 and 63",
+ ),
+ DomainErr::EmptyLabel => E::invalid_length(
+ 0,
+ &"a valid domain containing labels of length inclusively between 1 and 63",
+ ),
+ DomainErr::InvalidByte(byt) => E::invalid_value(
+ Unexpected::Unsigned(byt as u64),
+ &"a valid domain containing only the supplied ASCII subset",
+ ),
+ }
+}
impl<'a, T, T2> DomainVisitor<'a, T, T2> {
/// Returns `DomainVisitor` with [`Self::allowed_ascii`] set to `allowed_ascii`.
///
@@ -90,8 +119,7 @@ impl<'de: 'b, 'a, 'b, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, &'b
where
E: de::Error,
{
- Self::Value::try_from_bytes(v, self.allowed_ascii)
- .map_err(|err| E::invalid_value(Unexpected::Str(err.to_string().as_str()), &"a Domain"))
+ Self::Value::try_from_bytes(v, self.allowed_ascii).map_err(|err| dom_err_to_serde::<E>(err))
}
}
impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> {
@@ -105,8 +133,7 @@ impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> {
where
E: de::Error,
{
- Self::Value::try_from_bytes(v, self.allowed_ascii)
- .map_err(|err| E::invalid_value(Unexpected::Str(err.to_string().as_str()), &"a Domain"))
+ Self::Value::try_from_bytes(v, self.allowed_ascii).map_err(|err| dom_err_to_serde::<E>(err))
}
#[inline]
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
@@ -123,7 +150,7 @@ impl<'de> Deserialize<'de> for Domain<String> {
where
D: Deserializer<'de>,
{
- deserializer.deserialize_string(DomainVisitor::<'_, _, String>::new(&PRINTABLE_ASCII))
+ deserializer.deserialize_string(DomainVisitor::<'_, _, String>::new(DOMAIN_CHARS))
}
}
/// Deserializes `str`s into a `Domain` based on [`PRINTABLE_ASCII`].
@@ -133,7 +160,54 @@ impl<'de: 'a, 'a> Deserialize<'de> for Domain<&'a str> {
where
D: Deserializer<'de>,
{
- deserializer.deserialize_string(DomainVisitor::<'_, _, &str>::new(&PRINTABLE_ASCII))
+ deserializer.deserialize_str(DomainVisitor::<'_, _, &str>::new(DOMAIN_CHARS))
+ }
+}
+/// Converts `Rfc1123Err` to a Serde `de::Error`.
+#[inline]
+fn rfc_err_to_serde<E: de::Error>(value: Rfc1123Err) -> E {
+ match value {
+ Rfc1123Err::DomainErr(err) => dom_err_to_serde(err),
+ Rfc1123Err::LabelStartsWithAHyphen | Rfc1123Err::LabelEndsWithAHyphen => {
+ E::invalid_value(Unexpected::Str("-"), &"a valid domain conforming to RFC 1123 which mean all labels don't being or end with a '-'")
+ }
+ }
+}
+/// Serde [`Visitor`] that deserializes a string into an [`Rfc1123Domain`].
+struct Rfc1123Visitor<T>(PhantomData<fn() -> T>);
+impl<'de: 'a, 'a> Visitor<'de> for Rfc1123Visitor<&'a str> {
+ type Value = Rfc1123Domain<&'a str>;
+ #[inline]
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("Rfc1123Domain")
+ }
+ #[inline]
+ fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ Self::Value::try_from_bytes(v).map_err(|err| rfc_err_to_serde(err))
+ }
+}
+impl<'de> Visitor<'de> for Rfc1123Visitor<String> {
+ type Value = Rfc1123Domain<String>;
+ #[inline]
+ fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
+ formatter.write_str("Rfc1123Domain")
+ }
+ #[inline]
+ fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ Self::Value::try_from_bytes(v).map_err(|err| rfc_err_to_serde(err))
+ }
+ #[inline]
+ fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+ where
+ E: de::Error,
+ {
+ self.visit_string(v.to_owned())
}
}
impl<'de> Deserialize<'de> for Rfc1123Domain<String> {
@@ -142,18 +216,7 @@ impl<'de> Deserialize<'de> for Rfc1123Domain<String> {
where
D: Deserializer<'de>,
{
- deserializer
- .deserialize_string(DomainVisitor::<'_, _, String>::new(
- &ASCII_HYPHEN_DIGITS_LETTERS,
- ))
- .and_then(|dom| {
- Self::try_from(dom).map_err(|err| {
- Error::invalid_value(
- Unexpected::Str(err.to_string().as_str()),
- &"an Rfc1123Domain",
- )
- })
- })
+ deserializer.deserialize_string(Rfc1123Visitor::<String>(PhantomData))
}
}
impl<'de: 'a, 'a> Deserialize<'de> for Rfc1123Domain<&'a str> {
@@ -162,18 +225,7 @@ impl<'de: 'a, 'a> Deserialize<'de> for Rfc1123Domain<&'a str> {
where
D: Deserializer<'de>,
{
- deserializer
- .deserialize_string(DomainVisitor::<'_, _, &str>::new(
- &ASCII_HYPHEN_DIGITS_LETTERS,
- ))
- .and_then(|dom| {
- Self::try_from(dom).map_err(|err| {
- Error::invalid_value(
- Unexpected::Str(err.to_string().as_str()),
- &"an Rfc1123Domain",
- )
- })
- })
+ deserializer.deserialize_str(Rfc1123Visitor::<&'a str>(PhantomData))
}
}
#[cfg(test)]