ascii_domain

Domains whose labels are only ASCII.
git clone https://git.philomathiclife.com/repos/ascii_domain
Log | Files | Refs | README

commit e3cb725644c383097ab3d5752548b3f7b56760dd
parent f3b6ea7df4cce7a861a7a77fa574a07b10b64ea5
Author: Zack Newman <zack@philomathiclife.com>
Date:   Thu,  8 Feb 2024 10:30:41 -0700

domain only contains T. remove no trailing dot req for rfc1123

Diffstat:
MCargo.toml | 5+++--
MREADME.md | 3+--
Msrc/char_set.rs | 6++++++
Msrc/dom.rs | 575+++++++++++++++++++++++++++----------------------------------------------------
Msrc/lib.rs | 1+
Msrc/serde.rs | 36+++++++++++++++---------------------
6 files changed, 224 insertions(+), 402 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -1,6 +1,6 @@ [package] authors = ["Zack Newman <zack@philomathiclife.com>"] -categories = ["parsing"] +categories = ["no-std", "parsing"] description = "Parser for DNS names based on a provided ASCII character set." documentation = "https://docs.rs/ascii_domain/latest/ascii_domain/" edition = "2021" @@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0" name = "ascii_domain" readme = "README.md" repository = "https://git.philomathiclife.com/repos/ascii_domain/" -version = "0.3.1" +version = "0.4.0" [lib] name = "ascii_domain" @@ -26,6 +26,7 @@ rustc_version = "0.4.0" [features] serde = ["dep:serde"] +std = [] [package.metadata.docs.rs] all-features = true diff --git a/README.md b/README.md @@ -20,5 +20,4 @@ set. This package is actively maintained. The crate is only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but -it should work on any [Tier 1 with Host Tools](https://doc.rust-lang.org/beta/rustc/platform-support.html) -target. +it should work on any platform. diff --git a/src/char_set.rs b/src/char_set.rs @@ -2,6 +2,9 @@ use core::{ fmt::{self, Display, Formatter}, str, }; +#[cfg(feature = "std")] +extern crate std; +#[cfg(feature = "std")] use std::error::Error; /// Error returned from [`AllowedAscii::try_from_unique_ascii`]. #[allow(clippy::exhaustive_enums)] @@ -41,6 +44,7 @@ impl Display for AsciiErr { } } } +#[cfg(feature = "std")] impl Error for AsciiErr {} /// Container of the ASCII `u8`s that are allowed to appear in a [`crate::dom::Label`]. Note that while /// [`crate::dom::Domain`] treats ASCII uppercase letters as lowercase, it still depends on such `u8`s being @@ -313,12 +317,14 @@ pub const ASCII_DIGITS: AllowedAscii<[u8; 10]> = AllowedAscii { }; #[cfg(test)] mod tests { + extern crate alloc; use crate::char_set::{ AllowedAscii, AsciiErr, ASCII_DIGITS, ASCII_DIGITS_LETTERS, ASCII_DIGITS_LOWERCASE, ASCII_DIGITS_UPPERCASE, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, ASCII_HYPHEN_DIGITS_LOWERCASE, ASCII_HYPHEN_DIGITS_UPPERCASE, ASCII_LETTERS, ASCII_LOWERCASE, ASCII_UPPERCASE, PRINTABLE_ASCII, RFC5322_ATEXT, }; + use alloc::{borrow::ToOwned, vec::Vec}; #[test] fn try_from() { // Empty is allowed. diff --git a/src/dom.rs b/src/dom.rs @@ -1,4 +1,8 @@ +extern crate alloc; +#[cfg(feature = "std")] +extern crate std; use crate::char_set::AllowedAscii; +use alloc::{string::String, vec::Vec}; use core::{ borrow::Borrow, cmp::Ordering, @@ -10,6 +14,7 @@ use core::{ ops::Deref, str, }; +#[cfg(feature = "std")] use std::error::Error; /// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it /// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater @@ -53,7 +58,7 @@ impl From<DomainOrdering> for Ordering { } } } -/// A domain that consists of at least one [`Label`]. With each `Label` only containing the ASCII `u8`s in +/// A domain that consists of at least one [`Label`] with each `Label` only containing the ASCII `u8`s in /// the [`AllowedAscii`] passed to [`Self::try_from_bytes`]. The total length of a `Domain` is at most /// 253 bytes[^note] in length including the `b'.'` separator. The trailing `b'.'`, if one exists, is always /// ignored. @@ -71,17 +76,14 @@ impl From<DomainOrdering> for Ordering { /// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is /// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format /// contributes one byte due to each label being preceded by the octet that represents its length. -#[derive(Clone, Debug)] +/// +/// Note this only contains `T`, so this is allocation-free and the same size as `T`. +#[derive(Clone, Copy, Debug)] pub struct Domain<T> { /// The domain value. `value.as_ref().len()` is guaranteed to have length between 1 and 253 when the last `u8` /// is not `b'.'`; otherwise the length is between 2 and 254. /// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`. value: T, - /// The lengths of each label. Guaranteed to have length between 1 and 127 with each value being - /// between 1 and 63. - /// The sum of each value plus the length is guaranteed to be 1 greater than `value.as_ref().len()` when - /// the last `u8` in `value` is not `b'.'`; otherwise it will be the same. - label_lens: Vec<NonZeroU8>, } impl<'a> Domain<&'a [u8]> { /// Same as [`Self::as_bytes`] except the lifetime is tied to the slice and not `self`. @@ -129,10 +131,7 @@ impl From<Domain<Vec<u8>>> for Domain<String> { // SAFETY: // We only allow ASCII, so this is fine. let val = unsafe { String::from_utf8_unchecked(value.value) }; - Self { - value: val, - label_lens: value.label_lens, - } + Self { value: val } } } impl From<Domain<String>> for Domain<Vec<u8>> { @@ -140,7 +139,6 @@ impl From<Domain<String>> for Domain<Vec<u8>> { fn from(value: Domain<String>) -> Self { Self { value: value.value.into_bytes(), - label_lens: value.label_lens, } } } @@ -151,10 +149,7 @@ impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { // SAFETY: // We only allow ASCII, so this is fine. let val = unsafe { str::from_utf8_unchecked(value.value) }; - Self { - value: val, - label_lens: value.label_lens, - } + Self { value: val } } } impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { @@ -162,7 +157,6 @@ impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { fn from(value: Domain<&'a str>) -> Self { Self { value: value.value.as_bytes(), - label_lens: value.label_lens, } } } @@ -175,23 +169,6 @@ impl<T> Domain<T> { // SAFETY: 0 < 1 < 256. #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) }; - /// Returns the count of [`Label`]s. Due to length requirements of `Label` and `Domain`, - /// this is less than `128`. - /// - /// # Example - /// - /// ``` - /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; - /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().label_count().get() == 2); - /// ``` - #[inline] - #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)] - pub fn label_count(&self) -> NonZeroU8 { - // SAFETY: - // The only way to construct a `Domain` is via `try_from_bytes` which ensures the total number - // of labels is inclusively between 1 and 127. `Domain` is immutable ensuring such invariants are kept. - unsafe { NonZeroU8::new_unchecked(self.label_lens.len() as u8) } - } /// The inner `T`. This should be treated with caution since it will contain a trailing `b'.'` if there /// is one as well as ASCII uppercase letters if there are any. /// @@ -328,9 +305,8 @@ impl<T: AsRef<[u8]>> Domain<T> { None => return Err(DomainErr::Empty), Some(byt) => { if *byt == b'.' { - // We always ignore trailing `b'.'`, so this is equivalent to being empty. if val.len() == 1 { - return Err(DomainErr::Empty); + return Err(DomainErr::RootDomain); } // We know `val.len` is at least 1; otherwise `last` would have returned `None`. // Therefore this won't underflow and indexing won't `panic`. @@ -344,38 +320,25 @@ impl<T: AsRef<[u8]>> Domain<T> { if value.len() > Self::MAX_LEN.get() as usize { Err(DomainErr::LenExceeds253(value.len())) } else { - let mut label_lens = Vec::with_capacity(3); - let mut label_len = 0; value .into_iter() - .try_fold((), |(), byt| { + .try_fold(0, |label_len, byt| { let b = *byt; if b == b'.' { - NonZeroU8::new(label_len).map_or(Err(DomainErr::EmptyLabel), |length| { - label_lens.push(length); - label_len = 0; - Ok(()) - }) + NonZeroU8::new(label_len).map_or(Err(DomainErr::EmptyLabel), |_| Ok(0)) } else if !allowed_ascii.contains(b) { Err(DomainErr::InvalidByte(b)) } else if label_len == 63 { Err(DomainErr::LabelLenExceeds63) } else { // This is less than 63 due to the above check, so this won't overflow; - label_len += 1; - Ok(()) + Ok(label_len + 1) } }) - .and_then(|()| { + .and_then(|label_len| { NonZeroU8::new(label_len) .ok_or(DomainErr::EmptyLabel) - .map(|length| { - label_lens.push(length); - Self { - value: v, - label_lens, - } - }) + .map(|_| Self { value: v }) }) } } @@ -454,20 +417,27 @@ impl<T: AsRef<[u8]>> Domain<T> { if self == right { DomainOrdering::Equal } else { + let mut right_iter = right.into_iter(); self.into_iter() - .zip(right) - .try_fold((), |(), (label, label2)| match label.cmp(&label2) { - Ordering::Less => Err(DomainOrdering::Less), - Ordering::Equal => Ok(()), - Ordering::Greater => Err(DomainOrdering::Greater), + .try_fold(false, |_, label| { + right_iter + .next() + .map_or(Ok(true), |label2| match label.cmp(&label2) { + Ordering::Less => Err(DomainOrdering::Less), + Ordering::Equal => Ok(false), + Ordering::Greater => Err(DomainOrdering::Greater), + }) }) - .map_or_else(convert::identity, |()| { - match self.label_count().cmp(&right.label_count()) { - Ordering::Less => DomainOrdering::Shorter, - Ordering::Equal => { - unreachable!("there is a bug in Domain::cmp_by_domain_ordering") - } - Ordering::Greater => DomainOrdering::Longer, + .map_or_else(convert::identity, |flag| { + // We iterate `self` before `right`, so `flag` is `true` iff `right` + // has fewer `Label`s than `self`. + if flag { + DomainOrdering::Longer + } else { + // `self` has as many or fewer `Label`s than `right`; however if it had as many + // `Label`s as `right`, then all `Label`s are the same which is impossible since + // we already checked if `self == right`. + DomainOrdering::Shorter } }) } @@ -501,6 +471,21 @@ impl<T: AsRef<[u8]>> Domain<T> { pub fn cmp_doms<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> Ordering { self.cmp_by_domain_ordering(right).into() } + /// Returns the first `Label`. + /// + /// # Example + /// + /// ``` + /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; + /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().first_label().as_str() == "example"); + /// ``` + #[allow(clippy::unreachable)] + #[inline] + pub fn first_label(&self) -> Label<'_> { + self.into_iter() + .next_back() + .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) + } /// Returns the last `Label` (i.e., the TLD). /// /// # Example @@ -596,6 +581,9 @@ impl<T: AsRef<[u8]>> Deref for Domain<T> { pub enum DomainErr { /// The domain was empty. Empty, + /// The domain was the root domain that is to say it was the domain that only contained the root + /// zone (i.e., `b'.'`). + RootDomain, /// The length of the domain was greater than 253 not counting a terminating `b'.'` if there was one. LenExceeds253(usize), /// The domain contained at least one empty label. @@ -610,6 +598,7 @@ impl Display for DomainErr { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { Self::Empty => f.write_str("domain is empty"), + Self::RootDomain => f.write_str("domain is the root domain"), Self::LenExceeds253(len) => write!( f, "domain has length {len} which is greater than the max length of 253" @@ -624,6 +613,7 @@ impl Display for DomainErr { } } } +#[cfg(feature = "std")] impl Error for DomainErr {} /// A label of a [`Domain`]. The total length of a `Label` is inclusively between 1 and 63. #[derive(Clone, Copy, Debug)] @@ -756,6 +746,18 @@ impl PartialEq<Label<'_>> for Label<'_> { self.value.eq_ignore_ascii_case(other.value) } } +impl PartialEq<&Label<'_>> for Label<'_> { + #[inline] + fn eq(&self, other: &&Label<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Label<'_>> for &Label<'_> { + #[inline] + fn eq(&self, other: &Label<'_>) -> bool { + **self == *other + } +} impl Eq for Label<'_> {} impl PartialOrd<Label<'_>> for Label<'_> { #[inline] @@ -803,25 +805,24 @@ impl<'a> Deref for Label<'a> { } } /// [`Iterator`] that iterates [`Label`]s from a borrowed [`Domain`] starting from the TLD down. +/// +/// This iterates `Label`s on demand; so if repeated iteration is desired, it may be better to collect the `Label`s +/// in a collection (e.g., [`Vec`]) than call [`Domain::iter`] again. +#[derive(Clone, Copy, Debug)] pub struct LabelIter<'a, T> { /// Domain that contains `Label`s to iterate. domain: &'a Domain<T>, - /// Starts at domain.label_count().get() - 1 which is valid since domain.label_count().get() > 0. - /// idx is 255 when the iterator is exhausted. - /// Since idx is decremented each time and it starts at a value less than 254, this is a valid value to use - /// as a flag. + /// This is one past the last index of the last character in the "first" `Label` remaining. + /// `idx == idx_back` iff there are no more `Label`s to iterate. + /// Since `idx` is associated with the "first" `Label` remaining and `idx_back` is associated with the + /// "last", `idx >= idx_back`. + /// + /// This means `idx - idx_back` is the total length of `domain` remaining _including_ separators. idx: u8, - /// This is used to mark the start of a label before the length of the label has been subtracted. - /// After a label is read, 1 must be subtracted to account for '.'. - start: u8, - /// Starts at 0 which is valid since domain.label_count().get() > 0. - /// idx_back is 255 when the iterator is exhausted. - /// Since idx_back is incremented each time and the max label count is 127, this is a valid value to - /// use as a flag. + /// This is the index of the first character in the "last" `Label` remaining. + /// + /// Note since `LabelIter` iterates from the TLD down, "last" means "first" left-to-right. idx_back: u8, - /// This is used to mark the start of a label before the length of the label has been added. - /// After a label is read, 1 must be added to account for '.'. - start_back: u8, } impl<'a, T: AsRef<[u8]>> LabelIter<'a, T> { /// Helper function to construct an instance. @@ -829,11 +830,8 @@ impl<'a, T: AsRef<[u8]>> LabelIter<'a, T> { #[inline] fn new(domain: &'a Domain<T>) -> Self { Self { - // This won't underflow since `label_count` is at least 1. - idx: domain.label_count().get() - 1, - start: domain.len().get(), + idx: domain.len().get(), idx_back: 0, - start_back: 0, domain, } } @@ -844,41 +842,44 @@ impl<'a, T: AsRef<[u8]>> Iterator for LabelIter<'a, T> { unsafe_code, clippy::arithmetic_side_effects, clippy::as_conversions, - clippy::indexing_slicing + clippy::indexing_slicing, + clippy::into_iter_on_ref )] #[inline] fn next(&mut self) -> Option<Self::Item> { - self.domain.label_lens.get(self.idx as usize).map(|len| { - // This won't underflow since `start` is initialized to the length of the domain. - // The total sum of the label lengths plus the separators is equal to start. - self.start -= len.get(); - // Overflow clearly won't happen since we subtracted `len` from `start` above. - // Indexing won't `panic` either since `start` is greater than 0 since it's - // only 0 if `idx` is 0 which only happens _after_ all labels have been iterated. - let input = - &self.domain.as_bytes()[self.start as usize..(self.start + len.get()) as usize]; - // SAFETY: - // This is safe since we only allow ASCII, so the above indexing is fine. - let value = unsafe { str::from_utf8_unchecked(input) }; - let label = Label { value }; - if self.idx == 0 || self.idx <= self.idx_back { - // 255 is never valid, so it's used as a flag to indicate iteration is done. - // In particular when we call `label_lens.get` with 255, it will return `None`. - self.idx = 255; - self.idx_back = 255; - } else { - // We check above that `idx` is not 0, so this won't underflow. - self.idx -= 1; - // `start` is only 0 when `idx` is, so this won't underflow. - self.start -= 1; - } - label - }) - } - #[inline] - fn size_hint(&self) -> (usize, Option<usize>) { - let len = self.len(); - (len, Some(len)) + let bytes = self.domain.as_bytes(); + // 0 <= `self.idx_back` <= `self.idx` <= `bytes.len()`, so this won't `panic`. + bytes[self.idx_back as usize..self.idx as usize] + .into_iter() + .rev() + .try_fold(1, |count, byt| { + if *byt == b'.' { + let len = self.idx as usize; + // `count` <= `self.idx`, so this won't underflow. + self.idx -= count; + // `self.idx + 1` won't overflow since `self.idx < 255`. + // `self.idx + 1` <= `len` since `count` > 0 so this won't `panic`. + let ascii = &bytes[(self.idx + 1) as usize..len]; + // SAFETY: + // We only allow ASCII, so this is safe. + let value = unsafe { str::from_utf8_unchecked(ascii) }; + Err(Label { value }) + } else { + Ok(count + 1) + } + }) + .map_or_else(Some, |_| { + // `self.idx == self.idx_back` iff there are no more `Label`s to iterate. + (self.idx > self.idx_back).then(|| { + let len = self.idx as usize; + self.idx = self.idx_back; + let ascii = &bytes[self.idx_back as usize..len]; + // SAFETY: + // We only allow ASCII, so this is safe. + let value = unsafe { str::from_utf8_unchecked(ascii) }; + Label { value } + }) + }) } #[inline] fn last(mut self) -> Option<Self::Item> @@ -886,69 +887,55 @@ impl<'a, T: AsRef<[u8]>> Iterator for LabelIter<'a, T> { Self: Sized, { let opt = self.next_back(); - self.idx = 255; - self.idx_back = 255; + self.idx_back = self.idx; opt } } impl<T: AsRef<[u8]>> FusedIterator for LabelIter<'_, T> {} -impl<T: AsRef<[u8]>> ExactSizeIterator for LabelIter<'_, T> { - #[allow(clippy::arithmetic_side_effects, clippy::as_conversions)] - #[inline] - fn len(&self) -> usize { - if self.idx == 255 { - 0 - } else { - // `idx` is always >= `idx_back` so subtracting is free from underflow. - // Adding 1 is free from overflow since the max value of `idx` is 126 unless - // it's 255 which means there is no more iteration; however we checked for that - // above. - (self.idx - self.idx_back + 1) as usize - } - } -} impl<T: AsRef<[u8]>> DoubleEndedIterator for LabelIter<'_, T> { #[allow( unsafe_code, clippy::arithmetic_side_effects, clippy::as_conversions, - clippy::indexing_slicing + clippy::indexing_slicing, + clippy::into_iter_on_ref )] #[inline] fn next_back(&mut self) -> Option<Self::Item> { - self.domain - .label_lens - .get(self.idx_back as usize) - .map(|len| { - // Indexing won't `panic` since `start_back` is never too large. - let input = &self.domain.as_bytes() - [self.start_back as usize..(self.start_back + len.get()) as usize]; - // SAFETY: - // This is safe since we only allow ASCII, so the above indexing is fine. - let value = unsafe { str::from_utf8_unchecked(input) }; - let label = Label { value }; - // `idx_back` starts at 0 and is only incremented after we iterate a `Label` backwards. - // Since there are at most 127 `Label`s, this won't overflow. - if self.idx_back + 1 == self.domain.label_count().get() || self.idx_back >= self.idx - { - // `255` is used a "flag" to indicate iteration is over since 255 is never valid. - // In particular this means that `label_lens.get` will return `None` when passed - // `idx` or `idx_back`. - self.idx = 255; - self.idx_back = 255; + let bytes = self.domain.as_bytes(); + // 0 <= `self.idx_back` <= `self.idx` <= `bytes.len()`, so this won't `panic`. + bytes[self.idx_back as usize..self.idx as usize] + .into_iter() + .try_fold(1, |count, byt| { + if *byt == b'.' { + let start = self.idx_back as usize; + // The max value this can be is `self.idx`, so overflow is no concern. + self.idx_back += count; + // `self.idx_back - 1` won't underflow since `self.idx_back` > 0. + // `self.idx_back - 1` < `bytes.len()` since we know there `Label`s can't be empty + // and we are at `.` boundary. + // `start` <= `self.idx_back` since `count` >= 1. + // For those reasons, this won't `panic`. + let ascii = &bytes[start..self.idx_back as usize - 1]; + // SAFETY: + // We only allow ASCII, so this is safe. + let value = unsafe { str::from_utf8_unchecked(ascii) }; + Err(Label { value }) } else { - // Due to the call to `get`, this will only happen when `idx_back` < - // `label_lens.len()` which in turn is always below 128 making - // overflow not possible. - self.idx_back += 1; - // `len` is always below 64 so adding 1 won't cause overflow. - // `start_back` is initialized to 0 and is only increased here. - // This means this will only ever be as large as the domain plus 1 - // when there is no trailing `b'.'` or as large as the domain when there is - // a trailing `b'.'` both of which are less than 255. - self.start_back += len.get() + 1; + Ok(count + 1) } - label + }) + .map_or_else(Some, |_| { + // `self.idx == `self.idx_back` iff there are no more `Label`s to iterate. + (self.idx > self.idx_back).then(|| { + let start = self.idx_back as usize; + self.idx_back = self.idx; + let ascii = &bytes[start..self.idx as usize]; + // SAFETY: + // We only allow ASCII, so this is safe. + let value = unsafe { str::from_utf8_unchecked(ascii) }; + Label { value } + }) }) } } @@ -964,9 +951,7 @@ impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Domain<T> { #[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rfc1123Err { - /// The [`Domain`] has a trailing `b'.'`. - ContainsTrailingDot, - /// The [`Domain`] contains ASCII not in [`crate::char_set::ASCII_HYPHEN_DIGITS_LETTERS`]. + /// A [`Label`] of [`Domain`] contains ASCII not in [`crate::char_set::ASCII_HYPHEN_DIGITS_LETTERS`]. InvalidAscii, /// A [`Label`] of [`Domain`] starts with an ASCII hyphen. LabelStartsWithAHyphen, @@ -977,10 +962,9 @@ impl Display for Rfc1123Err { #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { - Self::ContainsTrailingDot => f.write_str("the domain contained a trailing dot"), - Self::InvalidAscii => { - f.write_str("the domain contained ASCII besides hyphen, digits, and letters") - } + Self::InvalidAscii => f.write_str( + "a label in the domain contained ASCII besides hyphen, digits, and letters", + ), Self::LabelStartsWithAHyphen => { f.write_str("a label in the domain starts with a hyphen") } @@ -988,13 +972,13 @@ impl Display for Rfc1123Err { } } } +#[cfg(feature = "std")] impl Error for Rfc1123Err {} /// **TL;DR** Wrapper type around a [`Domain`] that enforces conformance to /// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13). /// /// * Each [`Label`] must only contain ASCII digits, letters, or hyphen. /// * Each `Label` must not begin or end with a hyphen. -/// * A trailing `b'.'` must not exist. /// --- /// Unsurprisingly, RFC 1123 is not super precise as it uses "host name" to mean label and also domain: /// "Host software MUST handle host names \[labels\] of up to 63 characters and SHOULD handle host @@ -1033,7 +1017,10 @@ impl Error for Rfc1123Err {} /// until it is made obsolete. In the future there may be other RFCs that further restrict valid TLDs, and treating /// RFC 1123 as if it were written presciently does not make sense. For that reason the more relaxed interpretation /// is rejected. Consequently we use the most relaxed interpretation. -#[derive(Clone, Debug)] +/// +/// If one wants to enforce the literal interpretation, one can use [`Self::is_literal_interpretation`]. Similarly, +/// if one wants to enforce the strict interpretation, one can use [`Self::is_strict_interpretation`]. +#[derive(Clone, Copy, Debug)] pub struct Rfc1123Domain<T> { /// The domain. dom: Domain<T>, @@ -1096,46 +1083,29 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS}; /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("1.2.3.4", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_ipv4()); /// ``` - #[allow( - clippy::arithmetic_side_effects, - clippy::as_conversions, - clippy::cast_lossless, - clippy::into_iter_on_ref - )] + #[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)] #[inline] pub fn is_ipv4(&self) -> bool { - // Faster to check metadata first to hopefully avoid re-parsing the domain as an IPv4 address. - self.as_bytes().len() < 16 - && self.label_count().get() == 4 + // The min length of an IPv4 address is 7 and the max length is 15. + (7..=15).contains(&self.as_bytes().len()) // We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading // 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we // want to consider that as an IP. && self .dom .into_iter() - .try_fold((), |(), label| { - if label.len() < 4 { - label - .as_bytes() - .into_iter() - .try_fold(0u16, |val, byt| { - if byt.is_ascii_digit() { - // We already verified the length is at most 3, and we only perform - // this arithmetic on integers between 0 and 9. This means the max value - // of these operations is 999 which is smaller than `u16::MAX`. We verified - // `byt` is an ASCII digit so we know `byt - b'0'` will be inclusively between - // 0 and 9. So no overflow, underflow, or truncation will occur. - Ok(val * 10 + (byt - b'0') as u16) - } else { - Err(()) - } - }) - .and_then(|int| u8::try_from(int).map_or(Err(()), |_| Ok(()))) + .try_fold(0u8, |count, label| { + // If we have more than 4 `Label`s, it's not an IPv4 address. Similarly if a `Label` has + // length greater than 3, it's not a valid IPv4 address octet. + if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() { + // Overflow is not possible since we know `count < 4`. + Ok(count + 1) } else { Err(()) } }) - .is_ok() + // We must have exactly 4 `Label`s. + .map_or(false, |count| count == 4) } } impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Rfc1123Domain<T2> { @@ -1241,29 +1211,25 @@ impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] #[inline] fn try_from(value: Domain<T>) -> Result<Self, Self::Error> { - if value.contains_trailing_dot() { - Err(Rfc1123Err::ContainsTrailingDot) - } else { - value - .into_iter() - .try_fold((), |(), label| { - if label.is_hyphen_or_alphanumeric() { - let bytes = label.value.as_bytes(); - // `Label`s are never empty, so the below indexing is fine. - // Underflow won't occur for the same reason. - if bytes[0] == b'-' { - Err(Rfc1123Err::LabelStartsWithAHyphen) - } else if bytes[bytes.len() - 1] == b'-' { - Err(Rfc1123Err::LabelEndsWithAHyphen) - } else { - Ok(()) - } + value + .into_iter() + .try_fold((), |(), label| { + if label.is_hyphen_or_alphanumeric() { + let bytes = label.value.as_bytes(); + // `Label`s are never empty, so the below indexing is fine. + // Underflow won't occur for the same reason. + if bytes[0] == b'-' { + Err(Rfc1123Err::LabelStartsWithAHyphen) + } else if bytes[bytes.len() - 1] == b'-' { + Err(Rfc1123Err::LabelEndsWithAHyphen) } else { - Err(Rfc1123Err::InvalidAscii) + Ok(()) } - }) - .map(|()| Self { dom: value }) - } + } else { + Err(Rfc1123Err::InvalidAscii) + } + }) + .map(|()| Self { dom: value }) } } impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> { @@ -1274,8 +1240,10 @@ impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> { } #[cfg(test)] mod tests { + extern crate alloc; use super::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}; use crate::char_set::{AllowedAscii, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS}; + use alloc::borrow::ToOwned; use core::cmp::Ordering; #[test] fn test_dom_parse() { @@ -1283,8 +1251,9 @@ mod tests { // Test empty is error. assert!(Domain::try_from_bytes("", &allowed_ascii) .map_or_else(|e| e == DomainErr::Empty, |_| false)); + // Test root domain. assert!(Domain::try_from_bytes(".", &allowed_ascii) - .map_or_else(|e| e == DomainErr::Empty, |_| false)); + .map_or_else(|e| e == DomainErr::RootDomain, |_| false)); // Test empty label is error. assert!(Domain::try_from_bytes("a..com", &allowed_ascii) .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); @@ -1304,8 +1273,8 @@ mod tests { assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 )); // Test max labels. assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false)); - assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253)); - assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253)); + assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); + assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); // Test removal of trailing '.'. assert!( Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com") @@ -1365,144 +1334,24 @@ mod tests { assert!(counter == 78); } #[test] - fn test_dom_into_iter() { - let allowed_ascii = ASCII_FIREFOX; - assert!( - Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.into_iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next() else { - return false; - }; - if l.value != "com" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next() else { return false }; - if l.value != "example" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next() else { - return false; - }; - if iter.len() != 0 { - return false; - } - if l.value != "www" { - return false; - } - iter.next().is_none() - }) - ); - assert!( - Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.into_iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "www" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "example" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "com" { - return false; - } - if iter.len() != 0 { - return false; - } - iter.next_back().is_none() - }) - ); - assert!( - Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.into_iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "www" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next() else { return false }; - if l.value != "com" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "example" { - return false; - } - if iter.len() != 0 { - return false; - } - iter.next().is_none() && iter.next_back().is_none() - }) - ); - } - #[test] fn test_dom_iter() { let allowed_ascii = ASCII_FIREFOX; assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { let mut iter = d.iter(); - if iter.len() != 3 { - return false; - } let Some(l) = iter.next() else { return false; }; if l.value != "com" { return false; } - if iter.len() != 2 { - return false; - } let Some(l) = iter.next() else { return false }; if l.value != "example" { return false; } - if iter.len() != 1 { - return false; - } let Some(l) = iter.next() else { return false; }; - if iter.len() != 0 { - return false; - } if l.value != "www" { return false; } @@ -1512,70 +1361,46 @@ mod tests { assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { let mut iter = d.iter(); - if iter.len() != 3 { - return false; - } let Some(l) = iter.next_back() else { return false; }; if l.value != "www" { return false; } - if iter.len() != 2 { - return false; - } let Some(l) = iter.next_back() else { return false; }; if l.value != "example" { return false; } - if iter.len() != 1 { - return false; - } let Some(l) = iter.next_back() else { return false; }; if l.value != "com" { return false; } - if iter.len() != 0 { - return false; - } iter.next_back().is_none() }) ); assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { let mut iter = d.iter(); - if iter.len() != 3 { - return false; - } let Some(l) = iter.next_back() else { return false; }; if l.value != "www" { return false; } - if iter.len() != 2 { - return false; - } let Some(l) = iter.next() else { return false }; if l.value != "com" { return false; } - if iter.len() != 1 { - return false; - } let Some(l) = iter.next_back() else { return false; }; if l.value != "example" { return false; } - if iter.len() != 0 { - return false; - } iter.next().is_none() && iter.next_back().is_none() }) ); @@ -1590,13 +1415,6 @@ mod tests { ) ); assert!( - Domain::try_from_bytes("example.com.", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( - false, - |dom| Rfc1123Domain::try_from(dom) - .map_or_else(|e| e == Rfc1123Err::ContainsTrailingDot, |_| false) - ) - ); - assert!( AllowedAscii::try_from_unique_ascii(b"exampl!co".to_owned()).map_or(false, |ascii| { Domain::try_from_bytes("exampl!e.com", &ascii).map_or(false, |dom| { Rfc1123Domain::try_from(dom) @@ -1656,6 +1474,9 @@ mod tests { .map_or(false, |dom| Rfc1123Domain::try_from(dom) .map_or(false, |rfc| rfc.is_ipv4())) ); + assert!(Domain::try_from_bytes("1", &ASCII_HYPHEN_DIGITS_LETTERS) + .map_or(false, |dom| Rfc1123Domain::try_from(dom) + .map_or(false, |rfc| !rfc.is_ipv4()))); assert!( Domain::try_from_bytes("256.0.0.0", &ASCII_HYPHEN_DIGITS_LETTERS) .map_or(false, |dom| Rfc1123Domain::try_from(dom) diff --git a/src/lib.rs b/src/lib.rs @@ -13,6 +13,7 @@ //! all octets are allowed; but conforming to [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123) or //! [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) requires stricter formats and a reduced character //! set. +#![no_std] #![cfg_attr(all(doc, CHANNEL_NIGHTLY), feature(doc_auto_cfg))] #![deny( unsafe_code, diff --git a/src/serde.rs b/src/serde.rs @@ -1,18 +1,18 @@ -#[cfg(feature = "serde")] +#![cfg(feature = "serde")] +extern crate alloc; use crate::{ char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS, PRINTABLE_ASCII}, dom::{Domain, Rfc1123Domain}, }; -#[cfg(feature = "serde")] -use core::fmt; -#[cfg(feature = "serde")] -use core::marker::PhantomData; -#[cfg(feature = "serde")] +use alloc::{ + borrow::ToOwned, + string::{String, ToString}, +}; +use core::{fmt, marker::PhantomData}; use serde::{ de::{self, Deserialize, Deserializer, Error, Unexpected, Visitor}, ser::{Serialize, Serializer}, }; -#[cfg(feature = "serde")] impl<T: AsRef<[u8]>> Serialize for Domain<T> { /// Serializes `Domain` as a string. #[inline] @@ -23,7 +23,6 @@ impl<T: AsRef<[u8]>> Serialize for Domain<T> { serializer.serialize_str(self.as_str()) } } -#[cfg(feature = "serde")] impl<T: AsRef<[u8]>> Serialize for Rfc1123Domain<T> { /// Serializes `Rfc1123Domain` as a string. #[inline] @@ -56,7 +55,6 @@ impl<T: AsRef<[u8]>> Serialize for Rfc1123Domain<T> { /// } /// ``` #[allow(clippy::partial_pub_fields)] -#[cfg(feature = "serde")] #[derive(Clone, Copy)] pub struct DomainVisitor<'a, T, T2> { /// Phantom. @@ -81,7 +79,6 @@ impl<'a, T, T2> DomainVisitor<'a, T, T2> { } } } -#[cfg(feature = "serde")] impl<'de: 'b, 'a, 'b, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, &'b str> { type Value = Domain<&'b str>; #[inline] @@ -93,11 +90,10 @@ impl<'de: 'b, 'a, 'b, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, &'b where E: de::Error, { - Domain::try_from_bytes(v, self.allowed_ascii) + Self::Value::try_from_bytes(v, self.allowed_ascii) .map_err(|err| E::invalid_value(Unexpected::Str(err.to_string().as_str()), &"a Domain")) } } -#[cfg(feature = "serde")] impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> { type Value = Domain<String>; #[inline] @@ -109,7 +105,7 @@ impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> { where E: de::Error, { - Domain::try_from_bytes(v, self.allowed_ascii) + Self::Value::try_from_bytes(v, self.allowed_ascii) .map_err(|err| E::invalid_value(Unexpected::Str(err.to_string().as_str()), &"a Domain")) } #[inline] @@ -121,7 +117,6 @@ impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> { } } /// Deserializes `String`s into a `Domain` based on [`PRINTABLE_ASCII`]. -#[cfg(feature = "serde")] impl<'de> Deserialize<'de> for Domain<String> { #[inline] fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> @@ -132,7 +127,6 @@ impl<'de> Deserialize<'de> for Domain<String> { } } /// Deserializes `str`s into a `Domain` based on [`PRINTABLE_ASCII`]. -#[cfg(feature = "serde")] impl<'de: 'a, 'a> Deserialize<'de> for Domain<&'a str> { #[inline] fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> @@ -142,7 +136,6 @@ impl<'de: 'a, 'a> Deserialize<'de> for Domain<&'a str> { deserializer.deserialize_string(DomainVisitor::<'_, _, &str>::new(&PRINTABLE_ASCII)) } } -#[cfg(feature = "serde")] impl<'de> Deserialize<'de> for Rfc1123Domain<String> { #[inline] fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> @@ -163,7 +156,6 @@ impl<'de> Deserialize<'de> for Rfc1123Domain<String> { }) } } -#[cfg(feature = "serde")] impl<'de: 'a, 'a> Deserialize<'de> for Rfc1123Domain<&'a str> { #[inline] fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> @@ -184,19 +176,21 @@ impl<'de: 'a, 'a> Deserialize<'de> for Rfc1123Domain<&'a str> { }) } } -#[cfg(all(test, feature = "serde"))] +#[cfg(test)] mod tests { + extern crate alloc; use crate::{ char_set::ASCII_HYPHEN_DIGITS_LETTERS, dom::{Domain, Rfc1123Domain}, }; + use alloc::string::String; use serde_json; #[test] fn test_serde() { assert!(serde_json::from_str::<Domain<&str>>(r#""example.com""#) - .map_or(false, |dom| dom.label_count().get() == 2)); + .map_or(false, |dom| dom.into_iter().count() == 2)); assert!(serde_json::from_str::<Domain<String>>(r#""c\"om""#) - .map_or(false, |dom| dom.label_count().get() == 1)); + .map_or(false, |dom| dom.into_iter().count() == 1)); // Can't borrow since input needs to be de-escaped. assert!(serde_json::from_str::<Domain<&str>>(r#""c\"om""#) .map_or_else(|err| err.is_data() && err.column() == 7, |_| false)); @@ -210,7 +204,7 @@ mod tests { .map_or(false, |output| output == r#""example.com""#)); assert!( serde_json::from_str::<Rfc1123Domain<&str>>(r#""example.com""#) - .map_or(false, |dom| dom.label_count().get() == 2) + .map_or(false, |dom| dom.into_iter().count() == 2) ); assert!( serde_json::from_str::<Rfc1123Domain<String>>(r#""c\u006fm""#)