ascii_domain

Domains whose labels are only ASCII.
git clone https://git.philomathiclife.com/repos/ascii_domain
Log | Files | Refs | README

commit 353e30dbfbd1bc5c3ee32cf4da0a44e8e946dd42
parent 2858a84ec2ea7391d8ec73ca86aa47ecedb514ca
Author: Zack Newman <zack@philomathiclife.com>
Date:   Sat,  7 Sep 2024 10:37:28 -0600

update deps. lint reasons

Diffstat:
MCargo.toml | 34+++++++++++++---------------------
MREADME.md | 42++++++++++++++++++++++++++----------------
Dbuild.rs | 12------------
Msrc/char_set.rs | 24+++++++++++-------------
Msrc/dom.rs | 207+++++++++++++++++++++++++++++++++++++++++++------------------------------------
Msrc/lib.rs | 10++++++----
Msrc/serde.rs | 16+++++-----------
7 files changed, 173 insertions(+), 172 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,33 +9,25 @@ license = "MIT OR Apache-2.0" name = "ascii_domain" readme = "README.md" repository = "https://git.philomathiclife.com/repos/ascii_domain/" -version = "0.6.1" +rust-version = "1.81.0" +version = "0.6.2" -[lib] -name = "ascii_domain" -path = "src/lib.rs" +[badges] +maintenance = { status = "actively-developed" } + +[package.metadata.docs.rs] +all-features = true +rustdoc-args = ["--cfg", "docsrs"] [dependencies] -serde = { version = "1.0.197", default-features = false, features = ["alloc"], optional = true } +serde = { version = "1.0.210", default-features = false, features = ["alloc"], optional = true } [dev-dependencies] -serde_json = { version = "1.0.115", default-features = false, features = ["alloc"] } +serde_json = { version = "1.0.128", default-features = false, features = ["alloc"] } + -[build-dependencies] -rustc_version = "0.4.0" +### FEATURES ################################################################# [features] +# Provides serde support. serde = ["dep:serde"] -std = [] - -[package.metadata.docs.rs] -all-features = true -rustdoc-args = ["--cfg", "docsrs"] - -[badges] -maintenance = { status = "actively-developed" } - -[profile.release] -lto = true -panic = 'abort' -strip = true diff --git a/README.md b/README.md @@ -1,23 +1,33 @@ -# ascii_domain - -`ascii_domain` is a [library](https://docs.rs/ascii_domain/latest/ascii_domain) for efficiently parsing domains -based on a supplied ASCII character set one wants to enforce each -[`Label`](https://docs.rs/domain_parse/latest/domain_parse/dom/struct.Label.html) to conform to. The primary type -in the library is [`Domain`](https://docs.rs/domain_parse/latest/domain_parse/dom/struct.Domain.html) which can be -thought of as domains in _representation_ format. Technically since any ASCII `u8` except `b'.'` is allowed in a -`Label`, it is more general than an actual representation format that doesn’t include some form of escape -characters. For a full-fledged DNS library look elsewhere (e.g., [`domain`](https://docs.rs/domain/latest/domain/)). - +# ascii_domain + +`ascii_domain` is a library for efficiently parsing domains based on a supplied ASCII character set one wants to +enforce each `Label` to conform to. The primary type in the library is `Domain` which can be thought of as a domain +in _representation_ format. Technically since any ASCII `u8` except `b'.'` is allowed in a `Label`, it is more +general than an actual representation format that doesn't include some form of escape characters. For a +full-fledged DNS library look elsewhere (e.g., [`domain`](https://docs.rs/domain/latest/domain/)). + The purpose of this library is to allow efficient customization of domain name parsing while still retaining the hierarchical structure of a domain. Depending on one’s use case, allowed formats and characters can differ. If one wants to conform to the [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181), all octets are allowed; but conforming to [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123) or [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) requires stricter formats and a reduced character -set. - -### Status - -This package is actively maintained. - +set. + +## License + +Licensed under either of + +* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0). +* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT). + +at your option. + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, +as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. + +### Status + The crate is only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but it should work on any platform. diff --git a/build.rs b/build.rs @@ -1,12 +0,0 @@ -use rustc_version::{version_meta, Channel}; - -fn main() { - // Set cfg flags depending on release channel - let channel = match version_meta().unwrap().channel { - Channel::Stable => "CHANNEL_STABLE", - Channel::Beta => "CHANNEL_BETA", - Channel::Nightly => "CHANNEL_NIGHTLY", - Channel::Dev => "CHANNEL_DEV", - }; - println!("cargo:rustc-cfg={}", channel) -} diff --git a/src/char_set.rs b/src/char_set.rs @@ -1,13 +1,9 @@ use core::{ + error::Error, fmt::{self, Display, Formatter}, str, }; -#[cfg(feature = "std")] -extern crate std; -#[cfg(feature = "std")] -use std::error::Error; /// Error returned from [`AllowedAscii::try_from_unique_ascii`]. -#[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum AsciiErr { /// Since `AllowedAscii` only allows unique ASCII characters and doesn't allow `b'.'`, the maximum count is @@ -22,7 +18,6 @@ pub enum AsciiErr { Duplicate(u8), } impl Display for AsciiErr { - #[allow(unsafe_code)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -35,16 +30,15 @@ impl Display for AsciiErr { Self::Contains46 => f.write_str("allowed ASCII contains '.'"), Self::Duplicate(byt) => { let input = [byt]; - // SAFETY: - // This variant is only used _after_ verifying the passed `u8`s - // are valid ASCII. - let val = unsafe { str::from_utf8_unchecked(input.as_slice()) }; - write!(f, "allowed ASCII has the duplicate value '{val}'") + if let Ok(val) = str::from_utf8(input.as_slice()) { + write!(f, "allowed ASCII has the duplicate value '{val}'") + } else { + write!(f, "allowed ASCII has the invalid value '{byt}'") + } } } } } -#[cfg(feature = "std")] impl Error for AsciiErr {} /// Container of the ASCII `u8`s that are allowed to appear in a [`crate::dom::Label`]. Note that while /// [`crate::dom::Domain`] treats ASCII uppercase letters as lowercase, it still depends on such `u8`s being @@ -105,7 +99,11 @@ impl<T: AsRef<[u8]>> AllowedAscii<T> { /// use ascii_domain::char_set; /// assert!(char_set::ASCII_LETTERS.len() == 52); /// ``` - #[allow(clippy::as_conversions, clippy::cast_possible_truncation)] + #[expect( + clippy::as_conversions, + clippy::cast_possible_truncation, + reason = "comment justifies its correctness" + )] #[inline] #[must_use] pub fn len(&self) -> u8 { diff --git a/src/dom.rs b/src/dom.rs @@ -1,12 +1,11 @@ extern crate alloc; -#[cfg(feature = "std")] -extern crate std; use crate::char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS}; use alloc::{string::String, vec::Vec}; use core::{ borrow::Borrow, cmp::Ordering, convert::{self, AsRef}, + error::Error, fmt::{self, Display, Formatter}, hash::{Hash, Hasher}, iter::FusedIterator, @@ -14,8 +13,6 @@ use core::{ ops::Deref, str, }; -#[cfg(feature = "std")] -use std::error::Error; /// The `AllowedAscii` used by `Rfc1123Domain`. static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; /// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it @@ -24,7 +21,6 @@ static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; /// /// Another way to view this is that [`Self::Shorter`] is "closer" to being [`Self::Equal`] than [`Self::Less`] /// since the `Domain`s are still part of the same branch in the DNS hierarchy. Ditto for [`Self::Longer`]. -#[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum DomainOrdering { /// The `Domain` is less than another since a `Label` was less. @@ -74,10 +70,10 @@ impl From<DomainOrdering> for Ordering { /// case. /// /// [^note]: It is a common misconception that the max length of a domain is 255, but that is only true for -/// domains in _wire_ format. In representation format, which `Domain` can be thought of when only visible -/// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is -/// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format -/// contributes one byte due to each label being preceded by the octet that represents its length. +/// domains in _wire_ format. In representation format, which `Domain` can be thought of when only visible +/// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is +/// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format +/// contributes one byte due to each label being preceded by the octet that represents its length. /// /// Note this only contains `T`, so this is allocation-free and the same size as `T`. #[derive(Clone, Copy, Debug)] @@ -122,7 +118,11 @@ impl<T: AsRef<[u8]>> Domain<T> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().contains_trailing_dot()); /// ``` - #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] + #[expect( + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + reason = "comments explain their correctness" + )] #[inline] pub fn contains_trailing_dot(&self) -> bool { let bytes = self.value.as_ref(); @@ -161,21 +161,24 @@ impl<T: AsRef<[u8]>> Domain<T> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().len().get() == 11); /// ``` - #[inline] - #[allow( + #[expect( unsafe_code, + reason = "we enforce nonzero lengths, so NonZeroU8::new_unchecked is fine" + )] + #[expect( clippy::arithmetic_side_effects, clippy::as_conversions, - clippy::cast_lossless, - clippy::cast_possible_truncation + clippy::cast_possible_truncation, + reason = "comments justify their correctness" )] + #[inline] pub fn len(&self) -> NonZeroU8 { // No fear of underflow since the length of `value` is at least 1 _not including_ the // trailing `b'.'` if there was one. // `true as usize` is guaranteed to be 1 and `false as usize` is guaranteed to be 0. // No fear of truncation either since the length is guaranteed to be less than 255. // `Domain` is immutable ensuring such invariants are kept. - let len = (self.value.as_ref().len() - self.contains_trailing_dot() as usize) as u8; + let len = (self.value.as_ref().len() - usize::from(self.contains_trailing_dot())) as u8; // SAFETY: // The only way to construct a `Domain` is via `try_from_bytes` which ensures `len` is // is at least 1. @@ -199,10 +202,9 @@ impl<T: AsRef<[u8]>> Domain<T> { /// # Errors /// /// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`. - #[allow( + #[expect( clippy::arithmetic_side_effects, - clippy::indexing_slicing, - clippy::into_iter_on_ref + reason = "comment justifies its correctness" )] #[inline] pub fn try_from_bytes<T2: AsRef<[u8]>>( @@ -210,29 +212,29 @@ impl<T: AsRef<[u8]>> Domain<T> { allowed_ascii: &AllowedAscii<T2>, ) -> Result<Self, DomainErr> { let val = v.as_ref(); - let value = match val.last() { - None => return Err(DomainErr::Empty), - Some(byt) => { - if *byt == b'.' { - if val.len() == 1 { - return Err(DomainErr::RootDomain); - } - // We know `val.len` is at least 2. - let len = val.len() - 1; - if val[len - 1] == b'.' { - return Err(DomainErr::EmptyLabel); - } - &val[..len] + let value = val + .split_last() + .ok_or(DomainErr::Empty) + .and_then(|(lst, rem)| { + if *lst == b'.' { + rem.split_last() + .ok_or(DomainErr::RootDomain) + .and_then(|(lst_2, _)| { + if *lst_2 == b'.' { + Err(DomainErr::EmptyLabel) + } else { + Ok(rem) + } + }) } else { - val + Ok(val) } - } - }; + })?; if value.len() > 253 { Err(DomainErr::LenExceeds253(value.len())) } else { value - .into_iter() + .iter() .try_fold(0, |label_len, byt| { let b = *byt; if b == b'.' { @@ -285,7 +287,7 @@ impl<T: AsRef<[u8]>> Domain<T> { if self == right { true } else { - self.into_iter() + self.iter() .zip(right) .try_fold( (), @@ -319,15 +321,14 @@ impl<T: AsRef<[u8]>> Domain<T> { /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom3), DomainOrdering::Less)); /// assert!(matches!(dom3.cmp_by_domain_ordering(&dom1), DomainOrdering::Greater)); /// ``` - #[allow(clippy::unreachable)] #[inline] pub fn cmp_by_domain_ordering<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> DomainOrdering { // Faster to compare the entire value when we can instead each `Label`. if self == right { DomainOrdering::Equal } else { - let mut right_iter = right.into_iter(); - self.into_iter() + let mut right_iter = right.iter(); + self.iter() .try_fold(false, |_, label| { right_iter .next() @@ -388,10 +389,10 @@ impl<T: AsRef<[u8]>> Domain<T> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().first_label().as_str() == "example"); /// ``` - #[allow(clippy::unreachable)] + #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] #[inline] pub fn first_label(&self) -> Label<'_> { - self.into_iter() + self.iter() .next_back() .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) } @@ -403,10 +404,10 @@ impl<T: AsRef<[u8]>> Domain<T> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().tld().as_str() == "com"); /// ``` - #[allow(clippy::unreachable)] + #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] #[inline] pub fn tld(&self) -> Label<'_> { - self.into_iter() + self.iter() .next() .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) } @@ -485,7 +486,10 @@ impl<T: AsRef<[u8]>> Deref for Domain<T> { } } impl From<Domain<Vec<u8>>> for Domain<String> { - #[allow(unsafe_code)] + #[expect( + unsafe_code, + reason = "we enforce ASCII, so String::from_utf8_unchecked is fine" + )] #[inline] fn from(value: Domain<Vec<u8>>) -> Self { // SAFETY: @@ -519,7 +523,10 @@ impl From<Domain<String>> for Domain<Vec<u8>> { } } impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { - #[allow(unsafe_code)] + #[expect( + unsafe_code, + reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" + )] #[inline] fn from(value: Domain<&'a [u8]>) -> Self { // SAFETY: @@ -565,12 +572,15 @@ impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str { /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; /// assert!(<&str>::from(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap()) == "Example.com"); /// ``` - #[allow(unsafe_code, clippy::as_conversions, clippy::indexing_slicing)] + #[expect( + unsafe_code, + reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" + )] + #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] #[inline] fn from(value: Domain<&'a str>) -> Self { - // `value.len().get() as usize` is fine since it's a positive `u8`. // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. - let utf8 = &value.value.as_bytes()[..value.len().get() as usize]; + let utf8 = &value.value.as_bytes()[..usize::from(value.len().get())]; // SAFETY: // Only ASCII is allowed, so this is fine. unsafe { str::from_utf8_unchecked(utf8) } @@ -605,16 +615,14 @@ impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] { /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; /// assert!(<&[u8]>::from(Domain::try_from_bytes(b"Example.com.".as_slice(), &ASCII_LETTERS).unwrap()) == b"Example.com"); /// ``` - #[allow(clippy::as_conversions, clippy::indexing_slicing)] + #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] #[inline] fn from(value: Domain<&'a [u8]>) -> Self { - // `value.len().get() as usize` is fine since it's a positive `u8`. // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. - &value.value[..value.len().get() as usize] + &value.value[..usize::from(value.len().get())] } } /// Error returned from [`Domain::try_from_bytes`]. -#[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum DomainErr { /// The domain was empty. @@ -651,7 +659,6 @@ impl Display for DomainErr { } } } -#[cfg(feature = "std")] impl Error for DomainErr {} /// A label of a [`Domain`]. The total length of a `Label` is inclusively between 1 and 63. #[derive(Clone, Copy, Debug)] @@ -681,13 +688,12 @@ impl<'a> Label<'a> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphabetic())); /// ``` - #[allow(clippy::into_iter_on_ref)] #[inline] #[must_use] pub fn is_alphabetic(self) -> bool { self.value .as_bytes() - .into_iter() + .iter() .try_fold((), |(), byt| { if byt.is_ascii_alphabetic() { Ok(()) @@ -705,13 +711,12 @@ impl<'a> Label<'a> { /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.123", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_digits())); /// ``` - #[allow(clippy::into_iter_on_ref)] #[inline] #[must_use] pub fn is_digits(self) -> bool { self.value .as_bytes() - .into_iter() + .iter() .try_fold((), |(), byt| { if byt.is_ascii_digit() { Ok(()) @@ -729,13 +734,12 @@ impl<'a> Label<'a> { /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.1com", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphanumeric())); /// ``` - #[allow(clippy::into_iter_on_ref)] #[inline] #[must_use] pub fn is_alphanumeric(self) -> bool { self.value .as_bytes() - .into_iter() + .iter() .try_fold((), |(), byt| { if byt.is_ascii_alphanumeric() { Ok(()) @@ -753,13 +757,12 @@ impl<'a> Label<'a> { /// use ascii_domain::{dom::Domain, char_set::ASCII_HYPHEN_DIGITS_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.1-com", &ASCII_HYPHEN_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_hyphen_or_alphanumeric())); /// ``` - #[allow(clippy::into_iter_on_ref)] #[inline] #[must_use] pub fn is_hyphen_or_alphanumeric(self) -> bool { self.value .as_bytes() - .into_iter() + .iter() .try_fold((), |(), byt| { if *byt == b'-' || byt.is_ascii_alphanumeric() { Ok(()) @@ -777,7 +780,15 @@ impl<'a> Label<'a> { /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.len().get() == 3)); /// ``` - #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)] + #[expect( + unsafe_code, + reason = "we enforce label lengths, so NonZeroU8::new_unchecked is fine" + )] + #[expect( + clippy::as_conversions, + clippy::cast_possible_truncation, + reason = "comments justify their correctness" + )] #[inline] #[must_use] pub const fn len(self) -> NonZeroU8 { @@ -864,17 +875,20 @@ pub struct LabelIter<'a> { } impl<'a> Iterator for LabelIter<'a> { type Item = Label<'a>; - #[allow( + #[expect( unsafe_code, + reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" + )] + #[expect( clippy::arithmetic_side_effects, clippy::indexing_slicing, - clippy::into_iter_on_ref + reason = "comments justify their correctness" )] #[inline] fn next(&mut self) -> Option<Self::Item> { (!self.domain.is_empty()).then(|| { self.domain - .into_iter() + .iter() .rev() .try_fold(1, |count, byt| { if *byt == b'.' { @@ -908,7 +922,6 @@ impl<'a> Iterator for LabelIter<'a> { { self.next_back() } - #[allow(clippy::integer_division)] #[inline] fn size_hint(&self) -> (usize, Option<usize>) { if self.domain.is_empty() { @@ -920,7 +933,7 @@ impl<'a> Iterator for LabelIter<'a> { // The min size of a `Label` is 1; and all but the last have a `b'.'` that follow it. // This means the max number of `Label`s is the ceiling of the length divided by 2. ( - (self.domain.len() / 64).max(1), + (self.domain.len() >> 6).max(1), Some(self.domain.len().div_ceil(2)), ) } @@ -928,17 +941,20 @@ impl<'a> Iterator for LabelIter<'a> { } impl FusedIterator for LabelIter<'_> {} impl DoubleEndedIterator for LabelIter<'_> { - #[allow( + #[expect( unsafe_code, + reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" + )] + #[expect( clippy::arithmetic_side_effects, clippy::indexing_slicing, - clippy::into_iter_on_ref + reason = "comments justify their correctness" )] #[inline] fn next_back(&mut self) -> Option<Self::Item> { (!self.domain.is_empty()).then(|| { self.domain - .into_iter() + .iter() .try_fold(0, |count, byt| { if *byt == b'.' { // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s @@ -996,7 +1012,6 @@ impl<'a> IntoIterator for Domain<&'a [u8]> { } } /// Error returned from [`Rfc1123Domain::try_from`] and [`Rfc1123Domain::try_from_bytes`]. -#[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rfc1123Err { /// The inputs was not a valid [`Domain`]. @@ -1022,7 +1037,6 @@ impl Display for Rfc1123Err { } } } -#[cfg(feature = "std")] impl Error for Rfc1123Err {} /// **TL;DR** Wrapper type around a [`Domain`] that enforces conformance to /// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13). @@ -1030,7 +1044,7 @@ impl Error for Rfc1123Err {} /// * Each [`Label`] must only contain ASCII digits, letters, or hyphen. /// * Each `Label` must not begin or end with a hyphen. /// * The last `Label` (i.e., TLD) must either contain only ASCII letters or have length of at least five and -/// begin with `xn--`. +/// begin with `xn--`. /// --- /// Unsurprisingly, RFC 1123 is not super precise as it uses "host name" to mean label and also domain: /// "Host software MUST handle host names \[labels\] of up to 63 characters and SHOULD handle host @@ -1053,7 +1067,7 @@ impl Error for Rfc1123Err {} /// * More relaxed: enforce the "spirit" that the TLD must have the same format of a valid TLD. /// * Much more relaxed: enforce the "spirit" that the domain cannot have the form of an IPv4 address. /// * Most relaxed: treat TLDs no differently than other labels (i.e., don't make assumptions about what will be -/// a valid TLD in the future). +/// a valid TLD in the future). /// /// RFC 1123 is not obsolete, and it is clear from more recent RFCs like /// [RFC 5891](https://www.rfc-editor.org/rfc/rfc5891) that it is designed to be a foundation (i.e., domains that @@ -1134,13 +1148,12 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { /// # Errors /// /// Returns [`Rfc1123Err`] iff `v.as_ref()` is an invalid `Rfc1123Domain`. - #[allow( + #[expect( clippy::arithmetic_side_effects, - clippy::as_conversions, clippy::indexing_slicing, - clippy::into_iter_on_ref, - clippy::redundant_else + reason = "comments justify their correctness" )] + #[expect(clippy::redundant_else, reason = "prefer else with else-if")] #[inline] pub fn try_from_bytes(v: T) -> Result<Self, Rfc1123Err> { // The easiest implementation would be redirecting to `Domain::try_from_bytes`; and upon success, @@ -1177,7 +1190,7 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { } else { let mut count = 0; value - .into_iter() + .iter() .try_fold(0, |label_len, byt| { let b = *byt; if b == b'.' { @@ -1209,10 +1222,12 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { }) .and_then(|tld_len| { // `tld_len <= value.len()`. - let tld = &value[value.len() - tld_len as usize..]; - if (tld_len > 4 && tld[..4] == *b"xn--".as_slice()) + let tld = &value[value.len() - usize::from(tld_len)..]; + if (tld + .split_at_checked(4) + .map_or(false, |(fst, rem)| !rem.is_empty() && fst == b"xn--")) || tld - .into_iter() + .iter() .try_fold((), |(), byt| { if byt.is_ascii_alphabetic() { Ok(()) @@ -1410,20 +1425,22 @@ impl<'a: 'b, 'b> From<Rfc1123Domain<&'a str>> for Rfc1123Domain<&'b [u8]> { } impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { type Error = Rfc1123Err; - #[allow( + #[expect( clippy::arithmetic_side_effects, clippy::indexing_slicing, - clippy::into_iter_on_ref, - clippy::unreachable + clippy::unreachable, + reason = "comments explain their correctness" )] #[inline] fn try_from(value: Domain<T>) -> Result<Self, Self::Error> { - let mut labels = value.into_iter(); + let mut labels = value.iter(); let tld = labels .next() .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")); if tld.is_alphabetic() - || (tld.len().get() > 4 && tld.as_bytes()[..4] == *b"xn--".as_slice()) + || tld + .split_at_checked(4) + .map_or(false, |(fst, rem)| !rem.is_empty() && fst == "xn--") { labels .try_fold((), |(), label| { @@ -1435,7 +1452,7 @@ impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { } else if bytes[bytes.len() - 1] == b'-' { Err(Rfc1123Err::LabelEndsWithAHyphen) } else { - bytes.into_iter().try_fold((), |(), byt| match *byt { + bytes.iter().try_fold((), |(), byt| match *byt { b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => Ok(()), val => Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(val))), }) @@ -1522,8 +1539,8 @@ mod tests { assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 )); // Test max labels. assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false)); - assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); - assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); + assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); + assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); // Test removal of trailing '.'. assert!( Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com") @@ -1589,7 +1606,7 @@ mod tests { let allowed_ascii = ASCII_FIREFOX; assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.into_iter(); + let mut iter = d.iter(); let Some(l) = iter.next() else { return false; }; @@ -1611,7 +1628,7 @@ mod tests { ); assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.into_iter(); + let mut iter = d.iter(); let Some(l) = iter.next_back() else { return false; }; @@ -1635,7 +1652,7 @@ mod tests { ); assert!( Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { - let mut iter = d.into_iter(); + let mut iter = d.iter(); let Some(l) = iter.next_back() else { return false; }; @@ -1772,8 +1789,8 @@ mod tests { assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); // Test max labels. assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(255)), |_| false)); - assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); - assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.into_iter().count() == 127 && d.len().get() == 253)); + assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); + assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); // Test removal of trailing '.'. assert!(Rfc1123Domain::try_from_bytes("com.").map_or(false, |d| d.as_str() == "com")); // Test single label. diff --git a/src/lib.rs b/src/lib.rs @@ -13,8 +13,8 @@ //! all octets are allowed; but conforming to [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123) or //! [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) requires stricter formats and a reduced character //! set. +#![cfg_attr(docsrs, feature(doc_cfg))] #![no_std] -#![cfg_attr(all(doc, CHANNEL_NIGHTLY), feature(doc_auto_cfg))] #![deny( future_incompatible, let_underscore, @@ -38,14 +38,15 @@ clippy::style, clippy::suspicious )] -#![allow( +#![expect( clippy::blanket_clippy_restriction_lints, + clippy::exhaustive_enums, clippy::implicit_return, clippy::min_ident_chars, clippy::missing_trait_methods, - clippy::single_call_fn, + clippy::question_mark_used, clippy::single_char_lifetime_names, - clippy::unseparated_literal_suffix + reason = "noisy, opinionated, and likely doesn't prevent bugs or improve APIs" )] /// Contains [`char_set::AllowedAscii`] which is how one dictates the character set [`dom::Domain::try_from_bytes`] /// uses. @@ -56,5 +57,6 @@ pub mod char_set; pub mod dom; /// Contains a Serde [`Visitor`](https://docs.rs/serde/latest/serde/de/trait.Visitor.html) that can be used to help /// deserialize [`dom::Domain`] wrappers. +#[cfg_attr(docsrs, doc(cfg(feature = "serde")))] #[cfg(feature = "serde")] pub mod serde; diff --git a/src/serde.rs b/src/serde.rs @@ -1,4 +1,3 @@ -#![cfg(feature = "serde")] extern crate alloc; use crate::{ char_set::{AllowedAscii, PRINTABLE_ASCII}, @@ -56,7 +55,10 @@ impl<T: AsRef<[u8]>> Serialize for Rfc1123Domain<T> { /// } /// } /// ``` -#[allow(clippy::partial_pub_fields)] +#[expect( + clippy::partial_pub_fields, + reason = "we don't expost PhantomData for obvious reasons, so this is fine" +)] #[derive(Clone, Copy)] pub struct DomainVisitor<'a, T, T2> { /// Phantom. @@ -65,8 +67,6 @@ pub struct DomainVisitor<'a, T, T2> { pub allowed_ascii: &'a AllowedAscii<T>, } /// Converts `DomainErr` to a Serde `de::Error`. -#[allow(clippy::as_conversions, clippy::cast_lossless)] -#[inline] fn dom_err_to_serde<E: de::Error>(value: DomainErr) -> E { match value { DomainErr::Empty => E::invalid_length( @@ -89,7 +89,7 @@ fn dom_err_to_serde<E: de::Error>(value: DomainErr) -> E { &"a valid domain containing labels of length inclusively between 1 and 63", ), DomainErr::InvalidByte(byt) => E::invalid_value( - Unexpected::Unsigned(byt as u64), + Unexpected::Unsigned(u64::from(byt)), &"a valid domain containing only the supplied ASCII subset", ), } @@ -167,7 +167,6 @@ impl<'de: 'a, 'a> Deserialize<'de> for Domain<&'a str> { } } /// Converts `Rfc1123Err` to a Serde `de::Error`. -#[inline] fn rfc_err_to_serde<E: de::Error>(value: Rfc1123Err) -> E { match value { Rfc1123Err::DomainErr(err) => dom_err_to_serde(err), @@ -181,11 +180,9 @@ fn rfc_err_to_serde<E: de::Error>(value: Rfc1123Err) -> E { struct Rfc1123Visitor<T>(PhantomData<fn() -> T>); impl<'de: 'a, 'a> Visitor<'de> for Rfc1123Visitor<&'a str> { type Value = Rfc1123Domain<&'a str>; - #[inline] fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { formatter.write_str("Rfc1123Domain") } - #[inline] fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E> where E: de::Error, @@ -195,18 +192,15 @@ impl<'de: 'a, 'a> Visitor<'de> for Rfc1123Visitor<&'a str> { } impl<'de> Visitor<'de> for Rfc1123Visitor<String> { type Value = Rfc1123Domain<String>; - #[inline] fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { formatter.write_str("Rfc1123Domain") } - #[inline] fn visit_string<E>(self, v: String) -> Result<Self::Value, E> where E: de::Error, { Self::Value::try_from_bytes(v).map_err(|err| rfc_err_to_serde(err)) } - #[inline] fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> where E: de::Error,