ascii_domain

Domains whose labels are only ASCII.
git clone https://git.philomathiclife.com/repos/ascii_domain
Log | Files | Refs | README

commit 563f72d5b1f234181c1378c91db4f5ea542e10ce
parent 353e30dbfbd1bc5c3ee32cf4da0a44e8e946dd42
Author: Zack Newman <zack@philomathiclife.com>
Date:   Thu, 20 Feb 2025 16:48:43 -0700

rust 2024

Diffstat:
MCargo.toml | 13+++++--------
MREADME.md | 27++++++++++++++++++++++++---
Msrc/char_set.rs | 77++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------
Msrc/dom.rs | 140+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
Msrc/lib.rs | 11+++++++++--
Msrc/serde.rs | 68+++++++++++++++++++++++++++++++++++++++++++-------------------------
6 files changed, 233 insertions(+), 103 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -3,27 +3,24 @@ authors = ["Zack Newman <zack@philomathiclife.com>"] categories = ["no-std", "parsing"] description = "Parser for DNS names based on a provided ASCII character set." documentation = "https://docs.rs/ascii_domain/latest/ascii_domain/" -edition = "2021" +edition = "2024" keywords = ["ascii", "dns", "domain", "validation"] license = "MIT OR Apache-2.0" name = "ascii_domain" readme = "README.md" repository = "https://git.philomathiclife.com/repos/ascii_domain/" -rust-version = "1.81.0" -version = "0.6.2" - -[badges] -maintenance = { status = "actively-developed" } +rust-version = "1.85.0" +version = "0.6.3" [package.metadata.docs.rs] all-features = true rustdoc-args = ["--cfg", "docsrs"] [dependencies] -serde = { version = "1.0.210", default-features = false, features = ["alloc"], optional = true } +serde = { version = "1.0.218", default-features = false, features = ["alloc"], optional = true } [dev-dependencies] -serde_json = { version = "1.0.128", default-features = false, features = ["alloc"] } +serde_json = { version = "1.0.139", default-features = false, features = ["alloc"] } ### FEATURES ################################################################# diff --git a/README.md b/README.md @@ -1,4 +1,8 @@ -# ascii_domain +# `ascii_domain` + +[<img alt="git" src="https://git.philomathiclife.com/badges/ascii_domain.svg" height="20">](https://git.philomathiclife.com/ascii_domain/log.html) +[<img alt="crates.io" src="https://img.shields.io/crates/v/ascii_domain.svg?style=for-the-badge&color=fc8d62&logo=rust" height="20">](https://crates.io/crates/ascii_domain) +[<img alt="docs.rs" src="https://img.shields.io/badge/docs.rs-ascii_domain-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs" height="20">](https://docs.rs/ascii_domain/latest/ascii_domain/) `ascii_domain` is a library for efficiently parsing domains based on a supplied ASCII character set one wants to enforce each `Label` to conform to. The primary type in the library is `Domain` which can be thought of as a domain @@ -13,12 +17,25 @@ all octets are allowed; but conforming to [RFC 1123](https://www.rfc-editor.org/ [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) requires stricter formats and a reduced character set. +## Minimum Supported Rust Version (MSRV) + +This will frequently be updated to be the same as stable. Specifically, any time stable is updated and that +update has "useful" features or compilation no longer succeeds (e.g., due to new compiler lints), then MSRV +will be updated. + +MSRV changes will correspond to a SemVer patch version bump pre-`1.0.0`; otherwise a minor version bump. + +## SemVer Policy + +* All on-by-default features of this library are covered by SemVer +* MSRV is considered exempt from SemVer as noted above + ## License Licensed under either of -* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0). -* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT). +* Apache License, Version 2.0 ([LICENSE-APACHE](https://www.apache.org/licenses/LICENSE-2.0)) +* MIT license ([LICENSE-MIT](https://opensource.org/licenses/MIT)) at your option. @@ -27,6 +44,10 @@ at your option. Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. +Before any PR is sent, `cargo clippy` and `cargo t` should be run for both `--no-default-features` and +`--all-features`. Additionally `RUSTDOCFLAGS="--cfg docsrs" cargo +nightly doc --all-features` should be run to +ensure documentation can be built. + ### Status The crate is only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but diff --git a/src/char_set.rs b/src/char_set.rs @@ -40,7 +40,9 @@ impl Display for AsciiErr { } } impl Error for AsciiErr {} -/// Container of the ASCII `u8`s that are allowed to appear in a [`crate::dom::Label`]. Note that while +/// Container of the ASCII `u8`s that are allowed to appear in a [`crate::dom::Label`]. +/// +/// Note that while /// [`crate::dom::Domain`] treats ASCII uppercase letters as lowercase, it still depends on such `u8`s being /// included. For example if `b'A'` is not included, then `b'A'` is not allowed even if `b'a'` is included. /// @@ -181,8 +183,9 @@ impl<T: AsMut<[u8]>> AllowedAscii<T> { } } } -/// Printable ASCII that should not need to be "escaped". That is to say -/// printable ASCII excluding space (i.e., 32), dot (i.e. 46), and backslash (i.e., 92). +/// Printable ASCII that should not need to be "escaped". +/// +/// That is to say printable ASCII excluding space (i.e., 32), dot (i.e. 46), and backslash (i.e., 92). /// This returns all `u8`s inclusively between 33 and 126 except 46 and 92. pub const PRINTABLE_ASCII: AllowedAscii<[u8; 92]> = AllowedAscii { allowed: [ @@ -313,14 +316,29 @@ pub const ASCII_LOWERCASE: AllowedAscii<[u8; 26]> = AllowedAscii { pub const ASCII_DIGITS: AllowedAscii<[u8; 10]> = AllowedAscii { allowed: [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'], }; +/// ASCII that is not a [forbidden domain code point](https://url.spec.whatwg.org/#forbidden-domain-code-point). +/// +/// This contains the following `u8`s: +/// +/// 33, 34, 36, 38–45, 48–57, 59, 61, 65–90, 95–123, and 125–126 +pub const WHATWG_VALID_DOMAIN_CODE_POINTS: AllowedAscii<[u8; 80]> = AllowedAscii { + allowed: [ + b'!', b'"', b'$', b'&', b'\'', b'(', b')', b'*', b'+', b',', b'-', b'0', b'1', b'2', b'3', + b'4', b'5', b'6', b'7', b'8', b'9', b';', b'=', b'A', b'B', b'C', b'D', b'E', b'F', b'G', + b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', + b'W', b'X', b'Y', b'Z', b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', + b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', + b'y', b'z', b'{', b'}', b'~', + ], +}; #[cfg(test)] mod tests { extern crate alloc; use crate::char_set::{ - AllowedAscii, AsciiErr, ASCII_DIGITS, ASCII_DIGITS_LETTERS, ASCII_DIGITS_LOWERCASE, - ASCII_DIGITS_UPPERCASE, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, - ASCII_HYPHEN_DIGITS_LOWERCASE, ASCII_HYPHEN_DIGITS_UPPERCASE, ASCII_LETTERS, - ASCII_LOWERCASE, ASCII_UPPERCASE, PRINTABLE_ASCII, RFC5322_ATEXT, + ASCII_DIGITS, ASCII_DIGITS_LETTERS, ASCII_DIGITS_LOWERCASE, ASCII_DIGITS_UPPERCASE, + ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, ASCII_HYPHEN_DIGITS_LOWERCASE, + ASCII_HYPHEN_DIGITS_UPPERCASE, ASCII_LETTERS, ASCII_LOWERCASE, ASCII_UPPERCASE, + AllowedAscii, AsciiErr, PRINTABLE_ASCII, RFC5322_ATEXT, WHATWG_VALID_DOMAIN_CODE_POINTS, }; use alloc::{borrow::ToOwned, vec::Vec}; #[test] @@ -328,21 +346,29 @@ mod tests { // Empty is allowed. assert!(AllowedAscii::try_from_unique_ascii([]).is_ok()); // Duplicates are not allowed. - assert!(AllowedAscii::try_from_unique_ascii(b"aba".to_owned()) - .map_or_else(|e| e == AsciiErr::Duplicate(b'a'), |_| false)); + assert!( + AllowedAscii::try_from_unique_ascii(b"aba".to_owned()) + .map_or_else(|e| e == AsciiErr::Duplicate(b'a'), |_| false) + ); // `b'.'` is not allowed. - assert!(AllowedAscii::try_from_unique_ascii(b"a.c".to_owned()) - .map_or_else(|e| e == AsciiErr::Contains46, |_| false)); + assert!( + AllowedAscii::try_from_unique_ascii(b"a.c".to_owned()) + .map_or_else(|e| e == AsciiErr::Contains46, |_| false) + ); // At most 127 bytes are allowed. - assert!(AllowedAscii::try_from_unique_ascii([0; 128]) - .map_or_else(|e| e == AsciiErr::CountTooLarge(128), |_| false)); + assert!( + AllowedAscii::try_from_unique_ascii([0; 128]) + .map_or_else(|e| e == AsciiErr::CountTooLarge(128), |_| false) + ); let mut all_ascii = (0..b'.').collect::<Vec<u8>>(); let next = b'.' + 1; all_ascii.extend(next..=127); assert!(AllowedAscii::try_from_unique_ascii(all_ascii).is_ok()); // Only ASCII is allowed. - assert!(AllowedAscii::try_from_unique_ascii([255]) - .map_or_else(|e| e == AsciiErr::InvalidByte(255), |_| false)); + assert!( + AllowedAscii::try_from_unique_ascii([255]) + .map_or_else(|e| e == AsciiErr::InvalidByte(255), |_| false) + ); assert!( AllowedAscii::try_from_unique_ascii(b"abcdef".to_owned()).map_or(false, |bytes| bytes .contains(b'a') @@ -504,5 +530,26 @@ mod tests { assert!(rfc.contains(b'|')); assert!(rfc.contains(b'}')); assert!(rfc.contains(b'~')); + let whatwg = WHATWG_VALID_DOMAIN_CODE_POINTS; + for i in 0..=0x1f { + assert!(!whatwg.contains(i)); + } + assert!(!whatwg.contains(b'\x20')); + assert!(!whatwg.contains(b'#')); + assert!(!whatwg.contains(b'/')); + assert!(!whatwg.contains(b':')); + assert!(!whatwg.contains(b'<')); + assert!(!whatwg.contains(b'>')); + assert!(!whatwg.contains(b'?')); + assert!(!whatwg.contains(b'@')); + assert!(!whatwg.contains(b'[')); + assert!(!whatwg.contains(b'\\')); + assert!(!whatwg.contains(b']')); + assert!(!whatwg.contains(b'^')); + assert!(!whatwg.contains(b'|')); + assert!(!whatwg.contains(b'%')); + assert!(!whatwg.contains(b'\x7f')); + assert!(!whatwg.contains(b'.')); + assert!(whatwg.len() == 128 - 32 - 16); } } diff --git a/src/dom.rs b/src/dom.rs @@ -1,5 +1,5 @@ extern crate alloc; -use crate::char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS}; +use crate::char_set::{ASCII_HYPHEN_DIGITS_LETTERS, AllowedAscii}; use alloc::{string::String, vec::Vec}; use core::{ borrow::Borrow, @@ -15,7 +15,9 @@ use core::{ }; /// The `AllowedAscii` used by `Rfc1123Domain`. static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; -/// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it +/// Returned by [`Domain::cmp_by_domain_ordering`]. +/// +/// It is more informative than [`Ordering`] in that it /// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater /// from a `Domain` that has the same `Label`s as another but simply more. /// @@ -57,7 +59,9 @@ impl From<DomainOrdering> for Ordering { } } /// A domain that consists of at least one [`Label`] with each `Label` only containing the ASCII `u8`s in -/// the [`AllowedAscii`] passed to [`Self::try_from_bytes`]. The total length of a `Domain` is at most +/// the [`AllowedAscii`] passed to [`Self::try_from_bytes`]. +/// +/// The total length of a `Domain` is at most /// 253 bytes[^note] in length including the `b'.'` separator. The trailing `b'.'`, if one exists, is always /// ignored. /// @@ -293,7 +297,7 @@ impl<T: AsRef<[u8]>> Domain<T> { (), |(), (label, label2)| if label == label2 { Ok(()) } else { Err(()) }, ) - .map_or(false, |()| true) + .is_ok_and(|()| true) } } /// Same as [`Self::cmp_doms`] except returns [`DomainOrdering::Longer`] iff `self > right` due solely @@ -580,7 +584,7 @@ impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str { #[inline] fn from(value: Domain<&'a str>) -> Self { // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. - let utf8 = &value.value.as_bytes()[..usize::from(value.len().get())]; + let utf8 = &value.value.as_bytes()[..value.len().get().into()]; // SAFETY: // Only ASCII is allowed, so this is fine. unsafe { str::from_utf8_unchecked(utf8) } @@ -619,7 +623,7 @@ impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] { #[inline] fn from(value: Domain<&'a [u8]>) -> Self { // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. - &value.value[..usize::from(value.len().get())] + &value.value[..value.len().get().into()] } } /// Error returned from [`Domain::try_from_bytes`]. @@ -1225,7 +1229,7 @@ impl<T: AsRef<[u8]>> Rfc1123Domain<T> { let tld = &value[value.len() - usize::from(tld_len)..]; if (tld .split_at_checked(4) - .map_or(false, |(fst, rem)| !rem.is_empty() && fst == b"xn--")) + .is_some_and(|(fst, rem)| !rem.is_empty() && fst == b"xn--")) || tld .iter() .try_fold((), |(), byt| { @@ -1440,7 +1444,7 @@ impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { if tld.is_alphabetic() || tld .split_at_checked(4) - .map_or(false, |(fst, rem)| !rem.is_empty() && fst == "xn--") + .is_some_and(|(fst, rem)| !rem.is_empty() && fst == "xn--") { labels .try_fold((), |(), label| { @@ -1504,36 +1508,50 @@ impl<'a> IntoIterator for Rfc1123Domain<&'a [u8]> { mod tests { extern crate alloc; use super::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}; - use crate::char_set::{AllowedAscii, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS}; + use crate::char_set::{ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, AllowedAscii}; use alloc::borrow::ToOwned; use core::cmp::Ordering; #[test] fn test_dom_parse() { let allowed_ascii = ASCII_FIREFOX; // Test empty is error. - assert!(Domain::try_from_bytes("", &allowed_ascii) - .map_or_else(|e| e == DomainErr::Empty, |_| false)); + assert!( + Domain::try_from_bytes("", &allowed_ascii) + .map_or_else(|e| e == DomainErr::Empty, |_| false) + ); // Test root domain. - assert!(Domain::try_from_bytes(".", &allowed_ascii) - .map_or_else(|e| e == DomainErr::RootDomain, |_| false)); + assert!( + Domain::try_from_bytes(".", &allowed_ascii) + .map_or_else(|e| e == DomainErr::RootDomain, |_| false) + ); // Test empty label is error. - assert!(Domain::try_from_bytes("a..com", &allowed_ascii) - .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); - assert!(Domain::try_from_bytes("a..", &allowed_ascii) - .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); - assert!(Domain::try_from_bytes("..", &allowed_ascii) - .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); + assert!( + Domain::try_from_bytes("a..com", &allowed_ascii) + .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) + ); + assert!( + Domain::try_from_bytes("a..", &allowed_ascii) + .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) + ); + assert!( + Domain::try_from_bytes("..", &allowed_ascii) + .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) + ); // Test label too long. let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; // 4 + 64 + 4 assert!(val.len() == 72); - assert!(Domain::try_from_bytes(val, &allowed_ascii) - .map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false)); - assert!(Domain::try_from_bytes( - "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", - &allowed_ascii - ) - .map_or(false, |d| d.len().get() == 71)); + assert!( + Domain::try_from_bytes(val, &allowed_ascii) + .map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false) + ); + assert!( + Domain::try_from_bytes( + "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", + &allowed_ascii + ) + .map_or(false, |d| d.len().get() == 71) + ); // Test domain too long. assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false)); assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 )); @@ -1595,8 +1613,10 @@ mod tests { && d.value == input2) ) } - _ => assert!(Domain::try_from_bytes(input, &allowed_ascii) - .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false)), + _ => assert!( + Domain::try_from_bytes(input, &allowed_ascii) + .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false) + ), } } assert!(counter == 78); @@ -1752,8 +1772,10 @@ mod tests { #[test] fn test_rfc1123_parse() { // Test empty is error. - assert!(Rfc1123Domain::try_from_bytes("") - .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false)); + assert!( + Rfc1123Domain::try_from_bytes("") + .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false) + ); // Test root domain. assert!(Rfc1123Domain::try_from_bytes(".").map_or_else( |e| e == Rfc1123Err::DomainErr(DomainErr::RootDomain), @@ -1780,10 +1802,12 @@ mod tests { |e| e == Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63), |_| false )); - assert!(Rfc1123Domain::try_from_bytes( - "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", - ) - .map_or(false, |d| d.len().get() == 71)); + assert!( + Rfc1123Domain::try_from_bytes( + "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", + ) + .map_or(false, |d| d.len().get() == 71) + ); // Test domain too long. assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(254)), |_| false)); assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); @@ -1796,21 +1820,35 @@ mod tests { // Test single label. assert!(Rfc1123Domain::try_from_bytes("c").map_or(false, |d| d.as_str() == "c")); // Test ends with hyphen. - assert!(Rfc1123Domain::try_from_bytes("-") - .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); - assert!(Rfc1123Domain::try_from_bytes("-.") - .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); - assert!(Rfc1123Domain::try_from_bytes("a.com.-") - .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); - assert!(Rfc1123Domain::try_from_bytes("a.com-") - .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); - assert!(Rfc1123Domain::try_from_bytes("a-.com") - .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); + assert!( + Rfc1123Domain::try_from_bytes("-") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) + ); + assert!( + Rfc1123Domain::try_from_bytes("-.") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) + ); + assert!( + Rfc1123Domain::try_from_bytes("a.com.-") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) + ); + assert!( + Rfc1123Domain::try_from_bytes("a.com-") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) + ); + assert!( + Rfc1123Domain::try_from_bytes("a-.com") + .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) + ); // Test starts with hyphen. - assert!(Rfc1123Domain::try_from_bytes("a.-com") - .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false)); - assert!(Rfc1123Domain::try_from_bytes("-a.com") - .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false)); + assert!( + Rfc1123Domain::try_from_bytes("a.-com") + .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false) + ); + assert!( + Rfc1123Domain::try_from_bytes("-a.com") + .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false) + ); // Test case-insensitivity. assert!( Rfc1123Domain::try_from_bytes("wwW.ExAMple.COm").map_or(false, |d| { @@ -1824,7 +1862,9 @@ mod tests { .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) }) ); - assert!(Rfc1123Domain::try_from_bytes("1.1.1.1") - .map_or_else(|err| err == Rfc1123Err::InvalidTld, |_| false)); + assert!( + Rfc1123Domain::try_from_bytes("1.1.1.1") + .map_or_else(|err| err == Rfc1123Err::InvalidTld, |_| false) + ); } } diff --git a/src/lib.rs b/src/lib.rs @@ -1,4 +1,8 @@ -//! # `ascii_domain` +//! [![git]](https://git.philomathiclife.com/ascii_domain/log.html)&ensp;[![crates-io]](https://crates.io/crates/ascii_domain)&ensp;[![docs-rs]](crate) +//! +//! [git]: https://git.philomathiclife.com/git_badge.svg +//! [crates-io]: https://img.shields.io/badge/crates.io-fc8d62?style=for-the-badge&labelColor=555555&logo=rust +//! [docs-rs]: https://img.shields.io/badge/docs.rs-66c2a5?style=for-the-badge&labelColor=555555&logo=docs.rs //! //! `ascii_domain` is a library for efficiently parsing domains based on a supplied ASCII character set one //! wants to enforce each [`dom::Label`] to conform to. The primary type in the library is [`dom::Domain`] @@ -40,6 +44,7 @@ )] #![expect( clippy::blanket_clippy_restriction_lints, + clippy::arbitrary_source_item_ordering, clippy::exhaustive_enums, clippy::implicit_return, clippy::min_ident_chars, @@ -52,7 +57,9 @@ /// uses. pub mod char_set; /// Contains [`dom::Domain`] which is a domain whose [`dom::Label`]s consist of a subset of the supplied -/// [`char_set::AllowedAscii`]. Also contains [`dom::Rfc1123Domain`] which is a `Domain` that conforms to +/// [`char_set::AllowedAscii`]. +/// +/// Also contains [`dom::Rfc1123Domain`] which is a `Domain` that conforms to /// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13). pub mod dom; /// Contains a Serde [`Visitor`](https://docs.rs/serde/latest/serde/de/trait.Visitor.html) that can be used to help diff --git a/src/serde.rs b/src/serde.rs @@ -3,7 +3,7 @@ use crate::{ char_set::{AllowedAscii, PRINTABLE_ASCII}, dom::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}, }; -use alloc::{borrow::ToOwned, string::String}; +use alloc::{borrow::ToOwned as _, string::String}; use core::{ fmt::{self, Formatter}, marker::PhantomData, @@ -111,8 +111,8 @@ impl<'a, T, T2> DomainVisitor<'a, T, T2> { } } } -impl<'de: 'b, 'a, 'b, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, &'b str> { - type Value = Domain<&'b str>; +impl<'de: 'a, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'_, T, &'a str> { + type Value = Domain<&'a str>; #[inline] fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { formatter.write_str("Domain") @@ -125,7 +125,7 @@ impl<'de: 'b, 'a, 'b, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, &'b Self::Value::try_from_bytes(v, self.allowed_ascii).map_err(|err| dom_err_to_serde::<E>(err)) } } -impl<'de, 'a, T: AsRef<[u8]>> Visitor<'de> for DomainVisitor<'a, T, String> { +impl<T: AsRef<[u8]>> Visitor<'_> for DomainVisitor<'_, T, String> { type Value = Domain<String>; #[inline] fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { @@ -170,10 +170,16 @@ impl<'de: 'a, 'a> Deserialize<'de> for Domain<&'a str> { fn rfc_err_to_serde<E: de::Error>(value: Rfc1123Err) -> E { match value { Rfc1123Err::DomainErr(err) => dom_err_to_serde(err), - Rfc1123Err::LabelStartsWithAHyphen | Rfc1123Err::LabelEndsWithAHyphen => { - E::invalid_value(Unexpected::Str("-"), &"a valid domain conforming to RFC 1123 which requires all labels to not begin or end with a '-'") - } - Rfc1123Err::InvalidTld => E::invalid_value(Unexpected::Str("tld that is not all letters nor begins with 'xn--' and has length of at least five"), &"a valid domain conforming to RFC 1123 which requires the last label (i.e., TLD) to either be all letters or have length of at least five and begins with 'xn--'") + Rfc1123Err::LabelStartsWithAHyphen | Rfc1123Err::LabelEndsWithAHyphen => E::invalid_value( + Unexpected::Str("-"), + &"a valid domain conforming to RFC 1123 which requires all labels to not begin or end with a '-'", + ), + Rfc1123Err::InvalidTld => E::invalid_value( + Unexpected::Str( + "tld that is not all letters nor begins with 'xn--' and has length of at least five", + ), + &"a valid domain conforming to RFC 1123 which requires the last label (i.e., TLD) to either be all letters or have length of at least five and begins with 'xn--'", + ), } } /// Serde [`Visitor`] that deserializes a string into an [`Rfc1123Domain`]. @@ -190,7 +196,7 @@ impl<'de: 'a, 'a> Visitor<'de> for Rfc1123Visitor<&'a str> { Self::Value::try_from_bytes(v).map_err(|err| rfc_err_to_serde(err)) } } -impl<'de> Visitor<'de> for Rfc1123Visitor<String> { +impl Visitor<'_> for Rfc1123Visitor<String> { type Value = Rfc1123Domain<String>; fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { formatter.write_str("Rfc1123Domain") @@ -237,21 +243,31 @@ mod tests { use serde_json; #[test] fn test_serde() { - assert!(serde_json::from_str::<Domain<&str>>(r#""example.com""#) - .map_or(false, |dom| dom.into_iter().count() == 2)); - assert!(serde_json::from_str::<Domain<String>>(r#""c\"om""#) - .map_or(false, |dom| dom.into_iter().count() == 1)); + assert!( + serde_json::from_str::<Domain<&str>>(r#""example.com""#) + .map_or(false, |dom| dom.into_iter().count() == 2) + ); + assert!( + serde_json::from_str::<Domain<String>>(r#""c\"om""#) + .map_or(false, |dom| dom.into_iter().count() == 1) + ); // Can't borrow since input needs to be de-escaped. - assert!(serde_json::from_str::<Domain<&str>>(r#""c\"om""#) - .map_or_else(|err| err.is_data() && err.column() == 7, |_| false)); - assert!(serde_json::to_string( - &Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap() - ) - .map_or(false, |output| output == r#""example.com""#)); - assert!(serde_json::to_string( - &Domain::try_from_bytes(b"example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap() - ) - .map_or(false, |output| output == r#""example.com""#)); + assert!( + serde_json::from_str::<Domain<&str>>(r#""c\"om""#) + .map_or_else(|err| err.is_data() && err.column() == 7, |_| false) + ); + assert!( + serde_json::to_string( + &Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap() + ) + .map_or(false, |output| output == r#""example.com""#) + ); + assert!( + serde_json::to_string( + &Domain::try_from_bytes(b"example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap() + ) + .map_or(false, |output| output == r#""example.com""#) + ); assert!( serde_json::from_str::<Rfc1123Domain<&str>>(r#""example.com""#) .map_or(false, |dom| dom.into_iter().count() == 2) @@ -261,7 +277,9 @@ mod tests { .map_or(false, |dom| dom.tld().as_str() == "com") ); // Can't borrow since input needs to be de-escaped. - assert!(serde_json::from_str::<Rfc1123Domain<&str>>(r#""c\u006fm""#) - .map_or_else(|err| err.is_data() && err.column() == 10, |_| false)); + assert!( + serde_json::from_str::<Rfc1123Domain<&str>>(r#""c\u006fm""#) + .map_or_else(|err| err.is_data() && err.column() == 10, |_| false) + ); } }