commit ab96e3ccfcf8a47def57d892f73603e96639a6c2
Author: Zack Newman <zack@philomathiclife.com>
Date: Sat, 3 Feb 2024 17:06:03 -0700
init
Diffstat:
A | .gitignore | | | 2 | ++ |
A | Cargo.toml | | | 24 | ++++++++++++++++++++++++ |
A | LICENSE-APACHE | | | 177 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | LICENSE-MIT | | | 20 | ++++++++++++++++++++ |
A | README.md | | | 24 | ++++++++++++++++++++++++ |
A | src/char_set.rs | | | 582 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/dom.rs | | | 1880 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | src/lib.rs | | | 46 | ++++++++++++++++++++++++++++++++++++++++++++++ |
8 files changed, 2755 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+Cargo.lock
+target/**
diff --git a/Cargo.toml b/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+authors = ["Zack Newman <zack@philomathiclife.com>"]
+categories = ["parsing"]
+description = "Parser for DNS names based on a provided ASCII character set."
+documentation = "https://docs.rs/ascii_domain/latest/ascii_domain/"
+edition = "2021"
+keywords = ["ascii", "dns", "domain", "validation"]
+license = "MIT OR Apache-2.0"
+name = "ascii_domain"
+readme = "README.md"
+repository = "https://git.philomathiclife.com/repos/ascii_domain/"
+version = "0.1.0"
+
+[lib]
+name = "ascii_domain"
+path = "src/lib.rs"
+
+[badges]
+maintenance = { status = "actively-developed" }
+
+[profile.release]
+lto = true
+panic = 'abort'
+strip = true
diff --git a/LICENSE-APACHE b/LICENSE-APACHE
@@ -0,0 +1,177 @@
+
+ Apache License
+ Version 2.0, January 2004
+ http://www.apache.org/licenses/
+
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+ 1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+ 2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+ 3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+ 4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or
+ Derivative Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+ 6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for reasonable and customary use in describing the
+ origin of the Work and reproducing the content of the NOTICE file.
+
+ 7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+ 8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+ 9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+ END OF TERMS AND CONDITIONS
diff --git a/LICENSE-MIT b/LICENSE-MIT
@@ -0,0 +1,20 @@
+Copyright © 2023 Zack Newman
+
+Permission is hereby granted, free of charge, to any person obtaining a
+copy of this software and associated documentation files (the
+“Software”), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be included
+in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS
+OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,24 @@
+# ascii_domain
+
+`ascii_domain` is a [library](https://docs.rs/ascii_domain/latest/ascii_domain) for efficiently parsing domains
+based on a supplied ASCII character set one wants to enforce each
+[`Label`](https://docs.rs/domain_parse/latest/domain_parse/dom/struct.Label.html) to conform to. The primary type
+in the library is [`Domain`](https://docs.rs/domain_parse/latest/domain_parse/dom/struct.Domain.html) which can be
+thought of as domains in _representation_ format. Technically since any ASCII `u8` except `b'.'` is allowed in a
+`Label`, it is more general than an actual representation format that doesn’t include some form of escape
+characters. For a full-fledged DNS library look elsewhere (e.g., [`domain`](https://docs.rs/domain/latest/domain/)).
+
+The purpose of this library is to allow efficient customization of domain name parsing while still retaining
+the hierarchical structure of a domain. Depending on one’s use case, allowed formats and characters can
+differ. If one wants to conform to the [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181),
+all octets are allowed; but conforming to [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123) or
+[RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) requires stricter formats and a reduced character
+set.
+
+### Status
+
+This package is actively maintained.
+
+The crate is only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but
+it should work on any [Tier 1 with Host Tools](https://doc.rust-lang.org/beta/rustc/platform-support.html)
+target.
diff --git a/src/char_set.rs b/src/char_set.rs
@@ -0,0 +1,582 @@
+use core::{
+ array::IntoIter,
+ borrow::Borrow,
+ fmt::{self, Display, Formatter},
+ ops::Deref,
+ slice::Iter,
+ str,
+};
+use std::error::Error;
+/// Error returned from [`AllowedAscii::try_from`].
+#[allow(clippy::exhaustive_enums)]
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub enum AsciiErr {
+ /// Since `AllowedAscii` only allows unique ASCII characters, the maximum `COUNT` is 128.
+ /// This variant is returned when the `COUNT` exceeds that.
+ CountTooLarge,
+ /// The contained `u8` is not valid ASCII (i.e., it is strictly greater than 127).
+ InvalidByte(u8),
+ /// `b'.'` was in the allowed ASCII. It is the only ASCII value not allowed since it is always used
+ /// as a [`crate::dom::Label`] separator.
+ Contains46,
+ /// The contained ASCII appeared more than once.
+ Duplicate(u8),
+}
+impl Display for AsciiErr {
+ #[allow(unsafe_code)]
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match *self {
+ Self::CountTooLarge => f.write_str("allowed ASCII is larger than 128"),
+ Self::InvalidByte(byt) => {
+ write!(f, "allowed ASCII was passed the invalid byte value {byt}")
+ }
+ Self::Contains46 => f.write_str("allowed ASCII contains '.'"),
+ Self::Duplicate(byt) => {
+ let input = [byt];
+ // SAFETY:
+ // This variant is only used _after_ verifying the passed `u8`s
+ // are valid ASCII.
+ let val = unsafe { str::from_utf8_unchecked(input.as_slice()) };
+ write!(f, "allowed ASCII has the duplicate value '{val}'")
+ }
+ }
+ }
+}
+impl Error for AsciiErr {}
+// This can be large so we don't implement `Copy`.
+/// Container of the `COUNT` ASCII `u8`s that are allowed to appear in a [`crate::dom::Label`].
+/// Note that while [`crate::dom::Domain`] treats ASCII uppercase letters as lowercase,
+/// it still depends on such `u8`s being included. For example if `b'A'` is not
+/// included, then `b'A'` is not allowed even if `b'a'` is included.
+///
+/// It is _highly_ unlikely that non-printable ASCII nor `b'\\'` should be used since such ASCII would almost
+/// certainly require being escaped.
+#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct AllowedAscii<const COUNT: usize> {
+ /// The allowed ASCII `u8`s.
+ allowed: [u8; COUNT],
+}
+impl<const COUNT: usize> AllowedAscii<COUNT> {
+ /// Returns `true` iff `val` is an allowed ASCII value in a [`crate::dom::Label`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::char_set;
+ /// assert!(char_set::ASCII_LETTERS.contains(b'a'));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub fn contains(&self, val: u8) -> bool {
+ self.allowed.binary_search(&val).is_ok()
+ }
+ /// Returns an [`Iterator`] of the allowed ASCII `u8`s without consuming `self`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::char_set;
+ /// assert!(char_set::ASCII_LETTERS.into_iter().next() == Some(b'A'));
+ /// ```
+ #[inline]
+ pub fn iter(&self) -> Iter<'_, u8> {
+ self.into_iter()
+ }
+}
+impl<const COUNT: usize> AsRef<[u8]> for AllowedAscii<COUNT> {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ self.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> AsRef<str> for AllowedAscii<COUNT> {
+ #[allow(unsafe_code)]
+ #[inline]
+ fn as_ref(&self) -> &str {
+ // SAFETY:
+ // `allowed` only contains ASCII which is a valid subset of `str`s.
+ // `AllowedAscii` does not publicly expose `allowed` and is immutable
+ // so this invariant will hold.
+ unsafe { str::from_utf8_unchecked(self.allowed.as_slice()) }
+ }
+}
+impl<const COUNT: usize> Borrow<[u8]> for AllowedAscii<COUNT> {
+ #[inline]
+ fn borrow(&self) -> &[u8] {
+ self.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> Borrow<str> for AllowedAscii<COUNT> {
+ #[inline]
+ fn borrow(&self) -> &str {
+ self.as_ref()
+ }
+}
+impl<const COUNT: usize> Deref for AllowedAscii<COUNT> {
+ type Target = [u8];
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ self.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> From<AllowedAscii<COUNT>> for [u8; COUNT] {
+ #[inline]
+ fn from(value: AllowedAscii<COUNT>) -> Self {
+ value.allowed
+ }
+}
+impl<const COUNT: usize> TryFrom<[u8; COUNT]> for AllowedAscii<COUNT> {
+ type Error = AsciiErr;
+ /// `allowed` must contain unique ASCII `u8`s. Note it is likely
+ /// `allowed` should be a subset of [`PRINTABLE_ASCII`] since any other ASCII
+ /// would likely require some form of escape character logic. Additionally,
+ /// it is likely `COUNT` should be greater than 0; otherwise the returned `AllowedAscii`
+ /// will always cause [`crate::dom::Domain::try_from_bytes`] to error since `Domain` requires
+ /// at least one non-root [`crate::dom::Label`].
+ #[inline]
+ fn try_from(mut allowed: [u8; COUNT]) -> Result<Self, Self::Error> {
+ if COUNT > 128 {
+ Err(AsciiErr::CountTooLarge)
+ } else {
+ // We must sort in order for `contains` to use `binary_search`.
+ allowed.sort_unstable();
+ // Since `allowed` is sorted, we simply have to check the last value
+ // to determine if valid ASCII was provided.
+ if let Some(byt) = allowed.last() {
+ let b = *byt;
+ if b > 127 {
+ return Err(AsciiErr::InvalidByte(b));
+ }
+ }
+ allowed
+ .iter()
+ // Since `b'.'` is not allowed, we can use it as an initializer.
+ // Note this means `byt == b'.'` must be the first if-branch.
+ .try_fold(b'.', |prev, b| {
+ let byt = *b;
+ if byt == b'.' {
+ Err(AsciiErr::Contains46)
+ } else if prev == byt {
+ Err(AsciiErr::Duplicate(prev))
+ } else {
+ Ok(byt)
+ }
+ })
+ .map(|_| Self { allowed })
+ }
+ }
+}
+impl<const COUNT: usize> IntoIterator for AllowedAscii<COUNT> {
+ type Item = u8;
+ type IntoIter = IntoIter<u8, COUNT>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ self.allowed.into_iter()
+ }
+}
+impl<'a, const COUNT: usize> IntoIterator for &'a AllowedAscii<COUNT> {
+ type Item = &'a u8;
+ type IntoIter = Iter<'a, u8>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ self.allowed.iter()
+ }
+}
+impl<const COUNT: usize> PartialEq<AllowedAscii<COUNT>> for &AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &AllowedAscii<COUNT>) -> bool {
+ **self == *other
+ }
+}
+impl<const COUNT: usize> PartialEq<&Self> for AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &&Self) -> bool {
+ *self == **other
+ }
+}
+impl<const COUNT: usize> PartialEq<[u8]> for AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &[u8]) -> bool {
+ *self.allowed.as_slice() == *other
+ }
+}
+impl<const COUNT: usize> PartialEq<AllowedAscii<COUNT>> for [u8] {
+ #[inline]
+ fn eq(&self, other: &AllowedAscii<COUNT>) -> bool {
+ *self == *other.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<[u8]> for &AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &[u8]) -> bool {
+ *self.allowed.as_slice() == *other
+ }
+}
+impl<const COUNT: usize> PartialEq<&AllowedAscii<COUNT>> for [u8] {
+ #[inline]
+ fn eq(&self, other: &&AllowedAscii<COUNT>) -> bool {
+ *self == *other.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<&[u8]> for AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &&[u8]) -> bool {
+ *self.allowed.as_slice() == **other
+ }
+}
+impl<const COUNT: usize> PartialEq<AllowedAscii<COUNT>> for &[u8] {
+ #[inline]
+ fn eq(&self, other: &AllowedAscii<COUNT>) -> bool {
+ **self == *other.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<[u8; COUNT]> for AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &[u8; COUNT]) -> bool {
+ *self.allowed.as_slice() == *other.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<AllowedAscii<COUNT>> for [u8; COUNT] {
+ #[inline]
+ fn eq(&self, other: &AllowedAscii<COUNT>) -> bool {
+ *self.as_slice() == *other.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<[u8; COUNT]> for &AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &[u8; COUNT]) -> bool {
+ *self.allowed.as_slice() == *other.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<&AllowedAscii<COUNT>> for [u8; COUNT] {
+ #[inline]
+ fn eq(&self, other: &&AllowedAscii<COUNT>) -> bool {
+ *self.as_slice() == *other.allowed.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<&[u8; COUNT]> for AllowedAscii<COUNT> {
+ #[inline]
+ fn eq(&self, other: &&[u8; COUNT]) -> bool {
+ *self.allowed.as_slice() == *other.as_slice()
+ }
+}
+impl<const COUNT: usize> PartialEq<AllowedAscii<COUNT>> for &[u8; COUNT] {
+ #[inline]
+ fn eq(&self, other: &AllowedAscii<COUNT>) -> bool {
+ *self.as_slice() == *other.allowed.as_slice()
+ }
+}
+/// Printable ASCII that should not need to be "escaped". That is to say
+/// printable ASCII excluding space (i.e., 32), dot (i.e. 46), and backslash (i.e., 92).
+/// This returns all `u8`s inclusively between 33 and 126 except 46 and 92.
+pub const PRINTABLE_ASCII: AllowedAscii<92> = AllowedAscii {
+ allowed: [
+ b'!', b'"', b'#', b'$', b'%', b'&', b'\'', b'(', b')', b'*', b'+', b',', b'-', b'/', b'0',
+ b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b':', b';', b'<', b'=', b'>', b'?',
+ b'@', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N',
+ b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'[', b']', b'^',
+ b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm',
+ b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z', b'{', b'|',
+ b'}', b'~',
+ ],
+};
+/// ASCII allowed in [RFC 5322 `atext`](https://www.rfc-editor.org/rfc/rfc5322#section-3.2.3).
+/// This contains the following `u8`s:
+///
+/// 33, 35–39, 42–43, 45, 47–57, 61, 63, 65–90, and 94–126.
+pub const RFC5322_ATEXT: AllowedAscii<81> = AllowedAscii {
+ allowed: [
+ b'!', b'#', b'$', b'%', b'&', b'\'', b'*', b'+', b'-', b'/', b'0', b'1', b'2', b'3', b'4',
+ b'5', b'6', b'7', b'8', b'9', b'=', b'?', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H',
+ b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W',
+ b'X', b'Y', b'Z', b'^', b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i',
+ b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x',
+ b'y', b'z', b'{', b'|', b'}', b'~',
+ ],
+};
+/// ASCII allowed in a domain by [Firefox](https://www.mozilla.org/en-US/firefox/)
+/// as of 2023-09-03T20:50+00:00.
+/// This contains the following `u8`s:
+///
+/// 33, 36, 38–41, 43–45, 48–57, 59, 61, 65–90, 95–123, and 125–126.
+pub const ASCII_FIREFOX: AllowedAscii<78> = AllowedAscii {
+ allowed: [
+ b'!', b'$', b'&', b'\'', b'(', b')', b'+', b',', b'-', b'0', b'1', b'2', b'3', b'4', b'5',
+ b'6', b'7', b'8', b'9', b';', b'=', b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I',
+ b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X',
+ b'Y', b'Z', b'_', b'`', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k',
+ b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ b'{', b'}', b'~',
+ ],
+};
+/// ASCII hyphen, digits, and letters.
+/// This contains 45 and all `u8`s inclusively between 48 and 57, 65 and 90, and 97 and 122.
+pub const ASCII_HYPHEN_DIGITS_LETTERS: AllowedAscii<63> = AllowedAscii {
+ allowed: [
+ b'-', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D',
+ b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S',
+ b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h',
+ b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w',
+ b'x', b'y', b'z',
+ ],
+};
+/// ASCII digits and letters.
+/// This contains all `u8`s inclusively between 48 and 57, 65 and 90, and 97 and 122.
+pub const ASCII_DIGITS_LETTERS: AllowedAscii<62> = AllowedAscii {
+ allowed: [
+ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E',
+ b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
+ b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i',
+ b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x',
+ b'y', b'z',
+ ],
+};
+/// ASCII letters.
+/// This contains all `u8`s inclusively between 65 and 90 and 97 and 122.
+pub const ASCII_LETTERS: AllowedAscii<52> = AllowedAscii {
+ allowed: [
+ b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
+ b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z', b'a', b'b', b'c', b'd',
+ b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
+ b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ ],
+};
+/// ASCII hyphen, digits, and uppercase letters.
+/// This contains 45 and all `u8`s inclusively between 48 and 57 and 65 and 90.
+pub const ASCII_HYPHEN_DIGITS_UPPERCASE: AllowedAscii<37> = AllowedAscii {
+ allowed: [
+ b'-', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D',
+ b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S',
+ b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',
+ ],
+};
+/// ASCII hyphen, digits, and lowercase letters.
+/// This contains 45 and all `u8`s inclusively between 48 and 57 and 97 and 122.
+pub const ASCII_HYPHEN_DIGITS_LOWERCASE: AllowedAscii<37> = AllowedAscii {
+ allowed: [
+ b'-', b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd',
+ b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's',
+ b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ ],
+};
+/// ASCII digits and uppercase letters.
+/// This contains all `u8`s inclusively between 48 and 57 and 65 and 90.
+pub const ASCII_DIGITS_UPPERCASE: AllowedAscii<36> = AllowedAscii {
+ allowed: [
+ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E',
+ b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O', b'P', b'Q', b'R', b'S', b'T',
+ b'U', b'V', b'W', b'X', b'Y', b'Z',
+ ],
+};
+/// ASCII digits and lowercase letters.
+/// This contains all `u8`s inclusively between 48 and 57 and 97 and 122.
+pub const ASCII_DIGITS_LOWERCASE: AllowedAscii<36> = AllowedAscii {
+ allowed: [
+ b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'a', b'b', b'c', b'd', b'e',
+ b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o', b'p', b'q', b'r', b's', b't',
+ b'u', b'v', b'w', b'x', b'y', b'z',
+ ],
+};
+/// ASCII uppercase letters.
+/// This contains all `u8`s inclusively between 65 and 90.
+pub const ASCII_UPPERCASE: AllowedAscii<26> = AllowedAscii {
+ allowed: [
+ b'A', b'B', b'C', b'D', b'E', b'F', b'G', b'H', b'I', b'J', b'K', b'L', b'M', b'N', b'O',
+ b'P', b'Q', b'R', b'S', b'T', b'U', b'V', b'W', b'X', b'Y', b'Z',
+ ],
+};
+/// ASCII lowercase letters.
+/// This contains all `u8`s inclusively between 97 and 122.
+pub const ASCII_LOWERCASE: AllowedAscii<26> = AllowedAscii {
+ allowed: [
+ b'a', b'b', b'c', b'd', b'e', b'f', b'g', b'h', b'i', b'j', b'k', b'l', b'm', b'n', b'o',
+ b'p', b'q', b'r', b's', b't', b'u', b'v', b'w', b'x', b'y', b'z',
+ ],
+};
+/// ASCII digits.
+/// This contains all `u8`s inclusively between 48 and 57.
+pub const ASCII_DIGITS: AllowedAscii<10> = AllowedAscii {
+ allowed: [b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9'],
+};
+#[cfg(test)]
+mod tests {
+ use crate::char_set::{
+ AllowedAscii, AsciiErr, ASCII_DIGITS, ASCII_DIGITS_LETTERS, ASCII_DIGITS_LOWERCASE,
+ ASCII_DIGITS_UPPERCASE, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS,
+ ASCII_HYPHEN_DIGITS_LOWERCASE, ASCII_HYPHEN_DIGITS_UPPERCASE, ASCII_LETTERS,
+ ASCII_LOWERCASE, ASCII_UPPERCASE, PRINTABLE_ASCII, RFC5322_ATEXT,
+ };
+ #[test]
+ fn try_from() {
+ // Empty is allowed.
+ assert!(AllowedAscii::try_from([]).is_ok());
+ // Duplicates are not allowed.
+ assert!(AllowedAscii::try_from(b"aba".to_owned())
+ .map_or_else(|e| e == AsciiErr::Duplicate(b'a'), |_| false));
+ // `b'.'` is not allowed.
+ assert!(AllowedAscii::try_from(b"a.c".to_owned())
+ .map_or_else(|e| e == AsciiErr::Contains46, |_| false));
+ assert!(
+ AllowedAscii::try_from(b"abcdef".to_owned()).map_or(false, |bytes| bytes
+ .contains(b'a')
+ && bytes.contains(b'b')
+ && bytes.contains(b'c')
+ && bytes.contains(b'd')
+ && bytes.contains(b'e')
+ && bytes.contains(b'f'))
+ );
+ }
+ #[test]
+ fn test_consts() {
+ let letters = ASCII_LETTERS;
+ assert!(letters.len() == 52);
+ for i in b'A'..=b'Z' {
+ assert!(letters.contains(i));
+ }
+ for i in b'a'..=b'z' {
+ assert!(letters.contains(i));
+ }
+ let digits = ASCII_DIGITS;
+ assert!(digits.len() == 10);
+ for i in b'0'..=b'9' {
+ assert!(digits.contains(i));
+ }
+ let lower = ASCII_LOWERCASE;
+ assert!(lower.len() == 26);
+ for i in b'a'..=b'z' {
+ assert!(lower.contains(i));
+ }
+ let upper = ASCII_UPPERCASE;
+ assert!(upper.len() == 26);
+ for i in b'A'..=b'Z' {
+ assert!(upper.contains(i));
+ }
+ let dig_let = ASCII_DIGITS_LETTERS;
+ assert!(dig_let.len() == 62);
+ for i in b'a'..=b'z' {
+ assert!(dig_let.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(dig_let.contains(i));
+ }
+ for i in b'A'..=b'Z' {
+ assert!(dig_let.contains(i));
+ }
+ let dig_lower = ASCII_DIGITS_LOWERCASE;
+ assert!(dig_lower.len() == 36);
+ for i in b'a'..=b'z' {
+ assert!(dig_lower.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(dig_lower.contains(i));
+ }
+ let dig_upper = ASCII_DIGITS_UPPERCASE;
+ assert!(dig_upper.len() == 36);
+ for i in b'A'..=b'Z' {
+ assert!(dig_upper.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(dig_upper.contains(i));
+ }
+ let ffox = ASCII_FIREFOX;
+ assert!(ffox.len() == 78);
+ for i in b'A'..=b'Z' {
+ assert!(ffox.contains(i));
+ }
+ for i in b'a'..=b'z' {
+ assert!(ffox.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(ffox.contains(i));
+ }
+ assert!(ffox.contains(b'!'));
+ assert!(ffox.contains(b'$'));
+ assert!(ffox.contains(b'&'));
+ assert!(ffox.contains(b'\''));
+ assert!(ffox.contains(b'('));
+ assert!(ffox.contains(b')'));
+ assert!(ffox.contains(b'+'));
+ assert!(ffox.contains(b','));
+ assert!(ffox.contains(b'-'));
+ assert!(ffox.contains(b';'));
+ assert!(ffox.contains(b'='));
+ assert!(ffox.contains(b'_'));
+ assert!(ffox.contains(b'`'));
+ assert!(ffox.contains(b'{'));
+ assert!(ffox.contains(b'}'));
+ assert!(ffox.contains(b'~'));
+ assert!(ASCII_HYPHEN_DIGITS_LETTERS.len() == 63);
+ assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(b'-'));
+ for i in b'A'..=b'Z' {
+ assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(i));
+ }
+ for i in b'a'..=b'z' {
+ assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(ASCII_HYPHEN_DIGITS_LETTERS.contains(i));
+ }
+ let hyp_lower = ASCII_HYPHEN_DIGITS_LOWERCASE;
+ assert!(hyp_lower.len() == 37);
+ assert!(hyp_lower.contains(b'-'));
+ for i in b'a'..=b'z' {
+ assert!(hyp_lower.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(hyp_lower.contains(i));
+ }
+ let hyp_upper = ASCII_HYPHEN_DIGITS_UPPERCASE;
+ assert!(hyp_upper.len() == 37);
+ assert!(hyp_upper.contains(b'-'));
+ for i in b'A'..=b'Z' {
+ assert!(hyp_upper.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(hyp_upper.contains(i));
+ }
+ let printable = PRINTABLE_ASCII;
+ assert!(printable.len() == 92);
+ let stop = b'.' - 1;
+ for i in 33..=stop {
+ assert!(printable.contains(i));
+ }
+ let stop2 = b'\\' - 1;
+ for i in stop + 2..=stop2 {
+ assert!(printable.contains(i));
+ }
+ for i in stop2 + 2..=b'~' {
+ assert!(printable.contains(i));
+ }
+ let rfc = RFC5322_ATEXT;
+ assert!(rfc.len() == 81);
+ for i in b'A'..=b'Z' {
+ assert!(rfc.contains(i));
+ }
+ for i in b'a'..=b'z' {
+ assert!(rfc.contains(i));
+ }
+ for i in b'0'..=b'9' {
+ assert!(rfc.contains(i));
+ }
+ assert!(rfc.contains(b'!'));
+ assert!(rfc.contains(b'#'));
+ assert!(rfc.contains(b'$'));
+ assert!(rfc.contains(b'%'));
+ assert!(rfc.contains(b'&'));
+ assert!(rfc.contains(b'\''));
+ assert!(rfc.contains(b'*'));
+ assert!(rfc.contains(b'+'));
+ assert!(rfc.contains(b'-'));
+ assert!(rfc.contains(b'/'));
+ assert!(rfc.contains(b'='));
+ assert!(rfc.contains(b'?'));
+ assert!(rfc.contains(b'^'));
+ assert!(rfc.contains(b'_'));
+ assert!(rfc.contains(b'`'));
+ assert!(rfc.contains(b'{'));
+ assert!(rfc.contains(b'|'));
+ assert!(rfc.contains(b'}'));
+ assert!(rfc.contains(b'~'));
+ }
+}
diff --git a/src/dom.rs b/src/dom.rs
@@ -0,0 +1,1880 @@
+use crate::char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS};
+use core::{
+ borrow::Borrow,
+ cmp::Ordering,
+ convert::{self, AsRef},
+ fmt::{self, Display, Formatter},
+ hash::{Hash, Hasher},
+ iter::FusedIterator,
+ num::NonZeroU8,
+ ops::Deref,
+ str::{self, FromStr},
+};
+use std::{error::Error, net::Ipv4Addr};
+/// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it
+/// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater
+/// from a `Domain` that has the same `Label`s as another but simply more.
+///
+/// Another way to view this is that [`Self::Shorter`] is "closer" to being [`Self::Equal`] than [`Self::Less`]
+/// since the `Domain`s are still part of the same branch in the DNS hierarchy. Ditto for [`Self::Longer`].
+#[allow(clippy::exhaustive_enums)]
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub enum DomainOrdering {
+ /// The `Domain` is less than another since a `Label` was less.
+ Less,
+ /// The `Domain` is less than other but only because it had fewer `Label`s.
+ Shorter,
+ /// The `Domain` is equal to another.
+ Equal,
+ /// The `Domain` is greater than another but only because it had more `Label`s.
+ Longer,
+ /// The `Domain` is greater than another since a `Label` was greater.
+ Greater,
+}
+impl Display for DomainOrdering {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match *self {
+ Self::Less => f.write_str("less since a label was less"),
+ Self::Shorter => f.write_str("less since there were fewer labels"),
+ Self::Equal => f.write_str("equal"),
+ Self::Longer => f.write_str("greater since there were more labels"),
+ Self::Greater => f.write_str("greater since a label was greater"),
+ }
+ }
+}
+impl From<DomainOrdering> for Ordering {
+ #[inline]
+ fn from(value: DomainOrdering) -> Self {
+ match value {
+ DomainOrdering::Less | DomainOrdering::Shorter => Self::Less,
+ DomainOrdering::Equal => Self::Equal,
+ DomainOrdering::Longer | DomainOrdering::Greater => Self::Greater,
+ }
+ }
+}
+/// A flag used to indicate information about the characters in a [`Domain`]. This flag is used
+/// to perform more efficient comparisons that can potentially avoid temporary memory allocations
+/// to treat uppercase letters as if they were lowercase.
+///
+/// The reason `b'\\'`, `b']'`, `b'^'`, `b'_'`, and `` b'`' `` need to be tracked in addition to letters
+/// is to ensure uppercase letters are considered greater since lowercase letters are. As the documentation
+/// of `Domain` states, "uppercase letters are treated as lowercase".
+#[allow(clippy::exhaustive_enums)]
+#[repr(u8)]
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+enum CharFlag {
+ /// No ASCII letters, `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``.
+ None = 0,
+ /// ASCII lowercase letters but no uppercase letters, `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``.
+ Lower = 1,
+ /// ASCII uppercase letters but no lowercase letters, `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``.
+ Upper = 2,
+ /// ASCII lowercase and uppercase letters but no `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``.
+ LowerUpper = 3,
+ /// 'b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``; but no ASCII letters.
+ Between = 4,
+ /// ASCII lowercase letters and `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``; but no uppercase letters.
+ LowerBetween = 5,
+ /// ASCII uppercase letters and `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``; but no lowercase letters.
+ UpperBetween = 6,
+ /// ASCII lowercase letters, uppercase letters, and `b'\\'`, `b']'`, `b'^'`, `b'_'`, or `` b'`' ``.
+ All = 7,
+}
+impl CharFlag {
+ /// Returns a `bool` that indicates if equivalence must be done in a case insensitive way.
+ #[inline]
+ #[must_use]
+ const fn eq_ignore_case(self, other: Self) -> bool {
+ match self {
+ Self::None | Self::Between => false,
+ Self::Lower | Self::LowerBetween => !matches!(
+ other,
+ Self::None | Self::Between | Self::Lower | Self::LowerBetween
+ ),
+ Self::Upper | Self::UpperBetween => !matches!(
+ other,
+ Self::None | Self::Between | Self::Upper | Self::UpperBetween
+ ),
+ Self::LowerUpper | Self::All => !matches!(other, Self::None | Self::Between),
+ }
+ }
+}
+impl Display for CharFlag {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match *self {
+ Self::None => f.write_str("No ASCII letters, '\\', ']', '^', '_', or '`'"),
+ Self::Lower => f.write_str(
+ "ASCII lowercase letters; but no uppercase letters, '\\', ']', '^', '_', or '`'",
+ ),
+ Self::Upper => f.write_str(
+ "ASCII uppercase letters; but no lowercase letters, '\\', ']', '^', '_', or '`'",
+ ),
+ Self::Between => f.write_str("ASCII '\\', ']', '^', '_', or '`'; but no letters"),
+ Self::LowerUpper => f.write_str(
+ "ASCII lowercase and uppercase letters; but no '\\', ']', '^', '_', or '`'",
+ ),
+ Self::LowerBetween => f.write_str(
+ "ASCII lowercase letters and '\\', ']', '^', '_', or '`'; but no uppercase letters",
+ ),
+ Self::UpperBetween => f.write_str(
+ "ASCII uppercase letters and '\\', ']', '^', '_', or '`'; but no lowercase letters",
+ ),
+ Self::All => f.write_str(
+ "ASCII lowercase letters, uppercase letters, and '\\', ']', '^', '_', or '`'",
+ ),
+ }
+ }
+}
+/// A domain that consists of at least one [`Label`]. With each `Label` only containing the ASCII `u8`s in
+/// [`Self::allowed_ascii`]. The total length of a `Domain` is at most 253 bytes[^note] in length including the
+/// `b'.'` separator. The trailing `b'.'`, if one exists, is always ignored.
+///
+/// This is more restrictive than what a domain is allowed to be per the
+/// [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181) since all octets/`u8`s are allowed in a
+/// label. Additionally there is no way to represent the root domain.
+///
+/// Last, ASCII uppercase letters are treated as lowercase; however for better comparison performance
+/// that doesn't lead to intermediate memory allocations, two `Domain`s should consist entirely of the same
+/// case.
+///
+/// [^note]: It is a common misconception that the max length of a domain is 255, but that is only true for
+/// domains in _wire_ format. In representation format, which `Domain` can be thought of when only visible
+/// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is
+/// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format
+/// contributes one byte due to each label being preceded by the octet that represents its length.
+#[derive(Clone, Debug)]
+pub struct Domain<'a, const ALLOWED_COUNT: usize, T> {
+ /// The domain value. `value.as_ref().len()` is guaranteed to have length between 1 and 254.
+ /// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`.
+ value: T,
+ /// The allowed ASCII `u8`s a `Label` can have.
+ allowed_ascii: &'a AllowedAscii<ALLOWED_COUNT>,
+ /// The lengths of each label. Guaranteed to have length between 1 and 127 with each value being
+ /// between 1 and 63.
+ label_lens: Vec<NonZeroU8>,
+ /// Flag that contains information about the kind of ASCII `u8`s in `value`.
+ flag: CharFlag,
+ /// Indicates if the domain has a trailing `b'.'`.
+ contains_trailing_dot: bool,
+}
+impl<'a, const ALLOWED_COUNT: usize, T> Domain<'a, ALLOWED_COUNT, T> {
+ /// The maximum length of a `Domain` which is 253.
+ // SAFETY: 0 < 253 < 256.
+ #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
+ pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(253) };
+ /// The minimum length of a `Domain` which is 1.
+ // SAFETY: 0 < 1 < 256.
+ #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
+ pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) };
+ /// Returns the count of [`Label`]s. Due to length requirements of `Label` and `Domain`,
+ /// this is less than `128`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().label_count().get() == 2);
+ /// ```
+ #[inline]
+ #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)]
+ pub fn label_count(&self) -> NonZeroU8 {
+ // SAFETY:
+ // The only way to construct a `Domain` is via `try_from_bytes` which ensures the total number
+ // of labels is inclusively between 1 and 127. `Domain` is immutable ensuring such invariants are kept.
+ unsafe { NonZeroU8::new_unchecked(self.label_lens.len() as u8) }
+ }
+ /// The inner `T`. This should be treated with caution since it will contain a trailing `b'.'` if there
+ /// is one as well as ASCII uppercase letters if there are any.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(*Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().as_inner() == "example.com.");
+ /// ```
+ #[inline]
+ pub const fn as_inner(&self) -> &T {
+ &self.value
+ }
+ /// Same as [`Self::as_inner`] except `self` is consumed.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_inner() == "example.com.");
+ /// ```
+ #[inline]
+ pub fn into_inner(self) -> T {
+ self.value
+ }
+ /// Returns the ASCII `u8`s that are allowed in a [`Label`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().allowed_ascii() == ASCII_LOWERCASE);
+ /// ```
+ #[inline]
+ pub const fn allowed_ascii(&self) -> &'a AllowedAscii<ALLOWED_COUNT> {
+ self.allowed_ascii
+ }
+ /// Returns `true` iff the domain contained a trailing `b'.'`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().contains_trailing_dot());
+ /// ```
+ #[inline]
+ pub const fn contains_trailing_dot(&self) -> bool {
+ self.contains_trailing_dot
+ }
+}
+impl<'a, const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Domain<'a, ALLOWED_COUNT, T> {
+ /// The domain without a trailing `b'.'` if there was one.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
+ /// assert!(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap().as_str() == "Example.com");
+ /// ```
+ #[allow(unsafe_code)]
+ #[inline]
+ pub fn as_str(&self) -> &str {
+ let input = self.as_bytes();
+ // SAFETY:
+ // We only allow ASCII so this is safe.
+ unsafe { str::from_utf8_unchecked(input) }
+ }
+ /// The domain without a trailing `b'.'` if there was one.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS};
+ /// assert!(Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap().as_bytes() == b"Example.com");
+ /// ```
+ #[allow(clippy::as_conversions, clippy::indexing_slicing)]
+ #[inline]
+ pub fn as_bytes(&self) -> &[u8] {
+ // This is correct so long as `Self::len` is correct.
+ &self.value.as_ref()[..self.len().get() as usize]
+ }
+ /// The length of the `Domain`. This does _not_ include the trailing `b'.'` if there was one.
+ ///
+ /// Same as `self.as_str().len()` and `self.as_bytes().len()`.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().len().get() == 11);
+ /// ```
+ #[inline]
+ #[allow(
+ unsafe_code,
+ clippy::arithmetic_side_effects,
+ clippy::as_conversions,
+ clippy::cast_lossless,
+ clippy::cast_possible_truncation
+ )]
+ pub fn len(&self) -> NonZeroU8 {
+ // No fear of underflow since the length of `value` is at least 1 _not including_ the
+ // trailing `b'.'` if there was one.
+ // `true as usize` is guaranteed to be 1 and `false as usize` is guaranteed to be 0.
+ // No fear of truncation either since the length is guaranteed to be less than 255.
+ // `Domain` is immutable ensuring such invariants are kept.
+ let len = (self.value.as_ref().len() - self.contains_trailing_dot as usize) as u8;
+ // SAFETY:
+ // The only way to construct a `Domain` is via `try_from_bytes` which ensures `len` is
+ // is at least 1.
+ unsafe { NonZeroU8::new_unchecked(len) }
+ }
+ /// Function that transforms `v` into a `Domain` by only allowing [`Label`]s to contain the ASCII `u8`s in
+ /// `allowed_ascii`. A trailing `b'.'` is ignored.
+ ///
+ /// Note that while ASCII uppercase is treated as ASCII lowercase, `allowed_ascii` MUST still contain
+ /// each ASCII `u8` (e.g., if `!allowed_ascii.contains(b'A')`, then `b'A'` is not allowed even if
+ /// `allowed_ascii.contains(b'a')`).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ascii_domain::{dom::{Domain, DomainErr}, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).is_ok());
+ /// assert!(Domain::try_from_bytes("exam2ple.com", &ASCII_LOWERCASE).map_or_else(|err| err == DomainErr::InvalidByte(b'2'), |_| false));
+ /// ```
+ ///
+ /// # Errors
+ ///
+ /// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`.
+ #[allow(
+ clippy::arithmetic_side_effects,
+ clippy::as_conversions,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref,
+ clippy::unreachable
+ )]
+ #[inline]
+ pub fn try_from_bytes<'b: 'a>(
+ v: T,
+ allowed_ascii: &'b AllowedAscii<ALLOWED_COUNT>,
+ ) -> Result<Self, DomainErr> {
+ let val = v.as_ref();
+ let (value, contains_trailing_dot) = match val.last() {
+ None => return Err(DomainErr::Empty),
+ Some(byt) => {
+ if *byt == b'.' {
+ // We always ignore trailing `b'.'`, so this is equivalent to being empty.
+ if val.len() == 1 {
+ return Err(DomainErr::Empty);
+ }
+ // We know `val.len` is at least 1; otherwise `last` would have returned `None`.
+ // Therefore this won't underflow and indexing won't `panic`.
+ (&val[..val.len() - 1], true)
+ } else {
+ (val, false)
+ }
+ }
+ };
+ // `MAX_LEN` is 253 so this is fine.
+ if value.len() > Self::MAX_LEN.get() as usize {
+ Err(DomainErr::LenExceeds253(value.len()))
+ } else {
+ let mut label_lens = Vec::with_capacity(3);
+ let mut label_len = 0;
+ // This flag is the `u8` representation of `CharFlag`.
+ let mut flag = 0u8;
+ value
+ .into_iter()
+ .try_fold((), |(), byt| {
+ let b = *byt;
+ if b == b'.' {
+ return NonZeroU8::new(label_len).map_or(
+ Err(DomainErr::EmptyLabel),
+ |length| {
+ label_lens.push(length);
+ label_len = 0;
+ Ok(())
+ },
+ );
+ } else if allowed_ascii.contains(b) {
+ match b {
+ b'A'..=b'Z' => flag |= 2,
+ b'['..=b'`' => flag |= 4,
+ b'a'..=b'z' => flag |= 1,
+ _ => (),
+ }
+ } else {
+ return Err(DomainErr::InvalidByte(b));
+ }
+ if label_len == 63 {
+ Err(DomainErr::LabelLenExceeds63)
+ } else {
+ // This is less than 63 due to the above check, so this won't overflow;
+ label_len += 1;
+ Ok(())
+ }
+ })
+ .and_then(|()| {
+ NonZeroU8::new(label_len)
+ .ok_or(DomainErr::EmptyLabel)
+ .map(|length| {
+ label_lens.push(length);
+ Self {
+ value: v,
+ allowed_ascii,
+ label_lens,
+ flag: match flag {
+ 0 => CharFlag::None,
+ 1 => CharFlag::Lower,
+ 2 => CharFlag::Upper,
+ 3 => CharFlag::LowerUpper,
+ 4 => CharFlag::Between,
+ 5 => CharFlag::LowerBetween,
+ 6 => CharFlag::UpperBetween,
+ 7 => CharFlag::All,
+ _ => unreachable!("there is a bug in Domain::try_from_bytes"),
+ },
+ contains_trailing_dot,
+ }
+ })
+ })
+ }
+ }
+ /// Returns an [`Iterator`] of [`Label`]s without consuming the `Domain`.
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().iter().next().unwrap().as_str() == "com");
+ /// ```
+ #[inline]
+ pub fn iter(&self) -> LabelIter<'_, '_, ALLOWED_COUNT, T> {
+ LabelIter::new(self)
+ }
+ /// Returns `true` iff `self` and `right` are part of the same branch in the DNS hierarchy.
+ ///
+ /// For example `www.example.com` and `example.com` are in the `same_branch`, but `example.com` and
+ /// `foo.com` are not.
+ ///
+ /// Note that trailing `b'.'`s are ignored and ASCII uppercase and lowercase are treated the same.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}};
+ /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap();
+ /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap();
+ /// assert!(dom1.same_branch(&dom2));
+ /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap();
+ /// assert!(!dom1.same_branch(&dom3));
+ /// ```
+ #[inline]
+ pub fn same_branch<const ALLOWED_COUNT2: usize, T2: AsRef<[u8]>>(
+ &self,
+ right: &Domain<ALLOWED_COUNT2, T2>,
+ ) -> bool {
+ /// Compares `left` and `right` ignoring ASCII case such that `Ok` is returned iff `true`
+ /// and `Err` is returned iff `false`.
+ #[inline]
+ fn eq_ignore(left: &str, right: &str) -> Result<(), ()> {
+ if left.eq_ignore_ascii_case(right) {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+ /// Compares `left` and `right` such that `Ok` is returned iff `true` and `Err` is returned iff `false`.
+ #[inline]
+ fn eq(left: &str, right: &str) -> Result<(), ()> {
+ if left == right {
+ Ok(())
+ } else {
+ Err(())
+ }
+ }
+ let f = if self.flag.eq_ignore_case(right.flag) {
+ eq_ignore
+ } else {
+ eq
+ };
+ self.into_iter()
+ .zip(right)
+ .try_fold((), |(), (label, label2)| f(label.value, label2.value))
+ .map_or(false, |()| true)
+ }
+ /// Same as [`Self::cmp_doms`] except returns [`DomainOrdering::Longer`] iff `self > right` due solely
+ /// to having more [`Label`]s and [`DomainOrdering::Shorter`] iff `self < right` due solely to having
+ /// fewer `Label`s.
+ ///
+ /// For example `example.com` < `www.example.com` and `bar.com` < `www.example.com`; but with this function,
+ /// `example.com` is [`DomainOrdering::Shorter`] than `www.example.com` and `www.example.com` is
+ /// [`DomainOrdering::Longer`] than `example.com`; while `bar.com` is [`DomainOrdering::Less`] than
+ /// `www.example.com` and `www.example.com` is [`DomainOrdering::Greater`] than `bar.com`.
+ ///
+ /// In other words `DomainOrdering::Shorter` implies `Ordering::Less` and `DomainOrdering::Longer` implies
+ /// `Ordering::Greater` with additional information pertaining to the quantity of `Label`s.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ascii_domain::{dom::{Domain, DomainOrdering}, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}};
+ /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap();
+ /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom1), DomainOrdering::Equal));
+ /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap();
+ /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom2), DomainOrdering::Shorter));
+ /// assert!(matches!(dom2.cmp_by_domain_ordering(&dom1), DomainOrdering::Longer));
+ /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap();
+ /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom3), DomainOrdering::Less));
+ /// assert!(matches!(dom3.cmp_by_domain_ordering(&dom1), DomainOrdering::Greater));
+ /// ```
+ #[allow(clippy::too_many_lines, clippy::unreachable)]
+ #[inline]
+ pub fn cmp_by_domain_ordering<const ALLOWED_COUNT2: usize, T2: AsRef<[u8]>>(
+ &self,
+ right: &Domain<ALLOWED_COUNT2, T2>,
+ ) -> DomainOrdering {
+ // Faster to compare the entire value when we can instead of each label.
+ if self == right {
+ return DomainOrdering::Equal;
+ }
+ let left_input;
+ let right_input;
+ let left_dom;
+ let right_dom;
+ let (left_ref, right_ref) = match (self.flag, right.flag) {
+ (CharFlag::None, _)
+ | (_, CharFlag::None)
+ | (
+ CharFlag::Lower | CharFlag::Between | CharFlag::LowerBetween,
+ CharFlag::Lower | CharFlag::Between | CharFlag::LowerBetween,
+ )
+ | (CharFlag::Upper, CharFlag::Upper) => {
+ left_dom = Domain {
+ value: self.value.as_ref(),
+ allowed_ascii: self.allowed_ascii,
+ label_lens: self.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: self.contains_trailing_dot,
+ };
+ right_dom = Domain {
+ value: right.value.as_ref(),
+ allowed_ascii: right.allowed_ascii,
+ label_lens: right.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: right.contains_trailing_dot,
+ };
+ (&left_dom, &right_dom)
+ }
+ (CharFlag::Lower | CharFlag::LowerBetween | CharFlag::Between, _) => {
+ left_dom = Domain {
+ value: self.value.as_ref(),
+ allowed_ascii: self.allowed_ascii,
+ label_lens: self.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: self.contains_trailing_dot,
+ };
+ right_input = right.value.as_ref().to_ascii_lowercase();
+ right_dom = Domain {
+ value: right_input.as_ref(),
+ allowed_ascii: right.allowed_ascii,
+ label_lens: right.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: right.contains_trailing_dot,
+ };
+ (&left_dom, &right_dom)
+ }
+ (CharFlag::Upper, CharFlag::LowerUpper) => {
+ left_dom = Domain {
+ value: self.value.as_ref(),
+ allowed_ascii: self.allowed_ascii,
+ label_lens: self.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: self.contains_trailing_dot,
+ };
+ right_input = right.value.as_ref().to_ascii_uppercase();
+ right_dom = Domain {
+ value: right_input.as_ref(),
+ allowed_ascii: right.allowed_ascii,
+ label_lens: right.label_lens.clone(),
+ flag: CharFlag::Upper,
+ contains_trailing_dot: right.contains_trailing_dot,
+ };
+ (&left_dom, &right_dom)
+ }
+ (_, CharFlag::Lower | CharFlag::Between | CharFlag::LowerBetween) => {
+ left_input = self.value.as_ref().to_ascii_lowercase();
+ left_dom = Domain {
+ value: left_input.as_ref(),
+ allowed_ascii: self.allowed_ascii,
+ label_lens: self.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: self.contains_trailing_dot,
+ };
+ right_dom = Domain {
+ value: right.value.as_ref(),
+ allowed_ascii: right.allowed_ascii,
+ label_lens: right.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: right.contains_trailing_dot,
+ };
+ (&left_dom, &right_dom)
+ }
+ (CharFlag::LowerUpper, CharFlag::Upper) => {
+ left_input = self.value.as_ref().to_ascii_uppercase();
+ left_dom = Domain {
+ value: left_input.as_ref(),
+ allowed_ascii: self.allowed_ascii,
+ label_lens: self.label_lens.clone(),
+ flag: CharFlag::Upper,
+ contains_trailing_dot: self.contains_trailing_dot,
+ };
+ right_dom = Domain {
+ value: right.value.as_ref(),
+ allowed_ascii: right.allowed_ascii,
+ label_lens: right.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: right.contains_trailing_dot,
+ };
+ (&left_dom, &right_dom)
+ }
+ (_, _) => {
+ left_input = self.value.as_ref().to_ascii_lowercase();
+ left_dom = Domain {
+ value: left_input.as_ref(),
+ allowed_ascii: self.allowed_ascii,
+ label_lens: self.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: self.contains_trailing_dot,
+ };
+ right_input = right.value.as_ref().to_ascii_lowercase();
+ right_dom = Domain {
+ value: right_input.as_ref(),
+ allowed_ascii: right.allowed_ascii,
+ label_lens: right.label_lens.clone(),
+ flag: CharFlag::LowerBetween,
+ contains_trailing_dot: right.contains_trailing_dot,
+ };
+ (&left_dom, &right_dom)
+ }
+ };
+ left_ref
+ .into_iter()
+ .zip(right_ref)
+ .try_fold((), |(), (label, label2)| {
+ match label.value.cmp(label2.value) {
+ Ordering::Less => Err(DomainOrdering::Less),
+ Ordering::Equal => Ok(()),
+ Ordering::Greater => Err(DomainOrdering::Greater),
+ }
+ })
+ .map_or_else(convert::identity, |()| {
+ match self.label_count().cmp(&right.label_count()) {
+ Ordering::Less => DomainOrdering::Shorter,
+ Ordering::Equal => {
+ unreachable!("there is a bug in Domain::cmp_by_domain_ordering")
+ }
+ Ordering::Greater => DomainOrdering::Longer,
+ }
+ })
+ }
+ /// The total order that is defined follows the following hierarchy:
+ /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 2. If 1. evaluates as not equivalent, then return the result.
+ /// 3. Return the comparison of `Label` counts.
+ ///
+ /// For example, `com` < `example.com` < `net` < `example.net`.
+ ///
+ /// This is the same as the [canonical DNS name order](https://datatracker.ietf.org/doc/html/rfc4034#section-6.1).
+ /// ASCII uppercase is treated as ASCII lowercase and trailing `b'.'`s are ignored.
+ /// The [`AllowedAscii`]s in the `Domain`s are ignored.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use core::cmp::Ordering;
+ /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}};
+ /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap();
+ /// assert!(matches!(dom1.cmp_doms(&dom1), Ordering::Equal));
+ /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap();
+ /// assert!(matches!(dom1.cmp_doms(&dom2), Ordering::Less));
+ /// assert!(matches!(dom2.cmp_doms(&dom1), Ordering::Greater));
+ /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap();
+ /// assert!(matches!(dom1.cmp_doms(&dom3), Ordering::Less));
+ /// assert!(matches!(dom3.cmp_doms(&dom1), Ordering::Greater));
+ /// ```
+ #[inline]
+ pub fn cmp_doms<const ALLOWED_COUNT2: usize, T2: AsRef<[u8]>>(
+ &self,
+ right: &Domain<ALLOWED_COUNT2, T2>,
+ ) -> Ordering {
+ self.cmp_by_domain_ordering(right).into()
+ }
+ /// Returns the last `Label` (i.e., the TLD).
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().tld().as_str() == "com");
+ /// ```
+ #[allow(clippy::unreachable)]
+ #[inline]
+ pub fn tld(&self) -> Label<'_> {
+ self.into_iter()
+ .next()
+ .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes"))
+ }
+}
+impl<
+ 'a,
+ 'b,
+ const ALLOWED_COUNT: usize,
+ const ALLOWED_COUNT2: usize,
+ T: AsRef<[u8]>,
+ T2: AsRef<[u8]>,
+ > PartialEq<Domain<'a, ALLOWED_COUNT, T>> for Domain<'b, ALLOWED_COUNT2, T2>
+{
+ /// Ignores the provided [`AllowedAscii`] and simply compares the two `Domain`s as [`Label`]s
+ /// of bytes. Note uppercase ASCII is treated as lowercase ASCII and trailing `b'.'`s are ignored.
+ #[inline]
+ fn eq(&self, other: &Domain<ALLOWED_COUNT, T>) -> bool {
+ if self.flag.eq_ignore_case(other.flag) {
+ self.as_bytes().eq_ignore_ascii_case(other.as_bytes())
+ } else {
+ self.as_bytes() == other.as_bytes()
+ }
+ }
+}
+impl<
+ 'a,
+ 'b,
+ const ALLOWED_COUNT: usize,
+ const ALLOWED_COUNT2: usize,
+ T: AsRef<[u8]>,
+ T2: AsRef<[u8]>,
+ > PartialEq<&Domain<'a, ALLOWED_COUNT, T>> for Domain<'b, ALLOWED_COUNT2, T2>
+{
+ #[inline]
+ fn eq(&self, other: &&Domain<'a, ALLOWED_COUNT, T>) -> bool {
+ *self == **other
+ }
+}
+impl<
+ 'a,
+ 'b,
+ const ALLOWED_COUNT: usize,
+ const ALLOWED_COUNT2: usize,
+ T: AsRef<[u8]>,
+ T2: AsRef<[u8]>,
+ > PartialEq<Domain<'a, ALLOWED_COUNT, T>> for &Domain<'b, ALLOWED_COUNT2, T2>
+{
+ #[inline]
+ fn eq(&self, other: &Domain<'a, ALLOWED_COUNT, T>) -> bool {
+ **self == *other
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Eq for Domain<'_, ALLOWED_COUNT, T> {}
+impl<
+ 'a,
+ 'b,
+ const ALLOWED_COUNT: usize,
+ const ALLOWED_COUNT2: usize,
+ T: AsRef<[u8]>,
+ T2: AsRef<[u8]>,
+ > PartialOrd<Domain<'a, ALLOWED_COUNT, T>> for Domain<'b, ALLOWED_COUNT2, T2>
+{
+ /// Consult [`Self::cmp_doms`].
+ #[inline]
+ fn partial_cmp(&self, other: &Domain<ALLOWED_COUNT, T>) -> Option<Ordering> {
+ Some(self.cmp_doms(other))
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Ord for Domain<'_, ALLOWED_COUNT, T> {
+ /// Consult [`Self::cmp_doms`].
+ #[inline]
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.cmp_doms(other)
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Hash for Domain<'_, ALLOWED_COUNT, T> {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ match self.flag {
+ CharFlag::None | CharFlag::Lower | CharFlag::Between | CharFlag::LowerBetween => {
+ self.as_bytes().hash(state);
+ }
+ CharFlag::Upper | CharFlag::LowerUpper | CharFlag::UpperBetween | CharFlag::All => {
+ self.as_bytes().to_ascii_lowercase().hash(state);
+ }
+ }
+ }
+}
+impl<'a, 'b: 'a, const ALLOWED_COUNT: usize, T: AsRef<[u8]>>
+ TryFrom<(T, &'b AllowedAscii<ALLOWED_COUNT>)> for Domain<'a, ALLOWED_COUNT, T>
+{
+ type Error = DomainErr;
+ #[inline]
+ fn try_from(value: (T, &'b AllowedAscii<ALLOWED_COUNT>)) -> Result<Self, Self::Error> {
+ Self::try_from_bytes(value.0, value.1)
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Display for Domain<'_, ALLOWED_COUNT, T> {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.write_str(self.as_str())
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> AsRef<str> for Domain<'_, ALLOWED_COUNT, T> {
+ #[inline]
+ fn as_ref(&self) -> &str {
+ self.as_str()
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> AsRef<[u8]> for Domain<'_, ALLOWED_COUNT, T> {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ self.as_bytes()
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Deref for Domain<'_, ALLOWED_COUNT, T> {
+ type Target = str;
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ self.as_str()
+ }
+}
+/// Error returned from [`Domain::try_from_bytes`].
+#[allow(clippy::exhaustive_enums)]
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub enum DomainErr {
+ /// The domain was empty.
+ Empty,
+ /// The length of the domain was greater than 253 not counting a terminating `b'.'` if there was one.
+ LenExceeds253(usize),
+ /// The domain contained at least one empty label.
+ EmptyLabel,
+ /// The domain contained at least one label whose length exceeded 63.
+ LabelLenExceeds63,
+ /// The domain contained an invalid byte value.
+ InvalidByte(u8),
+}
+impl Display for DomainErr {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match *self {
+ Self::Empty => f.write_str("domain is empty"),
+ Self::LenExceeds253(len) => write!(
+ f,
+ "domain has length {len} which is greater than the max length of 253"
+ ),
+ Self::EmptyLabel => f.write_str("domain has an empty label"),
+ Self::LabelLenExceeds63 => {
+ f.write_str("domain has a label that exceeds the max length of 63")
+ }
+ Self::InvalidByte(byt) => {
+ write!(f, "domain has a label with the invalid byte value {byt}")
+ }
+ }
+ }
+}
+impl Error for DomainErr {}
+/// A label of a [`Domain`]. The total length of a `Label` is inclusively between 1 and 63.
+#[derive(Clone, Copy, Debug)]
+pub struct Label<'a> {
+ /// The label value.
+ value: &'a str,
+}
+impl<'a> Label<'a> {
+ /// The maximum length of a `Label` which is 63.
+ // SAFETY: 0 < 63 < 256.
+ #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
+ pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(63) };
+ /// The minimum length of a `Label` which is 1.
+ // SAFETY: 0 < 1 < 256.
+ #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
+ pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) };
+ /// The label.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.as_str() == "com"));
+ /// ```
+ #[inline]
+ #[must_use]
+ pub const fn as_str(self) -> &'a str {
+ self.value
+ }
+ /// Returns `true` iff the label only contains ASCII letters.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphabetic()));
+ /// ```
+ #[allow(clippy::into_iter_on_ref)]
+ #[inline]
+ #[must_use]
+ pub fn is_alphabetic(self) -> bool {
+ self.value
+ .as_bytes()
+ .into_iter()
+ .try_fold((), |(), byt| {
+ if byt.is_ascii_alphabetic() {
+ Ok(())
+ } else {
+ Err(())
+ }
+ })
+ .is_ok()
+ }
+ /// Returns `true` iff the label only contains ASCII digits.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.123", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_digits()));
+ /// ```
+ #[allow(clippy::into_iter_on_ref)]
+ #[inline]
+ #[must_use]
+ pub fn is_digits(self) -> bool {
+ self.value
+ .as_bytes()
+ .into_iter()
+ .try_fold((), |(), byt| {
+ if byt.is_ascii_digit() {
+ Ok(())
+ } else {
+ Err(())
+ }
+ })
+ .is_ok()
+ }
+ /// Returns `true` iff the label only contains ASCII digits or letters.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.1com", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphanumeric()));
+ /// ```
+ #[allow(clippy::into_iter_on_ref)]
+ #[inline]
+ #[must_use]
+ pub fn is_alphanumeric(self) -> bool {
+ self.value
+ .as_bytes()
+ .into_iter()
+ .try_fold((), |(), byt| {
+ if byt.is_ascii_alphanumeric() {
+ Ok(())
+ } else {
+ Err(())
+ }
+ })
+ .is_ok()
+ }
+ /// Returns `true` iff the label only contains ASCII hyphen, digits, or letters.
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::Domain, char_set::ASCII_HYPHEN_DIGITS_LOWERCASE};
+ /// assert!(Domain::try_from_bytes("example.1-com", &ASCII_HYPHEN_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_hyphen_or_alphanumeric()));
+ /// ```
+ #[allow(clippy::into_iter_on_ref)]
+ #[inline]
+ #[must_use]
+ pub fn is_hyphen_or_alphanumeric(self) -> bool {
+ self.value
+ .as_bytes()
+ .into_iter()
+ .try_fold((), |(), byt| {
+ if *byt == b'-' || byt.is_ascii_alphanumeric() {
+ Ok(())
+ } else {
+ Err(())
+ }
+ })
+ .is_ok()
+ }
+}
+impl PartialEq<Label<'_>> for Label<'_> {
+ #[inline]
+ fn eq(&self, other: &Label<'_>) -> bool {
+ self.value.eq_ignore_ascii_case(other.value)
+ }
+}
+impl Eq for Label<'_> {}
+impl PartialOrd<Label<'_>> for Label<'_> {
+ #[inline]
+ fn partial_cmp(&self, other: &Label<'_>) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+impl Ord for Label<'_> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.value
+ .to_ascii_lowercase()
+ .cmp(&other.value.to_ascii_lowercase())
+ }
+}
+impl Hash for Label<'_> {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.value.to_ascii_lowercase().hash(state);
+ }
+}
+impl Display for Label<'_> {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.write_str(self.value)
+ }
+}
+impl<'a> AsRef<[u8]> for Label<'a> {
+ #[inline]
+ fn as_ref(&self) -> &'a [u8] {
+ self.value.as_bytes()
+ }
+}
+impl<'a> AsRef<str> for Label<'a> {
+ #[inline]
+ fn as_ref(&self) -> &'a str {
+ self.value
+ }
+}
+impl<'a> Deref for Label<'a> {
+ type Target = str;
+ #[inline]
+ fn deref(&self) -> &'a Self::Target {
+ self.value
+ }
+}
+/// [`Iterator`] that iterates [`Label`]s from a borrowed [`Domain`] starting from the TLD down.
+pub struct LabelIter<'a, 'b: 'a, const ALLOWED_COUNT: usize, T> {
+ /// Domain that contains `Label`s to iterate.
+ domain: &'a Domain<'b, ALLOWED_COUNT, T>,
+ /// Starts at domain.label_count().get() - 1 which is valid since domain.label_count().get() > 0.
+ /// idx is 255 when the iterator is exhausted.
+ /// Since idx is decremented each time and it starts at a value less than 254, this is a valid value to use
+ /// as a flag.
+ idx: u8,
+ /// This is used to mark the start of a label before the length of the label has been subtracted.
+ /// After a label is read, 1 must be subtracted to account for '.'.
+ start: u8,
+ /// Starts at 0 which is valid since domain.label_count().get() > 0.
+ /// idx_back is 255 when the iterator is exhausted.
+ /// Since idx_back is incremented each time and the max label count is 127, this is a valid value to
+ /// use as a flag.
+ idx_back: u8,
+ /// This is used to mark the start of a label before the length of the label has been added.
+ /// After a label is read, 1 must be added to account for '.'.
+ start_back: u8,
+}
+impl<'a, 'b: 'a, const ALLOWED_COUNT: usize, T: AsRef<[u8]>> LabelIter<'a, 'b, ALLOWED_COUNT, T> {
+ /// Helper function to construct an instance.
+ #[allow(clippy::arithmetic_side_effects)]
+ #[inline]
+ fn new(domain: &'a Domain<'b, ALLOWED_COUNT, T>) -> Self {
+ Self {
+ // This won't underflow since `label_count` is at least 1.
+ idx: domain.label_count().get() - 1,
+ start: domain.len().get(),
+ idx_back: 0,
+ start_back: 0,
+ domain,
+ }
+ }
+}
+impl<'a, const ALLOWED_COUNT: usize, T: AsRef<[u8]>> Iterator
+ for LabelIter<'a, '_, ALLOWED_COUNT, T>
+{
+ type Item = Label<'a>;
+ #[allow(
+ unsafe_code,
+ clippy::arithmetic_side_effects,
+ clippy::as_conversions,
+ clippy::indexing_slicing
+ )]
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ self.domain.label_lens.get(self.idx as usize).map(|len| {
+ // This won't underflow since `start` is initialized to the length of the domain.
+ // The total sum of the label lengths plus the separators is equal to start.
+ self.start -= len.get();
+ // Overflow clearly won't happen since we subtracted `len` from `start` above.
+ // Indexing won't `panic` either since `start` is greater than 0 since it's
+ // only 0 if `idx` is 0 which only happens _after_ all labels have been iterated.
+ let input =
+ &self.domain.as_bytes()[self.start as usize..(self.start + len.get()) as usize];
+ // SAFETY:
+ // This is safe since we only allow ASCII, so the above indexing is fine.
+ let value = unsafe { str::from_utf8_unchecked(input) };
+ let label = Label { value };
+ if self.idx == 0 || self.idx <= self.idx_back {
+ // 255 is never valid, so it's used as a flag to indicate iteration is done.
+ // In particular when we call `label_lens.get` with 255, it will return `None`.
+ self.idx = 255;
+ self.idx_back = 255;
+ } else {
+ // We check above that `idx` is not 0, so this won't underflow.
+ self.idx -= 1;
+ // `start` is only 0 when `idx` is, so this won't underflow.
+ self.start -= 1;
+ }
+ label
+ })
+ }
+ #[inline]
+ fn size_hint(&self) -> (usize, Option<usize>) {
+ let len = self.len();
+ (len, Some(len))
+ }
+ #[inline]
+ fn last(mut self) -> Option<Self::Item>
+ where
+ Self: Sized,
+ {
+ let opt = self.next_back();
+ self.idx = 255;
+ self.idx_back = 255;
+ opt
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> FusedIterator
+ for LabelIter<'_, '_, ALLOWED_COUNT, T>
+{
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> ExactSizeIterator
+ for LabelIter<'_, '_, ALLOWED_COUNT, T>
+{
+ #[allow(clippy::arithmetic_side_effects, clippy::as_conversions)]
+ #[inline]
+ fn len(&self) -> usize {
+ if self.idx == 255 {
+ 0
+ } else {
+ // `idx` is always >= `idx_back` so subtracting is free from underflow.
+ // Adding 1 is free from overflow since the max value of `idx` is 126 unless
+ // it's 255 which means there is no more iteration; however we checked for that
+ // above.
+ (self.idx - self.idx_back + 1) as usize
+ }
+ }
+}
+impl<const ALLOWED_COUNT: usize, T: AsRef<[u8]>> DoubleEndedIterator
+ for LabelIter<'_, '_, ALLOWED_COUNT, T>
+{
+ #[allow(
+ unsafe_code,
+ clippy::arithmetic_side_effects,
+ clippy::as_conversions,
+ clippy::indexing_slicing
+ )]
+ #[inline]
+ fn next_back(&mut self) -> Option<Self::Item> {
+ self.domain
+ .label_lens
+ .get(self.idx_back as usize)
+ .map(|len| {
+ // Indexing won't `panic` since `start_back` is never too large.
+ let input = &self.domain.as_bytes()
+ [self.start_back as usize..(self.start_back + len.get()) as usize];
+ // SAFETY:
+ // This is safe since we only allow ASCII, so the above indexing is fine.
+ let value = unsafe { str::from_utf8_unchecked(input) };
+ let label = Label { value };
+ // `idx_back` starts at 0 and is only incremented after we iterate a `Label` backwards.
+ // Since there are at most 127 `Label`s, this won't overflow.
+ if self.idx_back + 1 == self.domain.label_count().get() || self.idx_back >= self.idx
+ {
+ // `255` is used a "flag" to indicate iteration is over since 255 is never valid.
+ // In particular this means that `label_lens.get` will return `None` when passed
+ // `idx` or `idx_back`.
+ self.idx = 255;
+ self.idx_back = 255;
+ } else {
+ // Due to the call to `get`, this will only happen when `idx_back` <
+ // `label_lens.len()` which in turn is always below 128 making
+ // overflow not possible.
+ self.idx_back += 1;
+ // `len` is always below 64 so adding 1 won't cause overflow.
+ // `start_back` is initialized to 0 and is only increased here.
+ // This means this will only ever be as large as the total length
+ // of the domain plus 1 which is less than 255.
+ self.start_back += len.get() + 1;
+ }
+ label
+ })
+ }
+}
+impl<'a, 'b: 'a, const ALLOWED_COUNT: usize, T: AsRef<[u8]>> IntoIterator
+ for &'a Domain<'b, ALLOWED_COUNT, T>
+{
+ type Item = Label<'a>;
+ type IntoIter = LabelIter<'a, 'b, ALLOWED_COUNT, T>;
+ #[inline]
+ fn into_iter(self) -> Self::IntoIter {
+ LabelIter::new(self)
+ }
+}
+/// Error returned from [`Rfc1123Domain::try_from`].
+#[allow(clippy::exhaustive_enums)]
+#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub enum Rfc1123Err {
+ /// [`Domain::allowed_ascii`] is not [`ASCII_HYPHEN_DIGITS_LETTERS`].
+ InvalidAllowedAscii,
+ /// The [`Domain`] has a trailing `b'.'`.
+ ContainsTrailingDot,
+ /// A [`Label`] of [`Domain`] starts with an ASCII hyphen.
+ LabelStartsWithAHyphen,
+ /// A [`Label`] of [`Domain`] ends with an ASCII hyphen.
+ LabelEndsWithAHyphen,
+}
+impl Display for Rfc1123Err {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ match *self {
+ Self::InvalidAllowedAscii => {
+ f.write_str("the allowed ASCII is not letters, digits, and hyphen")
+ }
+ Self::ContainsTrailingDot => f.write_str("the domain contained a trailing dot"),
+ Self::LabelStartsWithAHyphen => {
+ f.write_str("a label in the domain starts with a hyphen")
+ }
+ Self::LabelEndsWithAHyphen => f.write_str("a label in the domain ends with a hyphen"),
+ }
+ }
+}
+impl Error for Rfc1123Err {}
+/// **TL;DR**
+///
+/// Wrapper type around a [`Domain`] that enforces conformance to
+/// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13).
+///
+/// * Each [`Label`] must only contain ASCII digits, letters, or hyphen.
+/// * Each `Label` must not begin or end with a hyphen.
+/// * A trailing `b'.'` must not exist.
+/// ---
+/// Unsurprisingly, RFC 1123 is not super precise as it uses "host name" to mean label and also domain:
+/// "Host software MUST handle host names \[labels\] of up to 63 characters and SHOULD handle host
+/// names \[domains\] of up to 255 characters". It also states that only "one aspect of host name syntax is hereby
+/// changed" from [RFC 952](https://www.rfc-editor.org/rfc/rfc952): "the restriction on the first character
+/// is relaxed to allow either a letter or a digit". Despite that, it goes on to mention other restrictions
+/// not mentioned in RFC 952: "the highest-level component label will be alphabetic". It is therefore
+/// important to understand how this type interprets that RFC and why it does so.
+///
+/// The primary issue with RFC 1123 is the unjustified comment about the TLD being alphabetic. It is given
+/// as if it is common knowledge. As explained by (the rejected)
+/// [Errata 1353](https://www.rfc-editor.org/errata/eid1353), there seemed to be the assumption that the TLDs
+/// at the time would be the only ones that would ever exist or at least that the format of them would always be
+/// true. This leads to several possible interpretations:
+///
+/// * Strictest: enforce the TLD is one of the TLDs that existed at the time of the RFC.
+/// * Strict: enforce the TLD has the same format as the TLDs at the time (i.e., two or three letters long).
+/// * Literal: enforce the TLD is alphabetic regardless of the lack of justification.
+/// * Relaxed: enforce the "spirit" that the TLD must exist.
+/// * More relaxed: enforce the "spirit" that the TLD must have the same format of a valid TLD.
+/// * Most relaxed: treat TLDs no differently than other labels (i.e., don't make assumptions about what will be
+/// a valid TLD in the future).
+///
+/// RFC 1123 is not obsolete, and it is clear from more recent RFCs like
+/// [RFC 5891](https://www.rfc-editor.org/rfc/rfc5891) that it is designed to be a foundation (i.e., domains that
+/// are valid per newer RFCs are valid per RFC 1123). Clearly due to RFCs like RFC 5891, requiring the TLD
+/// to be alphabetic or exactly two or three characters long would violate that. For those reasons the strictest,
+/// strict, and literal interpretations are rejected.
+///
+/// Assuming TLDs are static is absurd, and relying on some dynamic list of TLDs is undesirable. For that reason
+/// the relaxed interpretation is rejected.
+///
+/// To enforce that the TLD is of the correct format would require RFC 5891 semantics to ensure the TLD is
+/// either a valid A-label or NR-LDH label. This is not a cheap operation, and it treats TLDs inconsistently
+/// (specifically more strictly validated) than other labels. RFC 1123 should be treated as the foundation
+/// until it is made obsolete. In the future there may be other RFCs that further restrict valid TLDs, and treating
+/// RFC 1123 as if it were written presciently does not make sense. For that reason the more relaxed interpretation
+/// is rejected. Consequently we use the most relaxed interpretation.
+#[derive(Clone, Debug)]
+pub struct Rfc1123Domain<'a, T> {
+ /// The domain.
+ dom: Domain<'a, 63, T>,
+}
+impl<T> Rfc1123Domain<'_, T> {
+ /// Returns the contained [`Domain`].
+ ///
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
+ /// let dom = Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap();
+ /// let dom2 = dom.clone();
+ /// assert!(Rfc1123Domain::try_from(dom).unwrap().domain() == dom2);
+ /// ```
+ #[inline]
+ pub const fn domain(&self) -> &Domain<'_, 63, T> {
+ &self.dom
+ }
+}
+impl<T: AsRef<[u8]>> Rfc1123Domain<'_, T> {
+ /// Returns `true` iff the domain adheres to the literal interpretation of RFC 1123. For more information
+ /// read the description of [`Rfc1123Domain`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
+ /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("example.commmm", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_literal_interpretation());
+ /// assert!(!Rfc1123Domain::try_from(Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_literal_interpretation());
+ /// ```
+ #[inline]
+ pub fn is_literal_interpretation(&self) -> bool {
+ self.dom.tld().is_alphabetic()
+ }
+ /// Returns `true` iff the domain adheres to the strict interpretation of RFC 1123. For more information
+ /// read the description of [`Rfc1123Domain`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
+ /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("example.Com", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_strict_interpretation());
+ /// assert!(!Rfc1123Domain::try_from(Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_strict_interpretation());
+ /// ```
+ #[inline]
+ pub fn is_strict_interpretation(&self) -> bool {
+ let tld = self.dom.tld();
+ (2..4).contains(&tld.len()) && tld.is_alphabetic()
+ }
+ /// Returns `true` iff the domain has the same format as an [`Ipv4Addr`].
+ ///
+ /// Note that due to the most relaxed interpretation of RFC 1123 mentioned in [`Rfc1123Domain`], it is possible
+ /// for the domain to be an IPv4 address unlike the strictest, strict, literal, and possibly relaxed
+ /// interpretations.
+ /// # Example
+ ///
+ /// ```
+ /// use ascii_domain::{dom::{Domain, Rfc1123Domain}, char_set::ASCII_HYPHEN_DIGITS_LETTERS};
+ /// assert!(Rfc1123Domain::try_from(Domain::try_from_bytes("1.2.3.4", &ASCII_HYPHEN_DIGITS_LETTERS).unwrap()).unwrap().is_ipv4());
+ /// ```
+ #[inline]
+ pub fn is_ipv4(&self) -> bool {
+ Ipv4Addr::from_str(&self.dom).is_ok()
+ }
+}
+impl<'a, 'b, T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<'a, T>>
+ for Rfc1123Domain<'b, T2>
+{
+ #[inline]
+ fn eq(&self, other: &Rfc1123Domain<'a, T>) -> bool {
+ self.dom == other.dom
+ }
+}
+impl<'a, 'b, T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<'a, T>>
+ for Rfc1123Domain<'b, T2>
+{
+ #[inline]
+ fn eq(&self, other: &&Rfc1123Domain<'a, T>) -> bool {
+ self.dom == other.dom
+ }
+}
+impl<'a, 'b, T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<'a, T>>
+ for &Rfc1123Domain<'b, T2>
+{
+ #[inline]
+ fn eq(&self, other: &Rfc1123Domain<'a, T>) -> bool {
+ self.dom == other.dom
+ }
+}
+impl<'a, 'b, const ALLOWED_COUNT: usize, T: AsRef<[u8]>, T2: AsRef<[u8]>>
+ PartialEq<Rfc1123Domain<'a, T>> for Domain<'b, ALLOWED_COUNT, T2>
+{
+ #[inline]
+ fn eq(&self, other: &Rfc1123Domain<'a, T>) -> bool {
+ *self == other.dom
+ }
+}
+impl<'a, 'b, const ALLOWED_COUNT: usize, T: AsRef<[u8]>, T2: AsRef<[u8]>>
+ PartialEq<Rfc1123Domain<'a, T>> for &Domain<'b, ALLOWED_COUNT, T2>
+{
+ #[inline]
+ fn eq(&self, other: &Rfc1123Domain<'a, T>) -> bool {
+ **self == other.dom
+ }
+}
+impl<'a, 'b, const ALLOWED_COUNT: usize, T: AsRef<[u8]>, T2: AsRef<[u8]>>
+ PartialEq<&Rfc1123Domain<'a, T>> for Domain<'b, ALLOWED_COUNT, T2>
+{
+ #[inline]
+ fn eq(&self, other: &&Rfc1123Domain<'a, T>) -> bool {
+ *self == other.dom
+ }
+}
+impl<'a, 'b, const ALLOWED_COUNT: usize, T: AsRef<[u8]>, T2: AsRef<[u8]>>
+ PartialEq<Domain<'a, ALLOWED_COUNT, T>> for Rfc1123Domain<'b, T2>
+{
+ #[inline]
+ fn eq(&self, other: &Domain<'a, ALLOWED_COUNT, T>) -> bool {
+ self.dom == *other
+ }
+}
+impl<T: AsRef<[u8]>> Eq for Rfc1123Domain<'_, T> {}
+impl<'a, 'b, T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<'a, T>>
+ for Rfc1123Domain<'b, T2>
+{
+ #[inline]
+ fn partial_cmp(&self, other: &Rfc1123Domain<'a, T>) -> Option<Ordering> {
+ self.dom.partial_cmp(&other.dom)
+ }
+}
+impl<'a, 'b, const ALLOWED_COUNT: usize, T: AsRef<[u8]>, T2: AsRef<[u8]>>
+ PartialOrd<Rfc1123Domain<'a, T>> for Domain<'b, ALLOWED_COUNT, T2>
+{
+ #[inline]
+ fn partial_cmp(&self, other: &Rfc1123Domain<'a, T>) -> Option<Ordering> {
+ self.partial_cmp(&other.dom)
+ }
+}
+impl<'a, 'b, const ALLOWED_COUNT: usize, T: AsRef<[u8]>, T2: AsRef<[u8]>>
+ PartialOrd<Domain<'a, ALLOWED_COUNT, T>> for Rfc1123Domain<'b, T2>
+{
+ #[inline]
+ fn partial_cmp(&self, other: &Domain<'a, ALLOWED_COUNT, T>) -> Option<Ordering> {
+ self.dom.partial_cmp(other)
+ }
+}
+impl<T: AsRef<[u8]>> Ord for Rfc1123Domain<'_, T> {
+ #[inline]
+ fn cmp(&self, other: &Self) -> Ordering {
+ self.dom.cmp(&other.dom)
+ }
+}
+impl<T: AsRef<[u8]>> Hash for Rfc1123Domain<'_, T> {
+ #[inline]
+ fn hash<H: Hasher>(&self, state: &mut H) {
+ self.dom.hash(state);
+ }
+}
+impl<'a, 'b: 'a, T> AsRef<Domain<'a, 63, T>> for Rfc1123Domain<'b, T> {
+ #[inline]
+ fn as_ref(&self) -> &Domain<'a, 63, T> {
+ &self.dom
+ }
+}
+impl<'a, 'b: 'a, T> Borrow<Domain<'a, 63, T>> for Rfc1123Domain<'b, T> {
+ #[inline]
+ fn borrow(&self) -> &Domain<'a, 63, T> {
+ &self.dom
+ }
+}
+impl<'a, T> Deref for Rfc1123Domain<'a, T> {
+ type Target = Domain<'a, 63, T>;
+ #[inline]
+ fn deref(&self) -> &Self::Target {
+ &self.dom
+ }
+}
+impl<'a, 'b: 'a, T> From<Rfc1123Domain<'b, T>> for Domain<'a, 63, T> {
+ #[inline]
+ fn from(value: Rfc1123Domain<'b, T>) -> Self {
+ value.dom
+ }
+}
+impl<'a, 'b: 'a, T: AsRef<[u8]>> TryFrom<Domain<'b, 63, T>> for Rfc1123Domain<'a, T> {
+ type Error = Rfc1123Err;
+ #[allow(
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::unreachable
+ )]
+ #[inline]
+ fn try_from(value: Domain<'b, 63, T>) -> Result<Self, Self::Error> {
+ if *value.allowed_ascii != ASCII_HYPHEN_DIGITS_LETTERS {
+ Err(Rfc1123Err::InvalidAllowedAscii)
+ } else if value.contains_trailing_dot {
+ Err(Rfc1123Err::ContainsTrailingDot)
+ } else {
+ value
+ .into_iter()
+ .try_fold((), |(), label| {
+ let bytes = label.as_bytes();
+ // `Label`s are never empty, so the below indexing is fine.
+ // Underflow won't occur for the same reason.
+ if bytes[0] == b'-' {
+ Err(Rfc1123Err::LabelStartsWithAHyphen)
+ } else if bytes[bytes.len() - 1] == b'-' {
+ Err(Rfc1123Err::LabelEndsWithAHyphen)
+ } else {
+ Ok(())
+ }
+ })
+ .map(|()| Self { dom: value })
+ }
+ }
+}
+impl<T: AsRef<[u8]>> Display for Rfc1123Domain<'_, T> {
+ #[inline]
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ self.dom.fmt(f)
+ }
+}
+#[cfg(test)]
+mod tests {
+ use super::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err};
+ use crate::char_set::{self, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS};
+ use core::cmp::Ordering;
+ #[test]
+ fn test_dom_parse() {
+ let allowed_ascii = ASCII_FIREFOX;
+ // Test empty is error.
+ assert!(Domain::try_from_bytes("", &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::Empty, |_| false));
+ assert!(Domain::try_from_bytes(".", &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::Empty, |_| false));
+ // Test empty label is error.
+ assert!(Domain::try_from_bytes("a..com", &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false));
+ // Test label too long.
+ let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com";
+ // 4 + 64 + 4
+ assert!(val.len() == 72);
+ assert!(Domain::try_from_bytes(val, &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false));
+ assert!(Domain::try_from_bytes(
+ "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com",
+ &allowed_ascii
+ )
+ .map_or(false, |d| d.len().get() == 71));
+ // Test domain too long.
+ assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false));
+ assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 ));
+ // Test max labels.
+ assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false));
+ assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253));
+ assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253));
+ // Test removal of trailing '.'.
+ assert!(
+ Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com")
+ );
+ // Test case-insensitivity.
+ assert!(
+ Domain::try_from_bytes("wwW.ExAMple.COm", &allowed_ascii).map_or(false, |d| {
+ Domain::try_from_bytes("www.example.com", &allowed_ascii)
+ .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal)
+ })
+ );
+ assert!(
+ Domain::try_from_bytes("ww_W.com", &allowed_ascii).map_or(false, |d| {
+ Domain::try_from_bytes("Ww_w.com", &allowed_ascii)
+ .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal)
+ })
+ );
+ // Test valid bytes
+ let mut input;
+ let mut counter = 0;
+ for i in 0..=127 {
+ input = [i];
+ match i {
+ b'!'
+ | b'$'
+ | b'&'..=b')'
+ | b'+'..=b'-'
+ | b'0'..=b'9'
+ | b';'
+ | b'='
+ | b'A'..=b'Z'
+ | b'_'..=b'{'
+ | b'}'..=b'~' => {
+ counter += 1;
+ assert!(
+ Domain::try_from_bytes(input, &allowed_ascii).map_or(false, |d| d
+ .value
+ .len()
+ == 1
+ && d.value == input)
+ )
+ }
+ b'.' => {
+ let input2 = b"a.";
+ assert!(
+ Domain::try_from_bytes(input2, &allowed_ascii).map_or(false, |d| d
+ .as_str()
+ .len()
+ == 1
+ && d.value == input2)
+ )
+ }
+ _ => assert!(Domain::try_from_bytes(input, &allowed_ascii)
+ .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false)),
+ }
+ }
+ assert!(counter == 78);
+ }
+ #[test]
+ fn test_dom_into_iter() {
+ let allowed_ascii = ASCII_FIREFOX;
+ assert!(
+ Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
+ let mut iter = d.into_iter();
+ if iter.len() != 3 {
+ return false;
+ }
+ let Some(l) = iter.next() else {
+ return false;
+ };
+ if l.value != "com" {
+ return false;
+ }
+ if iter.len() != 2 {
+ return false;
+ }
+ let Some(l) = iter.next() else { return false };
+ if l.value != "example" {
+ return false;
+ }
+ if iter.len() != 1 {
+ return false;
+ }
+ let Some(l) = iter.next() else {
+ return false;
+ };
+ if iter.len() != 0 {
+ return false;
+ }
+ if l.value != "www" {
+ return false;
+ }
+ iter.next().is_none()
+ })
+ );
+ assert!(
+ Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
+ let mut iter = d.into_iter();
+ if iter.len() != 3 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "www" {
+ return false;
+ }
+ if iter.len() != 2 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "example" {
+ return false;
+ }
+ if iter.len() != 1 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "com" {
+ return false;
+ }
+ if iter.len() != 0 {
+ return false;
+ }
+ iter.next_back().is_none()
+ })
+ );
+ assert!(
+ Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
+ let mut iter = d.into_iter();
+ if iter.len() != 3 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "www" {
+ return false;
+ }
+ if iter.len() != 2 {
+ return false;
+ }
+ let Some(l) = iter.next() else { return false };
+ if l.value != "com" {
+ return false;
+ }
+ if iter.len() != 1 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "example" {
+ return false;
+ }
+ if iter.len() != 0 {
+ return false;
+ }
+ iter.next().is_none() && iter.next_back().is_none()
+ })
+ );
+ }
+ #[test]
+ fn test_dom_iter() {
+ let allowed_ascii = ASCII_FIREFOX;
+ assert!(
+ Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
+ let mut iter = d.iter();
+ if iter.len() != 3 {
+ return false;
+ }
+ let Some(l) = iter.next() else {
+ return false;
+ };
+ if l.value != "com" {
+ return false;
+ }
+ if iter.len() != 2 {
+ return false;
+ }
+ let Some(l) = iter.next() else { return false };
+ if l.value != "example" {
+ return false;
+ }
+ if iter.len() != 1 {
+ return false;
+ }
+ let Some(l) = iter.next() else {
+ return false;
+ };
+ if iter.len() != 0 {
+ return false;
+ }
+ if l.value != "www" {
+ return false;
+ }
+ iter.next().is_none()
+ })
+ );
+ assert!(
+ Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
+ let mut iter = d.iter();
+ if iter.len() != 3 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "www" {
+ return false;
+ }
+ if iter.len() != 2 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "example" {
+ return false;
+ }
+ if iter.len() != 1 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "com" {
+ return false;
+ }
+ if iter.len() != 0 {
+ return false;
+ }
+ iter.next_back().is_none()
+ })
+ );
+ assert!(
+ Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| {
+ let mut iter = d.iter();
+ if iter.len() != 3 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "www" {
+ return false;
+ }
+ if iter.len() != 2 {
+ return false;
+ }
+ let Some(l) = iter.next() else { return false };
+ if l.value != "com" {
+ return false;
+ }
+ if iter.len() != 1 {
+ return false;
+ }
+ let Some(l) = iter.next_back() else {
+ return false;
+ };
+ if l.value != "example" {
+ return false;
+ }
+ if iter.len() != 0 {
+ return false;
+ }
+ iter.next().is_none() && iter.next_back().is_none()
+ })
+ );
+ }
+ #[test]
+ fn rfc1123() {
+ assert!(
+ Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |dom| dom.as_str() == "example.com")
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("example.com.", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or_else(|e| e == Rfc1123Err::ContainsTrailingDot, |_| false)
+ )
+ );
+ assert!(char_set::AllowedAscii::<63>::try_from(
+ b"!@#$%^&*()1234567890asdfghjklqwertyuiopzxcvbnmASDFGHJKLZXCVBNMQ".to_owned()
+ )
+ .map_or(false, |ascii| {
+ Domain::try_from_bytes("example.com", &ascii).map_or(false, |dom| {
+ Rfc1123Domain::try_from(dom)
+ .map_or_else(|e| e == Rfc1123Err::InvalidAllowedAscii, |_| false)
+ })
+ }));
+ assert!(
+ Domain::try_from_bytes("example-.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or_else(|e| e == Rfc1123Err::LabelEndsWithAHyphen, |_| false)
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("-example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or_else(|e| e == Rfc1123Err::LabelStartsWithAHyphen, |_| false)
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("example.commm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |rfc| rfc.is_literal_interpretation())
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |rfc| !rfc.is_literal_interpretation())
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |rfc| rfc.is_strict_interpretation())
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or(
+ false,
+ |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |rfc| !rfc.is_strict_interpretation())
+ )
+ );
+ assert!(
+ Domain::try_from_bytes("1.2.3.4", &ASCII_HYPHEN_DIGITS_LETTERS)
+ .map_or(false, |dom| Rfc1123Domain::try_from(dom)
+ .map_or(false, |rfc| rfc.is_ipv4()))
+ );
+ }
+ #[test]
+ fn test_tld() {
+ assert!(
+ Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS)
+ .map_or(false, |dom| dom.tld().as_str() == "com",)
+ );
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -0,0 +1,46 @@
+//! # `ascii_domain`
+//!
+//! `ascii_domain` is a library for efficiently parsing domains based on a supplied ASCII character set one
+//! wants to enforce each [`dom::Label`] to conform to. The primary type in the library is [`dom::Domain`]
+//! which can be thought of as domains in _representation_ format. Technically since any ASCII `u8` except
+//! `b'.'` is allowed in a `Label`, it is more general than an actual representation format that doesn’t
+//! include some form of escape characters. For a full-fledged DNS library look elsewhere (e.g.,
+//! [`domain`](https://docs.rs/domain/latest/domain/)).
+//!
+//! The purpose of this library is to allow efficient customization of domain name parsing while still retaining
+//! the hierarchical structure of a domain. Depending on one’s use case, allowed formats and characters can
+//! differ. If one wants to conform to the [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181),
+//! all octets are allowed; but conforming to [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123) or
+//! [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) requires stricter formats and a reduced character
+//! set.
+#![deny(
+ unsafe_code,
+ unused,
+ warnings,
+ clippy::all,
+ clippy::cargo,
+ clippy::complexity,
+ clippy::correctness,
+ clippy::nursery,
+ clippy::pedantic,
+ clippy::perf,
+ clippy::restriction,
+ clippy::style,
+ clippy::suspicious
+)]
+#![allow(
+ clippy::blanket_clippy_restriction_lints,
+ clippy::implicit_return,
+ clippy::min_ident_chars,
+ clippy::missing_trait_methods,
+ clippy::single_call_fn,
+ clippy::single_char_lifetime_names,
+ clippy::unseparated_literal_suffix
+)]
+/// Contains [`char_set::AllowedAscii`] which is how one dictates the character set [`dom::Domain::try_from_bytes`]
+/// uses.
+pub mod char_set;
+/// Contains [`dom::Domain`] which is a domain whose [`dom::Label`]s consist of a subset of the supplied
+/// [`char_set::AllowedAscii`]. Also contains [`dom::Rfc1123Domain`] which is a `Domain` that conforms to
+/// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13).
+pub mod dom;