dom.rs (74983B)
1 extern crate alloc; 2 use crate::char_set::{ASCII_HYPHEN_DIGITS_LETTERS, AllowedAscii}; 3 use alloc::{string::String, vec::Vec}; 4 use core::{ 5 borrow::Borrow, 6 cmp::Ordering, 7 convert, 8 error::Error, 9 fmt::{self, Display, Formatter}, 10 hash::{Hash, Hasher}, 11 iter::FusedIterator, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 /// The `AllowedAscii` used by `Rfc1123Domain`. 17 static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; 18 /// Returned by [`Domain::cmp_by_domain_ordering`]. 19 /// 20 /// It is more informative than [`Ordering`] in that it 21 /// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater 22 /// from a `Domain` that has the same `Label`s as another but simply more. 23 /// 24 /// Another way to view this is that [`Self::Shorter`] is "closer" to being [`Self::Equal`] than [`Self::Less`] 25 /// since the `Domain`s are still part of the same branch in the DNS hierarchy. Ditto for [`Self::Longer`]. 26 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 27 pub enum DomainOrdering { 28 /// The `Domain` is less than another since a `Label` was less. 29 Less, 30 /// The `Domain` is less than other but only because it had fewer `Label`s. 31 Shorter, 32 /// The `Domain` is equal to another. 33 Equal, 34 /// The `Domain` is greater than another but only because it had more `Label`s. 35 Longer, 36 /// The `Domain` is greater than another since a `Label` was greater. 37 Greater, 38 } 39 impl Display for DomainOrdering { 40 #[inline] 41 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 42 match *self { 43 Self::Less => f.write_str("less since a label was less"), 44 Self::Shorter => f.write_str("less since there were fewer labels"), 45 Self::Equal => f.write_str("equal"), 46 Self::Longer => f.write_str("greater since there were more labels"), 47 Self::Greater => f.write_str("greater since a label was greater"), 48 } 49 } 50 } 51 impl From<DomainOrdering> for Ordering { 52 #[inline] 53 fn from(value: DomainOrdering) -> Self { 54 match value { 55 DomainOrdering::Less | DomainOrdering::Shorter => Self::Less, 56 DomainOrdering::Equal => Self::Equal, 57 DomainOrdering::Longer | DomainOrdering::Greater => Self::Greater, 58 } 59 } 60 } 61 /// A domain that consists of at least one [`Label`] with each `Label` only containing the ASCII `u8`s in 62 /// the [`AllowedAscii`] passed to [`Self::try_from_bytes`]. 63 /// 64 /// The total length of a `Domain` is at most 65 /// 253 bytes[^note] in length including the `b'.'` separator. The trailing `b'.'`, if one exists, is always 66 /// ignored. 67 /// 68 /// This is more restrictive than what a domain is allowed to be per the 69 /// [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181) since all octets/`u8`s are allowed in a 70 /// label. Additionally there is no way to represent the root domain. 71 /// 72 /// Last, ASCII uppercase letters are treated as lowercase; however for better comparison performance 73 /// that doesn't lead to intermediate memory allocations, two `Domain`s should consist entirely of the same 74 /// case. 75 /// 76 /// [^note]: It is a common misconception that the max length of a domain is 255, but that is only true for 77 /// domains in _wire_ format. In representation format, which `Domain` can be thought of when only visible 78 /// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is 79 /// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format 80 /// contributes one byte due to each label being preceded by the octet that represents its length. 81 /// 82 /// Note this only contains `T`, so this is allocation-free and the same size as `T`. 83 #[derive(Clone, Copy, Debug)] 84 pub struct Domain<T> { 85 /// The domain value. `value.as_ref().len()` is guaranteed to have length between 1 and 253 when the last `u8` 86 /// is not `b'.'`; otherwise the length is between 2 and 254. 87 /// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`. 88 value: T, 89 } 90 impl<T> Domain<T> { 91 /// A reference to the contained `T`. 92 /// 93 /// # Example 94 /// 95 /// ``` 96 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 97 /// assert!(*Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().as_inner() == "example.com."); 98 /// ``` 99 #[inline] 100 pub const fn as_inner(&self) -> &T { 101 &self.value 102 } 103 /// Same as [`Self::as_inner`] except `self` is consumed. 104 /// 105 /// # Example 106 /// 107 /// ``` 108 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 109 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_inner() == "example.com."); 110 /// ``` 111 #[inline] 112 pub fn into_inner(self) -> T { 113 self.value 114 } 115 } 116 impl<T: AsRef<[u8]>> Domain<T> { 117 /// Returns `true` iff the domain contains a trailing `b'.'`. 118 /// 119 /// # Example 120 /// 121 /// ``` 122 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 123 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().contains_trailing_dot()); 124 /// ``` 125 #[expect( 126 clippy::arithmetic_side_effects, 127 clippy::indexing_slicing, 128 reason = "comments explain their correctness" 129 )] 130 #[inline] 131 pub fn contains_trailing_dot(&self) -> bool { 132 let bytes = self.value.as_ref(); 133 // This won't underflow or `panic` since `Domain`s are not empty. 134 bytes[bytes.len() - 1] == b'.' 135 } 136 /// The domain without a trailing `b'.'` if there was one. 137 /// 138 /// # Example 139 /// 140 /// ``` 141 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 142 /// assert!(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap().as_str() == "Example.com"); 143 /// ``` 144 #[inline] 145 pub fn as_str(&self) -> &str { 146 <&str>::from(Domain::<&str>::from(Domain::<&[u8]>::from(self))) 147 } 148 /// The domain without a trailing `b'.'` if there was one. 149 /// 150 /// # Example 151 /// 152 /// ``` 153 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 154 /// assert!(Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap().as_bytes() == b"Example.com"); 155 /// ``` 156 #[inline] 157 pub fn as_bytes(&self) -> &[u8] { 158 <&[u8]>::from(Domain::<&[u8]>::from(self)) 159 } 160 /// The length of the `Domain`. This does _not_ include the trailing `b'.'` if there was one. 161 /// 162 /// # Example 163 /// 164 /// ``` 165 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 166 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().len().get() == 11); 167 /// ``` 168 #[expect( 169 unsafe_code, 170 reason = "we enforce nonzero lengths, so NonZeroU8::new_unchecked is fine" 171 )] 172 #[expect( 173 clippy::arithmetic_side_effects, 174 clippy::as_conversions, 175 clippy::cast_possible_truncation, 176 reason = "comments justify their correctness" 177 )] 178 #[inline] 179 pub fn len(&self) -> NonZeroU8 { 180 // No fear of underflow since the length of `value` is at least 1 _not including_ the 181 // trailing `b'.'` if there was one. 182 // `true as usize` is guaranteed to be 1 and `false as usize` is guaranteed to be 0. 183 // No fear of truncation either since the length is guaranteed to be less than 255. 184 // `Domain` is immutable ensuring such invariants are kept. 185 let len = (self.value.as_ref().len() - usize::from(self.contains_trailing_dot())) as u8; 186 // SAFETY: 187 // The only way to construct a `Domain` is via `try_from_bytes` which ensures `len` is 188 // is at least 1. 189 unsafe { NonZeroU8::new_unchecked(len) } 190 } 191 /// Function that transforms `v` into a `Domain` by only allowing [`Label`]s to contain the ASCII `u8`s in 192 /// `allowed_ascii`. A trailing `b'.'` is ignored. 193 /// 194 /// Note that while ASCII uppercase is treated as ASCII lowercase, `allowed_ascii` MUST still contain 195 /// each ASCII `u8` (e.g., if `!allowed_ascii.contains(b'A')`, then `b'A'` is not allowed even if 196 /// `allowed_ascii.contains(b'a')`). 197 /// 198 /// # Examples 199 /// 200 /// ``` 201 /// use ascii_domain::{dom::{Domain, DomainErr}, char_set::ASCII_LOWERCASE}; 202 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).is_ok()); 203 /// assert!(Domain::try_from_bytes("exam2ple.com", &ASCII_LOWERCASE).map_or_else(|err| err == DomainErr::InvalidByte(b'2'), |_| false)); 204 /// ``` 205 /// 206 /// # Errors 207 /// 208 /// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`. 209 #[expect( 210 clippy::arithmetic_side_effects, 211 reason = "comment justifies its correctness" 212 )] 213 #[inline] 214 pub fn try_from_bytes<T2: AsRef<[u8]>>( 215 v: T, 216 allowed_ascii: &AllowedAscii<T2>, 217 ) -> Result<Self, DomainErr> { 218 let val = v.as_ref(); 219 let value = val 220 .split_last() 221 .ok_or(DomainErr::Empty) 222 .and_then(|(lst, rem)| { 223 if *lst == b'.' { 224 rem.split_last() 225 .ok_or(DomainErr::RootDomain) 226 .and_then(|(lst_2, _)| { 227 if *lst_2 == b'.' { 228 Err(DomainErr::EmptyLabel) 229 } else { 230 Ok(rem) 231 } 232 }) 233 } else { 234 Ok(val) 235 } 236 })?; 237 if value.len() > 253 { 238 Err(DomainErr::LenExceeds253(value.len())) 239 } else { 240 value 241 .iter() 242 .try_fold(0, |label_len, byt| { 243 let b = *byt; 244 if b == b'.' { 245 NonZeroU8::new(label_len).map_or(Err(DomainErr::EmptyLabel), |_| Ok(0)) 246 } else if !allowed_ascii.contains(b) { 247 Err(DomainErr::InvalidByte(b)) 248 } else if label_len == 63 { 249 Err(DomainErr::LabelLenExceeds63) 250 } else { 251 // This is less than 63 due to the above check, so this won't overflow; 252 Ok(label_len + 1) 253 } 254 }) 255 .map(|_| Self { value: v }) 256 } 257 } 258 /// Returns an [`Iterator`] of [`Label`]s without consuming the `Domain`. 259 /// # Example 260 /// 261 /// ``` 262 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 263 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().unwrap().as_str() == "com"); 264 /// ``` 265 #[inline] 266 pub fn iter(&self) -> LabelIter<'_> { 267 LabelIter { 268 domain: self.as_bytes(), 269 } 270 } 271 /// Returns `true` iff `self` and `right` are part of the same branch in the DNS hierarchy. 272 /// 273 /// For example `www.example.com` and `example.com` are in the `same_branch`, but `example.com` and 274 /// `foo.com` are not. 275 /// 276 /// Note that trailing `b'.'`s are ignored and ASCII uppercase and lowercase are treated the same. 277 /// 278 /// # Examples 279 /// 280 /// ``` 281 /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 282 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 283 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 284 /// assert!(dom1.same_branch(&dom2)); 285 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 286 /// assert!(!dom1.same_branch(&dom3)); 287 /// ``` 288 #[inline] 289 pub fn same_branch<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> bool { 290 // Faster to check the values as bytes and not iterate each `Label`. 291 if self == right { 292 true 293 } else { 294 self.iter() 295 .zip(right) 296 .try_fold( 297 (), 298 |(), (label, label2)| if label == label2 { Ok(()) } else { Err(()) }, 299 ) 300 .is_ok_and(|()| true) 301 } 302 } 303 /// Same as [`Self::cmp_doms`] except returns [`DomainOrdering::Longer`] iff `self > right` due solely 304 /// to having more [`Label`]s and [`DomainOrdering::Shorter`] iff `self < right` due solely to having 305 /// fewer `Label`s. 306 /// 307 /// For example `example.com` < `www.example.com` and `bar.com` < `www.example.com`; but with this function, 308 /// `example.com` is [`DomainOrdering::Shorter`] than `www.example.com` and `www.example.com` is 309 /// [`DomainOrdering::Longer`] than `example.com`; while `bar.com` is [`DomainOrdering::Less`] than 310 /// `www.example.com` and `www.example.com` is [`DomainOrdering::Greater`] than `bar.com`. 311 /// 312 /// In other words `DomainOrdering::Shorter` implies `Ordering::Less` and `DomainOrdering::Longer` implies 313 /// `Ordering::Greater` with additional information pertaining to the quantity of `Label`s. 314 /// 315 /// # Examples 316 /// 317 /// ``` 318 /// use ascii_domain::{dom::{Domain, DomainOrdering}, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 319 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 320 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom1), DomainOrdering::Equal)); 321 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 322 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom2), DomainOrdering::Shorter)); 323 /// assert!(matches!(dom2.cmp_by_domain_ordering(&dom1), DomainOrdering::Longer)); 324 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 325 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom3), DomainOrdering::Less)); 326 /// assert!(matches!(dom3.cmp_by_domain_ordering(&dom1), DomainOrdering::Greater)); 327 /// ``` 328 #[inline] 329 pub fn cmp_by_domain_ordering<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> DomainOrdering { 330 // Faster to compare the entire value when we can instead each `Label`. 331 if self == right { 332 DomainOrdering::Equal 333 } else { 334 let mut right_iter = right.iter(); 335 self.iter() 336 .try_fold(false, |_, label| { 337 right_iter 338 .next() 339 .map_or(Ok(true), |label2| match label.cmp(&label2) { 340 Ordering::Less => Err(DomainOrdering::Less), 341 Ordering::Equal => Ok(false), 342 Ordering::Greater => Err(DomainOrdering::Greater), 343 }) 344 }) 345 .map_or_else(convert::identity, |flag| { 346 // We iterate `self` before `right`, so `flag` is `true` iff `right` 347 // has fewer `Label`s than `self`. 348 if flag { 349 DomainOrdering::Longer 350 } else { 351 // `self` has as many or fewer `Label`s than `right`; however if it had as many 352 // `Label`s as `right`, then all `Label`s are the same which is impossible since 353 // we already checked if `self == right`. 354 DomainOrdering::Shorter 355 } 356 }) 357 } 358 } 359 /// The total order that is defined follows the following hierarchy: 360 /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. 361 /// 2. If 1. evaluates as not equivalent, then return the result. 362 /// 3. Return the comparison of `Label` counts. 363 /// 364 /// For example, `com` < `example.com` < `net` < `example.net`. 365 /// 366 /// This is the same as the [canonical DNS name order](https://datatracker.ietf.org/doc/html/rfc4034#section-6.1). 367 /// ASCII uppercase is treated as ASCII lowercase and trailing `b'.'`s are ignored. 368 /// The [`AllowedAscii`]s in the `Domain`s are ignored. 369 /// 370 /// # Examples 371 /// 372 /// ``` 373 /// use core::cmp::Ordering; 374 /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 375 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 376 /// assert!(matches!(dom1.cmp_doms(&dom1), Ordering::Equal)); 377 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 378 /// assert!(matches!(dom1.cmp_doms(&dom2), Ordering::Less)); 379 /// assert!(matches!(dom2.cmp_doms(&dom1), Ordering::Greater)); 380 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 381 /// assert!(matches!(dom1.cmp_doms(&dom3), Ordering::Less)); 382 /// assert!(matches!(dom3.cmp_doms(&dom1), Ordering::Greater)); 383 /// ``` 384 #[inline] 385 pub fn cmp_doms<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> Ordering { 386 self.cmp_by_domain_ordering(right).into() 387 } 388 /// Returns the first `Label`. 389 /// 390 /// # Example 391 /// 392 /// ``` 393 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 394 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().first_label().as_str() == "example"); 395 /// ``` 396 #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] 397 #[inline] 398 pub fn first_label(&self) -> Label<'_> { 399 self.iter() 400 .next_back() 401 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) 402 } 403 /// Returns the last `Label` (i.e., the TLD). 404 /// 405 /// # Example 406 /// 407 /// ``` 408 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 409 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().tld().as_str() == "com"); 410 /// ``` 411 #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] 412 #[inline] 413 pub fn tld(&self) -> Label<'_> { 414 self.iter() 415 .next() 416 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) 417 } 418 } 419 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for Domain<T2> { 420 /// Ignores the provided [`AllowedAscii`] and simply compares the two `Domain`s as [`Label`]s 421 /// of bytes. Note uppercase ASCII is treated as lowercase ASCII and trailing `b'.'`s are ignored. 422 #[inline] 423 fn eq(&self, other: &Domain<T>) -> bool { 424 self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) 425 } 426 } 427 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Domain<T>> for Domain<T2> { 428 #[inline] 429 fn eq(&self, other: &&Domain<T>) -> bool { 430 *self == **other 431 } 432 } 433 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for &Domain<T2> { 434 #[inline] 435 fn eq(&self, other: &Domain<T>) -> bool { 436 **self == *other 437 } 438 } 439 impl<T: AsRef<[u8]>> Eq for Domain<T> {} 440 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Domain<T>> for Domain<T2> { 441 /// Consult [`Self::cmp_doms`]. 442 #[inline] 443 fn partial_cmp(&self, other: &Domain<T>) -> Option<Ordering> { 444 Some(self.cmp_doms(other)) 445 } 446 } 447 impl<T: AsRef<[u8]>> Ord for Domain<T> { 448 /// Consult [`Self::cmp_doms`]. 449 #[inline] 450 fn cmp(&self, other: &Self) -> Ordering { 451 self.cmp_doms(other) 452 } 453 } 454 impl<T: AsRef<[u8]>> Hash for Domain<T> { 455 #[inline] 456 fn hash<H: Hasher>(&self, state: &mut H) { 457 self.as_bytes().to_ascii_lowercase().hash(state); 458 } 459 } 460 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> TryFrom<(T, &AllowedAscii<T2>)> for Domain<T> { 461 type Error = DomainErr; 462 #[inline] 463 fn try_from(value: (T, &AllowedAscii<T2>)) -> Result<Self, Self::Error> { 464 Self::try_from_bytes(value.0, value.1) 465 } 466 } 467 impl<T: AsRef<[u8]>> Display for Domain<T> { 468 #[inline] 469 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 470 f.write_str(self) 471 } 472 } 473 impl<T: AsRef<[u8]>> AsRef<str> for Domain<T> { 474 #[inline] 475 fn as_ref(&self) -> &str { 476 self.as_str() 477 } 478 } 479 impl<T: AsRef<[u8]>> AsRef<[u8]> for Domain<T> { 480 #[inline] 481 fn as_ref(&self) -> &[u8] { 482 self.as_bytes() 483 } 484 } 485 impl<T: AsRef<[u8]>> Deref for Domain<T> { 486 type Target = str; 487 #[inline] 488 fn deref(&self) -> &Self::Target { 489 self.as_str() 490 } 491 } 492 impl From<Domain<Vec<u8>>> for Domain<String> { 493 #[expect( 494 unsafe_code, 495 reason = "we enforce ASCII, so String::from_utf8_unchecked is fine" 496 )] 497 #[inline] 498 fn from(value: Domain<Vec<u8>>) -> Self { 499 // SAFETY: 500 // We only allow ASCII, so this is fine. 501 let val = unsafe { String::from_utf8_unchecked(value.value) }; 502 Self { value: val } 503 } 504 } 505 impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Domain<T>> for Domain<&'b [u8]> { 506 #[inline] 507 fn from(value: &'a Domain<T>) -> Self { 508 Self { 509 value: value.value.as_ref(), 510 } 511 } 512 } 513 impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Domain<T>> for Domain<&'b str> { 514 #[inline] 515 fn from(value: &'a Domain<T>) -> Self { 516 Self { 517 value: value.value.as_ref(), 518 } 519 } 520 } 521 impl From<Domain<String>> for Domain<Vec<u8>> { 522 #[inline] 523 fn from(value: Domain<String>) -> Self { 524 Self { 525 value: value.value.into_bytes(), 526 } 527 } 528 } 529 impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { 530 #[expect( 531 unsafe_code, 532 reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" 533 )] 534 #[inline] 535 fn from(value: Domain<&'a [u8]>) -> Self { 536 // SAFETY: 537 // We only allow ASCII, so this is fine. 538 let val = unsafe { str::from_utf8_unchecked(value.value) }; 539 Self { value: val } 540 } 541 } 542 impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { 543 #[inline] 544 fn from(value: Domain<&'a str>) -> Self { 545 Self { 546 value: value.value.as_bytes(), 547 } 548 } 549 } 550 impl From<Domain<Self>> for String { 551 /// Returns the contained `String` _without_ a trailing `'.'` if there was one. 552 /// 553 /// # Example 554 /// 555 /// ``` 556 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 557 /// assert!(String::from(Domain::try_from_bytes(String::from("Example.com."), &ASCII_LETTERS).unwrap()).as_str() == "Example.com"); 558 /// ``` 559 #[inline] 560 fn from(value: Domain<Self>) -> Self { 561 if value.contains_trailing_dot() { 562 let mut val = value.value; 563 _ = val.pop(); 564 val 565 } else { 566 value.value 567 } 568 } 569 } 570 impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str { 571 /// Returns the contained `str` _without_ a trailing `'.'` if there was one. 572 /// 573 /// # Example 574 /// 575 /// ``` 576 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 577 /// assert!(<&str>::from(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap()) == "Example.com"); 578 /// ``` 579 #[expect( 580 unsafe_code, 581 reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" 582 )] 583 #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] 584 #[inline] 585 fn from(value: Domain<&'a str>) -> Self { 586 // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. 587 let utf8 = &value.value.as_bytes()[..value.len().get().into()]; 588 // SAFETY: 589 // Only ASCII is allowed, so this is fine. 590 unsafe { str::from_utf8_unchecked(utf8) } 591 } 592 } 593 impl From<Domain<Self>> for Vec<u8> { 594 /// Returns the contained `Vec` _without_ a trailing `b'.'` if there was one. 595 /// 596 /// # Example 597 /// 598 /// ``` 599 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 600 /// assert!(Vec::from(Domain::try_from_bytes(vec![b'F', b'o', b'o', b'.', b'c', b'o', b'm'], &ASCII_LETTERS).unwrap()).as_slice() == b"Foo.com"); 601 /// ``` 602 #[inline] 603 fn from(value: Domain<Self>) -> Self { 604 if value.contains_trailing_dot() { 605 let mut val = value.value; 606 _ = val.pop(); 607 val 608 } else { 609 value.value 610 } 611 } 612 } 613 impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] { 614 /// Returns the contained slice _without_ a trailing `b'.'` if there was one. 615 /// 616 /// # Example 617 /// 618 /// ``` 619 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 620 /// assert!(<&[u8]>::from(Domain::try_from_bytes(b"Example.com.".as_slice(), &ASCII_LETTERS).unwrap()) == b"Example.com"); 621 /// ``` 622 #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] 623 #[inline] 624 fn from(value: Domain<&'a [u8]>) -> Self { 625 // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. 626 &value.value[..value.len().get().into()] 627 } 628 } 629 /// Error returned from [`Domain::try_from_bytes`]. 630 #[expect(variant_size_differences, reason = "usize is fine in size")] 631 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 632 pub enum DomainErr { 633 /// The domain was empty. 634 Empty, 635 /// The domain was the root domain that is to say it was the domain that only contained the root 636 /// zone (i.e., `b'.'`). 637 RootDomain, 638 /// The length of the domain was greater than 253 not counting a terminating `b'.'` if there was one. 639 LenExceeds253(usize), 640 /// The domain contained at least one empty label. 641 EmptyLabel, 642 /// The domain contained at least one label whose length exceeded 63. 643 LabelLenExceeds63, 644 /// The domain contained an invalid byte value. 645 InvalidByte(u8), 646 } 647 impl Display for DomainErr { 648 #[inline] 649 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 650 match *self { 651 Self::Empty => f.write_str("domain is empty"), 652 Self::RootDomain => f.write_str("domain is the root domain"), 653 Self::LenExceeds253(len) => write!( 654 f, 655 "domain has length {len} which is greater than the max length of 253" 656 ), 657 Self::EmptyLabel => f.write_str("domain has an empty label"), 658 Self::LabelLenExceeds63 => { 659 f.write_str("domain has a label that exceeds the max length of 63") 660 } 661 Self::InvalidByte(byt) => { 662 write!(f, "domain has a label with the invalid byte value {byt}") 663 } 664 } 665 } 666 } 667 impl Error for DomainErr {} 668 /// A label of a [`Domain`]. The total length of a `Label` is inclusively between 1 and 63. 669 #[derive(Clone, Copy, Debug)] 670 pub struct Label<'a> { 671 /// The label value. 672 value: &'a str, 673 } 674 impl<'a> Label<'a> { 675 /// The label. 676 /// 677 /// # Example 678 /// 679 /// ``` 680 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 681 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.as_str() == "com")); 682 /// ``` 683 #[inline] 684 #[must_use] 685 pub const fn as_str(self) -> &'a str { 686 self.value 687 } 688 /// Returns `true` iff the label only contains ASCII letters. 689 /// 690 /// # Example 691 /// 692 /// ``` 693 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 694 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphabetic())); 695 /// ``` 696 #[inline] 697 #[must_use] 698 pub fn is_alphabetic(self) -> bool { 699 self.value 700 .as_bytes() 701 .iter() 702 .try_fold((), |(), byt| { 703 if byt.is_ascii_alphabetic() { 704 Ok(()) 705 } else { 706 Err(()) 707 } 708 }) 709 .is_ok() 710 } 711 /// Returns `true` iff the label only contains ASCII digits. 712 /// 713 /// # Example 714 /// 715 /// ``` 716 /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; 717 /// assert!(Domain::try_from_bytes("example.123", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_digits())); 718 /// ``` 719 #[inline] 720 #[must_use] 721 pub fn is_digits(self) -> bool { 722 self.value 723 .as_bytes() 724 .iter() 725 .try_fold((), |(), byt| { 726 if byt.is_ascii_digit() { 727 Ok(()) 728 } else { 729 Err(()) 730 } 731 }) 732 .is_ok() 733 } 734 /// Returns `true` iff the label only contains ASCII digits or letters. 735 /// 736 /// # Example 737 /// 738 /// ``` 739 /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; 740 /// assert!(Domain::try_from_bytes("example.1com", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphanumeric())); 741 /// ``` 742 #[inline] 743 #[must_use] 744 pub fn is_alphanumeric(self) -> bool { 745 self.value 746 .as_bytes() 747 .iter() 748 .try_fold((), |(), byt| { 749 if byt.is_ascii_alphanumeric() { 750 Ok(()) 751 } else { 752 Err(()) 753 } 754 }) 755 .is_ok() 756 } 757 /// Returns `true` iff the label only contains ASCII hyphen, digits, or letters. 758 /// 759 /// # Example 760 /// 761 /// ``` 762 /// use ascii_domain::{dom::Domain, char_set::ASCII_HYPHEN_DIGITS_LOWERCASE}; 763 /// assert!(Domain::try_from_bytes("example.1-com", &ASCII_HYPHEN_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_hyphen_or_alphanumeric())); 764 /// ``` 765 #[inline] 766 #[must_use] 767 pub fn is_hyphen_or_alphanumeric(self) -> bool { 768 self.value 769 .as_bytes() 770 .iter() 771 .try_fold((), |(), byt| { 772 if *byt == b'-' || byt.is_ascii_alphanumeric() { 773 Ok(()) 774 } else { 775 Err(()) 776 } 777 }) 778 .is_ok() 779 } 780 /// The length of the `Label`. This is inclusively between 1 and 63. 781 /// 782 /// # Example 783 /// 784 /// ``` 785 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 786 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.len().get() == 3)); 787 /// ``` 788 #[expect( 789 unsafe_code, 790 reason = "we enforce label lengths, so NonZeroU8::new_unchecked is fine" 791 )] 792 #[expect( 793 clippy::as_conversions, 794 clippy::cast_possible_truncation, 795 reason = "comments justify their correctness" 796 )] 797 #[inline] 798 #[must_use] 799 pub const fn len(self) -> NonZeroU8 { 800 // The max length of a `Label` is 63. 801 let len = self.value.len() as u8; 802 // SAFETY: 803 // `Label`s are never empty. 804 unsafe { NonZeroU8::new_unchecked(len) } 805 } 806 } 807 impl PartialEq<Label<'_>> for Label<'_> { 808 #[inline] 809 fn eq(&self, other: &Label<'_>) -> bool { 810 self.value.eq_ignore_ascii_case(other.value) 811 } 812 } 813 impl PartialEq<&Label<'_>> for Label<'_> { 814 #[inline] 815 fn eq(&self, other: &&Label<'_>) -> bool { 816 *self == **other 817 } 818 } 819 impl PartialEq<Label<'_>> for &Label<'_> { 820 #[inline] 821 fn eq(&self, other: &Label<'_>) -> bool { 822 **self == *other 823 } 824 } 825 impl Eq for Label<'_> {} 826 impl PartialOrd<Label<'_>> for Label<'_> { 827 #[inline] 828 fn partial_cmp(&self, other: &Label<'_>) -> Option<Ordering> { 829 Some(self.cmp(other)) 830 } 831 } 832 impl Ord for Label<'_> { 833 #[inline] 834 fn cmp(&self, other: &Self) -> Ordering { 835 self.value 836 .to_ascii_lowercase() 837 .cmp(&other.value.to_ascii_lowercase()) 838 } 839 } 840 impl Hash for Label<'_> { 841 #[inline] 842 fn hash<H: Hasher>(&self, state: &mut H) { 843 self.value.to_ascii_lowercase().hash(state); 844 } 845 } 846 impl Display for Label<'_> { 847 #[inline] 848 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 849 f.write_str(self.value) 850 } 851 } 852 impl<'a> AsRef<[u8]> for Label<'a> { 853 #[inline] 854 fn as_ref(&self) -> &'a [u8] { 855 self.value.as_bytes() 856 } 857 } 858 impl<'a> AsRef<str> for Label<'a> { 859 #[inline] 860 fn as_ref(&self) -> &'a str { 861 self.value 862 } 863 } 864 impl<'a> Deref for Label<'a> { 865 type Target = str; 866 #[inline] 867 fn deref(&self) -> &'a Self::Target { 868 self.value 869 } 870 } 871 /// [`Iterator`] that iterates [`Label`]s from a [`Domain`] or [`Rfc1123Domain`] starting from the TLD down. 872 /// 873 /// This iterates `Label`s on demand; so if repeated iteration is desired, it may be better to collect the `Label`s 874 /// in a collection (e.g., [`Vec`]) than create the iterator again. This is also why [`ExactSizeIterator`] is not 875 /// implemented. 876 #[derive(Clone, Debug)] 877 pub struct LabelIter<'a> { 878 /// Domain as ASCII. 879 domain: &'a [u8], 880 } 881 impl<'a> Iterator for LabelIter<'a> { 882 type Item = Label<'a>; 883 #[expect( 884 unsafe_code, 885 reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" 886 )] 887 #[expect( 888 clippy::arithmetic_side_effects, 889 clippy::indexing_slicing, 890 reason = "comments justify their correctness" 891 )] 892 #[inline] 893 fn next(&mut self) -> Option<Self::Item> { 894 (!self.domain.is_empty()).then(|| { 895 self.domain 896 .iter() 897 .rev() 898 .try_fold(1, |count, byt| { 899 if *byt == b'.' { 900 let len = self.domain.len(); 901 // `count` < `len` since there is at least one more `u8` before `b'.'`. 902 let idx = len - count; 903 // `idx + 1` < `len` since `count` is > 1 since `Label`s are never empty. 904 let ascii = &self.domain[idx + 1..len]; 905 // SAFETY: 906 // We only allow ASCII, so this is safe. 907 let value = unsafe { str::from_utf8_unchecked(ascii) }; 908 self.domain = &self.domain[..idx]; 909 Err(Label { value }) 910 } else { 911 Ok(count + 1) 912 } 913 }) 914 .map_or_else(convert::identity, |_| { 915 // SAFETY: 916 // We only allow ASCII, so this is safe. 917 let value = unsafe { str::from_utf8_unchecked(self.domain) }; 918 self.domain = &[]; 919 Label { value } 920 }) 921 }) 922 } 923 #[inline] 924 fn last(mut self) -> Option<Self::Item> 925 where 926 Self: Sized, 927 { 928 self.next_back() 929 } 930 #[inline] 931 fn size_hint(&self) -> (usize, Option<usize>) { 932 if self.domain.is_empty() { 933 (0, Some(0)) 934 } else { 935 // The max size of a `Label` is 63; and all but the last have a `b'.'` that follow it. 936 // This means the fewest `Label`s possible is the floor of the length divided by 64 with 937 // the added requirement that it's at least one since we know the domain is not empty. 938 // The min size of a `Label` is 1; and all but the last have a `b'.'` that follow it. 939 // This means the max number of `Label`s is the ceiling of the length divided by 2. 940 ( 941 (self.domain.len() >> 6).max(1), 942 Some(self.domain.len().div_ceil(2)), 943 ) 944 } 945 } 946 } 947 impl FusedIterator for LabelIter<'_> {} 948 impl DoubleEndedIterator for LabelIter<'_> { 949 #[expect( 950 unsafe_code, 951 reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" 952 )] 953 #[expect( 954 clippy::arithmetic_side_effects, 955 clippy::indexing_slicing, 956 reason = "comments justify their correctness" 957 )] 958 #[inline] 959 fn next_back(&mut self) -> Option<Self::Item> { 960 (!self.domain.is_empty()).then(|| { 961 self.domain 962 .iter() 963 .try_fold(0, |count, byt| { 964 if *byt == b'.' { 965 // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s 966 // are not empty. 967 let ascii = &self.domain[..count]; 968 // SAFETY: 969 // We only allow ASCII, so this is safe. 970 let value = unsafe { str::from_utf8_unchecked(ascii) }; 971 // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s 972 // are not empty. 973 self.domain = &self.domain[count + 1..]; 974 Err(Label { value }) 975 } else { 976 Ok(count + 1) 977 } 978 }) 979 .map_or_else(convert::identity, |_| { 980 // SAFETY: 981 // We only allow ASCII, so this is safe. 982 let value = unsafe { str::from_utf8_unchecked(self.domain) }; 983 self.domain = &[]; 984 Label { value } 985 }) 986 }) 987 } 988 } 989 impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Domain<T> { 990 type Item = Label<'a>; 991 type IntoIter = LabelIter<'a>; 992 #[inline] 993 fn into_iter(self) -> Self::IntoIter { 994 LabelIter { 995 domain: self.as_bytes(), 996 } 997 } 998 } 999 impl<'a> IntoIterator for Domain<&'a str> { 1000 type Item = Label<'a>; 1001 type IntoIter = LabelIter<'a>; 1002 #[inline] 1003 fn into_iter(self) -> Self::IntoIter { 1004 LabelIter { 1005 domain: <&str>::from(self).as_bytes(), 1006 } 1007 } 1008 } 1009 impl<'a> IntoIterator for Domain<&'a [u8]> { 1010 type Item = Label<'a>; 1011 type IntoIter = LabelIter<'a>; 1012 #[inline] 1013 fn into_iter(self) -> Self::IntoIter { 1014 LabelIter { 1015 domain: <&[u8]>::from(self), 1016 } 1017 } 1018 } 1019 /// Error returned from [`Rfc1123Domain::try_from`] and [`Rfc1123Domain::try_from_bytes`]. 1020 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 1021 pub enum Rfc1123Err { 1022 /// The inputs was not a valid [`Domain`]. 1023 DomainErr(DomainErr), 1024 /// A [`Label`] of [`Domain`] starts with an ASCII hyphen. 1025 LabelStartsWithAHyphen, 1026 /// A [`Label`] of [`Domain`] ends with an ASCII hyphen. 1027 LabelEndsWithAHyphen, 1028 /// The last [`Label`] (i.e., TLD) was invalid which means it was not all ASCII letters nor 1029 /// had length of at least five with the first 4 characters being `xn--`. 1030 InvalidTld, 1031 } 1032 impl Display for Rfc1123Err { 1033 #[inline] 1034 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1035 match *self { 1036 Self::DomainErr(err) => err.fmt(f), 1037 Self::LabelStartsWithAHyphen => { 1038 f.write_str("a label in the domain starts with a hyphen") 1039 } 1040 Self::LabelEndsWithAHyphen => f.write_str("a label in the domain ends with a hyphen"), 1041 Self::InvalidTld => f.write_str("the TLD in the domain was not all letters nor had length of at least five with the first 4 characters being 'xn--'") 1042 } 1043 } 1044 } 1045 impl Error for Rfc1123Err {} 1046 /// **TL;DR** Wrapper type around a [`Domain`] that enforces conformance to 1047 /// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13). 1048 /// 1049 /// * Each [`Label`] must only contain ASCII digits, letters, or hyphen. 1050 /// * Each `Label` must not begin or end with a hyphen. 1051 /// * The last `Label` (i.e., TLD) must either contain only ASCII letters or have length of at least five and 1052 /// begin with `xn--`. 1053 /// --- 1054 /// Unsurprisingly, RFC 1123 is not super precise as it uses "host name" to mean label and also domain: 1055 /// "Host software MUST handle host names \[labels\] of up to 63 characters and SHOULD handle host 1056 /// names \[domains\] of up to 255 characters". It also states that only "one aspect of host name \[label\] 1057 /// syntax is hereby changed" from [RFC 952](https://www.rfc-editor.org/rfc/rfc952): "the restriction on the 1058 /// first character is relaxed to allow either a letter or a digit". Despite that, it goes on to mention other 1059 /// restrictions not mentioned in RFC 952: "the highest-level component label will be alphabetic". It is therefore 1060 /// important to understand how this type interprets that RFC and why it does so. 1061 /// 1062 /// The primary issue with RFC 1123 is the unjustified comment about the TLD being alphabetic. It is given 1063 /// as if it is common knowledge. As explained by (the rejected) 1064 /// [Errata 1353](https://www.rfc-editor.org/errata/eid1353), there seemed to be the assumption that the TLDs 1065 /// at the time would be the only ones that would ever exist or at least that the format of them would always be 1066 /// true. This leads to several possible interpretations: 1067 /// 1068 /// * Strictest: enforce the TLD is one of the TLDs that existed at the time of the RFC. 1069 /// * Strict: enforce the TLD has the same format as the TLDs at the time (i.e., two or three letters long). 1070 /// * Literal: enforce the TLD is alphabetic regardless of the lack of justification. 1071 /// * Relaxed: enforce the "spirit" that the TLD must exist. 1072 /// * More relaxed: enforce the "spirit" that the TLD must have the same format of a valid TLD. 1073 /// * Much more relaxed: enforce the "spirit" that the domain cannot have the form of an IPv4 address. 1074 /// * Most relaxed: treat TLDs no differently than other labels (i.e., don't make assumptions about what will be 1075 /// a valid TLD in the future). 1076 /// 1077 /// RFC 1123 is not obsolete, and it is clear from more recent RFCs like 1078 /// [RFC 5891](https://www.rfc-editor.org/rfc/rfc5891) that it is designed to be a foundation (i.e., domains that 1079 /// are valid per newer RFCs are valid per RFC 1123). Clearly due to RFCs like RFC 5891, requiring the TLD 1080 /// to be alphabetic or exactly two or three characters long would violate that. For those reasons the strictest, 1081 /// strict, and literal interpretations are rejected. 1082 /// 1083 /// Assuming TLDs are static is absurd, and relying on some dynamic list of TLDs is undesirable. For that reason 1084 /// the relaxed interpretation is rejected. 1085 /// 1086 /// Enforcing that domains do not have the form of an IPv4 address opens up the question of what is an IPv4 1087 /// address? Should leading 0s be allowed? What about hexadecimal? Should there be length limits for each octet? 1088 /// It also has the undesirable effect where subdomains that are all numeric exist but their parent domain does 1089 /// not which goes against the hierarchical nature of DNS. For those reasons the much more relaxed interpretation 1090 /// is rejected. 1091 /// 1092 /// Treating TLDs no differently than other labels is nice from a consistency perspective, but it suffers from 1093 /// the fact that domains that have the form of an IPv4 address are now allowed. For that reason the most 1094 /// relaxed interpretation is rejected. 1095 /// 1096 /// [ICANN](https://newgtlds.icann.org/sites/default/files/guidebook-full-04jun12-en.pdf) requires TLDs to either 1097 /// be alphabetic or a valid A-label per RFC 5891. Verifying a label is a valid A-label is not a cheap operation 1098 /// though. For that reason the more relaxed interpretation is accepted but with a twist: fake and valid A-labels 1099 /// are allowed in addition to entirely alphabetic labels. More specifically the TLD must either contain only 1100 /// letters or must be at least five characters long with the first 4 characters being `xn--`. 1101 /// 1102 /// If one wants to enforce the literal interpretation, one can use [`Self::is_literal_interpretation`]. Similarly, 1103 /// if one wants to enforce the strict interpretation, one can use [`Self::is_strict_interpretation`]. 1104 #[derive(Clone, Copy, Debug)] 1105 pub struct Rfc1123Domain<T> { 1106 /// The domain. 1107 dom: Domain<T>, 1108 } 1109 impl<T> Rfc1123Domain<T> { 1110 /// Returns a reference to the contained [`Domain`]. 1111 /// 1112 /// # Example 1113 /// 1114 /// ``` 1115 /// use ascii_domain::dom::Rfc1123Domain; 1116 /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().domain().len().get() == 11); 1117 /// ``` 1118 #[inline] 1119 pub const fn domain(&self) -> &Domain<T> { 1120 &self.dom 1121 } 1122 /// Returns the contained [`Domain`] consuming `self`. 1123 /// 1124 /// # Example 1125 /// 1126 /// ``` 1127 /// use ascii_domain::dom::Rfc1123Domain; 1128 /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().into_domain().len().get() == 11); 1129 /// ``` 1130 #[inline] 1131 pub fn into_domain(self) -> Domain<T> { 1132 self.dom 1133 } 1134 } 1135 impl<T: AsRef<[u8]>> Rfc1123Domain<T> { 1136 /// Function that transforms `v` into an `Rfc1123Domain` by only allowing [`Label`]s to contain the ASCII `u8`s 1137 /// in [`ASCII_HYPHEN_DIGITS_LETTERS`] with each `Label` not starting or ending with a `b'-'`. A trailing `b'.'` 1138 /// is ignored. The last `Label` (i.e., TLD) must either only contain ASCII letters or must have length of at 1139 /// least five with the first 4 bytes being `b"xn--"`. 1140 /// 1141 /// Unliked calling [`Domain::try_from_bytes`] then [`Rfc1123Domain::try_from`] which performs two traversals 1142 /// of `v`, this performs a single traversal of `v`. 1143 /// 1144 /// # Examples 1145 /// 1146 /// ``` 1147 /// use ascii_domain::dom::{Rfc1123Domain, Rfc1123Err}; 1148 /// assert!(Rfc1123Domain::try_from_bytes("example.com").is_ok()); 1149 /// assert!(Rfc1123Domain::try_from_bytes("example.xn--abc").is_ok()); 1150 /// assert!(Rfc1123Domain::try_from_bytes("a-.com").map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1151 /// ``` 1152 /// 1153 /// # Errors 1154 /// 1155 /// Returns [`Rfc1123Err`] iff `v.as_ref()` is an invalid `Rfc1123Domain`. 1156 #[expect( 1157 clippy::arithmetic_side_effects, 1158 clippy::indexing_slicing, 1159 reason = "comments justify their correctness" 1160 )] 1161 #[expect(clippy::redundant_else, reason = "prefer else with else-if")] 1162 #[inline] 1163 pub fn try_from_bytes(v: T) -> Result<Self, Rfc1123Err> { 1164 // The easiest implementation would be redirecting to `Domain::try_from_bytes`; and upon success, 1165 // verify each `Label` doesn't begin or end with a hyphen. That requires traversing `v` twice though. 1166 // We opt to traverse just once. 1167 let val = v.as_ref(); 1168 let value = match val.last() { 1169 None => return Err(Rfc1123Err::DomainErr(DomainErr::Empty)), 1170 Some(byt) => { 1171 let b = *byt; 1172 if b == b'.' { 1173 if val.len() == 1 { 1174 return Err(Rfc1123Err::DomainErr(DomainErr::RootDomain)); 1175 } 1176 // We know `val.len` is at least 2. 1177 let len = val.len() - 1; 1178 let lst = val[len - 1]; 1179 if lst == b'.' { 1180 return Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)); 1181 } else if lst == b'-' { 1182 return Err(Rfc1123Err::LabelEndsWithAHyphen); 1183 } else { 1184 &val[..len] 1185 } 1186 } else if b == b'-' { 1187 return Err(Rfc1123Err::LabelEndsWithAHyphen); 1188 } else { 1189 val 1190 } 1191 } 1192 }; 1193 if value.len() > 253 { 1194 Err(Rfc1123Err::DomainErr(DomainErr::LenExceeds253(value.len()))) 1195 } else { 1196 let mut count = 0; 1197 value 1198 .iter() 1199 .try_fold(0, |label_len, byt| { 1200 let b = *byt; 1201 if b == b'.' { 1202 NonZeroU8::new(label_len).map_or( 1203 Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)), 1204 |_| { 1205 // We verify the last character in the `Label` is not a hyphen. 1206 // `count` > 0 since `label_len` > 0 and `count` < `value.len()` since 1207 // it's the index of the `b'.'`. 1208 if value[count - 1] == b'-' { 1209 Err(Rfc1123Err::LabelEndsWithAHyphen) 1210 } else { 1211 Ok(0) 1212 } 1213 }, 1214 ) 1215 } else if !RFC_CHARS.contains(b) { 1216 Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(b))) 1217 } else if b == b'-' && label_len == 0 { 1218 Err(Rfc1123Err::LabelStartsWithAHyphen) 1219 } else if label_len == 63 { 1220 Err(Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63)) 1221 } else { 1222 // This caps at 253, so no overflow. 1223 count += 1; 1224 // This is less than 64 due to the above check, so this won't overflow; 1225 Ok(label_len + 1) 1226 } 1227 }) 1228 .and_then(|tld_len| { 1229 // `tld_len <= value.len()`. 1230 let tld = &value[value.len() - usize::from(tld_len)..]; 1231 if (tld 1232 .split_at_checked(4) 1233 .is_some_and(|(fst, rem)| !rem.is_empty() && fst == b"xn--")) 1234 || tld 1235 .iter() 1236 .try_fold((), |(), byt| { 1237 if byt.is_ascii_alphabetic() { 1238 Ok(()) 1239 } else { 1240 Err(()) 1241 } 1242 }) 1243 .is_ok() 1244 { 1245 Ok(()) 1246 } else { 1247 Err(Rfc1123Err::InvalidTld) 1248 } 1249 }) 1250 .map(|()| Self { 1251 dom: Domain { value: v }, 1252 }) 1253 } 1254 } 1255 /// Returns `true` iff the domain adheres to the literal interpretation of RFC 1123. For more information 1256 /// read the description of [`Rfc1123Domain`]. 1257 /// 1258 /// # Examples 1259 /// 1260 /// ``` 1261 /// use ascii_domain::dom::Rfc1123Domain; 1262 /// assert!(Rfc1123Domain::try_from_bytes("example.commmm").unwrap().is_literal_interpretation()); 1263 /// assert!(!Rfc1123Domain::try_from_bytes("example.xn--abc").unwrap().is_literal_interpretation()); 1264 /// ``` 1265 #[inline] 1266 pub fn is_literal_interpretation(&self) -> bool { 1267 self.dom.tld().is_alphabetic() 1268 } 1269 /// Returns `true` iff the domain adheres to the strict interpretation of RFC 1123. For more information 1270 /// read the description of [`Rfc1123Domain`]. 1271 /// 1272 /// # Examples 1273 /// 1274 /// ``` 1275 /// use ascii_domain::dom::Rfc1123Domain; 1276 /// assert!(Rfc1123Domain::try_from_bytes("example.Com").unwrap().is_strict_interpretation()); 1277 /// assert!(!Rfc1123Domain::try_from_bytes("example.comm").unwrap().is_strict_interpretation()); 1278 /// ``` 1279 #[inline] 1280 pub fn is_strict_interpretation(&self) -> bool { 1281 let tld = self.dom.tld(); 1282 (2..4).contains(&tld.len().get()) && tld.is_alphabetic() 1283 } 1284 } 1285 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1286 #[inline] 1287 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1288 self.dom == other.dom 1289 } 1290 } 1291 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1292 #[inline] 1293 fn eq(&self, other: &&Rfc1123Domain<T>) -> bool { 1294 self.dom == other.dom 1295 } 1296 } 1297 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for &Rfc1123Domain<T2> { 1298 #[inline] 1299 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1300 self.dom == other.dom 1301 } 1302 } 1303 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Domain<T2> { 1304 #[inline] 1305 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1306 *self == other.dom 1307 } 1308 } 1309 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for &Domain<T2> { 1310 #[inline] 1311 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1312 **self == other.dom 1313 } 1314 } 1315 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<T>> for Domain<T2> { 1316 #[inline] 1317 fn eq(&self, other: &&Rfc1123Domain<T>) -> bool { 1318 *self == other.dom 1319 } 1320 } 1321 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for Rfc1123Domain<T2> { 1322 #[inline] 1323 fn eq(&self, other: &Domain<T>) -> bool { 1324 self.dom == *other 1325 } 1326 } 1327 impl<T: AsRef<[u8]>> Eq for Rfc1123Domain<T> {} 1328 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1329 #[inline] 1330 fn partial_cmp(&self, other: &Rfc1123Domain<T>) -> Option<Ordering> { 1331 self.dom.partial_cmp(&other.dom) 1332 } 1333 } 1334 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<T>> for Domain<T2> { 1335 #[inline] 1336 fn partial_cmp(&self, other: &Rfc1123Domain<T>) -> Option<Ordering> { 1337 self.partial_cmp(&other.dom) 1338 } 1339 } 1340 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Domain<T>> for Rfc1123Domain<T2> { 1341 #[inline] 1342 fn partial_cmp(&self, other: &Domain<T>) -> Option<Ordering> { 1343 self.dom.partial_cmp(other) 1344 } 1345 } 1346 impl<T: AsRef<[u8]>> Ord for Rfc1123Domain<T> { 1347 #[inline] 1348 fn cmp(&self, other: &Self) -> Ordering { 1349 self.dom.cmp(&other.dom) 1350 } 1351 } 1352 impl<T: AsRef<[u8]>> Hash for Rfc1123Domain<T> { 1353 #[inline] 1354 fn hash<H: Hasher>(&self, state: &mut H) { 1355 self.dom.hash(state); 1356 } 1357 } 1358 impl<T> AsRef<Domain<T>> for Rfc1123Domain<T> { 1359 #[inline] 1360 fn as_ref(&self) -> &Domain<T> { 1361 &self.dom 1362 } 1363 } 1364 impl<T> Borrow<Domain<T>> for Rfc1123Domain<T> { 1365 #[inline] 1366 fn borrow(&self) -> &Domain<T> { 1367 &self.dom 1368 } 1369 } 1370 impl<T> Deref for Rfc1123Domain<T> { 1371 type Target = Domain<T>; 1372 #[inline] 1373 fn deref(&self) -> &Self::Target { 1374 &self.dom 1375 } 1376 } 1377 impl<T> From<Rfc1123Domain<T>> for Domain<T> { 1378 #[inline] 1379 fn from(value: Rfc1123Domain<T>) -> Self { 1380 value.dom 1381 } 1382 } 1383 impl From<Rfc1123Domain<Vec<u8>>> for Rfc1123Domain<String> { 1384 #[inline] 1385 fn from(value: Rfc1123Domain<Vec<u8>>) -> Self { 1386 Self { 1387 dom: Domain::<String>::from(value.dom), 1388 } 1389 } 1390 } 1391 impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b [u8]> { 1392 #[inline] 1393 fn from(value: &'a Rfc1123Domain<T>) -> Self { 1394 Self { 1395 dom: Domain::<&'b [u8]>::from(&value.dom), 1396 } 1397 } 1398 } 1399 impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b str> { 1400 #[inline] 1401 fn from(value: &'a Rfc1123Domain<T>) -> Self { 1402 Self { 1403 dom: Domain::<&'b str>::from(&value.dom), 1404 } 1405 } 1406 } 1407 impl From<Rfc1123Domain<String>> for Rfc1123Domain<Vec<u8>> { 1408 #[inline] 1409 fn from(value: Rfc1123Domain<String>) -> Self { 1410 Self { 1411 dom: Domain::<Vec<u8>>::from(value.dom), 1412 } 1413 } 1414 } 1415 impl<'a: 'b, 'b> From<Rfc1123Domain<&'a [u8]>> for Rfc1123Domain<&'b str> { 1416 #[inline] 1417 fn from(value: Rfc1123Domain<&'a [u8]>) -> Self { 1418 Self { 1419 dom: Domain::<&'b str>::from(value.dom), 1420 } 1421 } 1422 } 1423 impl<'a: 'b, 'b> From<Rfc1123Domain<&'a str>> for Rfc1123Domain<&'b [u8]> { 1424 #[inline] 1425 fn from(value: Rfc1123Domain<&'a str>) -> Self { 1426 Self { 1427 dom: Domain::<&'b [u8]>::from(value.dom), 1428 } 1429 } 1430 } 1431 impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { 1432 type Error = Rfc1123Err; 1433 #[expect( 1434 clippy::arithmetic_side_effects, 1435 clippy::indexing_slicing, 1436 clippy::unreachable, 1437 reason = "comments explain their correctness" 1438 )] 1439 #[inline] 1440 fn try_from(value: Domain<T>) -> Result<Self, Self::Error> { 1441 let mut labels = value.iter(); 1442 let tld = labels 1443 .next() 1444 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")); 1445 if tld.is_alphabetic() 1446 || tld 1447 .split_at_checked(4) 1448 .is_some_and(|(fst, rem)| !rem.is_empty() && fst == "xn--") 1449 { 1450 labels 1451 .try_fold((), |(), label| { 1452 let bytes = label.value.as_bytes(); 1453 // `Label`s are never empty, so the below indexing is fine. 1454 // Underflow won't occur for the same reason. 1455 if bytes[0] == b'-' { 1456 Err(Rfc1123Err::LabelStartsWithAHyphen) 1457 } else if bytes[bytes.len() - 1] == b'-' { 1458 Err(Rfc1123Err::LabelEndsWithAHyphen) 1459 } else { 1460 bytes.iter().try_fold((), |(), byt| match *byt { 1461 b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => Ok(()), 1462 val => Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(val))), 1463 }) 1464 } 1465 }) 1466 .map(|()| Self { dom: value }) 1467 } else { 1468 Err(Rfc1123Err::InvalidTld) 1469 } 1470 } 1471 } 1472 impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> { 1473 #[inline] 1474 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1475 self.dom.fmt(f) 1476 } 1477 } 1478 impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Rfc1123Domain<T> { 1479 type Item = Label<'a>; 1480 type IntoIter = LabelIter<'a>; 1481 #[inline] 1482 fn into_iter(self) -> Self::IntoIter { 1483 LabelIter { 1484 domain: self.dom.as_bytes(), 1485 } 1486 } 1487 } 1488 impl<'a> IntoIterator for Rfc1123Domain<&'a str> { 1489 type Item = Label<'a>; 1490 type IntoIter = LabelIter<'a>; 1491 #[inline] 1492 fn into_iter(self) -> Self::IntoIter { 1493 LabelIter { 1494 domain: <&str>::from(self.dom).as_bytes(), 1495 } 1496 } 1497 } 1498 impl<'a> IntoIterator for Rfc1123Domain<&'a [u8]> { 1499 type Item = Label<'a>; 1500 type IntoIter = LabelIter<'a>; 1501 #[inline] 1502 fn into_iter(self) -> Self::IntoIter { 1503 LabelIter { 1504 domain: <&[u8]>::from(self.dom), 1505 } 1506 } 1507 } 1508 #[cfg(test)] 1509 mod tests { 1510 extern crate alloc; 1511 use super::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}; 1512 use crate::char_set::{ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, AllowedAscii}; 1513 use alloc::borrow::ToOwned; 1514 use core::cmp::Ordering; 1515 use serde_json as _; 1516 #[test] 1517 fn test_dom_parse() { 1518 let allowed_ascii = ASCII_FIREFOX; 1519 // Test empty is error. 1520 assert!( 1521 Domain::try_from_bytes("", &allowed_ascii) 1522 .map_or_else(|e| e == DomainErr::Empty, |_| false) 1523 ); 1524 // Test root domain. 1525 assert!( 1526 Domain::try_from_bytes(".", &allowed_ascii) 1527 .map_or_else(|e| e == DomainErr::RootDomain, |_| false) 1528 ); 1529 // Test empty label is error. 1530 assert!( 1531 Domain::try_from_bytes("a..com", &allowed_ascii) 1532 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) 1533 ); 1534 assert!( 1535 Domain::try_from_bytes("a..", &allowed_ascii) 1536 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) 1537 ); 1538 assert!( 1539 Domain::try_from_bytes("..", &allowed_ascii) 1540 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) 1541 ); 1542 // Test label too long. 1543 let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; 1544 // 4 + 64 + 4 1545 assert!(val.len() == 72); 1546 assert!( 1547 Domain::try_from_bytes(val, &allowed_ascii) 1548 .map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false) 1549 ); 1550 assert!( 1551 Domain::try_from_bytes( 1552 "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", 1553 &allowed_ascii 1554 ) 1555 .map_or(false, |d| d.len().get() == 71) 1556 ); 1557 // Test domain too long. 1558 assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false)); 1559 assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 )); 1560 // Test max labels. 1561 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false)); 1562 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1563 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1564 // Test removal of trailing '.'. 1565 assert!( 1566 Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com") 1567 ); 1568 // Test single label. 1569 assert!(Domain::try_from_bytes("c", &allowed_ascii).map_or(false, |d| d.as_str() == "c")); 1570 // Test case-insensitivity. 1571 assert!( 1572 Domain::try_from_bytes("wwW.ExAMple.COm", &allowed_ascii).map_or(false, |d| { 1573 Domain::try_from_bytes("www.example.com", &allowed_ascii) 1574 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1575 }) 1576 ); 1577 assert!( 1578 Domain::try_from_bytes("ww_W.com", &allowed_ascii).map_or(false, |d| { 1579 Domain::try_from_bytes("Ww_w.com", &allowed_ascii) 1580 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1581 }) 1582 ); 1583 // Test valid bytes 1584 let mut input; 1585 let mut counter = 0; 1586 for i in 0..=127 { 1587 input = [i]; 1588 match i { 1589 b'!' 1590 | b'$' 1591 | b'&'..=b')' 1592 | b'+'..=b'-' 1593 | b'0'..=b'9' 1594 | b';' 1595 | b'=' 1596 | b'A'..=b'Z' 1597 | b'_'..=b'{' 1598 | b'}'..=b'~' => { 1599 counter += 1; 1600 assert!( 1601 Domain::try_from_bytes(input, &allowed_ascii).map_or(false, |d| d 1602 .value 1603 .len() 1604 == 1 1605 && d.value == input) 1606 ) 1607 } 1608 b'.' => { 1609 let input2 = b"a."; 1610 assert!( 1611 Domain::try_from_bytes(input2, &allowed_ascii).map_or(false, |d| d 1612 .len() 1613 .get() 1614 == 1 1615 && d.value == input2) 1616 ) 1617 } 1618 _ => assert!( 1619 Domain::try_from_bytes(input, &allowed_ascii) 1620 .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false) 1621 ), 1622 } 1623 } 1624 assert!(counter == 78); 1625 } 1626 #[test] 1627 fn test_dom_iter() { 1628 let allowed_ascii = ASCII_FIREFOX; 1629 assert!( 1630 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1631 let mut iter = d.iter(); 1632 let Some(l) = iter.next() else { 1633 return false; 1634 }; 1635 if l.value != "com" { 1636 return false; 1637 } 1638 let Some(l) = iter.next() else { return false }; 1639 if l.value != "example" { 1640 return false; 1641 } 1642 let Some(l) = iter.next() else { 1643 return false; 1644 }; 1645 if l.value != "www" { 1646 return false; 1647 } 1648 iter.next().is_none() 1649 }) 1650 ); 1651 assert!( 1652 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1653 let mut iter = d.iter(); 1654 let Some(l) = iter.next_back() else { 1655 return false; 1656 }; 1657 if l.value != "www" { 1658 return false; 1659 } 1660 let Some(l) = iter.next_back() else { 1661 return false; 1662 }; 1663 if l.value != "example" { 1664 return false; 1665 } 1666 let Some(l) = iter.next_back() else { 1667 return false; 1668 }; 1669 if l.value != "com" { 1670 return false; 1671 } 1672 iter.next_back().is_none() 1673 }) 1674 ); 1675 assert!( 1676 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1677 let mut iter = d.iter(); 1678 let Some(l) = iter.next_back() else { 1679 return false; 1680 }; 1681 if l.value != "www" { 1682 return false; 1683 } 1684 let Some(l) = iter.next() else { return false }; 1685 if l.value != "com" { 1686 return false; 1687 } 1688 let Some(l) = iter.next_back() else { 1689 return false; 1690 }; 1691 if l.value != "example" { 1692 return false; 1693 } 1694 iter.next().is_none() && iter.next_back().is_none() 1695 }) 1696 ); 1697 } 1698 #[test] 1699 fn rfc1123() { 1700 assert!( 1701 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1702 false, 1703 |dom| Rfc1123Domain::try_from(dom) 1704 .map_or(false, |dom| dom.as_str() == "example.com") 1705 ) 1706 ); 1707 assert!( 1708 AllowedAscii::try_from_unique_ascii(b"exampl!co".to_owned()).map_or(false, |ascii| { 1709 Domain::try_from_bytes("exampl!e.com", &ascii).map_or(false, |dom| { 1710 Rfc1123Domain::try_from(dom).map_or_else( 1711 |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(b'!')), 1712 |_| false, 1713 ) 1714 }) 1715 }) 1716 ); 1717 assert!( 1718 Domain::try_from_bytes("example-.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1719 false, 1720 |dom| Rfc1123Domain::try_from(dom) 1721 .map_or_else(|e| e == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1722 ) 1723 ); 1724 assert!( 1725 Domain::try_from_bytes("-example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1726 false, 1727 |dom| Rfc1123Domain::try_from(dom) 1728 .map_or_else(|e| e == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1729 ) 1730 ); 1731 assert!( 1732 Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1733 false, 1734 |dom| Rfc1123Domain::try_from(dom) 1735 .map_or_else(|e| e == Rfc1123Err::InvalidTld, |_| false) 1736 ) 1737 ); 1738 assert!( 1739 Domain::try_from_bytes("example.commm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1740 false, 1741 |dom| Rfc1123Domain::try_from(dom) 1742 .map_or(false, |rfc| rfc.is_literal_interpretation()) 1743 ) 1744 ); 1745 assert!( 1746 Domain::try_from_bytes("example.xn--abc", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1747 false, 1748 |dom| Rfc1123Domain::try_from(dom) 1749 .map_or(false, |rfc| !rfc.is_literal_interpretation()) 1750 ) 1751 ); 1752 assert!( 1753 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1754 false, 1755 |dom| Rfc1123Domain::try_from(dom) 1756 .map_or(false, |rfc| rfc.is_strict_interpretation()) 1757 ) 1758 ); 1759 assert!( 1760 Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1761 false, 1762 |dom| Rfc1123Domain::try_from(dom) 1763 .map_or(false, |rfc| !rfc.is_strict_interpretation()) 1764 ) 1765 ); 1766 } 1767 #[test] 1768 fn test_tld() { 1769 assert!( 1770 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS) 1771 .map_or(false, |dom| dom.tld().as_str() == "com",) 1772 ); 1773 } 1774 #[test] 1775 fn test_rfc1123_parse() { 1776 // Test empty is error. 1777 assert!( 1778 Rfc1123Domain::try_from_bytes("") 1779 .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false) 1780 ); 1781 // Test root domain. 1782 assert!(Rfc1123Domain::try_from_bytes(".").map_or_else( 1783 |e| e == Rfc1123Err::DomainErr(DomainErr::RootDomain), 1784 |_| false 1785 )); 1786 // Test empty label is error. 1787 assert!(Rfc1123Domain::try_from_bytes("a..com").map_or_else( 1788 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1789 |_| false 1790 )); 1791 assert!(Rfc1123Domain::try_from_bytes("a..").map_or_else( 1792 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1793 |_| false 1794 )); 1795 assert!(Rfc1123Domain::try_from_bytes("..").map_or_else( 1796 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1797 |_| false 1798 )); 1799 // Test label too long. 1800 let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; 1801 // 4 + 64 + 4 1802 assert!(val.len() == 72); 1803 assert!(Rfc1123Domain::try_from_bytes(val).map_or_else( 1804 |e| e == Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63), 1805 |_| false 1806 )); 1807 assert!( 1808 Rfc1123Domain::try_from_bytes( 1809 "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", 1810 ) 1811 .map_or(false, |d| d.len().get() == 71) 1812 ); 1813 // Test domain too long. 1814 assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(254)), |_| false)); 1815 assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); 1816 // Test max labels. 1817 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(255)), |_| false)); 1818 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1819 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1820 // Test removal of trailing '.'. 1821 assert!(Rfc1123Domain::try_from_bytes("com.").map_or(false, |d| d.as_str() == "com")); 1822 // Test single label. 1823 assert!(Rfc1123Domain::try_from_bytes("c").map_or(false, |d| d.as_str() == "c")); 1824 // Test ends with hyphen. 1825 assert!( 1826 Rfc1123Domain::try_from_bytes("-") 1827 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1828 ); 1829 assert!( 1830 Rfc1123Domain::try_from_bytes("-.") 1831 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1832 ); 1833 assert!( 1834 Rfc1123Domain::try_from_bytes("a.com.-") 1835 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1836 ); 1837 assert!( 1838 Rfc1123Domain::try_from_bytes("a.com-") 1839 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1840 ); 1841 assert!( 1842 Rfc1123Domain::try_from_bytes("a-.com") 1843 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1844 ); 1845 // Test starts with hyphen. 1846 assert!( 1847 Rfc1123Domain::try_from_bytes("a.-com") 1848 .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1849 ); 1850 assert!( 1851 Rfc1123Domain::try_from_bytes("-a.com") 1852 .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1853 ); 1854 // Test case-insensitivity. 1855 assert!( 1856 Rfc1123Domain::try_from_bytes("wwW.ExAMple.COm").map_or(false, |d| { 1857 Rfc1123Domain::try_from_bytes("www.example.com") 1858 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1859 }) 1860 ); 1861 assert!( 1862 Rfc1123Domain::try_from_bytes("ww-W.com").map_or(false, |d| { 1863 Rfc1123Domain::try_from_bytes("Ww-w.com") 1864 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1865 }) 1866 ); 1867 assert!( 1868 Rfc1123Domain::try_from_bytes("1.1.1.1") 1869 .map_or_else(|err| err == Rfc1123Err::InvalidTld, |_| false) 1870 ); 1871 } 1872 }