dom.rs (74895B)
1 extern crate alloc; 2 use crate::char_set::{ASCII_HYPHEN_DIGITS_LETTERS, AllowedAscii}; 3 use alloc::{string::String, vec::Vec}; 4 use core::{ 5 borrow::Borrow, 6 cmp::Ordering, 7 convert::{self, AsRef}, 8 error::Error, 9 fmt::{self, Display, Formatter}, 10 hash::{Hash, Hasher}, 11 iter::FusedIterator, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 /// The `AllowedAscii` used by `Rfc1123Domain`. 17 static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; 18 /// Returned by [`Domain::cmp_by_domain_ordering`]. 19 /// 20 /// It is more informative than [`Ordering`] in that it 21 /// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater 22 /// from a `Domain` that has the same `Label`s as another but simply more. 23 /// 24 /// Another way to view this is that [`Self::Shorter`] is "closer" to being [`Self::Equal`] than [`Self::Less`] 25 /// since the `Domain`s are still part of the same branch in the DNS hierarchy. Ditto for [`Self::Longer`]. 26 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 27 pub enum DomainOrdering { 28 /// The `Domain` is less than another since a `Label` was less. 29 Less, 30 /// The `Domain` is less than other but only because it had fewer `Label`s. 31 Shorter, 32 /// The `Domain` is equal to another. 33 Equal, 34 /// The `Domain` is greater than another but only because it had more `Label`s. 35 Longer, 36 /// The `Domain` is greater than another since a `Label` was greater. 37 Greater, 38 } 39 impl Display for DomainOrdering { 40 #[inline] 41 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 42 match *self { 43 Self::Less => f.write_str("less since a label was less"), 44 Self::Shorter => f.write_str("less since there were fewer labels"), 45 Self::Equal => f.write_str("equal"), 46 Self::Longer => f.write_str("greater since there were more labels"), 47 Self::Greater => f.write_str("greater since a label was greater"), 48 } 49 } 50 } 51 impl From<DomainOrdering> for Ordering { 52 #[inline] 53 fn from(value: DomainOrdering) -> Self { 54 match value { 55 DomainOrdering::Less | DomainOrdering::Shorter => Self::Less, 56 DomainOrdering::Equal => Self::Equal, 57 DomainOrdering::Longer | DomainOrdering::Greater => Self::Greater, 58 } 59 } 60 } 61 /// A domain that consists of at least one [`Label`] with each `Label` only containing the ASCII `u8`s in 62 /// the [`AllowedAscii`] passed to [`Self::try_from_bytes`]. 63 /// 64 /// The total length of a `Domain` is at most 65 /// 253 bytes[^note] in length including the `b'.'` separator. The trailing `b'.'`, if one exists, is always 66 /// ignored. 67 /// 68 /// This is more restrictive than what a domain is allowed to be per the 69 /// [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181) since all octets/`u8`s are allowed in a 70 /// label. Additionally there is no way to represent the root domain. 71 /// 72 /// Last, ASCII uppercase letters are treated as lowercase; however for better comparison performance 73 /// that doesn't lead to intermediate memory allocations, two `Domain`s should consist entirely of the same 74 /// case. 75 /// 76 /// [^note]: It is a common misconception that the max length of a domain is 255, but that is only true for 77 /// domains in _wire_ format. In representation format, which `Domain` can be thought of when only visible 78 /// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is 79 /// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format 80 /// contributes one byte due to each label being preceded by the octet that represents its length. 81 /// 82 /// Note this only contains `T`, so this is allocation-free and the same size as `T`. 83 #[derive(Clone, Copy, Debug)] 84 pub struct Domain<T> { 85 /// The domain value. `value.as_ref().len()` is guaranteed to have length between 1 and 253 when the last `u8` 86 /// is not `b'.'`; otherwise the length is between 2 and 254. 87 /// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`. 88 value: T, 89 } 90 impl<T> Domain<T> { 91 /// A reference to the contained `T`. 92 /// 93 /// # Example 94 /// 95 /// ``` 96 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 97 /// assert!(*Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().as_inner() == "example.com."); 98 /// ``` 99 #[inline] 100 pub const fn as_inner(&self) -> &T { 101 &self.value 102 } 103 /// Same as [`Self::as_inner`] except `self` is consumed. 104 /// 105 /// # Example 106 /// 107 /// ``` 108 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 109 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_inner() == "example.com."); 110 /// ``` 111 #[inline] 112 pub fn into_inner(self) -> T { 113 self.value 114 } 115 } 116 impl<T: AsRef<[u8]>> Domain<T> { 117 /// Returns `true` iff the domain contains a trailing `b'.'`. 118 /// 119 /// # Example 120 /// 121 /// ``` 122 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 123 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().contains_trailing_dot()); 124 /// ``` 125 #[expect( 126 clippy::arithmetic_side_effects, 127 clippy::indexing_slicing, 128 reason = "comments explain their correctness" 129 )] 130 #[inline] 131 pub fn contains_trailing_dot(&self) -> bool { 132 let bytes = self.value.as_ref(); 133 // This won't underflow or `panic` since `Domain`s are not empty. 134 bytes[bytes.len() - 1] == b'.' 135 } 136 /// The domain without a trailing `b'.'` if there was one. 137 /// 138 /// # Example 139 /// 140 /// ``` 141 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 142 /// assert!(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap().as_str() == "Example.com"); 143 /// ``` 144 #[inline] 145 pub fn as_str(&self) -> &str { 146 <&str>::from(Domain::<&str>::from(Domain::<&[u8]>::from(self))) 147 } 148 /// The domain without a trailing `b'.'` if there was one. 149 /// 150 /// # Example 151 /// 152 /// ``` 153 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 154 /// assert!(Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap().as_bytes() == b"Example.com"); 155 /// ``` 156 #[inline] 157 pub fn as_bytes(&self) -> &[u8] { 158 <&[u8]>::from(Domain::<&[u8]>::from(self)) 159 } 160 /// The length of the `Domain`. This does _not_ include the trailing `b'.'` if there was one. 161 /// 162 /// # Example 163 /// 164 /// ``` 165 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 166 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().len().get() == 11); 167 /// ``` 168 #[expect( 169 unsafe_code, 170 reason = "we enforce nonzero lengths, so NonZeroU8::new_unchecked is fine" 171 )] 172 #[expect( 173 clippy::arithmetic_side_effects, 174 clippy::as_conversions, 175 clippy::cast_possible_truncation, 176 reason = "comments justify their correctness" 177 )] 178 #[inline] 179 pub fn len(&self) -> NonZeroU8 { 180 // No fear of underflow since the length of `value` is at least 1 _not including_ the 181 // trailing `b'.'` if there was one. 182 // `true as usize` is guaranteed to be 1 and `false as usize` is guaranteed to be 0. 183 // No fear of truncation either since the length is guaranteed to be less than 255. 184 // `Domain` is immutable ensuring such invariants are kept. 185 let len = (self.value.as_ref().len() - usize::from(self.contains_trailing_dot())) as u8; 186 // SAFETY: 187 // The only way to construct a `Domain` is via `try_from_bytes` which ensures `len` is 188 // is at least 1. 189 unsafe { NonZeroU8::new_unchecked(len) } 190 } 191 /// Function that transforms `v` into a `Domain` by only allowing [`Label`]s to contain the ASCII `u8`s in 192 /// `allowed_ascii`. A trailing `b'.'` is ignored. 193 /// 194 /// Note that while ASCII uppercase is treated as ASCII lowercase, `allowed_ascii` MUST still contain 195 /// each ASCII `u8` (e.g., if `!allowed_ascii.contains(b'A')`, then `b'A'` is not allowed even if 196 /// `allowed_ascii.contains(b'a')`). 197 /// 198 /// # Examples 199 /// 200 /// ``` 201 /// use ascii_domain::{dom::{Domain, DomainErr}, char_set::ASCII_LOWERCASE}; 202 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).is_ok()); 203 /// assert!(Domain::try_from_bytes("exam2ple.com", &ASCII_LOWERCASE).map_or_else(|err| err == DomainErr::InvalidByte(b'2'), |_| false)); 204 /// ``` 205 /// 206 /// # Errors 207 /// 208 /// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`. 209 #[expect( 210 clippy::arithmetic_side_effects, 211 reason = "comment justifies its correctness" 212 )] 213 #[inline] 214 pub fn try_from_bytes<T2: AsRef<[u8]>>( 215 v: T, 216 allowed_ascii: &AllowedAscii<T2>, 217 ) -> Result<Self, DomainErr> { 218 let val = v.as_ref(); 219 let value = val 220 .split_last() 221 .ok_or(DomainErr::Empty) 222 .and_then(|(lst, rem)| { 223 if *lst == b'.' { 224 rem.split_last() 225 .ok_or(DomainErr::RootDomain) 226 .and_then(|(lst_2, _)| { 227 if *lst_2 == b'.' { 228 Err(DomainErr::EmptyLabel) 229 } else { 230 Ok(rem) 231 } 232 }) 233 } else { 234 Ok(val) 235 } 236 })?; 237 if value.len() > 253 { 238 Err(DomainErr::LenExceeds253(value.len())) 239 } else { 240 value 241 .iter() 242 .try_fold(0, |label_len, byt| { 243 let b = *byt; 244 if b == b'.' { 245 NonZeroU8::new(label_len).map_or(Err(DomainErr::EmptyLabel), |_| Ok(0)) 246 } else if !allowed_ascii.contains(b) { 247 Err(DomainErr::InvalidByte(b)) 248 } else if label_len == 63 { 249 Err(DomainErr::LabelLenExceeds63) 250 } else { 251 // This is less than 63 due to the above check, so this won't overflow; 252 Ok(label_len + 1) 253 } 254 }) 255 .map(|_| Self { value: v }) 256 } 257 } 258 /// Returns an [`Iterator`] of [`Label`]s without consuming the `Domain`. 259 /// # Example 260 /// 261 /// ``` 262 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 263 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().unwrap().as_str() == "com"); 264 /// ``` 265 #[inline] 266 pub fn iter(&self) -> LabelIter<'_> { 267 LabelIter { 268 domain: self.as_bytes(), 269 } 270 } 271 /// Returns `true` iff `self` and `right` are part of the same branch in the DNS hierarchy. 272 /// 273 /// For example `www.example.com` and `example.com` are in the `same_branch`, but `example.com` and 274 /// `foo.com` are not. 275 /// 276 /// Note that trailing `b'.'`s are ignored and ASCII uppercase and lowercase are treated the same. 277 /// 278 /// # Examples 279 /// 280 /// ``` 281 /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 282 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 283 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 284 /// assert!(dom1.same_branch(&dom2)); 285 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 286 /// assert!(!dom1.same_branch(&dom3)); 287 /// ``` 288 #[inline] 289 pub fn same_branch<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> bool { 290 // Faster to check the values as bytes and not iterate each `Label`. 291 if self == right { 292 true 293 } else { 294 self.iter() 295 .zip(right) 296 .try_fold( 297 (), 298 |(), (label, label2)| if label == label2 { Ok(()) } else { Err(()) }, 299 ) 300 .is_ok_and(|()| true) 301 } 302 } 303 /// Same as [`Self::cmp_doms`] except returns [`DomainOrdering::Longer`] iff `self > right` due solely 304 /// to having more [`Label`]s and [`DomainOrdering::Shorter`] iff `self < right` due solely to having 305 /// fewer `Label`s. 306 /// 307 /// For example `example.com` < `www.example.com` and `bar.com` < `www.example.com`; but with this function, 308 /// `example.com` is [`DomainOrdering::Shorter`] than `www.example.com` and `www.example.com` is 309 /// [`DomainOrdering::Longer`] than `example.com`; while `bar.com` is [`DomainOrdering::Less`] than 310 /// `www.example.com` and `www.example.com` is [`DomainOrdering::Greater`] than `bar.com`. 311 /// 312 /// In other words `DomainOrdering::Shorter` implies `Ordering::Less` and `DomainOrdering::Longer` implies 313 /// `Ordering::Greater` with additional information pertaining to the quantity of `Label`s. 314 /// 315 /// # Examples 316 /// 317 /// ``` 318 /// use ascii_domain::{dom::{Domain, DomainOrdering}, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 319 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 320 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom1), DomainOrdering::Equal)); 321 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 322 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom2), DomainOrdering::Shorter)); 323 /// assert!(matches!(dom2.cmp_by_domain_ordering(&dom1), DomainOrdering::Longer)); 324 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 325 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom3), DomainOrdering::Less)); 326 /// assert!(matches!(dom3.cmp_by_domain_ordering(&dom1), DomainOrdering::Greater)); 327 /// ``` 328 #[inline] 329 pub fn cmp_by_domain_ordering<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> DomainOrdering { 330 // Faster to compare the entire value when we can instead each `Label`. 331 if self == right { 332 DomainOrdering::Equal 333 } else { 334 let mut right_iter = right.iter(); 335 self.iter() 336 .try_fold(false, |_, label| { 337 right_iter 338 .next() 339 .map_or(Ok(true), |label2| match label.cmp(&label2) { 340 Ordering::Less => Err(DomainOrdering::Less), 341 Ordering::Equal => Ok(false), 342 Ordering::Greater => Err(DomainOrdering::Greater), 343 }) 344 }) 345 .map_or_else(convert::identity, |flag| { 346 // We iterate `self` before `right`, so `flag` is `true` iff `right` 347 // has fewer `Label`s than `self`. 348 if flag { 349 DomainOrdering::Longer 350 } else { 351 // `self` has as many or fewer `Label`s than `right`; however if it had as many 352 // `Label`s as `right`, then all `Label`s are the same which is impossible since 353 // we already checked if `self == right`. 354 DomainOrdering::Shorter 355 } 356 }) 357 } 358 } 359 /// The total order that is defined follows the following hierarchy: 360 /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. 361 /// 2. If 1. evaluates as not equivalent, then return the result. 362 /// 3. Return the comparison of `Label` counts. 363 /// 364 /// For example, `com` < `example.com` < `net` < `example.net`. 365 /// 366 /// This is the same as the [canonical DNS name order](https://datatracker.ietf.org/doc/html/rfc4034#section-6.1). 367 /// ASCII uppercase is treated as ASCII lowercase and trailing `b'.'`s are ignored. 368 /// The [`AllowedAscii`]s in the `Domain`s are ignored. 369 /// 370 /// # Examples 371 /// 372 /// ``` 373 /// use core::cmp::Ordering; 374 /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 375 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 376 /// assert!(matches!(dom1.cmp_doms(&dom1), Ordering::Equal)); 377 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 378 /// assert!(matches!(dom1.cmp_doms(&dom2), Ordering::Less)); 379 /// assert!(matches!(dom2.cmp_doms(&dom1), Ordering::Greater)); 380 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 381 /// assert!(matches!(dom1.cmp_doms(&dom3), Ordering::Less)); 382 /// assert!(matches!(dom3.cmp_doms(&dom1), Ordering::Greater)); 383 /// ``` 384 #[inline] 385 pub fn cmp_doms<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> Ordering { 386 self.cmp_by_domain_ordering(right).into() 387 } 388 /// Returns the first `Label`. 389 /// 390 /// # Example 391 /// 392 /// ``` 393 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 394 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().first_label().as_str() == "example"); 395 /// ``` 396 #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] 397 #[inline] 398 pub fn first_label(&self) -> Label<'_> { 399 self.iter() 400 .next_back() 401 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) 402 } 403 /// Returns the last `Label` (i.e., the TLD). 404 /// 405 /// # Example 406 /// 407 /// ``` 408 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 409 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().tld().as_str() == "com"); 410 /// ``` 411 #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] 412 #[inline] 413 pub fn tld(&self) -> Label<'_> { 414 self.iter() 415 .next() 416 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) 417 } 418 } 419 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for Domain<T2> { 420 /// Ignores the provided [`AllowedAscii`] and simply compares the two `Domain`s as [`Label`]s 421 /// of bytes. Note uppercase ASCII is treated as lowercase ASCII and trailing `b'.'`s are ignored. 422 #[inline] 423 fn eq(&self, other: &Domain<T>) -> bool { 424 self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) 425 } 426 } 427 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Domain<T>> for Domain<T2> { 428 #[inline] 429 fn eq(&self, other: &&Domain<T>) -> bool { 430 *self == **other 431 } 432 } 433 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for &Domain<T2> { 434 #[inline] 435 fn eq(&self, other: &Domain<T>) -> bool { 436 **self == *other 437 } 438 } 439 impl<T: AsRef<[u8]>> Eq for Domain<T> {} 440 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Domain<T>> for Domain<T2> { 441 /// Consult [`Self::cmp_doms`]. 442 #[inline] 443 fn partial_cmp(&self, other: &Domain<T>) -> Option<Ordering> { 444 Some(self.cmp_doms(other)) 445 } 446 } 447 impl<T: AsRef<[u8]>> Ord for Domain<T> { 448 /// Consult [`Self::cmp_doms`]. 449 #[inline] 450 fn cmp(&self, other: &Self) -> Ordering { 451 self.cmp_doms(other) 452 } 453 } 454 impl<T: AsRef<[u8]>> Hash for Domain<T> { 455 #[inline] 456 fn hash<H: Hasher>(&self, state: &mut H) { 457 self.as_bytes().to_ascii_lowercase().hash(state); 458 } 459 } 460 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> TryFrom<(T, &AllowedAscii<T2>)> for Domain<T> { 461 type Error = DomainErr; 462 #[inline] 463 fn try_from(value: (T, &AllowedAscii<T2>)) -> Result<Self, Self::Error> { 464 Self::try_from_bytes(value.0, value.1) 465 } 466 } 467 impl<T: AsRef<[u8]>> Display for Domain<T> { 468 #[inline] 469 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 470 f.write_str(self) 471 } 472 } 473 impl<T: AsRef<[u8]>> AsRef<str> for Domain<T> { 474 #[inline] 475 fn as_ref(&self) -> &str { 476 self.as_str() 477 } 478 } 479 impl<T: AsRef<[u8]>> AsRef<[u8]> for Domain<T> { 480 #[inline] 481 fn as_ref(&self) -> &[u8] { 482 self.as_bytes() 483 } 484 } 485 impl<T: AsRef<[u8]>> Deref for Domain<T> { 486 type Target = str; 487 #[inline] 488 fn deref(&self) -> &Self::Target { 489 self.as_str() 490 } 491 } 492 impl From<Domain<Vec<u8>>> for Domain<String> { 493 #[expect( 494 unsafe_code, 495 reason = "we enforce ASCII, so String::from_utf8_unchecked is fine" 496 )] 497 #[inline] 498 fn from(value: Domain<Vec<u8>>) -> Self { 499 // SAFETY: 500 // We only allow ASCII, so this is fine. 501 let val = unsafe { String::from_utf8_unchecked(value.value) }; 502 Self { value: val } 503 } 504 } 505 impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Domain<T>> for Domain<&'b [u8]> { 506 #[inline] 507 fn from(value: &'a Domain<T>) -> Self { 508 Self { 509 value: value.value.as_ref(), 510 } 511 } 512 } 513 impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Domain<T>> for Domain<&'b str> { 514 #[inline] 515 fn from(value: &'a Domain<T>) -> Self { 516 Self { 517 value: value.value.as_ref(), 518 } 519 } 520 } 521 impl From<Domain<String>> for Domain<Vec<u8>> { 522 #[inline] 523 fn from(value: Domain<String>) -> Self { 524 Self { 525 value: value.value.into_bytes(), 526 } 527 } 528 } 529 impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { 530 #[expect( 531 unsafe_code, 532 reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" 533 )] 534 #[inline] 535 fn from(value: Domain<&'a [u8]>) -> Self { 536 // SAFETY: 537 // We only allow ASCII, so this is fine. 538 let val = unsafe { str::from_utf8_unchecked(value.value) }; 539 Self { value: val } 540 } 541 } 542 impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { 543 #[inline] 544 fn from(value: Domain<&'a str>) -> Self { 545 Self { 546 value: value.value.as_bytes(), 547 } 548 } 549 } 550 impl From<Domain<Self>> for String { 551 /// Returns the contained `String` _without_ a trailing `'.'` if there was one. 552 /// 553 /// # Example 554 /// 555 /// ``` 556 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 557 /// assert!(String::from(Domain::try_from_bytes(String::from("Example.com."), &ASCII_LETTERS).unwrap()).as_str() == "Example.com"); 558 /// ``` 559 #[inline] 560 fn from(value: Domain<Self>) -> Self { 561 if value.contains_trailing_dot() { 562 let mut val = value.value; 563 val.pop(); 564 val 565 } else { 566 value.value 567 } 568 } 569 } 570 impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str { 571 /// Returns the contained `str` _without_ a trailing `'.'` if there was one. 572 /// 573 /// # Example 574 /// 575 /// ``` 576 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 577 /// assert!(<&str>::from(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap()) == "Example.com"); 578 /// ``` 579 #[expect( 580 unsafe_code, 581 reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" 582 )] 583 #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] 584 #[inline] 585 fn from(value: Domain<&'a str>) -> Self { 586 // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. 587 let utf8 = &value.value.as_bytes()[..value.len().get().into()]; 588 // SAFETY: 589 // Only ASCII is allowed, so this is fine. 590 unsafe { str::from_utf8_unchecked(utf8) } 591 } 592 } 593 impl From<Domain<Self>> for Vec<u8> { 594 /// Returns the contained `Vec` _without_ a trailing `b'.'` if there was one. 595 /// 596 /// # Example 597 /// 598 /// ``` 599 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 600 /// assert!(Vec::from(Domain::try_from_bytes(vec![b'F', b'o', b'o', b'.', b'c', b'o', b'm'], &ASCII_LETTERS).unwrap()).as_slice() == b"Foo.com"); 601 /// ``` 602 #[inline] 603 fn from(value: Domain<Self>) -> Self { 604 if value.contains_trailing_dot() { 605 let mut val = value.value; 606 val.pop(); 607 val 608 } else { 609 value.value 610 } 611 } 612 } 613 impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] { 614 /// Returns the contained slice _without_ a trailing `b'.'` if there was one. 615 /// 616 /// # Example 617 /// 618 /// ``` 619 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 620 /// assert!(<&[u8]>::from(Domain::try_from_bytes(b"Example.com.".as_slice(), &ASCII_LETTERS).unwrap()) == b"Example.com"); 621 /// ``` 622 #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] 623 #[inline] 624 fn from(value: Domain<&'a [u8]>) -> Self { 625 // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. 626 &value.value[..value.len().get().into()] 627 } 628 } 629 /// Error returned from [`Domain::try_from_bytes`]. 630 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 631 pub enum DomainErr { 632 /// The domain was empty. 633 Empty, 634 /// The domain was the root domain that is to say it was the domain that only contained the root 635 /// zone (i.e., `b'.'`). 636 RootDomain, 637 /// The length of the domain was greater than 253 not counting a terminating `b'.'` if there was one. 638 LenExceeds253(usize), 639 /// The domain contained at least one empty label. 640 EmptyLabel, 641 /// The domain contained at least one label whose length exceeded 63. 642 LabelLenExceeds63, 643 /// The domain contained an invalid byte value. 644 InvalidByte(u8), 645 } 646 impl Display for DomainErr { 647 #[inline] 648 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 649 match *self { 650 Self::Empty => f.write_str("domain is empty"), 651 Self::RootDomain => f.write_str("domain is the root domain"), 652 Self::LenExceeds253(len) => write!( 653 f, 654 "domain has length {len} which is greater than the max length of 253" 655 ), 656 Self::EmptyLabel => f.write_str("domain has an empty label"), 657 Self::LabelLenExceeds63 => { 658 f.write_str("domain has a label that exceeds the max length of 63") 659 } 660 Self::InvalidByte(byt) => { 661 write!(f, "domain has a label with the invalid byte value {byt}") 662 } 663 } 664 } 665 } 666 impl Error for DomainErr {} 667 /// A label of a [`Domain`]. The total length of a `Label` is inclusively between 1 and 63. 668 #[derive(Clone, Copy, Debug)] 669 pub struct Label<'a> { 670 /// The label value. 671 value: &'a str, 672 } 673 impl<'a> Label<'a> { 674 /// The label. 675 /// 676 /// # Example 677 /// 678 /// ``` 679 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 680 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.as_str() == "com")); 681 /// ``` 682 #[inline] 683 #[must_use] 684 pub const fn as_str(self) -> &'a str { 685 self.value 686 } 687 /// Returns `true` iff the label only contains ASCII letters. 688 /// 689 /// # Example 690 /// 691 /// ``` 692 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 693 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphabetic())); 694 /// ``` 695 #[inline] 696 #[must_use] 697 pub fn is_alphabetic(self) -> bool { 698 self.value 699 .as_bytes() 700 .iter() 701 .try_fold((), |(), byt| { 702 if byt.is_ascii_alphabetic() { 703 Ok(()) 704 } else { 705 Err(()) 706 } 707 }) 708 .is_ok() 709 } 710 /// Returns `true` iff the label only contains ASCII digits. 711 /// 712 /// # Example 713 /// 714 /// ``` 715 /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; 716 /// assert!(Domain::try_from_bytes("example.123", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_digits())); 717 /// ``` 718 #[inline] 719 #[must_use] 720 pub fn is_digits(self) -> bool { 721 self.value 722 .as_bytes() 723 .iter() 724 .try_fold((), |(), byt| { 725 if byt.is_ascii_digit() { 726 Ok(()) 727 } else { 728 Err(()) 729 } 730 }) 731 .is_ok() 732 } 733 /// Returns `true` iff the label only contains ASCII digits or letters. 734 /// 735 /// # Example 736 /// 737 /// ``` 738 /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; 739 /// assert!(Domain::try_from_bytes("example.1com", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphanumeric())); 740 /// ``` 741 #[inline] 742 #[must_use] 743 pub fn is_alphanumeric(self) -> bool { 744 self.value 745 .as_bytes() 746 .iter() 747 .try_fold((), |(), byt| { 748 if byt.is_ascii_alphanumeric() { 749 Ok(()) 750 } else { 751 Err(()) 752 } 753 }) 754 .is_ok() 755 } 756 /// Returns `true` iff the label only contains ASCII hyphen, digits, or letters. 757 /// 758 /// # Example 759 /// 760 /// ``` 761 /// use ascii_domain::{dom::Domain, char_set::ASCII_HYPHEN_DIGITS_LOWERCASE}; 762 /// assert!(Domain::try_from_bytes("example.1-com", &ASCII_HYPHEN_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_hyphen_or_alphanumeric())); 763 /// ``` 764 #[inline] 765 #[must_use] 766 pub fn is_hyphen_or_alphanumeric(self) -> bool { 767 self.value 768 .as_bytes() 769 .iter() 770 .try_fold((), |(), byt| { 771 if *byt == b'-' || byt.is_ascii_alphanumeric() { 772 Ok(()) 773 } else { 774 Err(()) 775 } 776 }) 777 .is_ok() 778 } 779 /// The length of the `Label`. This is inclusively between 1 and 63. 780 /// 781 /// # Example 782 /// 783 /// ``` 784 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 785 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.len().get() == 3)); 786 /// ``` 787 #[expect( 788 unsafe_code, 789 reason = "we enforce label lengths, so NonZeroU8::new_unchecked is fine" 790 )] 791 #[expect( 792 clippy::as_conversions, 793 clippy::cast_possible_truncation, 794 reason = "comments justify their correctness" 795 )] 796 #[inline] 797 #[must_use] 798 pub const fn len(self) -> NonZeroU8 { 799 // The max length of a `Label` is 63. 800 let len = self.value.len() as u8; 801 // SAFETY: 802 // `Label`s are never empty. 803 unsafe { NonZeroU8::new_unchecked(len) } 804 } 805 } 806 impl PartialEq<Label<'_>> for Label<'_> { 807 #[inline] 808 fn eq(&self, other: &Label<'_>) -> bool { 809 self.value.eq_ignore_ascii_case(other.value) 810 } 811 } 812 impl PartialEq<&Label<'_>> for Label<'_> { 813 #[inline] 814 fn eq(&self, other: &&Label<'_>) -> bool { 815 *self == **other 816 } 817 } 818 impl PartialEq<Label<'_>> for &Label<'_> { 819 #[inline] 820 fn eq(&self, other: &Label<'_>) -> bool { 821 **self == *other 822 } 823 } 824 impl Eq for Label<'_> {} 825 impl PartialOrd<Label<'_>> for Label<'_> { 826 #[inline] 827 fn partial_cmp(&self, other: &Label<'_>) -> Option<Ordering> { 828 Some(self.cmp(other)) 829 } 830 } 831 impl Ord for Label<'_> { 832 #[inline] 833 fn cmp(&self, other: &Self) -> Ordering { 834 self.value 835 .to_ascii_lowercase() 836 .cmp(&other.value.to_ascii_lowercase()) 837 } 838 } 839 impl Hash for Label<'_> { 840 #[inline] 841 fn hash<H: Hasher>(&self, state: &mut H) { 842 self.value.to_ascii_lowercase().hash(state); 843 } 844 } 845 impl Display for Label<'_> { 846 #[inline] 847 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 848 f.write_str(self.value) 849 } 850 } 851 impl<'a> AsRef<[u8]> for Label<'a> { 852 #[inline] 853 fn as_ref(&self) -> &'a [u8] { 854 self.value.as_bytes() 855 } 856 } 857 impl<'a> AsRef<str> for Label<'a> { 858 #[inline] 859 fn as_ref(&self) -> &'a str { 860 self.value 861 } 862 } 863 impl<'a> Deref for Label<'a> { 864 type Target = str; 865 #[inline] 866 fn deref(&self) -> &'a Self::Target { 867 self.value 868 } 869 } 870 /// [`Iterator`] that iterates [`Label`]s from a [`Domain`] or [`Rfc1123Domain`] starting from the TLD down. 871 /// 872 /// This iterates `Label`s on demand; so if repeated iteration is desired, it may be better to collect the `Label`s 873 /// in a collection (e.g., [`Vec`]) than create the iterator again. This is also why [`ExactSizeIterator`] is not 874 /// implemented. 875 #[derive(Clone, Debug)] 876 pub struct LabelIter<'a> { 877 /// Domain as ASCII. 878 domain: &'a [u8], 879 } 880 impl<'a> Iterator for LabelIter<'a> { 881 type Item = Label<'a>; 882 #[expect( 883 unsafe_code, 884 reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" 885 )] 886 #[expect( 887 clippy::arithmetic_side_effects, 888 clippy::indexing_slicing, 889 reason = "comments justify their correctness" 890 )] 891 #[inline] 892 fn next(&mut self) -> Option<Self::Item> { 893 (!self.domain.is_empty()).then(|| { 894 self.domain 895 .iter() 896 .rev() 897 .try_fold(1, |count, byt| { 898 if *byt == b'.' { 899 let len = self.domain.len(); 900 // `count` < `len` since there is at least one more `u8` before `b'.'`. 901 let idx = len - count; 902 // `idx + 1` < `len` since `count` is > 1 since `Label`s are never empty. 903 let ascii = &self.domain[idx + 1..len]; 904 // SAFETY: 905 // We only allow ASCII, so this is safe. 906 let value = unsafe { str::from_utf8_unchecked(ascii) }; 907 self.domain = &self.domain[..idx]; 908 Err(Label { value }) 909 } else { 910 Ok(count + 1) 911 } 912 }) 913 .map_or_else(convert::identity, |_| { 914 // SAFETY: 915 // We only allow ASCII, so this is safe. 916 let value = unsafe { str::from_utf8_unchecked(self.domain) }; 917 self.domain = &[]; 918 Label { value } 919 }) 920 }) 921 } 922 #[inline] 923 fn last(mut self) -> Option<Self::Item> 924 where 925 Self: Sized, 926 { 927 self.next_back() 928 } 929 #[inline] 930 fn size_hint(&self) -> (usize, Option<usize>) { 931 if self.domain.is_empty() { 932 (0, Some(0)) 933 } else { 934 // The max size of a `Label` is 63; and all but the last have a `b'.'` that follow it. 935 // This means the fewest `Label`s possible is the floor of the length divided by 64 with 936 // the added requirement that it's at least one since we know the domain is not empty. 937 // The min size of a `Label` is 1; and all but the last have a `b'.'` that follow it. 938 // This means the max number of `Label`s is the ceiling of the length divided by 2. 939 ( 940 (self.domain.len() >> 6).max(1), 941 Some(self.domain.len().div_ceil(2)), 942 ) 943 } 944 } 945 } 946 impl FusedIterator for LabelIter<'_> {} 947 impl DoubleEndedIterator for LabelIter<'_> { 948 #[expect( 949 unsafe_code, 950 reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" 951 )] 952 #[expect( 953 clippy::arithmetic_side_effects, 954 clippy::indexing_slicing, 955 reason = "comments justify their correctness" 956 )] 957 #[inline] 958 fn next_back(&mut self) -> Option<Self::Item> { 959 (!self.domain.is_empty()).then(|| { 960 self.domain 961 .iter() 962 .try_fold(0, |count, byt| { 963 if *byt == b'.' { 964 // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s 965 // are not empty. 966 let ascii = &self.domain[..count]; 967 // SAFETY: 968 // We only allow ASCII, so this is safe. 969 let value = unsafe { str::from_utf8_unchecked(ascii) }; 970 // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s 971 // are not empty. 972 self.domain = &self.domain[count + 1..]; 973 Err(Label { value }) 974 } else { 975 Ok(count + 1) 976 } 977 }) 978 .map_or_else(convert::identity, |_| { 979 // SAFETY: 980 // We only allow ASCII, so this is safe. 981 let value = unsafe { str::from_utf8_unchecked(self.domain) }; 982 self.domain = &[]; 983 Label { value } 984 }) 985 }) 986 } 987 } 988 impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Domain<T> { 989 type Item = Label<'a>; 990 type IntoIter = LabelIter<'a>; 991 #[inline] 992 fn into_iter(self) -> Self::IntoIter { 993 LabelIter { 994 domain: self.as_bytes(), 995 } 996 } 997 } 998 impl<'a> IntoIterator for Domain<&'a str> { 999 type Item = Label<'a>; 1000 type IntoIter = LabelIter<'a>; 1001 #[inline] 1002 fn into_iter(self) -> Self::IntoIter { 1003 LabelIter { 1004 domain: <&str>::from(self).as_bytes(), 1005 } 1006 } 1007 } 1008 impl<'a> IntoIterator for Domain<&'a [u8]> { 1009 type Item = Label<'a>; 1010 type IntoIter = LabelIter<'a>; 1011 #[inline] 1012 fn into_iter(self) -> Self::IntoIter { 1013 LabelIter { 1014 domain: <&[u8]>::from(self), 1015 } 1016 } 1017 } 1018 /// Error returned from [`Rfc1123Domain::try_from`] and [`Rfc1123Domain::try_from_bytes`]. 1019 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 1020 pub enum Rfc1123Err { 1021 /// The inputs was not a valid [`Domain`]. 1022 DomainErr(DomainErr), 1023 /// A [`Label`] of [`Domain`] starts with an ASCII hyphen. 1024 LabelStartsWithAHyphen, 1025 /// A [`Label`] of [`Domain`] ends with an ASCII hyphen. 1026 LabelEndsWithAHyphen, 1027 /// The last [`Label`] (i.e., TLD) was invalid which means it was not all ASCII letters nor 1028 /// had length of at least five with the first 4 characters being `xn--`. 1029 InvalidTld, 1030 } 1031 impl Display for Rfc1123Err { 1032 #[inline] 1033 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1034 match *self { 1035 Self::DomainErr(err) => err.fmt(f), 1036 Self::LabelStartsWithAHyphen => { 1037 f.write_str("a label in the domain starts with a hyphen") 1038 } 1039 Self::LabelEndsWithAHyphen => f.write_str("a label in the domain ends with a hyphen"), 1040 Self::InvalidTld => f.write_str("the TLD in the domain was not all letters nor had length of at least five with the first 4 characters being 'xn--'") 1041 } 1042 } 1043 } 1044 impl Error for Rfc1123Err {} 1045 /// **TL;DR** Wrapper type around a [`Domain`] that enforces conformance to 1046 /// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13). 1047 /// 1048 /// * Each [`Label`] must only contain ASCII digits, letters, or hyphen. 1049 /// * Each `Label` must not begin or end with a hyphen. 1050 /// * The last `Label` (i.e., TLD) must either contain only ASCII letters or have length of at least five and 1051 /// begin with `xn--`. 1052 /// --- 1053 /// Unsurprisingly, RFC 1123 is not super precise as it uses "host name" to mean label and also domain: 1054 /// "Host software MUST handle host names \[labels\] of up to 63 characters and SHOULD handle host 1055 /// names \[domains\] of up to 255 characters". It also states that only "one aspect of host name \[label\] 1056 /// syntax is hereby changed" from [RFC 952](https://www.rfc-editor.org/rfc/rfc952): "the restriction on the 1057 /// first character is relaxed to allow either a letter or a digit". Despite that, it goes on to mention other 1058 /// restrictions not mentioned in RFC 952: "the highest-level component label will be alphabetic". It is therefore 1059 /// important to understand how this type interprets that RFC and why it does so. 1060 /// 1061 /// The primary issue with RFC 1123 is the unjustified comment about the TLD being alphabetic. It is given 1062 /// as if it is common knowledge. As explained by (the rejected) 1063 /// [Errata 1353](https://www.rfc-editor.org/errata/eid1353), there seemed to be the assumption that the TLDs 1064 /// at the time would be the only ones that would ever exist or at least that the format of them would always be 1065 /// true. This leads to several possible interpretations: 1066 /// 1067 /// * Strictest: enforce the TLD is one of the TLDs that existed at the time of the RFC. 1068 /// * Strict: enforce the TLD has the same format as the TLDs at the time (i.e., two or three letters long). 1069 /// * Literal: enforce the TLD is alphabetic regardless of the lack of justification. 1070 /// * Relaxed: enforce the "spirit" that the TLD must exist. 1071 /// * More relaxed: enforce the "spirit" that the TLD must have the same format of a valid TLD. 1072 /// * Much more relaxed: enforce the "spirit" that the domain cannot have the form of an IPv4 address. 1073 /// * Most relaxed: treat TLDs no differently than other labels (i.e., don't make assumptions about what will be 1074 /// a valid TLD in the future). 1075 /// 1076 /// RFC 1123 is not obsolete, and it is clear from more recent RFCs like 1077 /// [RFC 5891](https://www.rfc-editor.org/rfc/rfc5891) that it is designed to be a foundation (i.e., domains that 1078 /// are valid per newer RFCs are valid per RFC 1123). Clearly due to RFCs like RFC 5891, requiring the TLD 1079 /// to be alphabetic or exactly two or three characters long would violate that. For those reasons the strictest, 1080 /// strict, and literal interpretations are rejected. 1081 /// 1082 /// Assuming TLDs are static is absurd, and relying on some dynamic list of TLDs is undesirable. For that reason 1083 /// the relaxed interpretation is rejected. 1084 /// 1085 /// Enforcing that domains do not have the form of an IPv4 address opens up the question of what is an IPv4 1086 /// address? Should leading 0s be allowed? What about hexadecimal? Should there be length limits for each octet? 1087 /// It also has the undesirable effect where subdomains that are all numeric exist but their parent domain does 1088 /// not which goes against the hierarchical nature of DNS. For those reasons the much more relaxed interpretation 1089 /// is rejected. 1090 /// 1091 /// Treating TLDs no differently than other labels is nice from a consistency perspective, but it suffers from 1092 /// the fact that domains that have the form of an IPv4 address are now allowed. For that reason the most 1093 /// relaxed interpretation is rejected. 1094 /// 1095 /// [ICANN](https://newgtlds.icann.org/sites/default/files/guidebook-full-04jun12-en.pdf) requires TLDs to either 1096 /// be alphabetic or a valid A-label per RFC 5891. Verifying a label is a valid A-label is not a cheap operation 1097 /// though. For that reason the more relaxed interpretation is accepted but with a twist: fake and valid A-labels 1098 /// are allowed in addition to entirely alphabetic labels. More specifically the TLD must either contain only 1099 /// letters or must be at least five characters long with the first 4 characters being `xn--`. 1100 /// 1101 /// If one wants to enforce the literal interpretation, one can use [`Self::is_literal_interpretation`]. Similarly, 1102 /// if one wants to enforce the strict interpretation, one can use [`Self::is_strict_interpretation`]. 1103 #[derive(Clone, Copy, Debug)] 1104 pub struct Rfc1123Domain<T> { 1105 /// The domain. 1106 dom: Domain<T>, 1107 } 1108 impl<T> Rfc1123Domain<T> { 1109 /// Returns a reference to the contained [`Domain`]. 1110 /// 1111 /// # Example 1112 /// 1113 /// ``` 1114 /// use ascii_domain::dom::Rfc1123Domain; 1115 /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().domain().len().get() == 11); 1116 /// ``` 1117 #[inline] 1118 pub const fn domain(&self) -> &Domain<T> { 1119 &self.dom 1120 } 1121 /// Returns the contained [`Domain`] consuming `self`. 1122 /// 1123 /// # Example 1124 /// 1125 /// ``` 1126 /// use ascii_domain::dom::Rfc1123Domain; 1127 /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().into_domain().len().get() == 11); 1128 /// ``` 1129 #[inline] 1130 pub fn into_domain(self) -> Domain<T> { 1131 self.dom 1132 } 1133 } 1134 impl<T: AsRef<[u8]>> Rfc1123Domain<T> { 1135 /// Function that transforms `v` into an `Rfc1123Domain` by only allowing [`Label`]s to contain the ASCII `u8`s 1136 /// in [`ASCII_HYPHEN_DIGITS_LETTERS`] with each `Label` not starting or ending with a `b'-'`. A trailing `b'.'` 1137 /// is ignored. The last `Label` (i.e., TLD) must either only contain ASCII letters or must have length of at 1138 /// least five with the first 4 bytes being `b"xn--"`. 1139 /// 1140 /// Unliked calling [`Domain::try_from_bytes`] then [`Rfc1123Domain::try_from`] which performs two traversals 1141 /// of `v`, this performs a single traversal of `v`. 1142 /// 1143 /// # Examples 1144 /// 1145 /// ``` 1146 /// use ascii_domain::dom::{Rfc1123Domain, Rfc1123Err}; 1147 /// assert!(Rfc1123Domain::try_from_bytes("example.com").is_ok()); 1148 /// assert!(Rfc1123Domain::try_from_bytes("example.xn--abc").is_ok()); 1149 /// assert!(Rfc1123Domain::try_from_bytes("a-.com").map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1150 /// ``` 1151 /// 1152 /// # Errors 1153 /// 1154 /// Returns [`Rfc1123Err`] iff `v.as_ref()` is an invalid `Rfc1123Domain`. 1155 #[expect( 1156 clippy::arithmetic_side_effects, 1157 clippy::indexing_slicing, 1158 reason = "comments justify their correctness" 1159 )] 1160 #[expect(clippy::redundant_else, reason = "prefer else with else-if")] 1161 #[inline] 1162 pub fn try_from_bytes(v: T) -> Result<Self, Rfc1123Err> { 1163 // The easiest implementation would be redirecting to `Domain::try_from_bytes`; and upon success, 1164 // verify each `Label` doesn't begin or end with a hyphen. That requires traversing `v` twice though. 1165 // We opt to traverse just once. 1166 let val = v.as_ref(); 1167 let value = match val.last() { 1168 None => return Err(Rfc1123Err::DomainErr(DomainErr::Empty)), 1169 Some(byt) => { 1170 let b = *byt; 1171 if b == b'.' { 1172 if val.len() == 1 { 1173 return Err(Rfc1123Err::DomainErr(DomainErr::RootDomain)); 1174 } 1175 // We know `val.len` is at least 2. 1176 let len = val.len() - 1; 1177 let lst = val[len - 1]; 1178 if lst == b'.' { 1179 return Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)); 1180 } else if lst == b'-' { 1181 return Err(Rfc1123Err::LabelEndsWithAHyphen); 1182 } else { 1183 &val[..len] 1184 } 1185 } else if b == b'-' { 1186 return Err(Rfc1123Err::LabelEndsWithAHyphen); 1187 } else { 1188 val 1189 } 1190 } 1191 }; 1192 if value.len() > 253 { 1193 Err(Rfc1123Err::DomainErr(DomainErr::LenExceeds253(value.len()))) 1194 } else { 1195 let mut count = 0; 1196 value 1197 .iter() 1198 .try_fold(0, |label_len, byt| { 1199 let b = *byt; 1200 if b == b'.' { 1201 NonZeroU8::new(label_len).map_or( 1202 Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)), 1203 |_| { 1204 // We verify the last character in the `Label` is not a hyphen. 1205 // `count` > 0 since `label_len` > 0 and `count` < `value.len()` since 1206 // it's the index of the `b'.'`. 1207 if value[count - 1] == b'-' { 1208 Err(Rfc1123Err::LabelEndsWithAHyphen) 1209 } else { 1210 Ok(0) 1211 } 1212 }, 1213 ) 1214 } else if !RFC_CHARS.contains(b) { 1215 Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(b))) 1216 } else if b == b'-' && label_len == 0 { 1217 Err(Rfc1123Err::LabelStartsWithAHyphen) 1218 } else if label_len == 63 { 1219 Err(Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63)) 1220 } else { 1221 // This caps at 253, so no overflow. 1222 count += 1; 1223 // This is less than 64 due to the above check, so this won't overflow; 1224 Ok(label_len + 1) 1225 } 1226 }) 1227 .and_then(|tld_len| { 1228 // `tld_len <= value.len()`. 1229 let tld = &value[value.len() - usize::from(tld_len)..]; 1230 if (tld 1231 .split_at_checked(4) 1232 .is_some_and(|(fst, rem)| !rem.is_empty() && fst == b"xn--")) 1233 || tld 1234 .iter() 1235 .try_fold((), |(), byt| { 1236 if byt.is_ascii_alphabetic() { 1237 Ok(()) 1238 } else { 1239 Err(()) 1240 } 1241 }) 1242 .is_ok() 1243 { 1244 Ok(()) 1245 } else { 1246 Err(Rfc1123Err::InvalidTld) 1247 } 1248 }) 1249 .map(|()| Self { 1250 dom: Domain { value: v }, 1251 }) 1252 } 1253 } 1254 /// Returns `true` iff the domain adheres to the literal interpretation of RFC 1123. For more information 1255 /// read the description of [`Rfc1123Domain`]. 1256 /// 1257 /// # Examples 1258 /// 1259 /// ``` 1260 /// use ascii_domain::dom::Rfc1123Domain; 1261 /// assert!(Rfc1123Domain::try_from_bytes("example.commmm").unwrap().is_literal_interpretation()); 1262 /// assert!(!Rfc1123Domain::try_from_bytes("example.xn--abc").unwrap().is_literal_interpretation()); 1263 /// ``` 1264 #[inline] 1265 pub fn is_literal_interpretation(&self) -> bool { 1266 self.dom.tld().is_alphabetic() 1267 } 1268 /// Returns `true` iff the domain adheres to the strict interpretation of RFC 1123. For more information 1269 /// read the description of [`Rfc1123Domain`]. 1270 /// 1271 /// # Examples 1272 /// 1273 /// ``` 1274 /// use ascii_domain::dom::Rfc1123Domain; 1275 /// assert!(Rfc1123Domain::try_from_bytes("example.Com").unwrap().is_strict_interpretation()); 1276 /// assert!(!Rfc1123Domain::try_from_bytes("example.comm").unwrap().is_strict_interpretation()); 1277 /// ``` 1278 #[inline] 1279 pub fn is_strict_interpretation(&self) -> bool { 1280 let tld = self.dom.tld(); 1281 (2..4).contains(&tld.len().get()) && tld.is_alphabetic() 1282 } 1283 } 1284 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1285 #[inline] 1286 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1287 self.dom == other.dom 1288 } 1289 } 1290 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1291 #[inline] 1292 fn eq(&self, other: &&Rfc1123Domain<T>) -> bool { 1293 self.dom == other.dom 1294 } 1295 } 1296 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for &Rfc1123Domain<T2> { 1297 #[inline] 1298 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1299 self.dom == other.dom 1300 } 1301 } 1302 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Domain<T2> { 1303 #[inline] 1304 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1305 *self == other.dom 1306 } 1307 } 1308 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for &Domain<T2> { 1309 #[inline] 1310 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1311 **self == other.dom 1312 } 1313 } 1314 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<T>> for Domain<T2> { 1315 #[inline] 1316 fn eq(&self, other: &&Rfc1123Domain<T>) -> bool { 1317 *self == other.dom 1318 } 1319 } 1320 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for Rfc1123Domain<T2> { 1321 #[inline] 1322 fn eq(&self, other: &Domain<T>) -> bool { 1323 self.dom == *other 1324 } 1325 } 1326 impl<T: AsRef<[u8]>> Eq for Rfc1123Domain<T> {} 1327 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1328 #[inline] 1329 fn partial_cmp(&self, other: &Rfc1123Domain<T>) -> Option<Ordering> { 1330 self.dom.partial_cmp(&other.dom) 1331 } 1332 } 1333 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<T>> for Domain<T2> { 1334 #[inline] 1335 fn partial_cmp(&self, other: &Rfc1123Domain<T>) -> Option<Ordering> { 1336 self.partial_cmp(&other.dom) 1337 } 1338 } 1339 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Domain<T>> for Rfc1123Domain<T2> { 1340 #[inline] 1341 fn partial_cmp(&self, other: &Domain<T>) -> Option<Ordering> { 1342 self.dom.partial_cmp(other) 1343 } 1344 } 1345 impl<T: AsRef<[u8]>> Ord for Rfc1123Domain<T> { 1346 #[inline] 1347 fn cmp(&self, other: &Self) -> Ordering { 1348 self.dom.cmp(&other.dom) 1349 } 1350 } 1351 impl<T: AsRef<[u8]>> Hash for Rfc1123Domain<T> { 1352 #[inline] 1353 fn hash<H: Hasher>(&self, state: &mut H) { 1354 self.dom.hash(state); 1355 } 1356 } 1357 impl<T> AsRef<Domain<T>> for Rfc1123Domain<T> { 1358 #[inline] 1359 fn as_ref(&self) -> &Domain<T> { 1360 &self.dom 1361 } 1362 } 1363 impl<T> Borrow<Domain<T>> for Rfc1123Domain<T> { 1364 #[inline] 1365 fn borrow(&self) -> &Domain<T> { 1366 &self.dom 1367 } 1368 } 1369 impl<T> Deref for Rfc1123Domain<T> { 1370 type Target = Domain<T>; 1371 #[inline] 1372 fn deref(&self) -> &Self::Target { 1373 &self.dom 1374 } 1375 } 1376 impl<T> From<Rfc1123Domain<T>> for Domain<T> { 1377 #[inline] 1378 fn from(value: Rfc1123Domain<T>) -> Self { 1379 value.dom 1380 } 1381 } 1382 impl From<Rfc1123Domain<Vec<u8>>> for Rfc1123Domain<String> { 1383 #[inline] 1384 fn from(value: Rfc1123Domain<Vec<u8>>) -> Self { 1385 Self { 1386 dom: Domain::<String>::from(value.dom), 1387 } 1388 } 1389 } 1390 impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b [u8]> { 1391 #[inline] 1392 fn from(value: &'a Rfc1123Domain<T>) -> Self { 1393 Self { 1394 dom: Domain::<&'b [u8]>::from(&value.dom), 1395 } 1396 } 1397 } 1398 impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b str> { 1399 #[inline] 1400 fn from(value: &'a Rfc1123Domain<T>) -> Self { 1401 Self { 1402 dom: Domain::<&'b str>::from(&value.dom), 1403 } 1404 } 1405 } 1406 impl From<Rfc1123Domain<String>> for Rfc1123Domain<Vec<u8>> { 1407 #[inline] 1408 fn from(value: Rfc1123Domain<String>) -> Self { 1409 Self { 1410 dom: Domain::<Vec<u8>>::from(value.dom), 1411 } 1412 } 1413 } 1414 impl<'a: 'b, 'b> From<Rfc1123Domain<&'a [u8]>> for Rfc1123Domain<&'b str> { 1415 #[inline] 1416 fn from(value: Rfc1123Domain<&'a [u8]>) -> Self { 1417 Self { 1418 dom: Domain::<&'b str>::from(value.dom), 1419 } 1420 } 1421 } 1422 impl<'a: 'b, 'b> From<Rfc1123Domain<&'a str>> for Rfc1123Domain<&'b [u8]> { 1423 #[inline] 1424 fn from(value: Rfc1123Domain<&'a str>) -> Self { 1425 Self { 1426 dom: Domain::<&'b [u8]>::from(value.dom), 1427 } 1428 } 1429 } 1430 impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { 1431 type Error = Rfc1123Err; 1432 #[expect( 1433 clippy::arithmetic_side_effects, 1434 clippy::indexing_slicing, 1435 clippy::unreachable, 1436 reason = "comments explain their correctness" 1437 )] 1438 #[inline] 1439 fn try_from(value: Domain<T>) -> Result<Self, Self::Error> { 1440 let mut labels = value.iter(); 1441 let tld = labels 1442 .next() 1443 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")); 1444 if tld.is_alphabetic() 1445 || tld 1446 .split_at_checked(4) 1447 .is_some_and(|(fst, rem)| !rem.is_empty() && fst == "xn--") 1448 { 1449 labels 1450 .try_fold((), |(), label| { 1451 let bytes = label.value.as_bytes(); 1452 // `Label`s are never empty, so the below indexing is fine. 1453 // Underflow won't occur for the same reason. 1454 if bytes[0] == b'-' { 1455 Err(Rfc1123Err::LabelStartsWithAHyphen) 1456 } else if bytes[bytes.len() - 1] == b'-' { 1457 Err(Rfc1123Err::LabelEndsWithAHyphen) 1458 } else { 1459 bytes.iter().try_fold((), |(), byt| match *byt { 1460 b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => Ok(()), 1461 val => Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(val))), 1462 }) 1463 } 1464 }) 1465 .map(|()| Self { dom: value }) 1466 } else { 1467 Err(Rfc1123Err::InvalidTld) 1468 } 1469 } 1470 } 1471 impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> { 1472 #[inline] 1473 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1474 self.dom.fmt(f) 1475 } 1476 } 1477 impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Rfc1123Domain<T> { 1478 type Item = Label<'a>; 1479 type IntoIter = LabelIter<'a>; 1480 #[inline] 1481 fn into_iter(self) -> Self::IntoIter { 1482 LabelIter { 1483 domain: self.dom.as_bytes(), 1484 } 1485 } 1486 } 1487 impl<'a> IntoIterator for Rfc1123Domain<&'a str> { 1488 type Item = Label<'a>; 1489 type IntoIter = LabelIter<'a>; 1490 #[inline] 1491 fn into_iter(self) -> Self::IntoIter { 1492 LabelIter { 1493 domain: <&str>::from(self.dom).as_bytes(), 1494 } 1495 } 1496 } 1497 impl<'a> IntoIterator for Rfc1123Domain<&'a [u8]> { 1498 type Item = Label<'a>; 1499 type IntoIter = LabelIter<'a>; 1500 #[inline] 1501 fn into_iter(self) -> Self::IntoIter { 1502 LabelIter { 1503 domain: <&[u8]>::from(self.dom), 1504 } 1505 } 1506 } 1507 #[cfg(test)] 1508 mod tests { 1509 extern crate alloc; 1510 use super::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}; 1511 use crate::char_set::{ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS, AllowedAscii}; 1512 use alloc::borrow::ToOwned; 1513 use core::cmp::Ordering; 1514 #[test] 1515 fn test_dom_parse() { 1516 let allowed_ascii = ASCII_FIREFOX; 1517 // Test empty is error. 1518 assert!( 1519 Domain::try_from_bytes("", &allowed_ascii) 1520 .map_or_else(|e| e == DomainErr::Empty, |_| false) 1521 ); 1522 // Test root domain. 1523 assert!( 1524 Domain::try_from_bytes(".", &allowed_ascii) 1525 .map_or_else(|e| e == DomainErr::RootDomain, |_| false) 1526 ); 1527 // Test empty label is error. 1528 assert!( 1529 Domain::try_from_bytes("a..com", &allowed_ascii) 1530 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) 1531 ); 1532 assert!( 1533 Domain::try_from_bytes("a..", &allowed_ascii) 1534 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) 1535 ); 1536 assert!( 1537 Domain::try_from_bytes("..", &allowed_ascii) 1538 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) 1539 ); 1540 // Test label too long. 1541 let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; 1542 // 4 + 64 + 4 1543 assert!(val.len() == 72); 1544 assert!( 1545 Domain::try_from_bytes(val, &allowed_ascii) 1546 .map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false) 1547 ); 1548 assert!( 1549 Domain::try_from_bytes( 1550 "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", 1551 &allowed_ascii 1552 ) 1553 .map_or(false, |d| d.len().get() == 71) 1554 ); 1555 // Test domain too long. 1556 assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false)); 1557 assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 )); 1558 // Test max labels. 1559 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false)); 1560 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1561 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1562 // Test removal of trailing '.'. 1563 assert!( 1564 Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com") 1565 ); 1566 // Test single label. 1567 assert!(Domain::try_from_bytes("c", &allowed_ascii).map_or(false, |d| d.as_str() == "c")); 1568 // Test case-insensitivity. 1569 assert!( 1570 Domain::try_from_bytes("wwW.ExAMple.COm", &allowed_ascii).map_or(false, |d| { 1571 Domain::try_from_bytes("www.example.com", &allowed_ascii) 1572 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1573 }) 1574 ); 1575 assert!( 1576 Domain::try_from_bytes("ww_W.com", &allowed_ascii).map_or(false, |d| { 1577 Domain::try_from_bytes("Ww_w.com", &allowed_ascii) 1578 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1579 }) 1580 ); 1581 // Test valid bytes 1582 let mut input; 1583 let mut counter = 0; 1584 for i in 0..=127 { 1585 input = [i]; 1586 match i { 1587 b'!' 1588 | b'$' 1589 | b'&'..=b')' 1590 | b'+'..=b'-' 1591 | b'0'..=b'9' 1592 | b';' 1593 | b'=' 1594 | b'A'..=b'Z' 1595 | b'_'..=b'{' 1596 | b'}'..=b'~' => { 1597 counter += 1; 1598 assert!( 1599 Domain::try_from_bytes(input, &allowed_ascii).map_or(false, |d| d 1600 .value 1601 .len() 1602 == 1 1603 && d.value == input) 1604 ) 1605 } 1606 b'.' => { 1607 let input2 = b"a."; 1608 assert!( 1609 Domain::try_from_bytes(input2, &allowed_ascii).map_or(false, |d| d 1610 .len() 1611 .get() 1612 == 1 1613 && d.value == input2) 1614 ) 1615 } 1616 _ => assert!( 1617 Domain::try_from_bytes(input, &allowed_ascii) 1618 .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false) 1619 ), 1620 } 1621 } 1622 assert!(counter == 78); 1623 } 1624 #[test] 1625 fn test_dom_iter() { 1626 let allowed_ascii = ASCII_FIREFOX; 1627 assert!( 1628 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1629 let mut iter = d.iter(); 1630 let Some(l) = iter.next() else { 1631 return false; 1632 }; 1633 if l.value != "com" { 1634 return false; 1635 } 1636 let Some(l) = iter.next() else { return false }; 1637 if l.value != "example" { 1638 return false; 1639 } 1640 let Some(l) = iter.next() else { 1641 return false; 1642 }; 1643 if l.value != "www" { 1644 return false; 1645 } 1646 iter.next().is_none() 1647 }) 1648 ); 1649 assert!( 1650 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1651 let mut iter = d.iter(); 1652 let Some(l) = iter.next_back() else { 1653 return false; 1654 }; 1655 if l.value != "www" { 1656 return false; 1657 } 1658 let Some(l) = iter.next_back() else { 1659 return false; 1660 }; 1661 if l.value != "example" { 1662 return false; 1663 } 1664 let Some(l) = iter.next_back() else { 1665 return false; 1666 }; 1667 if l.value != "com" { 1668 return false; 1669 } 1670 iter.next_back().is_none() 1671 }) 1672 ); 1673 assert!( 1674 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1675 let mut iter = d.iter(); 1676 let Some(l) = iter.next_back() else { 1677 return false; 1678 }; 1679 if l.value != "www" { 1680 return false; 1681 } 1682 let Some(l) = iter.next() else { return false }; 1683 if l.value != "com" { 1684 return false; 1685 } 1686 let Some(l) = iter.next_back() else { 1687 return false; 1688 }; 1689 if l.value != "example" { 1690 return false; 1691 } 1692 iter.next().is_none() && iter.next_back().is_none() 1693 }) 1694 ); 1695 } 1696 #[test] 1697 fn rfc1123() { 1698 assert!( 1699 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1700 false, 1701 |dom| Rfc1123Domain::try_from(dom) 1702 .map_or(false, |dom| dom.as_str() == "example.com") 1703 ) 1704 ); 1705 assert!( 1706 AllowedAscii::try_from_unique_ascii(b"exampl!co".to_owned()).map_or(false, |ascii| { 1707 Domain::try_from_bytes("exampl!e.com", &ascii).map_or(false, |dom| { 1708 Rfc1123Domain::try_from(dom).map_or_else( 1709 |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(b'!')), 1710 |_| false, 1711 ) 1712 }) 1713 }) 1714 ); 1715 assert!( 1716 Domain::try_from_bytes("example-.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1717 false, 1718 |dom| Rfc1123Domain::try_from(dom) 1719 .map_or_else(|e| e == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1720 ) 1721 ); 1722 assert!( 1723 Domain::try_from_bytes("-example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1724 false, 1725 |dom| Rfc1123Domain::try_from(dom) 1726 .map_or_else(|e| e == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1727 ) 1728 ); 1729 assert!( 1730 Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1731 false, 1732 |dom| Rfc1123Domain::try_from(dom) 1733 .map_or_else(|e| e == Rfc1123Err::InvalidTld, |_| false) 1734 ) 1735 ); 1736 assert!( 1737 Domain::try_from_bytes("example.commm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1738 false, 1739 |dom| Rfc1123Domain::try_from(dom) 1740 .map_or(false, |rfc| rfc.is_literal_interpretation()) 1741 ) 1742 ); 1743 assert!( 1744 Domain::try_from_bytes("example.xn--abc", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1745 false, 1746 |dom| Rfc1123Domain::try_from(dom) 1747 .map_or(false, |rfc| !rfc.is_literal_interpretation()) 1748 ) 1749 ); 1750 assert!( 1751 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1752 false, 1753 |dom| Rfc1123Domain::try_from(dom) 1754 .map_or(false, |rfc| rfc.is_strict_interpretation()) 1755 ) 1756 ); 1757 assert!( 1758 Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1759 false, 1760 |dom| Rfc1123Domain::try_from(dom) 1761 .map_or(false, |rfc| !rfc.is_strict_interpretation()) 1762 ) 1763 ); 1764 } 1765 #[test] 1766 fn test_tld() { 1767 assert!( 1768 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS) 1769 .map_or(false, |dom| dom.tld().as_str() == "com",) 1770 ); 1771 } 1772 #[test] 1773 fn test_rfc1123_parse() { 1774 // Test empty is error. 1775 assert!( 1776 Rfc1123Domain::try_from_bytes("") 1777 .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false) 1778 ); 1779 // Test root domain. 1780 assert!(Rfc1123Domain::try_from_bytes(".").map_or_else( 1781 |e| e == Rfc1123Err::DomainErr(DomainErr::RootDomain), 1782 |_| false 1783 )); 1784 // Test empty label is error. 1785 assert!(Rfc1123Domain::try_from_bytes("a..com").map_or_else( 1786 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1787 |_| false 1788 )); 1789 assert!(Rfc1123Domain::try_from_bytes("a..").map_or_else( 1790 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1791 |_| false 1792 )); 1793 assert!(Rfc1123Domain::try_from_bytes("..").map_or_else( 1794 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1795 |_| false 1796 )); 1797 // Test label too long. 1798 let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; 1799 // 4 + 64 + 4 1800 assert!(val.len() == 72); 1801 assert!(Rfc1123Domain::try_from_bytes(val).map_or_else( 1802 |e| e == Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63), 1803 |_| false 1804 )); 1805 assert!( 1806 Rfc1123Domain::try_from_bytes( 1807 "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", 1808 ) 1809 .map_or(false, |d| d.len().get() == 71) 1810 ); 1811 // Test domain too long. 1812 assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(254)), |_| false)); 1813 assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); 1814 // Test max labels. 1815 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(255)), |_| false)); 1816 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1817 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1818 // Test removal of trailing '.'. 1819 assert!(Rfc1123Domain::try_from_bytes("com.").map_or(false, |d| d.as_str() == "com")); 1820 // Test single label. 1821 assert!(Rfc1123Domain::try_from_bytes("c").map_or(false, |d| d.as_str() == "c")); 1822 // Test ends with hyphen. 1823 assert!( 1824 Rfc1123Domain::try_from_bytes("-") 1825 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1826 ); 1827 assert!( 1828 Rfc1123Domain::try_from_bytes("-.") 1829 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1830 ); 1831 assert!( 1832 Rfc1123Domain::try_from_bytes("a.com.-") 1833 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1834 ); 1835 assert!( 1836 Rfc1123Domain::try_from_bytes("a.com-") 1837 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1838 ); 1839 assert!( 1840 Rfc1123Domain::try_from_bytes("a-.com") 1841 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1842 ); 1843 // Test starts with hyphen. 1844 assert!( 1845 Rfc1123Domain::try_from_bytes("a.-com") 1846 .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1847 ); 1848 assert!( 1849 Rfc1123Domain::try_from_bytes("-a.com") 1850 .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1851 ); 1852 // Test case-insensitivity. 1853 assert!( 1854 Rfc1123Domain::try_from_bytes("wwW.ExAMple.COm").map_or(false, |d| { 1855 Rfc1123Domain::try_from_bytes("www.example.com") 1856 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1857 }) 1858 ); 1859 assert!( 1860 Rfc1123Domain::try_from_bytes("ww-W.com").map_or(false, |d| { 1861 Rfc1123Domain::try_from_bytes("Ww-w.com") 1862 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1863 }) 1864 ); 1865 assert!( 1866 Rfc1123Domain::try_from_bytes("1.1.1.1") 1867 .map_or_else(|err| err == Rfc1123Err::InvalidTld, |_| false) 1868 ); 1869 } 1870 }