dom.rs (74395B)
1 extern crate alloc; 2 use crate::char_set::{AllowedAscii, ASCII_HYPHEN_DIGITS_LETTERS}; 3 use alloc::{string::String, vec::Vec}; 4 use core::{ 5 borrow::Borrow, 6 cmp::Ordering, 7 convert::{self, AsRef}, 8 error::Error, 9 fmt::{self, Display, Formatter}, 10 hash::{Hash, Hasher}, 11 iter::FusedIterator, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 /// The `AllowedAscii` used by `Rfc1123Domain`. 17 static RFC_CHARS: &AllowedAscii<[u8; 63]> = &ASCII_HYPHEN_DIGITS_LETTERS; 18 /// Returned by [`Domain::cmp_by_domain_ordering`]. It is more informative than [`Ordering`] in that it 19 /// distinguishes between a `Domain` that is greater than another `Domain` due to a [`Label`] being greater 20 /// from a `Domain` that has the same `Label`s as another but simply more. 21 /// 22 /// Another way to view this is that [`Self::Shorter`] is "closer" to being [`Self::Equal`] than [`Self::Less`] 23 /// since the `Domain`s are still part of the same branch in the DNS hierarchy. Ditto for [`Self::Longer`]. 24 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 25 pub enum DomainOrdering { 26 /// The `Domain` is less than another since a `Label` was less. 27 Less, 28 /// The `Domain` is less than other but only because it had fewer `Label`s. 29 Shorter, 30 /// The `Domain` is equal to another. 31 Equal, 32 /// The `Domain` is greater than another but only because it had more `Label`s. 33 Longer, 34 /// The `Domain` is greater than another since a `Label` was greater. 35 Greater, 36 } 37 impl Display for DomainOrdering { 38 #[inline] 39 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 40 match *self { 41 Self::Less => f.write_str("less since a label was less"), 42 Self::Shorter => f.write_str("less since there were fewer labels"), 43 Self::Equal => f.write_str("equal"), 44 Self::Longer => f.write_str("greater since there were more labels"), 45 Self::Greater => f.write_str("greater since a label was greater"), 46 } 47 } 48 } 49 impl From<DomainOrdering> for Ordering { 50 #[inline] 51 fn from(value: DomainOrdering) -> Self { 52 match value { 53 DomainOrdering::Less | DomainOrdering::Shorter => Self::Less, 54 DomainOrdering::Equal => Self::Equal, 55 DomainOrdering::Longer | DomainOrdering::Greater => Self::Greater, 56 } 57 } 58 } 59 /// A domain that consists of at least one [`Label`] with each `Label` only containing the ASCII `u8`s in 60 /// the [`AllowedAscii`] passed to [`Self::try_from_bytes`]. The total length of a `Domain` is at most 61 /// 253 bytes[^note] in length including the `b'.'` separator. The trailing `b'.'`, if one exists, is always 62 /// ignored. 63 /// 64 /// This is more restrictive than what a domain is allowed to be per the 65 /// [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181) since all octets/`u8`s are allowed in a 66 /// label. Additionally there is no way to represent the root domain. 67 /// 68 /// Last, ASCII uppercase letters are treated as lowercase; however for better comparison performance 69 /// that doesn't lead to intermediate memory allocations, two `Domain`s should consist entirely of the same 70 /// case. 71 /// 72 /// [^note]: It is a common misconception that the max length of a domain is 255, but that is only true for 73 /// domains in _wire_ format. In representation format, which `Domain` can be thought of when only visible 74 /// ASCII bytes are used, the max length is 253 when the last byte is not `b'.'`; otherwise the max length is 75 /// 254. This is due to the fact that there is no way to explicitly represent the root label which in wire format 76 /// contributes one byte due to each label being preceded by the octet that represents its length. 77 /// 78 /// Note this only contains `T`, so this is allocation-free and the same size as `T`. 79 #[derive(Clone, Copy, Debug)] 80 pub struct Domain<T> { 81 /// The domain value. `value.as_ref().len()` is guaranteed to have length between 1 and 253 when the last `u8` 82 /// is not `b'.'`; otherwise the length is between 2 and 254. 83 /// Guaranteed to only contain `b'.'` and the ASCII `u8`s in `allowed_ascii`. 84 value: T, 85 } 86 impl<T> Domain<T> { 87 /// A reference to the contained `T`. 88 /// 89 /// # Example 90 /// 91 /// ``` 92 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 93 /// assert!(*Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().as_inner() == "example.com."); 94 /// ``` 95 #[inline] 96 pub const fn as_inner(&self) -> &T { 97 &self.value 98 } 99 /// Same as [`Self::as_inner`] except `self` is consumed. 100 /// 101 /// # Example 102 /// 103 /// ``` 104 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 105 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_inner() == "example.com."); 106 /// ``` 107 #[inline] 108 pub fn into_inner(self) -> T { 109 self.value 110 } 111 } 112 impl<T: AsRef<[u8]>> Domain<T> { 113 /// Returns `true` iff the domain contains a trailing `b'.'`. 114 /// 115 /// # Example 116 /// 117 /// ``` 118 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 119 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().contains_trailing_dot()); 120 /// ``` 121 #[expect( 122 clippy::arithmetic_side_effects, 123 clippy::indexing_slicing, 124 reason = "comments explain their correctness" 125 )] 126 #[inline] 127 pub fn contains_trailing_dot(&self) -> bool { 128 let bytes = self.value.as_ref(); 129 // This won't underflow or `panic` since `Domain`s are not empty. 130 bytes[bytes.len() - 1] == b'.' 131 } 132 /// The domain without a trailing `b'.'` if there was one. 133 /// 134 /// # Example 135 /// 136 /// ``` 137 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 138 /// assert!(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap().as_str() == "Example.com"); 139 /// ``` 140 #[inline] 141 pub fn as_str(&self) -> &str { 142 <&str>::from(Domain::<&str>::from(Domain::<&[u8]>::from(self))) 143 } 144 /// The domain without a trailing `b'.'` if there was one. 145 /// 146 /// # Example 147 /// 148 /// ``` 149 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 150 /// assert!(Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap().as_bytes() == b"Example.com"); 151 /// ``` 152 #[inline] 153 pub fn as_bytes(&self) -> &[u8] { 154 <&[u8]>::from(Domain::<&[u8]>::from(self)) 155 } 156 /// The length of the `Domain`. This does _not_ include the trailing `b'.'` if there was one. 157 /// 158 /// # Example 159 /// 160 /// ``` 161 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 162 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().len().get() == 11); 163 /// ``` 164 #[expect( 165 unsafe_code, 166 reason = "we enforce nonzero lengths, so NonZeroU8::new_unchecked is fine" 167 )] 168 #[expect( 169 clippy::arithmetic_side_effects, 170 clippy::as_conversions, 171 clippy::cast_possible_truncation, 172 reason = "comments justify their correctness" 173 )] 174 #[inline] 175 pub fn len(&self) -> NonZeroU8 { 176 // No fear of underflow since the length of `value` is at least 1 _not including_ the 177 // trailing `b'.'` if there was one. 178 // `true as usize` is guaranteed to be 1 and `false as usize` is guaranteed to be 0. 179 // No fear of truncation either since the length is guaranteed to be less than 255. 180 // `Domain` is immutable ensuring such invariants are kept. 181 let len = (self.value.as_ref().len() - usize::from(self.contains_trailing_dot())) as u8; 182 // SAFETY: 183 // The only way to construct a `Domain` is via `try_from_bytes` which ensures `len` is 184 // is at least 1. 185 unsafe { NonZeroU8::new_unchecked(len) } 186 } 187 /// Function that transforms `v` into a `Domain` by only allowing [`Label`]s to contain the ASCII `u8`s in 188 /// `allowed_ascii`. A trailing `b'.'` is ignored. 189 /// 190 /// Note that while ASCII uppercase is treated as ASCII lowercase, `allowed_ascii` MUST still contain 191 /// each ASCII `u8` (e.g., if `!allowed_ascii.contains(b'A')`, then `b'A'` is not allowed even if 192 /// `allowed_ascii.contains(b'a')`). 193 /// 194 /// # Examples 195 /// 196 /// ``` 197 /// use ascii_domain::{dom::{Domain, DomainErr}, char_set::ASCII_LOWERCASE}; 198 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).is_ok()); 199 /// assert!(Domain::try_from_bytes("exam2ple.com", &ASCII_LOWERCASE).map_or_else(|err| err == DomainErr::InvalidByte(b'2'), |_| false)); 200 /// ``` 201 /// 202 /// # Errors 203 /// 204 /// Returns [`DomainErr`] iff `v.as_ref()` is an invalid `Domain`. 205 #[expect( 206 clippy::arithmetic_side_effects, 207 reason = "comment justifies its correctness" 208 )] 209 #[inline] 210 pub fn try_from_bytes<T2: AsRef<[u8]>>( 211 v: T, 212 allowed_ascii: &AllowedAscii<T2>, 213 ) -> Result<Self, DomainErr> { 214 let val = v.as_ref(); 215 let value = val 216 .split_last() 217 .ok_or(DomainErr::Empty) 218 .and_then(|(lst, rem)| { 219 if *lst == b'.' { 220 rem.split_last() 221 .ok_or(DomainErr::RootDomain) 222 .and_then(|(lst_2, _)| { 223 if *lst_2 == b'.' { 224 Err(DomainErr::EmptyLabel) 225 } else { 226 Ok(rem) 227 } 228 }) 229 } else { 230 Ok(val) 231 } 232 })?; 233 if value.len() > 253 { 234 Err(DomainErr::LenExceeds253(value.len())) 235 } else { 236 value 237 .iter() 238 .try_fold(0, |label_len, byt| { 239 let b = *byt; 240 if b == b'.' { 241 NonZeroU8::new(label_len).map_or(Err(DomainErr::EmptyLabel), |_| Ok(0)) 242 } else if !allowed_ascii.contains(b) { 243 Err(DomainErr::InvalidByte(b)) 244 } else if label_len == 63 { 245 Err(DomainErr::LabelLenExceeds63) 246 } else { 247 // This is less than 63 due to the above check, so this won't overflow; 248 Ok(label_len + 1) 249 } 250 }) 251 .map(|_| Self { value: v }) 252 } 253 } 254 /// Returns an [`Iterator`] of [`Label`]s without consuming the `Domain`. 255 /// # Example 256 /// 257 /// ``` 258 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 259 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().unwrap().as_str() == "com"); 260 /// ``` 261 #[inline] 262 pub fn iter(&self) -> LabelIter<'_> { 263 LabelIter { 264 domain: self.as_bytes(), 265 } 266 } 267 /// Returns `true` iff `self` and `right` are part of the same branch in the DNS hierarchy. 268 /// 269 /// For example `www.example.com` and `example.com` are in the `same_branch`, but `example.com` and 270 /// `foo.com` are not. 271 /// 272 /// Note that trailing `b'.'`s are ignored and ASCII uppercase and lowercase are treated the same. 273 /// 274 /// # Examples 275 /// 276 /// ``` 277 /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 278 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 279 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 280 /// assert!(dom1.same_branch(&dom2)); 281 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 282 /// assert!(!dom1.same_branch(&dom3)); 283 /// ``` 284 #[inline] 285 pub fn same_branch<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> bool { 286 // Faster to check the values as bytes and not iterate each `Label`. 287 if self == right { 288 true 289 } else { 290 self.iter() 291 .zip(right) 292 .try_fold( 293 (), 294 |(), (label, label2)| if label == label2 { Ok(()) } else { Err(()) }, 295 ) 296 .map_or(false, |()| true) 297 } 298 } 299 /// Same as [`Self::cmp_doms`] except returns [`DomainOrdering::Longer`] iff `self > right` due solely 300 /// to having more [`Label`]s and [`DomainOrdering::Shorter`] iff `self < right` due solely to having 301 /// fewer `Label`s. 302 /// 303 /// For example `example.com` < `www.example.com` and `bar.com` < `www.example.com`; but with this function, 304 /// `example.com` is [`DomainOrdering::Shorter`] than `www.example.com` and `www.example.com` is 305 /// [`DomainOrdering::Longer`] than `example.com`; while `bar.com` is [`DomainOrdering::Less`] than 306 /// `www.example.com` and `www.example.com` is [`DomainOrdering::Greater`] than `bar.com`. 307 /// 308 /// In other words `DomainOrdering::Shorter` implies `Ordering::Less` and `DomainOrdering::Longer` implies 309 /// `Ordering::Greater` with additional information pertaining to the quantity of `Label`s. 310 /// 311 /// # Examples 312 /// 313 /// ``` 314 /// use ascii_domain::{dom::{Domain, DomainOrdering}, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 315 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 316 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom1), DomainOrdering::Equal)); 317 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 318 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom2), DomainOrdering::Shorter)); 319 /// assert!(matches!(dom2.cmp_by_domain_ordering(&dom1), DomainOrdering::Longer)); 320 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 321 /// assert!(matches!(dom1.cmp_by_domain_ordering(&dom3), DomainOrdering::Less)); 322 /// assert!(matches!(dom3.cmp_by_domain_ordering(&dom1), DomainOrdering::Greater)); 323 /// ``` 324 #[inline] 325 pub fn cmp_by_domain_ordering<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> DomainOrdering { 326 // Faster to compare the entire value when we can instead each `Label`. 327 if self == right { 328 DomainOrdering::Equal 329 } else { 330 let mut right_iter = right.iter(); 331 self.iter() 332 .try_fold(false, |_, label| { 333 right_iter 334 .next() 335 .map_or(Ok(true), |label2| match label.cmp(&label2) { 336 Ordering::Less => Err(DomainOrdering::Less), 337 Ordering::Equal => Ok(false), 338 Ordering::Greater => Err(DomainOrdering::Greater), 339 }) 340 }) 341 .map_or_else(convert::identity, |flag| { 342 // We iterate `self` before `right`, so `flag` is `true` iff `right` 343 // has fewer `Label`s than `self`. 344 if flag { 345 DomainOrdering::Longer 346 } else { 347 // `self` has as many or fewer `Label`s than `right`; however if it had as many 348 // `Label`s as `right`, then all `Label`s are the same which is impossible since 349 // we already checked if `self == right`. 350 DomainOrdering::Shorter 351 } 352 }) 353 } 354 } 355 /// The total order that is defined follows the following hierarchy: 356 /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. 357 /// 2. If 1. evaluates as not equivalent, then return the result. 358 /// 3. Return the comparison of `Label` counts. 359 /// 360 /// For example, `com` < `example.com` < `net` < `example.net`. 361 /// 362 /// This is the same as the [canonical DNS name order](https://datatracker.ietf.org/doc/html/rfc4034#section-6.1). 363 /// ASCII uppercase is treated as ASCII lowercase and trailing `b'.'`s are ignored. 364 /// The [`AllowedAscii`]s in the `Domain`s are ignored. 365 /// 366 /// # Examples 367 /// 368 /// ``` 369 /// use core::cmp::Ordering; 370 /// use ascii_domain::{dom::Domain, char_set::{ASCII_LETTERS, ASCII_LOWERCASE}}; 371 /// let dom1 = Domain::try_from_bytes("Example.com", &ASCII_LETTERS).unwrap(); 372 /// assert!(matches!(dom1.cmp_doms(&dom1), Ordering::Equal)); 373 /// let dom2 = Domain::try_from_bytes("www.example.com", &ASCII_LOWERCASE).unwrap(); 374 /// assert!(matches!(dom1.cmp_doms(&dom2), Ordering::Less)); 375 /// assert!(matches!(dom2.cmp_doms(&dom1), Ordering::Greater)); 376 /// let dom3 = Domain::try_from_bytes("foo.com", &ASCII_LOWERCASE).unwrap(); 377 /// assert!(matches!(dom1.cmp_doms(&dom3), Ordering::Less)); 378 /// assert!(matches!(dom3.cmp_doms(&dom1), Ordering::Greater)); 379 /// ``` 380 #[inline] 381 pub fn cmp_doms<T2: AsRef<[u8]>>(&self, right: &Domain<T2>) -> Ordering { 382 self.cmp_by_domain_ordering(right).into() 383 } 384 /// Returns the first `Label`. 385 /// 386 /// # Example 387 /// 388 /// ``` 389 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 390 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().first_label().as_str() == "example"); 391 /// ``` 392 #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] 393 #[inline] 394 pub fn first_label(&self) -> Label<'_> { 395 self.iter() 396 .next_back() 397 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) 398 } 399 /// Returns the last `Label` (i.e., the TLD). 400 /// 401 /// # Example 402 /// 403 /// ``` 404 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 405 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().tld().as_str() == "com"); 406 /// ``` 407 #[expect(clippy::unreachable, reason = "bug in code, so we want to crash")] 408 #[inline] 409 pub fn tld(&self) -> Label<'_> { 410 self.iter() 411 .next() 412 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")) 413 } 414 } 415 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for Domain<T2> { 416 /// Ignores the provided [`AllowedAscii`] and simply compares the two `Domain`s as [`Label`]s 417 /// of bytes. Note uppercase ASCII is treated as lowercase ASCII and trailing `b'.'`s are ignored. 418 #[inline] 419 fn eq(&self, other: &Domain<T>) -> bool { 420 self.as_bytes().eq_ignore_ascii_case(other.as_bytes()) 421 } 422 } 423 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Domain<T>> for Domain<T2> { 424 #[inline] 425 fn eq(&self, other: &&Domain<T>) -> bool { 426 *self == **other 427 } 428 } 429 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for &Domain<T2> { 430 #[inline] 431 fn eq(&self, other: &Domain<T>) -> bool { 432 **self == *other 433 } 434 } 435 impl<T: AsRef<[u8]>> Eq for Domain<T> {} 436 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Domain<T>> for Domain<T2> { 437 /// Consult [`Self::cmp_doms`]. 438 #[inline] 439 fn partial_cmp(&self, other: &Domain<T>) -> Option<Ordering> { 440 Some(self.cmp_doms(other)) 441 } 442 } 443 impl<T: AsRef<[u8]>> Ord for Domain<T> { 444 /// Consult [`Self::cmp_doms`]. 445 #[inline] 446 fn cmp(&self, other: &Self) -> Ordering { 447 self.cmp_doms(other) 448 } 449 } 450 impl<T: AsRef<[u8]>> Hash for Domain<T> { 451 #[inline] 452 fn hash<H: Hasher>(&self, state: &mut H) { 453 self.as_bytes().to_ascii_lowercase().hash(state); 454 } 455 } 456 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> TryFrom<(T, &AllowedAscii<T2>)> for Domain<T> { 457 type Error = DomainErr; 458 #[inline] 459 fn try_from(value: (T, &AllowedAscii<T2>)) -> Result<Self, Self::Error> { 460 Self::try_from_bytes(value.0, value.1) 461 } 462 } 463 impl<T: AsRef<[u8]>> Display for Domain<T> { 464 #[inline] 465 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 466 f.write_str(self) 467 } 468 } 469 impl<T: AsRef<[u8]>> AsRef<str> for Domain<T> { 470 #[inline] 471 fn as_ref(&self) -> &str { 472 self.as_str() 473 } 474 } 475 impl<T: AsRef<[u8]>> AsRef<[u8]> for Domain<T> { 476 #[inline] 477 fn as_ref(&self) -> &[u8] { 478 self.as_bytes() 479 } 480 } 481 impl<T: AsRef<[u8]>> Deref for Domain<T> { 482 type Target = str; 483 #[inline] 484 fn deref(&self) -> &Self::Target { 485 self.as_str() 486 } 487 } 488 impl From<Domain<Vec<u8>>> for Domain<String> { 489 #[expect( 490 unsafe_code, 491 reason = "we enforce ASCII, so String::from_utf8_unchecked is fine" 492 )] 493 #[inline] 494 fn from(value: Domain<Vec<u8>>) -> Self { 495 // SAFETY: 496 // We only allow ASCII, so this is fine. 497 let val = unsafe { String::from_utf8_unchecked(value.value) }; 498 Self { value: val } 499 } 500 } 501 impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Domain<T>> for Domain<&'b [u8]> { 502 #[inline] 503 fn from(value: &'a Domain<T>) -> Self { 504 Self { 505 value: value.value.as_ref(), 506 } 507 } 508 } 509 impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Domain<T>> for Domain<&'b str> { 510 #[inline] 511 fn from(value: &'a Domain<T>) -> Self { 512 Self { 513 value: value.value.as_ref(), 514 } 515 } 516 } 517 impl From<Domain<String>> for Domain<Vec<u8>> { 518 #[inline] 519 fn from(value: Domain<String>) -> Self { 520 Self { 521 value: value.value.into_bytes(), 522 } 523 } 524 } 525 impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for Domain<&'b str> { 526 #[expect( 527 unsafe_code, 528 reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" 529 )] 530 #[inline] 531 fn from(value: Domain<&'a [u8]>) -> Self { 532 // SAFETY: 533 // We only allow ASCII, so this is fine. 534 let val = unsafe { str::from_utf8_unchecked(value.value) }; 535 Self { value: val } 536 } 537 } 538 impl<'a: 'b, 'b> From<Domain<&'a str>> for Domain<&'b [u8]> { 539 #[inline] 540 fn from(value: Domain<&'a str>) -> Self { 541 Self { 542 value: value.value.as_bytes(), 543 } 544 } 545 } 546 impl From<Domain<Self>> for String { 547 /// Returns the contained `String` _without_ a trailing `'.'` if there was one. 548 /// 549 /// # Example 550 /// 551 /// ``` 552 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 553 /// assert!(String::from(Domain::try_from_bytes(String::from("Example.com."), &ASCII_LETTERS).unwrap()).as_str() == "Example.com"); 554 /// ``` 555 #[inline] 556 fn from(value: Domain<Self>) -> Self { 557 if value.contains_trailing_dot() { 558 let mut val = value.value; 559 val.pop(); 560 val 561 } else { 562 value.value 563 } 564 } 565 } 566 impl<'a: 'b, 'b> From<Domain<&'a str>> for &'b str { 567 /// Returns the contained `str` _without_ a trailing `'.'` if there was one. 568 /// 569 /// # Example 570 /// 571 /// ``` 572 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 573 /// assert!(<&str>::from(Domain::try_from_bytes("Example.com.", &ASCII_LETTERS).unwrap()) == "Example.com"); 574 /// ``` 575 #[expect( 576 unsafe_code, 577 reason = "we enforce ASCII, so str::from_utf8_unchecked is fine" 578 )] 579 #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] 580 #[inline] 581 fn from(value: Domain<&'a str>) -> Self { 582 // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. 583 let utf8 = &value.value.as_bytes()[..usize::from(value.len().get())]; 584 // SAFETY: 585 // Only ASCII is allowed, so this is fine. 586 unsafe { str::from_utf8_unchecked(utf8) } 587 } 588 } 589 impl From<Domain<Self>> for Vec<u8> { 590 /// Returns the contained `Vec` _without_ a trailing `b'.'` if there was one. 591 /// 592 /// # Example 593 /// 594 /// ``` 595 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 596 /// assert!(Vec::from(Domain::try_from_bytes(vec![b'F', b'o', b'o', b'.', b'c', b'o', b'm'], &ASCII_LETTERS).unwrap()).as_slice() == b"Foo.com"); 597 /// ``` 598 #[inline] 599 fn from(value: Domain<Self>) -> Self { 600 if value.contains_trailing_dot() { 601 let mut val = value.value; 602 val.pop(); 603 val 604 } else { 605 value.value 606 } 607 } 608 } 609 impl<'a: 'b, 'b> From<Domain<&'a [u8]>> for &'b [u8] { 610 /// Returns the contained slice _without_ a trailing `b'.'` if there was one. 611 /// 612 /// # Example 613 /// 614 /// ``` 615 /// use ascii_domain::{dom::Domain, char_set::ASCII_LETTERS}; 616 /// assert!(<&[u8]>::from(Domain::try_from_bytes(b"Example.com.".as_slice(), &ASCII_LETTERS).unwrap()) == b"Example.com"); 617 /// ``` 618 #[expect(clippy::indexing_slicing, reason = "comment justifies its correctness")] 619 #[inline] 620 fn from(value: Domain<&'a [u8]>) -> Self { 621 // Indexing won't `panic` since `value.len()` is at most as long as `value.value`. 622 &value.value[..usize::from(value.len().get())] 623 } 624 } 625 /// Error returned from [`Domain::try_from_bytes`]. 626 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 627 pub enum DomainErr { 628 /// The domain was empty. 629 Empty, 630 /// The domain was the root domain that is to say it was the domain that only contained the root 631 /// zone (i.e., `b'.'`). 632 RootDomain, 633 /// The length of the domain was greater than 253 not counting a terminating `b'.'` if there was one. 634 LenExceeds253(usize), 635 /// The domain contained at least one empty label. 636 EmptyLabel, 637 /// The domain contained at least one label whose length exceeded 63. 638 LabelLenExceeds63, 639 /// The domain contained an invalid byte value. 640 InvalidByte(u8), 641 } 642 impl Display for DomainErr { 643 #[inline] 644 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 645 match *self { 646 Self::Empty => f.write_str("domain is empty"), 647 Self::RootDomain => f.write_str("domain is the root domain"), 648 Self::LenExceeds253(len) => write!( 649 f, 650 "domain has length {len} which is greater than the max length of 253" 651 ), 652 Self::EmptyLabel => f.write_str("domain has an empty label"), 653 Self::LabelLenExceeds63 => { 654 f.write_str("domain has a label that exceeds the max length of 63") 655 } 656 Self::InvalidByte(byt) => { 657 write!(f, "domain has a label with the invalid byte value {byt}") 658 } 659 } 660 } 661 } 662 impl Error for DomainErr {} 663 /// A label of a [`Domain`]. The total length of a `Label` is inclusively between 1 and 63. 664 #[derive(Clone, Copy, Debug)] 665 pub struct Label<'a> { 666 /// The label value. 667 value: &'a str, 668 } 669 impl<'a> Label<'a> { 670 /// The label. 671 /// 672 /// # Example 673 /// 674 /// ``` 675 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 676 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.as_str() == "com")); 677 /// ``` 678 #[inline] 679 #[must_use] 680 pub const fn as_str(self) -> &'a str { 681 self.value 682 } 683 /// Returns `true` iff the label only contains ASCII letters. 684 /// 685 /// # Example 686 /// 687 /// ``` 688 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 689 /// assert!(Domain::try_from_bytes("example.com", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphabetic())); 690 /// ``` 691 #[inline] 692 #[must_use] 693 pub fn is_alphabetic(self) -> bool { 694 self.value 695 .as_bytes() 696 .iter() 697 .try_fold((), |(), byt| { 698 if byt.is_ascii_alphabetic() { 699 Ok(()) 700 } else { 701 Err(()) 702 } 703 }) 704 .is_ok() 705 } 706 /// Returns `true` iff the label only contains ASCII digits. 707 /// 708 /// # Example 709 /// 710 /// ``` 711 /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; 712 /// assert!(Domain::try_from_bytes("example.123", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_digits())); 713 /// ``` 714 #[inline] 715 #[must_use] 716 pub fn is_digits(self) -> bool { 717 self.value 718 .as_bytes() 719 .iter() 720 .try_fold((), |(), byt| { 721 if byt.is_ascii_digit() { 722 Ok(()) 723 } else { 724 Err(()) 725 } 726 }) 727 .is_ok() 728 } 729 /// Returns `true` iff the label only contains ASCII digits or letters. 730 /// 731 /// # Example 732 /// 733 /// ``` 734 /// use ascii_domain::{dom::Domain, char_set::ASCII_DIGITS_LOWERCASE}; 735 /// assert!(Domain::try_from_bytes("example.1com", &ASCII_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_alphanumeric())); 736 /// ``` 737 #[inline] 738 #[must_use] 739 pub fn is_alphanumeric(self) -> bool { 740 self.value 741 .as_bytes() 742 .iter() 743 .try_fold((), |(), byt| { 744 if byt.is_ascii_alphanumeric() { 745 Ok(()) 746 } else { 747 Err(()) 748 } 749 }) 750 .is_ok() 751 } 752 /// Returns `true` iff the label only contains ASCII hyphen, digits, or letters. 753 /// 754 /// # Example 755 /// 756 /// ``` 757 /// use ascii_domain::{dom::Domain, char_set::ASCII_HYPHEN_DIGITS_LOWERCASE}; 758 /// assert!(Domain::try_from_bytes("example.1-com", &ASCII_HYPHEN_DIGITS_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.is_hyphen_or_alphanumeric())); 759 /// ``` 760 #[inline] 761 #[must_use] 762 pub fn is_hyphen_or_alphanumeric(self) -> bool { 763 self.value 764 .as_bytes() 765 .iter() 766 .try_fold((), |(), byt| { 767 if *byt == b'-' || byt.is_ascii_alphanumeric() { 768 Ok(()) 769 } else { 770 Err(()) 771 } 772 }) 773 .is_ok() 774 } 775 /// The length of the `Label`. This is inclusively between 1 and 63. 776 /// 777 /// # Example 778 /// 779 /// ``` 780 /// use ascii_domain::{dom::Domain, char_set::ASCII_LOWERCASE}; 781 /// assert!(Domain::try_from_bytes("example.com.", &ASCII_LOWERCASE).unwrap().into_iter().next().map_or(false, |label| label.len().get() == 3)); 782 /// ``` 783 #[expect( 784 unsafe_code, 785 reason = "we enforce label lengths, so NonZeroU8::new_unchecked is fine" 786 )] 787 #[expect( 788 clippy::as_conversions, 789 clippy::cast_possible_truncation, 790 reason = "comments justify their correctness" 791 )] 792 #[inline] 793 #[must_use] 794 pub const fn len(self) -> NonZeroU8 { 795 // The max length of a `Label` is 63. 796 let len = self.value.len() as u8; 797 // SAFETY: 798 // `Label`s are never empty. 799 unsafe { NonZeroU8::new_unchecked(len) } 800 } 801 } 802 impl PartialEq<Label<'_>> for Label<'_> { 803 #[inline] 804 fn eq(&self, other: &Label<'_>) -> bool { 805 self.value.eq_ignore_ascii_case(other.value) 806 } 807 } 808 impl PartialEq<&Label<'_>> for Label<'_> { 809 #[inline] 810 fn eq(&self, other: &&Label<'_>) -> bool { 811 *self == **other 812 } 813 } 814 impl PartialEq<Label<'_>> for &Label<'_> { 815 #[inline] 816 fn eq(&self, other: &Label<'_>) -> bool { 817 **self == *other 818 } 819 } 820 impl Eq for Label<'_> {} 821 impl PartialOrd<Label<'_>> for Label<'_> { 822 #[inline] 823 fn partial_cmp(&self, other: &Label<'_>) -> Option<Ordering> { 824 Some(self.cmp(other)) 825 } 826 } 827 impl Ord for Label<'_> { 828 #[inline] 829 fn cmp(&self, other: &Self) -> Ordering { 830 self.value 831 .to_ascii_lowercase() 832 .cmp(&other.value.to_ascii_lowercase()) 833 } 834 } 835 impl Hash for Label<'_> { 836 #[inline] 837 fn hash<H: Hasher>(&self, state: &mut H) { 838 self.value.to_ascii_lowercase().hash(state); 839 } 840 } 841 impl Display for Label<'_> { 842 #[inline] 843 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 844 f.write_str(self.value) 845 } 846 } 847 impl<'a> AsRef<[u8]> for Label<'a> { 848 #[inline] 849 fn as_ref(&self) -> &'a [u8] { 850 self.value.as_bytes() 851 } 852 } 853 impl<'a> AsRef<str> for Label<'a> { 854 #[inline] 855 fn as_ref(&self) -> &'a str { 856 self.value 857 } 858 } 859 impl<'a> Deref for Label<'a> { 860 type Target = str; 861 #[inline] 862 fn deref(&self) -> &'a Self::Target { 863 self.value 864 } 865 } 866 /// [`Iterator`] that iterates [`Label`]s from a [`Domain`] or [`Rfc1123Domain`] starting from the TLD down. 867 /// 868 /// This iterates `Label`s on demand; so if repeated iteration is desired, it may be better to collect the `Label`s 869 /// in a collection (e.g., [`Vec`]) than create the iterator again. This is also why [`ExactSizeIterator`] is not 870 /// implemented. 871 #[derive(Clone, Debug)] 872 pub struct LabelIter<'a> { 873 /// Domain as ASCII. 874 domain: &'a [u8], 875 } 876 impl<'a> Iterator for LabelIter<'a> { 877 type Item = Label<'a>; 878 #[expect( 879 unsafe_code, 880 reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" 881 )] 882 #[expect( 883 clippy::arithmetic_side_effects, 884 clippy::indexing_slicing, 885 reason = "comments justify their correctness" 886 )] 887 #[inline] 888 fn next(&mut self) -> Option<Self::Item> { 889 (!self.domain.is_empty()).then(|| { 890 self.domain 891 .iter() 892 .rev() 893 .try_fold(1, |count, byt| { 894 if *byt == b'.' { 895 let len = self.domain.len(); 896 // `count` < `len` since there is at least one more `u8` before `b'.'`. 897 let idx = len - count; 898 // `idx + 1` < `len` since `count` is > 1 since `Label`s are never empty. 899 let ascii = &self.domain[idx + 1..len]; 900 // SAFETY: 901 // We only allow ASCII, so this is safe. 902 let value = unsafe { str::from_utf8_unchecked(ascii) }; 903 self.domain = &self.domain[..idx]; 904 Err(Label { value }) 905 } else { 906 Ok(count + 1) 907 } 908 }) 909 .map_or_else(convert::identity, |_| { 910 // SAFETY: 911 // We only allow ASCII, so this is safe. 912 let value = unsafe { str::from_utf8_unchecked(self.domain) }; 913 self.domain = &[]; 914 Label { value } 915 }) 916 }) 917 } 918 #[inline] 919 fn last(mut self) -> Option<Self::Item> 920 where 921 Self: Sized, 922 { 923 self.next_back() 924 } 925 #[inline] 926 fn size_hint(&self) -> (usize, Option<usize>) { 927 if self.domain.is_empty() { 928 (0, Some(0)) 929 } else { 930 // The max size of a `Label` is 63; and all but the last have a `b'.'` that follow it. 931 // This means the fewest `Label`s possible is the floor of the length divided by 64 with 932 // the added requirement that it's at least one since we know the domain is not empty. 933 // The min size of a `Label` is 1; and all but the last have a `b'.'` that follow it. 934 // This means the max number of `Label`s is the ceiling of the length divided by 2. 935 ( 936 (self.domain.len() >> 6).max(1), 937 Some(self.domain.len().div_ceil(2)), 938 ) 939 } 940 } 941 } 942 impl FusedIterator for LabelIter<'_> {} 943 impl DoubleEndedIterator for LabelIter<'_> { 944 #[expect( 945 unsafe_code, 946 reason = "we only allow ASCII, so str::from_utf8_unchecked is fine" 947 )] 948 #[expect( 949 clippy::arithmetic_side_effects, 950 clippy::indexing_slicing, 951 reason = "comments justify their correctness" 952 )] 953 #[inline] 954 fn next_back(&mut self) -> Option<Self::Item> { 955 (!self.domain.is_empty()).then(|| { 956 self.domain 957 .iter() 958 .try_fold(0, |count, byt| { 959 if *byt == b'.' { 960 // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s 961 // are not empty. 962 let ascii = &self.domain[..count]; 963 // SAFETY: 964 // We only allow ASCII, so this is safe. 965 let value = unsafe { str::from_utf8_unchecked(ascii) }; 966 // `count + 1` < `self.domain.len()` since there is at least one more `Label` and `Label`s 967 // are not empty. 968 self.domain = &self.domain[count + 1..]; 969 Err(Label { value }) 970 } else { 971 Ok(count + 1) 972 } 973 }) 974 .map_or_else(convert::identity, |_| { 975 // SAFETY: 976 // We only allow ASCII, so this is safe. 977 let value = unsafe { str::from_utf8_unchecked(self.domain) }; 978 self.domain = &[]; 979 Label { value } 980 }) 981 }) 982 } 983 } 984 impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Domain<T> { 985 type Item = Label<'a>; 986 type IntoIter = LabelIter<'a>; 987 #[inline] 988 fn into_iter(self) -> Self::IntoIter { 989 LabelIter { 990 domain: self.as_bytes(), 991 } 992 } 993 } 994 impl<'a> IntoIterator for Domain<&'a str> { 995 type Item = Label<'a>; 996 type IntoIter = LabelIter<'a>; 997 #[inline] 998 fn into_iter(self) -> Self::IntoIter { 999 LabelIter { 1000 domain: <&str>::from(self).as_bytes(), 1001 } 1002 } 1003 } 1004 impl<'a> IntoIterator for Domain<&'a [u8]> { 1005 type Item = Label<'a>; 1006 type IntoIter = LabelIter<'a>; 1007 #[inline] 1008 fn into_iter(self) -> Self::IntoIter { 1009 LabelIter { 1010 domain: <&[u8]>::from(self), 1011 } 1012 } 1013 } 1014 /// Error returned from [`Rfc1123Domain::try_from`] and [`Rfc1123Domain::try_from_bytes`]. 1015 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 1016 pub enum Rfc1123Err { 1017 /// The inputs was not a valid [`Domain`]. 1018 DomainErr(DomainErr), 1019 /// A [`Label`] of [`Domain`] starts with an ASCII hyphen. 1020 LabelStartsWithAHyphen, 1021 /// A [`Label`] of [`Domain`] ends with an ASCII hyphen. 1022 LabelEndsWithAHyphen, 1023 /// The last [`Label`] (i.e., TLD) was invalid which means it was not all ASCII letters nor 1024 /// had length of at least five with the first 4 characters being `xn--`. 1025 InvalidTld, 1026 } 1027 impl Display for Rfc1123Err { 1028 #[inline] 1029 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1030 match *self { 1031 Self::DomainErr(err) => err.fmt(f), 1032 Self::LabelStartsWithAHyphen => { 1033 f.write_str("a label in the domain starts with a hyphen") 1034 } 1035 Self::LabelEndsWithAHyphen => f.write_str("a label in the domain ends with a hyphen"), 1036 Self::InvalidTld => f.write_str("the TLD in the domain was not all letters nor had length of at least five with the first 4 characters being 'xn--'") 1037 } 1038 } 1039 } 1040 impl Error for Rfc1123Err {} 1041 /// **TL;DR** Wrapper type around a [`Domain`] that enforces conformance to 1042 /// [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123#page-13). 1043 /// 1044 /// * Each [`Label`] must only contain ASCII digits, letters, or hyphen. 1045 /// * Each `Label` must not begin or end with a hyphen. 1046 /// * The last `Label` (i.e., TLD) must either contain only ASCII letters or have length of at least five and 1047 /// begin with `xn--`. 1048 /// --- 1049 /// Unsurprisingly, RFC 1123 is not super precise as it uses "host name" to mean label and also domain: 1050 /// "Host software MUST handle host names \[labels\] of up to 63 characters and SHOULD handle host 1051 /// names \[domains\] of up to 255 characters". It also states that only "one aspect of host name \[label\] 1052 /// syntax is hereby changed" from [RFC 952](https://www.rfc-editor.org/rfc/rfc952): "the restriction on the 1053 /// first character is relaxed to allow either a letter or a digit". Despite that, it goes on to mention other 1054 /// restrictions not mentioned in RFC 952: "the highest-level component label will be alphabetic". It is therefore 1055 /// important to understand how this type interprets that RFC and why it does so. 1056 /// 1057 /// The primary issue with RFC 1123 is the unjustified comment about the TLD being alphabetic. It is given 1058 /// as if it is common knowledge. As explained by (the rejected) 1059 /// [Errata 1353](https://www.rfc-editor.org/errata/eid1353), there seemed to be the assumption that the TLDs 1060 /// at the time would be the only ones that would ever exist or at least that the format of them would always be 1061 /// true. This leads to several possible interpretations: 1062 /// 1063 /// * Strictest: enforce the TLD is one of the TLDs that existed at the time of the RFC. 1064 /// * Strict: enforce the TLD has the same format as the TLDs at the time (i.e., two or three letters long). 1065 /// * Literal: enforce the TLD is alphabetic regardless of the lack of justification. 1066 /// * Relaxed: enforce the "spirit" that the TLD must exist. 1067 /// * More relaxed: enforce the "spirit" that the TLD must have the same format of a valid TLD. 1068 /// * Much more relaxed: enforce the "spirit" that the domain cannot have the form of an IPv4 address. 1069 /// * Most relaxed: treat TLDs no differently than other labels (i.e., don't make assumptions about what will be 1070 /// a valid TLD in the future). 1071 /// 1072 /// RFC 1123 is not obsolete, and it is clear from more recent RFCs like 1073 /// [RFC 5891](https://www.rfc-editor.org/rfc/rfc5891) that it is designed to be a foundation (i.e., domains that 1074 /// are valid per newer RFCs are valid per RFC 1123). Clearly due to RFCs like RFC 5891, requiring the TLD 1075 /// to be alphabetic or exactly two or three characters long would violate that. For those reasons the strictest, 1076 /// strict, and literal interpretations are rejected. 1077 /// 1078 /// Assuming TLDs are static is absurd, and relying on some dynamic list of TLDs is undesirable. For that reason 1079 /// the relaxed interpretation is rejected. 1080 /// 1081 /// Enforcing that domains do not have the form of an IPv4 address opens up the question of what is an IPv4 1082 /// address? Should leading 0s be allowed? What about hexadecimal? Should there be length limits for each octet? 1083 /// It also has the undesirable effect where subdomains that are all numeric exist but their parent domain does 1084 /// not which goes against the hierarchical nature of DNS. For those reasons the much more relaxed interpretation 1085 /// is rejected. 1086 /// 1087 /// Treating TLDs no differently than other labels is nice from a consistency perspective, but it suffers from 1088 /// the fact that domains that have the form of an IPv4 address are now allowed. For that reason the most 1089 /// relaxed interpretation is rejected. 1090 /// 1091 /// [ICANN](https://newgtlds.icann.org/sites/default/files/guidebook-full-04jun12-en.pdf) requires TLDs to either 1092 /// be alphabetic or a valid A-label per RFC 5891. Verifying a label is a valid A-label is not a cheap operation 1093 /// though. For that reason the more relaxed interpretation is accepted but with a twist: fake and valid A-labels 1094 /// are allowed in addition to entirely alphabetic labels. More specifically the TLD must either contain only 1095 /// letters or must be at least five characters long with the first 4 characters being `xn--`. 1096 /// 1097 /// If one wants to enforce the literal interpretation, one can use [`Self::is_literal_interpretation`]. Similarly, 1098 /// if one wants to enforce the strict interpretation, one can use [`Self::is_strict_interpretation`]. 1099 #[derive(Clone, Copy, Debug)] 1100 pub struct Rfc1123Domain<T> { 1101 /// The domain. 1102 dom: Domain<T>, 1103 } 1104 impl<T> Rfc1123Domain<T> { 1105 /// Returns a reference to the contained [`Domain`]. 1106 /// 1107 /// # Example 1108 /// 1109 /// ``` 1110 /// use ascii_domain::dom::Rfc1123Domain; 1111 /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().domain().len().get() == 11); 1112 /// ``` 1113 #[inline] 1114 pub const fn domain(&self) -> &Domain<T> { 1115 &self.dom 1116 } 1117 /// Returns the contained [`Domain`] consuming `self`. 1118 /// 1119 /// # Example 1120 /// 1121 /// ``` 1122 /// use ascii_domain::dom::Rfc1123Domain; 1123 /// assert!(Rfc1123Domain::try_from_bytes("example.com").unwrap().into_domain().len().get() == 11); 1124 /// ``` 1125 #[inline] 1126 pub fn into_domain(self) -> Domain<T> { 1127 self.dom 1128 } 1129 } 1130 impl<T: AsRef<[u8]>> Rfc1123Domain<T> { 1131 /// Function that transforms `v` into an `Rfc1123Domain` by only allowing [`Label`]s to contain the ASCII `u8`s 1132 /// in [`ASCII_HYPHEN_DIGITS_LETTERS`] with each `Label` not starting or ending with a `b'-'`. A trailing `b'.'` 1133 /// is ignored. The last `Label` (i.e., TLD) must either only contain ASCII letters or must have length of at 1134 /// least five with the first 4 bytes being `b"xn--"`. 1135 /// 1136 /// Unliked calling [`Domain::try_from_bytes`] then [`Rfc1123Domain::try_from`] which performs two traversals 1137 /// of `v`, this performs a single traversal of `v`. 1138 /// 1139 /// # Examples 1140 /// 1141 /// ``` 1142 /// use ascii_domain::dom::{Rfc1123Domain, Rfc1123Err}; 1143 /// assert!(Rfc1123Domain::try_from_bytes("example.com").is_ok()); 1144 /// assert!(Rfc1123Domain::try_from_bytes("example.xn--abc").is_ok()); 1145 /// assert!(Rfc1123Domain::try_from_bytes("a-.com").map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1146 /// ``` 1147 /// 1148 /// # Errors 1149 /// 1150 /// Returns [`Rfc1123Err`] iff `v.as_ref()` is an invalid `Rfc1123Domain`. 1151 #[expect( 1152 clippy::arithmetic_side_effects, 1153 clippy::indexing_slicing, 1154 reason = "comments justify their correctness" 1155 )] 1156 #[expect(clippy::redundant_else, reason = "prefer else with else-if")] 1157 #[inline] 1158 pub fn try_from_bytes(v: T) -> Result<Self, Rfc1123Err> { 1159 // The easiest implementation would be redirecting to `Domain::try_from_bytes`; and upon success, 1160 // verify each `Label` doesn't begin or end with a hyphen. That requires traversing `v` twice though. 1161 // We opt to traverse just once. 1162 let val = v.as_ref(); 1163 let value = match val.last() { 1164 None => return Err(Rfc1123Err::DomainErr(DomainErr::Empty)), 1165 Some(byt) => { 1166 let b = *byt; 1167 if b == b'.' { 1168 if val.len() == 1 { 1169 return Err(Rfc1123Err::DomainErr(DomainErr::RootDomain)); 1170 } 1171 // We know `val.len` is at least 2. 1172 let len = val.len() - 1; 1173 let lst = val[len - 1]; 1174 if lst == b'.' { 1175 return Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)); 1176 } else if lst == b'-' { 1177 return Err(Rfc1123Err::LabelEndsWithAHyphen); 1178 } else { 1179 &val[..len] 1180 } 1181 } else if b == b'-' { 1182 return Err(Rfc1123Err::LabelEndsWithAHyphen); 1183 } else { 1184 val 1185 } 1186 } 1187 }; 1188 if value.len() > 253 { 1189 Err(Rfc1123Err::DomainErr(DomainErr::LenExceeds253(value.len()))) 1190 } else { 1191 let mut count = 0; 1192 value 1193 .iter() 1194 .try_fold(0, |label_len, byt| { 1195 let b = *byt; 1196 if b == b'.' { 1197 NonZeroU8::new(label_len).map_or( 1198 Err(Rfc1123Err::DomainErr(DomainErr::EmptyLabel)), 1199 |_| { 1200 // We verify the last character in the `Label` is not a hyphen. 1201 // `count` > 0 since `label_len` > 0 and `count` < `value.len()` since 1202 // it's the index of the `b'.'`. 1203 if value[count - 1] == b'-' { 1204 Err(Rfc1123Err::LabelEndsWithAHyphen) 1205 } else { 1206 Ok(0) 1207 } 1208 }, 1209 ) 1210 } else if !RFC_CHARS.contains(b) { 1211 Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(b))) 1212 } else if b == b'-' && label_len == 0 { 1213 Err(Rfc1123Err::LabelStartsWithAHyphen) 1214 } else if label_len == 63 { 1215 Err(Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63)) 1216 } else { 1217 // This caps at 253, so no overflow. 1218 count += 1; 1219 // This is less than 64 due to the above check, so this won't overflow; 1220 Ok(label_len + 1) 1221 } 1222 }) 1223 .and_then(|tld_len| { 1224 // `tld_len <= value.len()`. 1225 let tld = &value[value.len() - usize::from(tld_len)..]; 1226 if (tld 1227 .split_at_checked(4) 1228 .map_or(false, |(fst, rem)| !rem.is_empty() && fst == b"xn--")) 1229 || tld 1230 .iter() 1231 .try_fold((), |(), byt| { 1232 if byt.is_ascii_alphabetic() { 1233 Ok(()) 1234 } else { 1235 Err(()) 1236 } 1237 }) 1238 .is_ok() 1239 { 1240 Ok(()) 1241 } else { 1242 Err(Rfc1123Err::InvalidTld) 1243 } 1244 }) 1245 .map(|()| Self { 1246 dom: Domain { value: v }, 1247 }) 1248 } 1249 } 1250 /// Returns `true` iff the domain adheres to the literal interpretation of RFC 1123. For more information 1251 /// read the description of [`Rfc1123Domain`]. 1252 /// 1253 /// # Examples 1254 /// 1255 /// ``` 1256 /// use ascii_domain::dom::Rfc1123Domain; 1257 /// assert!(Rfc1123Domain::try_from_bytes("example.commmm").unwrap().is_literal_interpretation()); 1258 /// assert!(!Rfc1123Domain::try_from_bytes("example.xn--abc").unwrap().is_literal_interpretation()); 1259 /// ``` 1260 #[inline] 1261 pub fn is_literal_interpretation(&self) -> bool { 1262 self.dom.tld().is_alphabetic() 1263 } 1264 /// Returns `true` iff the domain adheres to the strict interpretation of RFC 1123. For more information 1265 /// read the description of [`Rfc1123Domain`]. 1266 /// 1267 /// # Examples 1268 /// 1269 /// ``` 1270 /// use ascii_domain::dom::Rfc1123Domain; 1271 /// assert!(Rfc1123Domain::try_from_bytes("example.Com").unwrap().is_strict_interpretation()); 1272 /// assert!(!Rfc1123Domain::try_from_bytes("example.comm").unwrap().is_strict_interpretation()); 1273 /// ``` 1274 #[inline] 1275 pub fn is_strict_interpretation(&self) -> bool { 1276 let tld = self.dom.tld(); 1277 (2..4).contains(&tld.len().get()) && tld.is_alphabetic() 1278 } 1279 } 1280 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1281 #[inline] 1282 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1283 self.dom == other.dom 1284 } 1285 } 1286 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1287 #[inline] 1288 fn eq(&self, other: &&Rfc1123Domain<T>) -> bool { 1289 self.dom == other.dom 1290 } 1291 } 1292 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for &Rfc1123Domain<T2> { 1293 #[inline] 1294 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1295 self.dom == other.dom 1296 } 1297 } 1298 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for Domain<T2> { 1299 #[inline] 1300 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1301 *self == other.dom 1302 } 1303 } 1304 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Rfc1123Domain<T>> for &Domain<T2> { 1305 #[inline] 1306 fn eq(&self, other: &Rfc1123Domain<T>) -> bool { 1307 **self == other.dom 1308 } 1309 } 1310 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<&Rfc1123Domain<T>> for Domain<T2> { 1311 #[inline] 1312 fn eq(&self, other: &&Rfc1123Domain<T>) -> bool { 1313 *self == other.dom 1314 } 1315 } 1316 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialEq<Domain<T>> for Rfc1123Domain<T2> { 1317 #[inline] 1318 fn eq(&self, other: &Domain<T>) -> bool { 1319 self.dom == *other 1320 } 1321 } 1322 impl<T: AsRef<[u8]>> Eq for Rfc1123Domain<T> {} 1323 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<T>> for Rfc1123Domain<T2> { 1324 #[inline] 1325 fn partial_cmp(&self, other: &Rfc1123Domain<T>) -> Option<Ordering> { 1326 self.dom.partial_cmp(&other.dom) 1327 } 1328 } 1329 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Rfc1123Domain<T>> for Domain<T2> { 1330 #[inline] 1331 fn partial_cmp(&self, other: &Rfc1123Domain<T>) -> Option<Ordering> { 1332 self.partial_cmp(&other.dom) 1333 } 1334 } 1335 impl<T: AsRef<[u8]>, T2: AsRef<[u8]>> PartialOrd<Domain<T>> for Rfc1123Domain<T2> { 1336 #[inline] 1337 fn partial_cmp(&self, other: &Domain<T>) -> Option<Ordering> { 1338 self.dom.partial_cmp(other) 1339 } 1340 } 1341 impl<T: AsRef<[u8]>> Ord for Rfc1123Domain<T> { 1342 #[inline] 1343 fn cmp(&self, other: &Self) -> Ordering { 1344 self.dom.cmp(&other.dom) 1345 } 1346 } 1347 impl<T: AsRef<[u8]>> Hash for Rfc1123Domain<T> { 1348 #[inline] 1349 fn hash<H: Hasher>(&self, state: &mut H) { 1350 self.dom.hash(state); 1351 } 1352 } 1353 impl<T> AsRef<Domain<T>> for Rfc1123Domain<T> { 1354 #[inline] 1355 fn as_ref(&self) -> &Domain<T> { 1356 &self.dom 1357 } 1358 } 1359 impl<T> Borrow<Domain<T>> for Rfc1123Domain<T> { 1360 #[inline] 1361 fn borrow(&self) -> &Domain<T> { 1362 &self.dom 1363 } 1364 } 1365 impl<T> Deref for Rfc1123Domain<T> { 1366 type Target = Domain<T>; 1367 #[inline] 1368 fn deref(&self) -> &Self::Target { 1369 &self.dom 1370 } 1371 } 1372 impl<T> From<Rfc1123Domain<T>> for Domain<T> { 1373 #[inline] 1374 fn from(value: Rfc1123Domain<T>) -> Self { 1375 value.dom 1376 } 1377 } 1378 impl From<Rfc1123Domain<Vec<u8>>> for Rfc1123Domain<String> { 1379 #[inline] 1380 fn from(value: Rfc1123Domain<Vec<u8>>) -> Self { 1381 Self { 1382 dom: Domain::<String>::from(value.dom), 1383 } 1384 } 1385 } 1386 impl<'a: 'b, 'b, T: AsRef<[u8]>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b [u8]> { 1387 #[inline] 1388 fn from(value: &'a Rfc1123Domain<T>) -> Self { 1389 Self { 1390 dom: Domain::<&'b [u8]>::from(&value.dom), 1391 } 1392 } 1393 } 1394 impl<'a: 'b, 'b, T: AsRef<str>> From<&'a Rfc1123Domain<T>> for Rfc1123Domain<&'b str> { 1395 #[inline] 1396 fn from(value: &'a Rfc1123Domain<T>) -> Self { 1397 Self { 1398 dom: Domain::<&'b str>::from(&value.dom), 1399 } 1400 } 1401 } 1402 impl From<Rfc1123Domain<String>> for Rfc1123Domain<Vec<u8>> { 1403 #[inline] 1404 fn from(value: Rfc1123Domain<String>) -> Self { 1405 Self { 1406 dom: Domain::<Vec<u8>>::from(value.dom), 1407 } 1408 } 1409 } 1410 impl<'a: 'b, 'b> From<Rfc1123Domain<&'a [u8]>> for Rfc1123Domain<&'b str> { 1411 #[inline] 1412 fn from(value: Rfc1123Domain<&'a [u8]>) -> Self { 1413 Self { 1414 dom: Domain::<&'b str>::from(value.dom), 1415 } 1416 } 1417 } 1418 impl<'a: 'b, 'b> From<Rfc1123Domain<&'a str>> for Rfc1123Domain<&'b [u8]> { 1419 #[inline] 1420 fn from(value: Rfc1123Domain<&'a str>) -> Self { 1421 Self { 1422 dom: Domain::<&'b [u8]>::from(value.dom), 1423 } 1424 } 1425 } 1426 impl<T: AsRef<[u8]>> TryFrom<Domain<T>> for Rfc1123Domain<T> { 1427 type Error = Rfc1123Err; 1428 #[expect( 1429 clippy::arithmetic_side_effects, 1430 clippy::indexing_slicing, 1431 clippy::unreachable, 1432 reason = "comments explain their correctness" 1433 )] 1434 #[inline] 1435 fn try_from(value: Domain<T>) -> Result<Self, Self::Error> { 1436 let mut labels = value.iter(); 1437 let tld = labels 1438 .next() 1439 .unwrap_or_else(|| unreachable!("there is a bug in Domain::try_from_bytes")); 1440 if tld.is_alphabetic() 1441 || tld 1442 .split_at_checked(4) 1443 .map_or(false, |(fst, rem)| !rem.is_empty() && fst == "xn--") 1444 { 1445 labels 1446 .try_fold((), |(), label| { 1447 let bytes = label.value.as_bytes(); 1448 // `Label`s are never empty, so the below indexing is fine. 1449 // Underflow won't occur for the same reason. 1450 if bytes[0] == b'-' { 1451 Err(Rfc1123Err::LabelStartsWithAHyphen) 1452 } else if bytes[bytes.len() - 1] == b'-' { 1453 Err(Rfc1123Err::LabelEndsWithAHyphen) 1454 } else { 1455 bytes.iter().try_fold((), |(), byt| match *byt { 1456 b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z' => Ok(()), 1457 val => Err(Rfc1123Err::DomainErr(DomainErr::InvalidByte(val))), 1458 }) 1459 } 1460 }) 1461 .map(|()| Self { dom: value }) 1462 } else { 1463 Err(Rfc1123Err::InvalidTld) 1464 } 1465 } 1466 } 1467 impl<T: AsRef<[u8]>> Display for Rfc1123Domain<T> { 1468 #[inline] 1469 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1470 self.dom.fmt(f) 1471 } 1472 } 1473 impl<'a, T: AsRef<[u8]>> IntoIterator for &'a Rfc1123Domain<T> { 1474 type Item = Label<'a>; 1475 type IntoIter = LabelIter<'a>; 1476 #[inline] 1477 fn into_iter(self) -> Self::IntoIter { 1478 LabelIter { 1479 domain: self.dom.as_bytes(), 1480 } 1481 } 1482 } 1483 impl<'a> IntoIterator for Rfc1123Domain<&'a str> { 1484 type Item = Label<'a>; 1485 type IntoIter = LabelIter<'a>; 1486 #[inline] 1487 fn into_iter(self) -> Self::IntoIter { 1488 LabelIter { 1489 domain: <&str>::from(self.dom).as_bytes(), 1490 } 1491 } 1492 } 1493 impl<'a> IntoIterator for Rfc1123Domain<&'a [u8]> { 1494 type Item = Label<'a>; 1495 type IntoIter = LabelIter<'a>; 1496 #[inline] 1497 fn into_iter(self) -> Self::IntoIter { 1498 LabelIter { 1499 domain: <&[u8]>::from(self.dom), 1500 } 1501 } 1502 } 1503 #[cfg(test)] 1504 mod tests { 1505 extern crate alloc; 1506 use super::{Domain, DomainErr, Rfc1123Domain, Rfc1123Err}; 1507 use crate::char_set::{AllowedAscii, ASCII_FIREFOX, ASCII_HYPHEN_DIGITS_LETTERS}; 1508 use alloc::borrow::ToOwned; 1509 use core::cmp::Ordering; 1510 #[test] 1511 fn test_dom_parse() { 1512 let allowed_ascii = ASCII_FIREFOX; 1513 // Test empty is error. 1514 assert!(Domain::try_from_bytes("", &allowed_ascii) 1515 .map_or_else(|e| e == DomainErr::Empty, |_| false)); 1516 // Test root domain. 1517 assert!(Domain::try_from_bytes(".", &allowed_ascii) 1518 .map_or_else(|e| e == DomainErr::RootDomain, |_| false)); 1519 // Test empty label is error. 1520 assert!(Domain::try_from_bytes("a..com", &allowed_ascii) 1521 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); 1522 assert!(Domain::try_from_bytes("a..", &allowed_ascii) 1523 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); 1524 assert!(Domain::try_from_bytes("..", &allowed_ascii) 1525 .map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); 1526 // Test label too long. 1527 let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; 1528 // 4 + 64 + 4 1529 assert!(val.len() == 72); 1530 assert!(Domain::try_from_bytes(val, &allowed_ascii) 1531 .map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false)); 1532 assert!(Domain::try_from_bytes( 1533 "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", 1534 &allowed_ascii 1535 ) 1536 .map_or(false, |d| d.len().get() == 71)); 1537 // Test domain too long. 1538 assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false)); 1539 assert!(Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", &allowed_ascii).map_or(false, |d| d.len().get() == 253 )); 1540 // Test max labels. 1541 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false)); 1542 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1543 assert!(Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.", &allowed_ascii).map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1544 // Test removal of trailing '.'. 1545 assert!( 1546 Domain::try_from_bytes("com.", &allowed_ascii).map_or(false, |d| d.as_str() == "com") 1547 ); 1548 // Test single label. 1549 assert!(Domain::try_from_bytes("c", &allowed_ascii).map_or(false, |d| d.as_str() == "c")); 1550 // Test case-insensitivity. 1551 assert!( 1552 Domain::try_from_bytes("wwW.ExAMple.COm", &allowed_ascii).map_or(false, |d| { 1553 Domain::try_from_bytes("www.example.com", &allowed_ascii) 1554 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1555 }) 1556 ); 1557 assert!( 1558 Domain::try_from_bytes("ww_W.com", &allowed_ascii).map_or(false, |d| { 1559 Domain::try_from_bytes("Ww_w.com", &allowed_ascii) 1560 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1561 }) 1562 ); 1563 // Test valid bytes 1564 let mut input; 1565 let mut counter = 0; 1566 for i in 0..=127 { 1567 input = [i]; 1568 match i { 1569 b'!' 1570 | b'$' 1571 | b'&'..=b')' 1572 | b'+'..=b'-' 1573 | b'0'..=b'9' 1574 | b';' 1575 | b'=' 1576 | b'A'..=b'Z' 1577 | b'_'..=b'{' 1578 | b'}'..=b'~' => { 1579 counter += 1; 1580 assert!( 1581 Domain::try_from_bytes(input, &allowed_ascii).map_or(false, |d| d 1582 .value 1583 .len() 1584 == 1 1585 && d.value == input) 1586 ) 1587 } 1588 b'.' => { 1589 let input2 = b"a."; 1590 assert!( 1591 Domain::try_from_bytes(input2, &allowed_ascii).map_or(false, |d| d 1592 .len() 1593 .get() 1594 == 1 1595 && d.value == input2) 1596 ) 1597 } 1598 _ => assert!(Domain::try_from_bytes(input, &allowed_ascii) 1599 .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false)), 1600 } 1601 } 1602 assert!(counter == 78); 1603 } 1604 #[test] 1605 fn test_dom_iter() { 1606 let allowed_ascii = ASCII_FIREFOX; 1607 assert!( 1608 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1609 let mut iter = d.iter(); 1610 let Some(l) = iter.next() else { 1611 return false; 1612 }; 1613 if l.value != "com" { 1614 return false; 1615 } 1616 let Some(l) = iter.next() else { return false }; 1617 if l.value != "example" { 1618 return false; 1619 } 1620 let Some(l) = iter.next() else { 1621 return false; 1622 }; 1623 if l.value != "www" { 1624 return false; 1625 } 1626 iter.next().is_none() 1627 }) 1628 ); 1629 assert!( 1630 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1631 let mut iter = d.iter(); 1632 let Some(l) = iter.next_back() else { 1633 return false; 1634 }; 1635 if l.value != "www" { 1636 return false; 1637 } 1638 let Some(l) = iter.next_back() else { 1639 return false; 1640 }; 1641 if l.value != "example" { 1642 return false; 1643 } 1644 let Some(l) = iter.next_back() else { 1645 return false; 1646 }; 1647 if l.value != "com" { 1648 return false; 1649 } 1650 iter.next_back().is_none() 1651 }) 1652 ); 1653 assert!( 1654 Domain::try_from_bytes("www.example.com", &allowed_ascii).map_or(false, |d| { 1655 let mut iter = d.iter(); 1656 let Some(l) = iter.next_back() else { 1657 return false; 1658 }; 1659 if l.value != "www" { 1660 return false; 1661 } 1662 let Some(l) = iter.next() else { return false }; 1663 if l.value != "com" { 1664 return false; 1665 } 1666 let Some(l) = iter.next_back() else { 1667 return false; 1668 }; 1669 if l.value != "example" { 1670 return false; 1671 } 1672 iter.next().is_none() && iter.next_back().is_none() 1673 }) 1674 ); 1675 } 1676 #[test] 1677 fn rfc1123() { 1678 assert!( 1679 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1680 false, 1681 |dom| Rfc1123Domain::try_from(dom) 1682 .map_or(false, |dom| dom.as_str() == "example.com") 1683 ) 1684 ); 1685 assert!( 1686 AllowedAscii::try_from_unique_ascii(b"exampl!co".to_owned()).map_or(false, |ascii| { 1687 Domain::try_from_bytes("exampl!e.com", &ascii).map_or(false, |dom| { 1688 Rfc1123Domain::try_from(dom).map_or_else( 1689 |e| e == Rfc1123Err::DomainErr(DomainErr::InvalidByte(b'!')), 1690 |_| false, 1691 ) 1692 }) 1693 }) 1694 ); 1695 assert!( 1696 Domain::try_from_bytes("example-.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1697 false, 1698 |dom| Rfc1123Domain::try_from(dom) 1699 .map_or_else(|e| e == Rfc1123Err::LabelEndsWithAHyphen, |_| false) 1700 ) 1701 ); 1702 assert!( 1703 Domain::try_from_bytes("-example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1704 false, 1705 |dom| Rfc1123Domain::try_from(dom) 1706 .map_or_else(|e| e == Rfc1123Err::LabelStartsWithAHyphen, |_| false) 1707 ) 1708 ); 1709 assert!( 1710 Domain::try_from_bytes("example.c1m", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1711 false, 1712 |dom| Rfc1123Domain::try_from(dom) 1713 .map_or_else(|e| e == Rfc1123Err::InvalidTld, |_| false) 1714 ) 1715 ); 1716 assert!( 1717 Domain::try_from_bytes("example.commm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1718 false, 1719 |dom| Rfc1123Domain::try_from(dom) 1720 .map_or(false, |rfc| rfc.is_literal_interpretation()) 1721 ) 1722 ); 1723 assert!( 1724 Domain::try_from_bytes("example.xn--abc", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1725 false, 1726 |dom| Rfc1123Domain::try_from(dom) 1727 .map_or(false, |rfc| !rfc.is_literal_interpretation()) 1728 ) 1729 ); 1730 assert!( 1731 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1732 false, 1733 |dom| Rfc1123Domain::try_from(dom) 1734 .map_or(false, |rfc| rfc.is_strict_interpretation()) 1735 ) 1736 ); 1737 assert!( 1738 Domain::try_from_bytes("example.comm", &ASCII_HYPHEN_DIGITS_LETTERS).map_or( 1739 false, 1740 |dom| Rfc1123Domain::try_from(dom) 1741 .map_or(false, |rfc| !rfc.is_strict_interpretation()) 1742 ) 1743 ); 1744 } 1745 #[test] 1746 fn test_tld() { 1747 assert!( 1748 Domain::try_from_bytes("example.com", &ASCII_HYPHEN_DIGITS_LETTERS) 1749 .map_or(false, |dom| dom.tld().as_str() == "com",) 1750 ); 1751 } 1752 #[test] 1753 fn test_rfc1123_parse() { 1754 // Test empty is error. 1755 assert!(Rfc1123Domain::try_from_bytes("") 1756 .map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::Empty), |_| false)); 1757 // Test root domain. 1758 assert!(Rfc1123Domain::try_from_bytes(".").map_or_else( 1759 |e| e == Rfc1123Err::DomainErr(DomainErr::RootDomain), 1760 |_| false 1761 )); 1762 // Test empty label is error. 1763 assert!(Rfc1123Domain::try_from_bytes("a..com").map_or_else( 1764 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1765 |_| false 1766 )); 1767 assert!(Rfc1123Domain::try_from_bytes("a..").map_or_else( 1768 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1769 |_| false 1770 )); 1771 assert!(Rfc1123Domain::try_from_bytes("..").map_or_else( 1772 |e| e == Rfc1123Err::DomainErr(DomainErr::EmptyLabel), 1773 |_| false 1774 )); 1775 // Test label too long. 1776 let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; 1777 // 4 + 64 + 4 1778 assert!(val.len() == 72); 1779 assert!(Rfc1123Domain::try_from_bytes(val).map_or_else( 1780 |e| e == Rfc1123Err::DomainErr(DomainErr::LabelLenExceeds63), 1781 |_| false 1782 )); 1783 assert!(Rfc1123Domain::try_from_bytes( 1784 "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com", 1785 ) 1786 .map_or(false, |d| d.len().get() == 71)); 1787 // Test domain too long. 1788 assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(254)), |_| false)); 1789 assert!(Rfc1123Domain::try_from_bytes("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); 1790 // Test max labels. 1791 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == Rfc1123Err::DomainErr(DomainErr::LenExceeds253(255)), |_| false)); 1792 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1793 assert!(Rfc1123Domain::try_from_bytes("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.iter().count() == 127 && d.len().get() == 253)); 1794 // Test removal of trailing '.'. 1795 assert!(Rfc1123Domain::try_from_bytes("com.").map_or(false, |d| d.as_str() == "com")); 1796 // Test single label. 1797 assert!(Rfc1123Domain::try_from_bytes("c").map_or(false, |d| d.as_str() == "c")); 1798 // Test ends with hyphen. 1799 assert!(Rfc1123Domain::try_from_bytes("-") 1800 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1801 assert!(Rfc1123Domain::try_from_bytes("-.") 1802 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1803 assert!(Rfc1123Domain::try_from_bytes("a.com.-") 1804 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1805 assert!(Rfc1123Domain::try_from_bytes("a.com-") 1806 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1807 assert!(Rfc1123Domain::try_from_bytes("a-.com") 1808 .map_or_else(|err| err == Rfc1123Err::LabelEndsWithAHyphen, |_| false)); 1809 // Test starts with hyphen. 1810 assert!(Rfc1123Domain::try_from_bytes("a.-com") 1811 .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false)); 1812 assert!(Rfc1123Domain::try_from_bytes("-a.com") 1813 .map_or_else(|err| err == Rfc1123Err::LabelStartsWithAHyphen, |_| false)); 1814 // Test case-insensitivity. 1815 assert!( 1816 Rfc1123Domain::try_from_bytes("wwW.ExAMple.COm").map_or(false, |d| { 1817 Rfc1123Domain::try_from_bytes("www.example.com") 1818 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1819 }) 1820 ); 1821 assert!( 1822 Rfc1123Domain::try_from_bytes("ww-W.com").map_or(false, |d| { 1823 Rfc1123Domain::try_from_bytes("Ww-w.com") 1824 .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal) 1825 }) 1826 ); 1827 assert!(Rfc1123Domain::try_from_bytes("1.1.1.1") 1828 .map_or_else(|err| err == Rfc1123Err::InvalidTld, |_| false)); 1829 } 1830 }