dom.rs (89732B)
1 use crate::dom_count_auto_gen::proper_subdomain_count; 2 use ascii_domain::{ 3 char_set::{ASCII_FIREFOX, AllowedAscii}, 4 dom::{Domain, DomainErr, DomainOrdering}, 5 }; 6 use core::{ 7 borrow::Borrow, 8 cmp::Ordering, 9 convert, 10 fmt::{self, Display, Formatter}, 11 hash::{Hash, Hasher}, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 use num_bigint::BigUint; 17 use std::{ 18 error, 19 io::{Error, Write}, 20 }; 21 use superset_map::SetOrd; 22 use zfc::{BoundedCardinality, Cardinality, Set}; 23 /// One. 24 const ONE: NonZeroU8 = NonZeroU8::new(1).unwrap(); 25 /// Error returned when an invalid string is passed to [`Adblock::parse_value`], [`DomainOnly::parse_value`], 26 /// [`Hosts::parse_value`], [`Wildcard::parse_value`], or [`RpzDomain::parse_value`]. 27 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] 28 pub enum FirefoxDomainErr { 29 /// The domain is invalid based on [`Domain`] using [`ASCII_FIREFOX`]. 30 InvalidDomain(DomainErr), 31 /// The domain had a TLD that was not all letters nor length of at least five beginning with `b"xn--"`. 32 InvalidTld, 33 /// The string passed to [`Adblock::parse_value`] contained `$`. 34 InvalidAdblockDomain, 35 /// The string passed to [`Hosts::parse_value`] did not conform 36 /// to the required [`Hosts`] format. 37 InvalidHostsIP, 38 /// The length of the non-wildcard portion of the string passed to 39 /// [`Wildcard::parse_value`] was at least 252 which means there are 40 /// no proper subdomains. 41 InvalidWildcardDomain, 42 } 43 impl Display for FirefoxDomainErr { 44 #[inline] 45 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 46 match *self { 47 Self::InvalidDomain(err) => err.fmt(f), 48 Self::InvalidTld => f.write_str("domain had a TLD that was not all letters nor at least five characters long starting with 'xn--'"), 49 Self::InvalidAdblockDomain => f.write_str("Adblock-style domain contained a '$'"), 50 Self::InvalidHostsIP => f.write_str("hosts-style domain does not begin with the IP '::', '::1', '0.0.0.0', or '127.0.0.1' followed by at least one space or tab"), 51 Self::InvalidWildcardDomain => f.write_str("non-wildcard portion of a wildcard domain had length of at least 252 which means there are 0 proper subdomains"), 52 } 53 } 54 } 55 impl error::Error for FirefoxDomainErr {} 56 /// The ASCII we allow domains to have. 57 const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX; 58 /// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` has a TLD 59 /// that is either all letters or has length of at least five and begins with `b"xn--"`. 60 #[expect(single_use_lifetimes, reason = "false positive")] 61 #[expect(clippy::indexing_slicing, reason = "we verify manually")] 62 fn domain_icann_tld<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> { 63 Domain::try_from_bytes(val, CHARS) 64 .map_err(FirefoxDomainErr::InvalidDomain) 65 .and_then(|dom| { 66 let tld = dom.tld(); 67 // `tld.as_bytes()[..4]` won't panic since we check before that that the length is at least 5. 68 if tld.is_alphabetic() || (tld.len().get() > 4 && tld.as_bytes()[..4] == *b"xn--") { 69 Ok(dom.into()) 70 } else { 71 Err(FirefoxDomainErr::InvalidTld) 72 } 73 }) 74 } 75 /// Action taken by a DNS server when a domain matches. 76 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 77 pub enum RpzAction { 78 /// Send `NXDOMAIN` reply. 79 Nxdomain, 80 /// Send `NODATA` reply. 81 Nodata, 82 /// Do nothing; continue as normal. 83 Passthru, 84 /// Drop the query. 85 Drop, 86 /// Answer over TCP. 87 TcpOnly, 88 } 89 impl Display for RpzAction { 90 #[inline] 91 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 92 match *self { 93 Self::Nxdomain => f.write_str("NXDOMAIN"), 94 Self::Nodata => f.write_str("NODATA"), 95 Self::Passthru => f.write_str("PASSTHRU"), 96 Self::Drop => f.write_str("DROP"), 97 Self::TcpOnly => f.write_str("TCP-Only"), 98 } 99 } 100 } 101 impl PartialEq<&Self> for RpzAction { 102 #[inline] 103 fn eq(&self, other: &&Self) -> bool { 104 *self == **other 105 } 106 } 107 impl PartialEq<RpzAction> for &RpzAction { 108 #[inline] 109 fn eq(&self, other: &RpzAction) -> bool { 110 **self == *other 111 } 112 } 113 /// Writes the following line with `writer` based on `action`: 114 /// * `RpzAction::Nxdomain`: `<dom> CNAME .`. 115 /// * `RpzAction::Nodata`: `<dom> CNAME *.`. 116 /// * `RpzAction::Passthru`: `<dom> CNAME rpz-passthru.`. 117 /// * `RpzAction::Drop`: `<dom> CNAME rpz-drop.`. 118 /// * `RpzAction::TcpOnly`: `<dom> CNAME rpz-tcp-only.`. 119 /// 120 /// `*.` is prepended to `<dom>` iff `wildcard`. 121 /// 122 /// # Errors 123 /// 124 /// Returns [`Error`] iff [`writeln`] does. 125 #[inline] 126 pub fn write_rpz_line<W: Write, T>( 127 mut writer: W, 128 dom: &Domain<T>, 129 action: RpzAction, 130 wildcard: bool, 131 ) -> Result<(), Error> 132 where 133 Domain<T>: Display, 134 { 135 writeln!( 136 writer, 137 "{}{} CNAME {}.", 138 if wildcard { "*." } else { "" }, 139 dom, 140 match action { 141 RpzAction::Nxdomain => "", 142 RpzAction::Nodata => "*", 143 RpzAction::Passthru => "rpz-passthru", 144 RpzAction::Drop => "rpz-drop", 145 RpzAction::TcpOnly => "rpz-tcp-only", 146 } 147 ) 148 } 149 /// Type that can be returned by [`Domain`]-like parsers (e.g., [`Adblock`]). 150 #[derive(Clone, Copy, Debug)] 151 pub enum Value<'a, T: ParsedDomain<'a>> { 152 /// The parsed value is a domain. 153 Domain(T), 154 /// The parsed value is a comment. 155 Comment(&'a str), 156 /// The parsed value is blank or just [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 157 Blank, 158 } 159 impl<'a, T: ParsedDomain<'a>> Value<'a, T> { 160 /// Returns `true` iff `self` is a [`Self::Domain`]. 161 #[inline] 162 pub const fn is_domain(&self) -> bool { 163 match *self { 164 Self::Domain(_) => true, 165 Self::Comment(_) | Self::Blank => false, 166 } 167 } 168 /// Returns `true` iff `self` is a [`Self::Comment`]. 169 #[inline] 170 pub const fn is_comment(&self) -> bool { 171 match *self { 172 Self::Comment(_) => true, 173 Self::Domain(_) | Self::Blank => false, 174 } 175 } 176 /// Returns `true` iff `self` is a [`Self::Blank`]. 177 #[inline] 178 pub const fn is_blank(&self) -> bool { 179 matches!(*self, Value::Blank) 180 } 181 /// Returns the contained [`Self::Domain`] value. 182 /// 183 /// # Panics 184 /// 185 /// Panics iff `self` is [`Self::Comment`] or [`Self::Blank`]. 186 #[expect(clippy::panic, reason = "bug if called incorrectly")] 187 #[inline] 188 pub fn unwrap_domain(self) -> T { 189 match self { 190 Self::Domain(dom) => dom, 191 Self::Comment(_) | Self::Blank => { 192 panic!("called `ParsedDomain::unwrap_domain()` on a `Comment` or `Blank` value") 193 } 194 } 195 } 196 /// Returns the contained [`prim@str`] in [`Self::Comment`]. 197 /// 198 /// # Panics 199 /// 200 /// Panics iff `self` is [`Self::Domain`] or [`Self::Blank`]. 201 #[expect(clippy::panic, reason = "bug if called incorrectly")] 202 #[inline] 203 pub fn unwrap_comment(self) -> &'a str { 204 match self { 205 Self::Comment(com) => com, 206 Self::Domain(_) | Self::Blank => { 207 panic!("called `ParsedDomain::unwrap_comment()` on a `Domain` or `Blank` value") 208 } 209 } 210 } 211 /// Returns [`unit`] when `self` is [`Self::Blank`]. 212 /// 213 /// # Panics 214 /// 215 /// Panics iff `self` is [`Self::Domain`] or [`Self::Comment`]. 216 #[expect(clippy::panic, reason = "bug if called incorrectly")] 217 #[inline] 218 pub fn unwrap_blank(self) { 219 match self { 220 Self::Blank => {} 221 Self::Domain(_) | Self::Comment(_) => { 222 panic!("called `ParsedDomain::unwrap_blank()` on a `Domain` or `Comment` value") 223 } 224 } 225 } 226 } 227 /// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. 228 /// 229 /// When parsed into a [`Value::Domain`], the domain can be written to a 230 /// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 231 pub trait ParsedDomain<'a>: Sized { 232 /// The error returned from [`Self::parse_value`]. 233 type Error; 234 /// Parses a `str` into a `Value`. 235 /// # Errors 236 /// 237 /// Errors iff `val` is unable to be parsed into a `Value`. 238 #[expect(single_use_lifetimes, reason = "false positive")] 239 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error>; 240 /// Reference to the contained `Domain`. 241 fn domain(&self) -> &Domain<&'a str>; 242 /// Writes `self` as RPZ lines via `writer`. 243 /// 244 /// # Errors 245 /// 246 /// Errors iff `writer` errors. 247 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>; 248 } 249 /// Domain constructed from an 250 /// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax). 251 /// 252 /// Specifically the domain must conform to the following extended regex: 253 /// 254 /// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` 255 /// 256 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`] with the added requirement that it 257 /// does not contain `$`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of 258 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 259 /// 260 /// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments after a valid 261 /// domain are ignored and will be parsed into a [`Value::Domain`]. 262 /// 263 /// Note that this means some valid Adblock-style rules are not considered valid since such rules often contain 264 /// path information or modifiers (e.g., “third-party”), but this only considers domain-only rules. 265 #[derive(Clone, Debug)] 266 pub struct Adblock<'a> { 267 /// The `Domain`. 268 domain: Domain<&'a str>, 269 /// `true` iff `domain` represents all subdomains. Note that this includes `domain` itself. 270 subdomains: bool, 271 } 272 impl Adblock<'_> { 273 /// Returns `true` iff the contained [`Domain`] represents all subdomains. Note this includes the 274 /// `Domain` itself. 275 #[inline] 276 #[must_use] 277 pub const fn is_subdomains(&self) -> bool { 278 self.subdomains 279 } 280 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 281 /// for both. 282 #[must_use] 283 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 284 match self.domain.cmp_by_domain_ordering(other) { 285 DomainOrdering::Less => Ordering::Less, 286 DomainOrdering::Shorter => { 287 if self.subdomains { 288 Ordering::Greater 289 } else { 290 Ordering::Less 291 } 292 } 293 DomainOrdering::Equal => { 294 if self.subdomains { 295 Ordering::Greater 296 } else { 297 Ordering::Equal 298 } 299 } 300 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 301 } 302 } 303 /// The total order that is defined follows the following hierarchy: 304 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 305 /// 2. If 1. evaluates as not equivalent, then return the result. 306 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_subdomains()`), 307 /// then return the comparison of label counts. 308 /// 4. `self` is greater. 309 /// 310 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 311 #[inline] 312 #[must_use] 313 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 314 self.cmp_dom(&other.domain) 315 } 316 /// Same as [`Adblock::cmp_domain_only`]. 317 #[inline] 318 #[must_use] 319 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 320 self.cmp_dom(&other.domain) 321 } 322 /// The total order that is defined follows the following hierarchy: 323 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 324 /// 2. If 1. evaluates as not equivalent, then return the result. 325 /// 3. If both domains represent a single `Domain`, then return the comparison 326 /// of label counts. 327 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 328 /// 5. If the label counts are the same, `self` is greater. 329 /// 6. Return the inverse of the comparison of label counts. 330 /// 331 /// For example the following is a sequence of domains in 332 /// ascending order: 333 /// 334 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 335 #[inline] 336 #[must_use] 337 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 338 match self.domain.cmp_by_domain_ordering(&other.domain) { 339 DomainOrdering::Less => Ordering::Less, 340 DomainOrdering::Shorter => { 341 if self.subdomains { 342 Ordering::Greater 343 } else { 344 Ordering::Less 345 } 346 } 347 DomainOrdering::Equal => { 348 if self.subdomains { 349 Ordering::Greater 350 } else if other.proper_subdomains { 351 Ordering::Less 352 } else { 353 Ordering::Equal 354 } 355 } 356 DomainOrdering::Longer => { 357 if self.subdomains { 358 if other.proper_subdomains { 359 Ordering::Less 360 } else { 361 Ordering::Greater 362 } 363 } else if other.proper_subdomains { 364 Ordering::Less 365 } else { 366 Ordering::Greater 367 } 368 } 369 DomainOrdering::Greater => Ordering::Greater, 370 } 371 } 372 /// Same as [`Adblock::cardinality`] except that a `BigUint` is returned. Note the count _includes_ 373 /// the `Domain` itself when `self.is_subdomains()`. 374 /// 375 /// `!self.is_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 376 #[expect(clippy::arithmetic_side_effects, reason = "arbitrary-sized arithmetic")] 377 #[inline] 378 #[must_use] 379 pub fn domain_count(&self) -> BigUint { 380 if self.subdomains { 381 proper_subdomain_count(&self.domain) + BigUint::new(vec![1]) 382 } else { 383 BigUint::new(vec![1]) 384 } 385 } 386 } 387 impl Display for Adblock<'_> { 388 #[inline] 389 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 390 write!( 391 f, 392 "{}{}", 393 if self.subdomains { "||" } else { "" }, 394 self.domain 395 ) 396 } 397 } 398 impl PartialEq<Adblock<'_>> for Adblock<'_> { 399 #[inline] 400 fn eq(&self, other: &Adblock<'_>) -> bool { 401 self.domain == other.domain && self.subdomains == other.subdomains 402 } 403 } 404 impl PartialEq<&Adblock<'_>> for Adblock<'_> { 405 #[inline] 406 fn eq(&self, other: &&Adblock<'_>) -> bool { 407 *self == **other 408 } 409 } 410 impl PartialEq<Adblock<'_>> for &Adblock<'_> { 411 #[inline] 412 fn eq(&self, other: &Adblock<'_>) -> bool { 413 **self == *other 414 } 415 } 416 impl PartialEq<DomainOnly<'_>> for Adblock<'_> { 417 #[inline] 418 fn eq(&self, other: &DomainOnly<'_>) -> bool { 419 !self.subdomains && self.domain == other.domain 420 } 421 } 422 impl PartialEq<&DomainOnly<'_>> for Adblock<'_> { 423 #[inline] 424 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 425 *self == **other 426 } 427 } 428 impl PartialEq<DomainOnly<'_>> for &Adblock<'_> { 429 #[inline] 430 fn eq(&self, other: &DomainOnly<'_>) -> bool { 431 **self == *other 432 } 433 } 434 impl PartialEq<&Adblock<'_>> for DomainOnly<'_> { 435 #[inline] 436 fn eq(&self, other: &&Adblock<'_>) -> bool { 437 *self == **other 438 } 439 } 440 impl PartialEq<Adblock<'_>> for &DomainOnly<'_> { 441 #[inline] 442 fn eq(&self, other: &Adblock<'_>) -> bool { 443 **self == *other 444 } 445 } 446 impl PartialEq<Hosts<'_>> for Adblock<'_> { 447 #[inline] 448 fn eq(&self, other: &Hosts<'_>) -> bool { 449 !self.subdomains && self.domain == other.domain 450 } 451 } 452 impl PartialEq<&Hosts<'_>> for Adblock<'_> { 453 #[inline] 454 fn eq(&self, other: &&Hosts<'_>) -> bool { 455 *self == **other 456 } 457 } 458 impl PartialEq<Hosts<'_>> for &Adblock<'_> { 459 #[inline] 460 fn eq(&self, other: &Hosts<'_>) -> bool { 461 **self == *other 462 } 463 } 464 impl PartialEq<&Adblock<'_>> for Hosts<'_> { 465 #[inline] 466 fn eq(&self, other: &&Adblock<'_>) -> bool { 467 *self == **other 468 } 469 } 470 impl PartialEq<Adblock<'_>> for &Hosts<'_> { 471 #[inline] 472 fn eq(&self, other: &Adblock<'_>) -> bool { 473 **self == *other 474 } 475 } 476 impl PartialEq<Wildcard<'_>> for Adblock<'_> { 477 #[expect(clippy::suspicious_operation_groupings, reason = "false positive")] 478 #[inline] 479 fn eq(&self, other: &Wildcard<'_>) -> bool { 480 !(self.subdomains || other.proper_subdomains) && self.domain == other.domain 481 } 482 } 483 impl PartialEq<&Wildcard<'_>> for Adblock<'_> { 484 #[inline] 485 fn eq(&self, other: &&Wildcard<'_>) -> bool { 486 *self == **other 487 } 488 } 489 impl PartialEq<Wildcard<'_>> for &Adblock<'_> { 490 #[inline] 491 fn eq(&self, other: &Wildcard<'_>) -> bool { 492 **self == *other 493 } 494 } 495 impl PartialEq<&Adblock<'_>> for Wildcard<'_> { 496 #[inline] 497 fn eq(&self, other: &&Adblock<'_>) -> bool { 498 *self == **other 499 } 500 } 501 impl PartialEq<Adblock<'_>> for &Wildcard<'_> { 502 #[inline] 503 fn eq(&self, other: &Adblock<'_>) -> bool { 504 **self == *other 505 } 506 } 507 impl Eq for Adblock<'_> {} 508 impl Hash for Adblock<'_> { 509 #[inline] 510 fn hash<H: Hasher>(&self, state: &mut H) { 511 self.domain.hash(state); 512 } 513 } 514 impl PartialOrd<Adblock<'_>> for Adblock<'_> { 515 #[inline] 516 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 517 Some(self.cmp(other)) 518 } 519 } 520 impl Ord for Adblock<'_> { 521 /// The total order that is defined follows the following hierarchy: 522 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 523 /// 2. If 1. evaluates as not equivalent, then return the result. 524 /// 3. If both domains represent a single `Domain`, then return the comparison 525 /// of label counts. 526 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 527 /// 5. Return the inverse of the comparison of label counts. 528 /// 529 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 530 #[inline] 531 fn cmp(&self, other: &Self) -> Ordering { 532 match self.domain.cmp_by_domain_ordering(&other.domain) { 533 DomainOrdering::Less => Ordering::Less, 534 DomainOrdering::Shorter => { 535 if self.subdomains { 536 Ordering::Greater 537 } else { 538 Ordering::Less 539 } 540 } 541 DomainOrdering::Equal => { 542 if self.subdomains { 543 if other.subdomains { 544 Ordering::Equal 545 } else { 546 Ordering::Greater 547 } 548 } else if other.subdomains { 549 Ordering::Less 550 } else { 551 Ordering::Equal 552 } 553 } 554 DomainOrdering::Longer => { 555 if self.subdomains { 556 if other.subdomains { 557 Ordering::Less 558 } else { 559 Ordering::Greater 560 } 561 } else if other.subdomains { 562 Ordering::Less 563 } else { 564 Ordering::Greater 565 } 566 } 567 DomainOrdering::Greater => Ordering::Greater, 568 } 569 } 570 } 571 impl PartialOrd<DomainOnly<'_>> for Adblock<'_> { 572 #[inline] 573 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 574 Some(self.cmp_domain_only(other)) 575 } 576 } 577 impl PartialOrd<Hosts<'_>> for Adblock<'_> { 578 #[inline] 579 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 580 Some(self.cmp_hosts(other)) 581 } 582 } 583 impl PartialOrd<Wildcard<'_>> for Adblock<'_> { 584 #[inline] 585 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 586 Some(self.cmp_wildcard(other)) 587 } 588 } 589 impl<'a> Set for Adblock<'a> { 590 type Elem = Domain<&'a str>; 591 #[inline] 592 fn bounded_cardinality(&self) -> BoundedCardinality { 593 BoundedCardinality::from_biguint_exact(self.domain_count()) 594 } 595 #[inline] 596 fn cardinality(&self) -> Option<Cardinality> { 597 Some(Cardinality::Finite(self.domain_count())) 598 } 599 #[inline] 600 fn contains<Q>(&self, elem: &Q) -> bool 601 where 602 Q: Borrow<Self::Elem> + Eq + ?Sized, 603 { 604 if self.subdomains { 605 matches!( 606 self.domain.cmp_by_domain_ordering(elem.borrow()), 607 DomainOrdering::Shorter 608 ) 609 } else { 610 self.domain == *elem.borrow() 611 } 612 } 613 #[inline] 614 fn is_proper_subset(&self, val: &Self) -> bool { 615 // A single domain can never be a proper superset. Subdomains` cannot be a proper superset if it has 616 // more labels or the same number of labels as another subdomains. In all other cases, we need to 617 // recursively check from the TLD that the labels are the same. 618 val.subdomains 619 && match val.domain.cmp_by_domain_ordering(&self.domain) { 620 DomainOrdering::Less | DomainOrdering::Longer | DomainOrdering::Greater => false, 621 DomainOrdering::Shorter => true, 622 DomainOrdering::Equal => !self.subdomains, 623 } 624 } 625 #[inline] 626 fn is_subset(&self, val: &Self) -> bool { 627 self == val || self.is_proper_subset(val) 628 } 629 } 630 impl SetOrd for Adblock<'_> {} 631 impl<'a> Deref for Adblock<'a> { 632 type Target = Domain<&'a str>; 633 #[inline] 634 fn deref(&self) -> &Self::Target { 635 &self.domain 636 } 637 } 638 impl<'a> ParsedDomain<'a> for Adblock<'a> { 639 type Error = FirefoxDomainErr; 640 #[expect(single_use_lifetimes, reason = "false positive")] 641 #[expect( 642 unsafe_code, 643 clippy::indexing_slicing, 644 reason = "we carefully verify what we are doing" 645 )] 646 #[inline] 647 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 648 // First remove leading whitepace. Then check for comments via '#' and '!'. Return Blank iff empty. 649 // Return Comment iff '#' or '!' is the first character. Remove trailing whitespace. Next remove the 650 // last byte if it is '^' as well as whitespace before. Next track and remove '||' at the beginning 651 // and any subsequent whitespace. 652 let mut value = val.as_bytes().trim_ascii_start(); 653 value.first().map_or_else( 654 || Ok(Value::Blank), 655 |byt| { 656 if *byt == b'#' || *byt == b'!' { 657 // SAFETY: 658 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 659 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 660 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 661 Ok(Value::Comment(comment)) 662 } else { 663 value = value.trim_ascii_end(); 664 let len = value.len().wrapping_sub(1); 665 value = value.get(len).map_or(value, |byt2| { 666 if *byt2 == b'^' { 667 value[..len].trim_ascii_end() 668 } else { 669 value 670 } 671 }); 672 let (subdomains, val2) = value.get(..2).map_or_else( 673 || (false, value), 674 |fst| { 675 if fst == b"||" { 676 (true, value[2..].trim_ascii_start()) 677 } else { 678 (false, value) 679 } 680 }, 681 ); 682 // `Domain`s allow `$`, but we don't want to allow that symbol for Adblock-style rules. 683 val2.iter() 684 .try_fold((), |(), byt2| { 685 if *byt2 == b'$' { 686 Err(FirefoxDomainErr::InvalidAdblockDomain) 687 } else { 688 Ok(()) 689 } 690 }) 691 .and_then(|()| { 692 domain_icann_tld(val2).map(|domain| { 693 // A domain of length 252 or 253 can't have subdomains due to there not being enough 694 // characters. 695 Value::Domain(Self { 696 subdomains: if domain.len().get() > 251 { 697 false 698 } else { 699 subdomains 700 }, 701 domain, 702 }) 703 }) 704 }) 705 } 706 }, 707 ) 708 } 709 #[inline] 710 fn domain(&self) -> &Domain<&'a str> { 711 &self.domain 712 } 713 #[inline] 714 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 715 write_rpz_line(&mut writer, self.domain(), action, false).and_then(|()| { 716 if self.subdomains { 717 write_rpz_line(writer, self.domain(), action, true) 718 } else { 719 Ok(()) 720 } 721 }) 722 } 723 } 724 /// Domain constructed from a 725 /// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax). 726 /// 727 /// Specifically the domain must conform to the following extended regex: 728 /// 729 /// `^<ws>*<domain><ws>*(#.*)?$` 730 /// 731 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 732 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 733 /// 734 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 735 /// are ignored and will be parsed into a [`Value::Domain`]. 736 #[derive(Clone, Debug)] 737 pub struct DomainOnly<'a> { 738 /// The `Domain`. 739 domain: Domain<&'a str>, 740 } 741 impl DomainOnly<'_> { 742 /// Read [`Adblock::cmp_domain_only`]. 743 #[inline] 744 #[must_use] 745 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 746 other.cmp_domain_only(self).reverse() 747 } 748 /// Read [`Domain::cmp`]. 749 #[inline] 750 #[must_use] 751 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 752 self.domain.cmp(&other.domain) 753 } 754 /// Read [`Wildcard::cmp_domain_only`]. 755 #[inline] 756 #[must_use] 757 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 758 other.cmp_domain_only(self).reverse() 759 } 760 /// Same as [`DomainOnly::cardinality`] except that a `NonZeroU8` is returned. 761 /// 762 /// The value is always 1. 763 #[inline] 764 #[must_use] 765 pub const fn domain_count(&self) -> NonZeroU8 { 766 ONE 767 } 768 } 769 impl PartialEq<DomainOnly<'_>> for DomainOnly<'_> { 770 #[inline] 771 fn eq(&self, other: &DomainOnly<'_>) -> bool { 772 self.domain == other.domain 773 } 774 } 775 impl PartialEq<DomainOnly<'_>> for &DomainOnly<'_> { 776 #[inline] 777 fn eq(&self, other: &DomainOnly<'_>) -> bool { 778 **self == *other 779 } 780 } 781 impl PartialEq<&DomainOnly<'_>> for DomainOnly<'_> { 782 #[inline] 783 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 784 *self == **other 785 } 786 } 787 impl PartialEq<Adblock<'_>> for DomainOnly<'_> { 788 #[inline] 789 fn eq(&self, other: &Adblock<'_>) -> bool { 790 other == self 791 } 792 } 793 impl PartialEq<Hosts<'_>> for DomainOnly<'_> { 794 #[inline] 795 fn eq(&self, other: &Hosts<'_>) -> bool { 796 self.domain == other.domain 797 } 798 } 799 impl PartialEq<&Hosts<'_>> for DomainOnly<'_> { 800 #[inline] 801 fn eq(&self, other: &&Hosts<'_>) -> bool { 802 *self == **other 803 } 804 } 805 impl PartialEq<Hosts<'_>> for &DomainOnly<'_> { 806 #[inline] 807 fn eq(&self, other: &Hosts<'_>) -> bool { 808 **self == *other 809 } 810 } 811 impl PartialEq<&DomainOnly<'_>> for Hosts<'_> { 812 #[inline] 813 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 814 *self == **other 815 } 816 } 817 impl PartialEq<DomainOnly<'_>> for &Hosts<'_> { 818 #[inline] 819 fn eq(&self, other: &DomainOnly<'_>) -> bool { 820 **self == *other 821 } 822 } 823 impl PartialEq<Wildcard<'_>> for DomainOnly<'_> { 824 #[inline] 825 fn eq(&self, other: &Wildcard<'_>) -> bool { 826 !other.proper_subdomains && self.domain == other.domain 827 } 828 } 829 impl PartialEq<&Wildcard<'_>> for DomainOnly<'_> { 830 #[inline] 831 fn eq(&self, other: &&Wildcard<'_>) -> bool { 832 *self == **other 833 } 834 } 835 impl PartialEq<Wildcard<'_>> for &DomainOnly<'_> { 836 #[inline] 837 fn eq(&self, other: &Wildcard<'_>) -> bool { 838 **self == *other 839 } 840 } 841 impl PartialEq<&DomainOnly<'_>> for Wildcard<'_> { 842 #[inline] 843 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 844 *self == **other 845 } 846 } 847 impl PartialEq<DomainOnly<'_>> for &Wildcard<'_> { 848 #[inline] 849 fn eq(&self, other: &DomainOnly<'_>) -> bool { 850 **self == *other 851 } 852 } 853 impl Eq for DomainOnly<'_> {} 854 impl Hash for DomainOnly<'_> { 855 #[inline] 856 fn hash<H: Hasher>(&self, state: &mut H) { 857 self.domain.hash(state); 858 } 859 } 860 impl PartialOrd<DomainOnly<'_>> for DomainOnly<'_> { 861 #[inline] 862 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 863 Some(self.cmp(other)) 864 } 865 } 866 impl Ord for DomainOnly<'_> { 867 /// Read [`Domain::cmp`]. 868 #[inline] 869 fn cmp(&self, other: &Self) -> Ordering { 870 self.domain.cmp(&other.domain) 871 } 872 } 873 impl PartialOrd<Adblock<'_>> for DomainOnly<'_> { 874 #[inline] 875 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 876 Some(self.cmp_adblock(other)) 877 } 878 } 879 impl PartialOrd<Hosts<'_>> for DomainOnly<'_> { 880 #[inline] 881 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 882 Some(self.cmp_hosts(other)) 883 } 884 } 885 impl PartialOrd<Wildcard<'_>> for DomainOnly<'_> { 886 #[inline] 887 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 888 Some(self.cmp_wildcard(other)) 889 } 890 } 891 impl Display for DomainOnly<'_> { 892 #[inline] 893 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 894 self.domain.fmt(f) 895 } 896 } 897 impl<'a> Set for DomainOnly<'a> { 898 type Elem = Domain<&'a str>; 899 #[inline] 900 fn bounded_cardinality(&self) -> BoundedCardinality { 901 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 902 } 903 #[inline] 904 fn cardinality(&self) -> Option<Cardinality> { 905 Some(Cardinality::Finite(self.domain_count().get().into())) 906 } 907 #[inline] 908 fn contains<Q>(&self, elem: &Q) -> bool 909 where 910 Q: Borrow<Self::Elem> + Eq + ?Sized, 911 { 912 self.domain == *elem.borrow() 913 } 914 #[inline] 915 fn is_proper_subset(&self, _: &Self) -> bool { 916 false 917 } 918 #[inline] 919 fn is_subset(&self, val: &Self) -> bool { 920 self == val 921 } 922 } 923 impl SetOrd for DomainOnly<'_> {} 924 impl<'a> Deref for DomainOnly<'a> { 925 type Target = Domain<&'a str>; 926 #[inline] 927 fn deref(&self) -> &Self::Target { 928 &self.domain 929 } 930 } 931 impl<'a> ParsedDomain<'a> for DomainOnly<'a> { 932 type Error = FirefoxDomainErr; 933 #[expect(single_use_lifetimes, reason = "false positive")] 934 #[expect( 935 unsafe_code, 936 clippy::arithmetic_side_effects, 937 clippy::indexing_slicing, 938 reason = "we verify all use is correct" 939 )] 940 #[inline] 941 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 942 let value = val.as_bytes().trim_ascii_start(); 943 value.first().map_or_else( 944 || Ok(Value::Blank), 945 |byt| { 946 if *byt == b'#' { 947 // SAFETY: 948 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 949 // since the first byte is '#' or '$' the remaining bytes are still valid UTF-8. 950 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 951 Ok(Value::Comment(comment)) 952 } else { 953 domain_icann_tld( 954 value[..value 955 .iter() 956 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 957 .map_or_else(convert::identity, convert::identity)] 958 .trim_ascii_end(), 959 ) 960 .map(|domain| Value::Domain(Self { domain })) 961 } 962 }, 963 ) 964 } 965 #[inline] 966 fn domain(&self) -> &Domain<&'a str> { 967 &self.domain 968 } 969 #[inline] 970 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 971 write_rpz_line(&mut writer, self.domain(), action, false) 972 } 973 } 974 /// Domain constructed from a 975 /// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax). 976 /// 977 /// Specifically the domain must conform to the following extended regex: 978 /// 979 /// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` 980 /// 981 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 982 /// or at least length five and begins with `xn--`, `<ws>` is any sequence of 983 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and `<ip>` is one of the following: 984 /// 985 /// `::`, `::1`, `0.0.0.0`, or `127.0.0.1`. 986 /// 987 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 988 /// are ignored and will be parsed into a [`Value::Domain`]. 989 #[derive(Clone, Debug)] 990 pub struct Hosts<'a> { 991 /// The `Domain`. 992 domain: Domain<&'a str>, 993 } 994 impl Hosts<'_> { 995 /// Read [`Adblock::cmp_hosts`]. 996 #[inline] 997 #[must_use] 998 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 999 other.cmp_hosts(self).reverse() 1000 } 1001 /// Read [`DomainOnly::cmp_hosts`]. 1002 #[inline] 1003 #[must_use] 1004 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1005 other.cmp_hosts(self).reverse() 1006 } 1007 /// Read [`Wildcard::cmp_hosts`]. 1008 #[inline] 1009 #[must_use] 1010 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 1011 other.cmp_hosts(self).reverse() 1012 } 1013 /// Same as [`Hosts::cardinality`] except that a `NonZeroU8` is returned. 1014 /// 1015 /// The value is always 1. 1016 #[inline] 1017 #[must_use] 1018 pub const fn domain_count(&self) -> NonZeroU8 { 1019 ONE 1020 } 1021 } 1022 impl PartialEq<Hosts<'_>> for Hosts<'_> { 1023 #[inline] 1024 fn eq(&self, other: &Hosts<'_>) -> bool { 1025 self.domain == other.domain 1026 } 1027 } 1028 impl PartialEq<Hosts<'_>> for &Hosts<'_> { 1029 #[inline] 1030 fn eq(&self, other: &Hosts<'_>) -> bool { 1031 **self == *other 1032 } 1033 } 1034 impl PartialEq<&Hosts<'_>> for Hosts<'_> { 1035 #[inline] 1036 fn eq(&self, other: &&Hosts<'_>) -> bool { 1037 *self == **other 1038 } 1039 } 1040 impl PartialEq<Adblock<'_>> for Hosts<'_> { 1041 #[inline] 1042 fn eq(&self, other: &Adblock<'_>) -> bool { 1043 other == self 1044 } 1045 } 1046 impl PartialEq<DomainOnly<'_>> for Hosts<'_> { 1047 #[inline] 1048 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1049 other == self 1050 } 1051 } 1052 impl PartialEq<Wildcard<'_>> for Hosts<'_> { 1053 #[inline] 1054 fn eq(&self, other: &Wildcard<'_>) -> bool { 1055 !other.proper_subdomains && self.domain == other.domain 1056 } 1057 } 1058 impl PartialEq<&Wildcard<'_>> for Hosts<'_> { 1059 #[inline] 1060 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1061 *self == **other 1062 } 1063 } 1064 impl PartialEq<Wildcard<'_>> for &Hosts<'_> { 1065 #[inline] 1066 fn eq(&self, other: &Wildcard<'_>) -> bool { 1067 **self == *other 1068 } 1069 } 1070 impl PartialEq<&Hosts<'_>> for Wildcard<'_> { 1071 #[inline] 1072 fn eq(&self, other: &&Hosts<'_>) -> bool { 1073 *self == **other 1074 } 1075 } 1076 impl PartialEq<Hosts<'_>> for &Wildcard<'_> { 1077 #[inline] 1078 fn eq(&self, other: &Hosts<'_>) -> bool { 1079 **self == *other 1080 } 1081 } 1082 impl Eq for Hosts<'_> {} 1083 impl Hash for Hosts<'_> { 1084 #[inline] 1085 fn hash<H: Hasher>(&self, state: &mut H) { 1086 self.domain.hash(state); 1087 } 1088 } 1089 impl PartialOrd<Hosts<'_>> for Hosts<'_> { 1090 #[inline] 1091 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1092 Some(self.cmp(other)) 1093 } 1094 } 1095 impl Ord for Hosts<'_> { 1096 /// Read [`Domain::cmp`]. 1097 #[inline] 1098 fn cmp(&self, other: &Self) -> Ordering { 1099 self.domain.cmp(&other.domain) 1100 } 1101 } 1102 impl PartialOrd<Adblock<'_>> for Hosts<'_> { 1103 #[inline] 1104 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1105 Some(self.cmp_adblock(other)) 1106 } 1107 } 1108 impl PartialOrd<DomainOnly<'_>> for Hosts<'_> { 1109 #[inline] 1110 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1111 Some(self.cmp_domain_only(other)) 1112 } 1113 } 1114 impl PartialOrd<Wildcard<'_>> for Hosts<'_> { 1115 #[inline] 1116 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1117 Some(self.cmp_wildcard(other)) 1118 } 1119 } 1120 impl Display for Hosts<'_> { 1121 #[inline] 1122 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1123 self.domain.fmt(f) 1124 } 1125 } 1126 impl<'a> Set for Hosts<'a> { 1127 type Elem = Domain<&'a str>; 1128 #[inline] 1129 fn bounded_cardinality(&self) -> BoundedCardinality { 1130 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 1131 } 1132 #[inline] 1133 fn cardinality(&self) -> Option<Cardinality> { 1134 Some(Cardinality::Finite(self.domain_count().get().into())) 1135 } 1136 #[inline] 1137 fn contains<Q>(&self, elem: &Q) -> bool 1138 where 1139 Q: Borrow<Self::Elem> + Eq + ?Sized, 1140 { 1141 self.domain == *elem.borrow() 1142 } 1143 #[inline] 1144 fn is_proper_subset(&self, _: &Self) -> bool { 1145 false 1146 } 1147 #[inline] 1148 fn is_subset(&self, val: &Self) -> bool { 1149 self == val 1150 } 1151 } 1152 impl SetOrd for Hosts<'_> {} 1153 impl<'a> Deref for Hosts<'a> { 1154 type Target = Domain<&'a str>; 1155 #[inline] 1156 fn deref(&self) -> &Self::Target { 1157 &self.domain 1158 } 1159 } 1160 impl<'a> ParsedDomain<'a> for Hosts<'a> { 1161 type Error = FirefoxDomainErr; 1162 #[expect(single_use_lifetimes, reason = "false positive")] 1163 #[expect( 1164 unsafe_code, 1165 clippy::arithmetic_side_effects, 1166 clippy::indexing_slicing, 1167 reason = "carefully verified use is correct" 1168 )] 1169 #[inline] 1170 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1171 let mut value = val.as_bytes().trim_ascii_start(); 1172 value.first().map_or_else( 1173 || Ok(Value::Blank), 1174 |byt| { 1175 if *byt == b'#' { 1176 // SAFETY: 1177 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1178 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1179 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1180 Ok(Value::Comment(comment)) 1181 } else { 1182 value = value 1183 .get(..3) 1184 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1185 .and_then(|fst| { 1186 if fst == b"::1" { 1187 Ok(&value[3..]) 1188 } else if &value[..2] == b"::" { 1189 Ok(&value[2..]) 1190 } else { 1191 value 1192 .get(..7) 1193 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1194 .and_then(|fst2| { 1195 if fst2 == b"0.0.0.0" { 1196 Ok(&value[7..]) 1197 } else { 1198 value 1199 .get(..9) 1200 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1201 .and_then(|fst3| { 1202 if fst3 == b"127.0.0.1" { 1203 Ok(&value[9..]) 1204 } else { 1205 Err(FirefoxDomainErr::InvalidHostsIP) 1206 } 1207 }) 1208 } 1209 }) 1210 } 1211 })?; 1212 let len = value.len(); 1213 value = value.trim_ascii_start(); 1214 if len == value.len() { 1215 // There has to be at least one space or tab between the IP and domain. 1216 Err(FirefoxDomainErr::InvalidHostsIP) 1217 } else { 1218 domain_icann_tld( 1219 value[..value 1220 .iter() 1221 .try_fold( 1222 0, 1223 |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }, 1224 ) 1225 .map_or_else(convert::identity, convert::identity)] 1226 .trim_ascii_end(), 1227 ) 1228 .map(|domain| Value::Domain(Self { domain })) 1229 } 1230 } 1231 }, 1232 ) 1233 } 1234 #[inline] 1235 fn domain(&self) -> &Domain<&'a str> { 1236 &self.domain 1237 } 1238 #[inline] 1239 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1240 write_rpz_line(&mut writer, self.domain(), action, false) 1241 } 1242 } 1243 /// Domain constructed from a 1244 /// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext). 1245 /// 1246 /// Specifically the domain must conform to the following extended regex: 1247 /// 1248 /// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` 1249 /// 1250 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 1251 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of 1252 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 1253 /// 1254 /// If `domain` begins with `*.`, then `domain` must have length less than 252. 1255 /// 1256 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 1257 /// are ignored and will be parsed into a [`Value::Domain`]. 1258 #[derive(Clone, Debug)] 1259 pub struct Wildcard<'a> { 1260 /// The `Domain`. 1261 domain: Domain<&'a str>, 1262 /// `true` iff `domain` represents all proper subdomains. Note that this does _not_ include `domain` itself. 1263 proper_subdomains: bool, 1264 } 1265 impl Wildcard<'_> { 1266 /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. Note this does _not_ 1267 /// include the `Domain` itself. 1268 #[inline] 1269 #[must_use] 1270 pub const fn is_proper_subdomains(&self) -> bool { 1271 self.proper_subdomains 1272 } 1273 /// Read [`Adblock::cmp_wildcard`]. 1274 #[inline] 1275 #[must_use] 1276 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 1277 other.cmp_wildcard(self).reverse() 1278 } 1279 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 1280 /// for both. 1281 #[must_use] 1282 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 1283 match self.domain.cmp_by_domain_ordering(other) { 1284 DomainOrdering::Less => Ordering::Less, 1285 DomainOrdering::Shorter => { 1286 if self.proper_subdomains { 1287 Ordering::Greater 1288 } else { 1289 Ordering::Less 1290 } 1291 } 1292 DomainOrdering::Equal => { 1293 if self.proper_subdomains { 1294 Ordering::Greater 1295 } else { 1296 Ordering::Equal 1297 } 1298 } 1299 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 1300 } 1301 } 1302 /// The total order that is defined follows the following hierarchy: 1303 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1304 /// 2. If 1. evaluates as not equivalent, then return the result. 1305 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_proper_subdomains()`), 1306 /// then return the comparison of label counts. 1307 /// 4. Return `self` is greater. 1308 /// 1309 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1310 #[inline] 1311 #[must_use] 1312 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1313 self.cmp_dom(&other.domain) 1314 } 1315 /// Read [`Wildcard::cmp_domain_only`]. 1316 #[inline] 1317 #[must_use] 1318 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 1319 self.cmp_dom(&other.domain) 1320 } 1321 /// Same as [`Wildcard::cardinality`] except that a `BigUint` is returned. Note the count does _not_ include 1322 /// the `Domain` itself when `self.is_proper_subdomains()`. 1323 /// 1324 /// `!self.is_proper_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 1325 #[inline] 1326 #[must_use] 1327 pub fn domain_count(&self) -> BigUint { 1328 if self.proper_subdomains { 1329 proper_subdomain_count(&self.domain) 1330 } else { 1331 BigUint::new(vec![1]) 1332 } 1333 } 1334 } 1335 impl PartialEq<Wildcard<'_>> for Wildcard<'_> { 1336 #[inline] 1337 fn eq(&self, other: &Wildcard<'_>) -> bool { 1338 self.domain == other.domain && self.proper_subdomains == other.proper_subdomains 1339 } 1340 } 1341 impl PartialEq<Wildcard<'_>> for &Wildcard<'_> { 1342 #[inline] 1343 fn eq(&self, other: &Wildcard<'_>) -> bool { 1344 **self == *other 1345 } 1346 } 1347 impl PartialEq<&Wildcard<'_>> for Wildcard<'_> { 1348 #[inline] 1349 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1350 *self == **other 1351 } 1352 } 1353 impl PartialEq<Adblock<'_>> for Wildcard<'_> { 1354 #[inline] 1355 fn eq(&self, other: &Adblock<'_>) -> bool { 1356 other == self 1357 } 1358 } 1359 impl PartialEq<DomainOnly<'_>> for Wildcard<'_> { 1360 #[inline] 1361 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1362 other == self 1363 } 1364 } 1365 impl PartialEq<Hosts<'_>> for Wildcard<'_> { 1366 #[inline] 1367 fn eq(&self, other: &Hosts<'_>) -> bool { 1368 other == self 1369 } 1370 } 1371 impl Eq for Wildcard<'_> {} 1372 impl Hash for Wildcard<'_> { 1373 #[inline] 1374 fn hash<H: Hasher>(&self, state: &mut H) { 1375 self.domain.hash(state); 1376 } 1377 } 1378 impl PartialOrd<Wildcard<'_>> for Wildcard<'_> { 1379 #[inline] 1380 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1381 Some(self.cmp(other)) 1382 } 1383 } 1384 impl Ord for Wildcard<'_> { 1385 /// The total order that is defined follows the following hierarchy: 1386 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1387 /// 2. If 1. evaluates as not equivalent, then return the result. 1388 /// 3. If both domains represent a single `Domain`, then return the comparison 1389 /// of label counts. 1390 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1391 /// 5. Return the inverse of the comparison of label counts. 1392 /// 1393 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1394 #[inline] 1395 fn cmp(&self, other: &Self) -> Ordering { 1396 match self.domain.cmp_by_domain_ordering(&other.domain) { 1397 DomainOrdering::Less => Ordering::Less, 1398 DomainOrdering::Shorter => { 1399 if self.proper_subdomains { 1400 Ordering::Greater 1401 } else { 1402 Ordering::Less 1403 } 1404 } 1405 DomainOrdering::Equal => { 1406 if self.proper_subdomains { 1407 if other.proper_subdomains { 1408 Ordering::Equal 1409 } else { 1410 Ordering::Greater 1411 } 1412 } else if other.proper_subdomains { 1413 Ordering::Less 1414 } else { 1415 Ordering::Equal 1416 } 1417 } 1418 DomainOrdering::Longer => { 1419 if self.proper_subdomains { 1420 if other.proper_subdomains { 1421 Ordering::Less 1422 } else { 1423 Ordering::Greater 1424 } 1425 } else if other.proper_subdomains { 1426 Ordering::Less 1427 } else { 1428 Ordering::Greater 1429 } 1430 } 1431 DomainOrdering::Greater => Ordering::Greater, 1432 } 1433 } 1434 } 1435 impl PartialOrd<Adblock<'_>> for Wildcard<'_> { 1436 #[inline] 1437 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1438 Some(self.cmp_adblock(other)) 1439 } 1440 } 1441 impl PartialOrd<DomainOnly<'_>> for Wildcard<'_> { 1442 #[inline] 1443 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1444 Some(self.cmp_domain_only(other)) 1445 } 1446 } 1447 impl PartialOrd<Hosts<'_>> for Wildcard<'_> { 1448 #[inline] 1449 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1450 Some(self.cmp_hosts(other)) 1451 } 1452 } 1453 impl Display for Wildcard<'_> { 1454 #[inline] 1455 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1456 write!( 1457 f, 1458 "{}{}", 1459 if self.proper_subdomains { "*." } else { "" }, 1460 self.domain 1461 ) 1462 } 1463 } 1464 impl<'a> Set for Wildcard<'a> { 1465 type Elem = Domain<&'a str>; 1466 #[inline] 1467 fn bounded_cardinality(&self) -> BoundedCardinality { 1468 BoundedCardinality::from_biguint_exact(self.domain_count()) 1469 } 1470 #[inline] 1471 fn cardinality(&self) -> Option<Cardinality> { 1472 Some(Cardinality::Finite(self.domain_count())) 1473 } 1474 #[inline] 1475 fn contains<Q>(&self, elem: &Q) -> bool 1476 where 1477 Q: Borrow<Self::Elem> + Eq + ?Sized, 1478 { 1479 if self.proper_subdomains { 1480 self.domain.cmp_by_domain_ordering(elem.borrow()) == DomainOrdering::Shorter 1481 } else { 1482 self.domain == *elem.borrow() 1483 } 1484 } 1485 #[inline] 1486 fn is_proper_subset(&self, val: &Self) -> bool { 1487 // A single domain can never be a proper superset. Proper subdomains cannot be a proper superset if it 1488 // has more labels or the same number of labels as another domain. In all other cases, we need to 1489 // recursively check from the TLD that the labels are the same. 1490 val.proper_subdomains 1491 && val.domain.cmp_by_domain_ordering(&self.domain) == DomainOrdering::Shorter 1492 } 1493 #[inline] 1494 fn is_subset(&self, val: &Self) -> bool { 1495 self == val || self.is_proper_subset(val) 1496 } 1497 } 1498 impl SetOrd for Wildcard<'_> {} 1499 impl<'a> Deref for Wildcard<'a> { 1500 type Target = Domain<&'a str>; 1501 #[inline] 1502 fn deref(&self) -> &Self::Target { 1503 &self.domain 1504 } 1505 } 1506 impl<'a> ParsedDomain<'a> for Wildcard<'a> { 1507 type Error = FirefoxDomainErr; 1508 #[expect(single_use_lifetimes, reason = "false positive")] 1509 #[expect( 1510 unsafe_code, 1511 clippy::arithmetic_side_effects, 1512 clippy::indexing_slicing, 1513 reason = "need them all. care has been taken." 1514 )] 1515 #[inline] 1516 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1517 let value = val.as_bytes().trim_ascii_start(); 1518 value.first().map_or_else( 1519 || Ok(Value::Blank), 1520 |byt| { 1521 if *byt == b'#' { 1522 // SAFETY: 1523 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1524 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1525 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1526 Ok(Value::Comment(comment)) 1527 } else { 1528 let (proper_subdomains, val2) = value.get(..2).map_or_else( 1529 || (false, value), 1530 |fst| { 1531 if fst == b"*." { 1532 (true, &value[2..]) 1533 } else { 1534 (false, value) 1535 } 1536 }, 1537 ); 1538 domain_icann_tld( 1539 val2[..val2 1540 .iter() 1541 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 1542 .map_or_else(convert::identity, convert::identity)] 1543 .trim_ascii_end(), 1544 ) 1545 .and_then(|domain| { 1546 if proper_subdomains { 1547 if domain.len().get() > 251 { 1548 Err(FirefoxDomainErr::InvalidWildcardDomain) 1549 } else { 1550 Ok(Value::Domain(Self { 1551 domain, 1552 proper_subdomains: true, 1553 })) 1554 } 1555 } else { 1556 Ok(Value::Domain(Self { 1557 domain, 1558 proper_subdomains, 1559 })) 1560 } 1561 }) 1562 } 1563 }, 1564 ) 1565 } 1566 #[inline] 1567 fn domain(&self) -> &Domain<&'a str> { 1568 &self.domain 1569 } 1570 #[inline] 1571 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1572 write_rpz_line(&mut writer, self.domain(), action, self.proper_subdomains) 1573 } 1574 } 1575 /// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 1576 #[derive(Clone, Debug)] 1577 pub enum RpzDomain<'a> { 1578 /// An `Adblock` domain. 1579 Adblock(Adblock<'a>), 1580 /// A `DomainOnly` domain. 1581 DomainOnly(DomainOnly<'a>), 1582 /// A `Hosts` domain. 1583 Hosts(Hosts<'a>), 1584 /// A `Wildcard` domain. 1585 Wildcard(Wildcard<'a>), 1586 } 1587 impl RpzDomain<'_> { 1588 /// Returns `true` iff `self` represents a single [`Domain`]. 1589 #[inline] 1590 #[must_use] 1591 pub const fn is_domain(&self) -> bool { 1592 match *self { 1593 Self::Adblock(ref dom) => !dom.subdomains, 1594 Self::DomainOnly(_) | Self::Hosts(_) => true, 1595 Self::Wildcard(ref dom) => !dom.proper_subdomains, 1596 } 1597 } 1598 /// Returns `true` iff `self` represents proper subdomains of the contained [`Domain`] (i.e., 1599 /// is a [`Wildcard`] such that [`Wildcard::is_proper_subdomains`]). 1600 #[inline] 1601 #[must_use] 1602 pub const fn is_proper_subdomains(&self) -> bool { 1603 match *self { 1604 Self::Adblock(_) | Self::DomainOnly(_) | Self::Hosts(_) => false, 1605 Self::Wildcard(ref dom) => dom.proper_subdomains, 1606 } 1607 } 1608 /// Returns `true` iff `self` represents subdomains of the contained [`Domain`] (i.e., is an 1609 /// [`Adblock`] such that [`Adblock::is_subdomains`]). 1610 #[inline] 1611 #[must_use] 1612 pub const fn is_subdomains(&self) -> bool { 1613 match *self { 1614 Self::Adblock(ref dom) => dom.subdomains, 1615 Self::DomainOnly(_) | Self::Hosts(_) | Self::Wildcard(_) => false, 1616 } 1617 } 1618 /// Returns the count of [`Domain`]s represented by `self`. This function is the same as 1619 /// [`RpzDomain::cardinality`] except that it returns a `BigUint`. 1620 #[inline] 1621 #[must_use] 1622 pub fn domain_count(&self) -> BigUint { 1623 match *self { 1624 Self::Adblock(ref dom) => dom.domain_count(), 1625 Self::DomainOnly(ref dom) => dom.domain_count().get().into(), 1626 Self::Hosts(ref dom) => dom.domain_count().get().into(), 1627 Self::Wildcard(ref dom) => dom.domain_count(), 1628 } 1629 } 1630 } 1631 impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> { 1632 #[inline] 1633 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1634 match *self { 1635 Self::Adblock(ref dom) => match *other { 1636 RpzDomain::Adblock(ref dom2) => dom == dom2, 1637 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1638 RpzDomain::Hosts(ref dom2) => dom == dom2, 1639 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1640 }, 1641 Self::DomainOnly(ref dom) => match *other { 1642 RpzDomain::Adblock(ref dom2) => dom == dom2, 1643 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1644 RpzDomain::Hosts(ref dom2) => dom == dom2, 1645 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1646 }, 1647 Self::Hosts(ref dom) => match *other { 1648 RpzDomain::Adblock(ref dom2) => dom == dom2, 1649 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1650 RpzDomain::Hosts(ref dom2) => dom == dom2, 1651 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1652 }, 1653 Self::Wildcard(ref dom) => match *other { 1654 RpzDomain::Adblock(ref dom2) => dom == dom2, 1655 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1656 RpzDomain::Hosts(ref dom2) => dom == dom2, 1657 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1658 }, 1659 } 1660 } 1661 } 1662 impl PartialEq<RpzDomain<'_>> for &RpzDomain<'_> { 1663 #[inline] 1664 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1665 **self == *other 1666 } 1667 } 1668 impl PartialEq<&RpzDomain<'_>> for RpzDomain<'_> { 1669 #[inline] 1670 fn eq(&self, other: &&RpzDomain<'_>) -> bool { 1671 *self == **other 1672 } 1673 } 1674 impl Eq for RpzDomain<'_> {} 1675 impl Hash for RpzDomain<'_> { 1676 #[inline] 1677 fn hash<H: Hasher>(&self, state: &mut H) { 1678 self.domain().hash(state); 1679 } 1680 } 1681 impl PartialOrd<RpzDomain<'_>> for RpzDomain<'_> { 1682 #[inline] 1683 fn partial_cmp(&self, other: &RpzDomain<'_>) -> Option<Ordering> { 1684 Some(self.cmp(other)) 1685 } 1686 } 1687 impl Ord for RpzDomain<'_> { 1688 /// The total order that is defined follows the following hierarchy: 1689 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1690 /// 2. If 1. evaluates as not equivalent, then return the result. 1691 /// 3. If both domains represent a single `Domain`, then return the comparison 1692 /// of label counts. 1693 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1694 /// 5. If the label counts are the same and exactly one domain represents proper subdomains, the other domain is greater. 1695 /// 6. Return the inverse of the comparison of label counts. 1696 /// 1697 /// For example the following is a sequence of domains in 1698 /// ascending order: 1699 /// 1700 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net`. 1701 #[inline] 1702 fn cmp(&self, other: &Self) -> Ordering { 1703 match *self { 1704 Self::Adblock(ref dom) => match *other { 1705 Self::Adblock(ref dom2) => dom.cmp(dom2), 1706 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1707 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1708 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1709 }, 1710 Self::DomainOnly(ref dom) => match *other { 1711 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1712 Self::DomainOnly(ref dom2) => dom.cmp(dom2), 1713 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1714 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1715 }, 1716 Self::Hosts(ref dom) => match *other { 1717 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1718 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1719 Self::Hosts(ref dom2) => dom.cmp(dom2), 1720 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1721 }, 1722 Self::Wildcard(ref dom) => match *other { 1723 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1724 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1725 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1726 Self::Wildcard(ref dom2) => dom.cmp(dom2), 1727 }, 1728 } 1729 } 1730 } 1731 impl Display for RpzDomain<'_> { 1732 #[inline] 1733 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1734 match *self { 1735 Self::Adblock(ref dom) => dom.fmt(f), 1736 Self::DomainOnly(ref dom) => dom.fmt(f), 1737 Self::Hosts(ref dom) => dom.fmt(f), 1738 Self::Wildcard(ref dom) => dom.fmt(f), 1739 } 1740 } 1741 } 1742 impl<'a> Set for RpzDomain<'a> { 1743 type Elem = Domain<&'a str>; 1744 #[inline] 1745 fn bounded_cardinality(&self) -> BoundedCardinality { 1746 BoundedCardinality::from_biguint_exact(self.domain_count()) 1747 } 1748 #[inline] 1749 fn cardinality(&self) -> Option<Cardinality> { 1750 Some(Cardinality::Finite(self.domain_count())) 1751 } 1752 #[inline] 1753 fn contains<Q>(&self, elem: &Q) -> bool 1754 where 1755 Q: Borrow<Self::Elem> + Eq + ?Sized, 1756 { 1757 match *self { 1758 Self::Adblock(ref dom) => dom.contains(elem), 1759 Self::DomainOnly(ref dom) => dom.contains(elem), 1760 Self::Hosts(ref dom) => dom.contains(elem), 1761 Self::Wildcard(ref dom) => dom.contains(elem), 1762 } 1763 } 1764 #[inline] 1765 fn is_proper_subset(&self, val: &Self) -> bool { 1766 match *val { 1767 Self::Adblock(ref dom) => { 1768 dom.subdomains 1769 && match *self { 1770 Self::Adblock(ref dom2) => { 1771 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1772 == DomainOrdering::Shorter 1773 } 1774 Self::DomainOnly(ref dom2) => matches!( 1775 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1776 DomainOrdering::Shorter | DomainOrdering::Equal 1777 ), 1778 Self::Hosts(ref dom2) => matches!( 1779 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1780 DomainOrdering::Shorter | DomainOrdering::Equal 1781 ), 1782 Self::Wildcard(ref dom2) => matches!( 1783 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1784 DomainOrdering::Shorter | DomainOrdering::Equal 1785 ), 1786 } 1787 } 1788 Self::DomainOnly(_) | Self::Hosts(_) => false, 1789 Self::Wildcard(ref dom) => { 1790 dom.proper_subdomains 1791 && match *self { 1792 Self::Adblock(ref dom2) => { 1793 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1794 == DomainOrdering::Shorter 1795 } 1796 Self::DomainOnly(ref dom2) => { 1797 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1798 == DomainOrdering::Shorter 1799 } 1800 Self::Hosts(ref dom2) => { 1801 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1802 == DomainOrdering::Shorter 1803 } 1804 Self::Wildcard(ref dom2) => { 1805 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1806 == DomainOrdering::Shorter 1807 } 1808 } 1809 } 1810 } 1811 } 1812 #[inline] 1813 fn is_subset(&self, val: &Self) -> bool { 1814 self == val || self.is_proper_subset(val) 1815 } 1816 } 1817 impl SetOrd for RpzDomain<'_> {} 1818 impl<'a> Deref for RpzDomain<'a> { 1819 type Target = Domain<&'a str>; 1820 #[inline] 1821 fn deref(&self) -> &Self::Target { 1822 match *self { 1823 Self::Adblock(ref dom) => &dom.domain, 1824 Self::DomainOnly(ref dom) => &dom.domain, 1825 Self::Hosts(ref dom) => &dom.domain, 1826 Self::Wildcard(ref dom) => &dom.domain, 1827 } 1828 } 1829 } 1830 impl<'a: 'b, 'b> From<Adblock<'a>> for RpzDomain<'b> { 1831 #[inline] 1832 fn from(value: Adblock<'a>) -> Self { 1833 Self::Adblock(value) 1834 } 1835 } 1836 impl<'a: 'b, 'b> From<DomainOnly<'a>> for RpzDomain<'b> { 1837 #[inline] 1838 fn from(value: DomainOnly<'a>) -> Self { 1839 Self::DomainOnly(value) 1840 } 1841 } 1842 impl<'a: 'b, 'b> From<Hosts<'a>> for RpzDomain<'b> { 1843 #[inline] 1844 fn from(value: Hosts<'a>) -> Self { 1845 Self::Hosts(value) 1846 } 1847 } 1848 impl<'a: 'b, 'b> From<Wildcard<'a>> for RpzDomain<'b> { 1849 #[inline] 1850 fn from(value: Wildcard<'a>) -> Self { 1851 Self::Wildcard(value) 1852 } 1853 } 1854 impl<'a> ParsedDomain<'a> for RpzDomain<'a> { 1855 type Error = FirefoxDomainErr; 1856 #[expect(single_use_lifetimes, reason = "false positive")] 1857 #[inline] 1858 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1859 DomainOnly::parse_value(val).map_or_else( 1860 |_| { 1861 Hosts::parse_value(val).map_or_else( 1862 |_| { 1863 Wildcard::parse_value(val).map_or_else( 1864 |_| { 1865 Adblock::parse_value(val).map(|value| match value { 1866 Value::Domain(dom) => Value::Domain(Self::Adblock(dom)), 1867 Value::Comment(com) => Value::Comment(com), 1868 Value::Blank => Value::Blank, 1869 }) 1870 }, 1871 |value| { 1872 Ok(match value { 1873 Value::Domain(dom) => Value::Domain(Self::Wildcard(dom)), 1874 Value::Comment(com) => Value::Comment(com), 1875 Value::Blank => Value::Blank, 1876 }) 1877 }, 1878 ) 1879 }, 1880 |value| { 1881 Ok(match value { 1882 Value::Domain(dom) => Value::Domain(Self::Hosts(dom)), 1883 Value::Comment(com) => Value::Comment(com), 1884 Value::Blank => Value::Blank, 1885 }) 1886 }, 1887 ) 1888 }, 1889 |value| { 1890 Ok(match value { 1891 Value::Domain(dom) => Value::Domain(Self::DomainOnly(dom)), 1892 Value::Comment(com) => Value::Comment(com), 1893 Value::Blank => Value::Blank, 1894 }) 1895 }, 1896 ) 1897 } 1898 #[inline] 1899 fn domain(&self) -> &Domain<&'a str> { 1900 match *self { 1901 Self::Adblock(ref dom) => &dom.domain, 1902 Self::DomainOnly(ref dom) => &dom.domain, 1903 Self::Hosts(ref dom) => &dom.domain, 1904 Self::Wildcard(ref dom) => &dom.domain, 1905 } 1906 } 1907 #[inline] 1908 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error> { 1909 match *self { 1910 Self::Adblock(ref dom) => dom.write_to_rpz(action, writer), 1911 Self::DomainOnly(ref dom) => dom.write_to_rpz(action, writer), 1912 Self::Hosts(ref dom) => dom.write_to_rpz(action, writer), 1913 Self::Wildcard(ref dom) => dom.write_to_rpz(action, writer), 1914 } 1915 } 1916 } 1917 #[cfg(test)] 1918 mod tests { 1919 use super::{ 1920 Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain as _, RpzDomain, Value, Wildcard, 1921 }; 1922 use ascii_domain::dom::DomainErr; 1923 use num_bigint::BigUint; 1924 use superset_map::SupersetSet; 1925 #[test] 1926 fn adblock_parse() { 1927 // Test subdomains. 1928 assert!( 1929 Adblock::parse_value("||www.example.com").is_ok_and(|val| match val { 1930 Value::Domain(ref dom) => 1931 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1932 Value::Comment(_) | Value::Blank => false, 1933 }) 1934 ); 1935 // Test whitespace and '^' removal. 1936 assert!( 1937 Adblock::parse_value(" \t\t ||\t\t \twww.example.com \t\t ^ \t\t ").is_ok_and( 1938 |val| match val { 1939 Value::Domain(ref dom) => 1940 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1941 Value::Comment(_) | Value::Blank => false, 1942 } 1943 ) 1944 ); 1945 assert!( 1946 Adblock::parse_value("\t\t \twww.example.com \t\t \t\t ").is_ok_and(|val| { 1947 match val { 1948 Value::Domain(ref dom) => { 1949 !dom.subdomains && dom.domain.as_bytes() == b"www.example.com" 1950 } 1951 Value::Comment(_) | Value::Blank => false, 1952 } 1953 }) 1954 ); 1955 assert!(Adblock::parse_value("www .example.com").is_err_and( 1956 |err| err == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 1957 )); 1958 assert!(Adblock::parse_value("||www.ExAMPle.COm").is_ok_and(|val| { 1959 match val { 1960 Value::Domain(ref dom) => { 1961 Adblock::parse_value("||www.example.com").is_ok_and(|val_2| match val_2 { 1962 Value::Domain(ref dom2) => { 1963 dom == dom2 1964 && dom.subdomains 1965 && dom2.subdomains 1966 && dom.cmp(dom2).is_eq() 1967 } 1968 Value::Comment(_) | Value::Blank => false, 1969 }) 1970 } 1971 Value::Comment(_) | Value::Blank => false, 1972 } 1973 })); 1974 // Test comment 1975 assert!( 1976 Adblock::parse_value(" \t\t #hi").is_ok_and(|val| match val { 1977 Value::Comment(com) => com == "hi", 1978 Value::Domain(_) | Value::Blank => false, 1979 }) 1980 ); 1981 assert!( 1982 Adblock::parse_value(" \t\t !! foo").is_ok_and(|val| match val { 1983 Value::Comment(com) => com == "! foo", 1984 Value::Domain(_) | Value::Blank => false, 1985 }) 1986 ); 1987 // Test blank 1988 assert!(Adblock::parse_value(" \t\t ").is_ok_and(|val| matches!(val, Value::Blank))); 1989 } 1990 #[test] 1991 fn domain_only_parse_value() { 1992 // Test whitespace and comment. 1993 assert!( 1994 DomainOnly::parse_value(" \t\t \t\t \twww.example.com#asdflkj asdf alskdfj ") 1995 .is_ok_and(|val| match val { 1996 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 1997 Value::Comment(_) | Value::Blank => false, 1998 }) 1999 ); 2000 assert!( 2001 DomainOnly::parse_value(" \t\t \t\t \twww.example.com \t\t ^ \t\t ") 2002 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' '))) 2003 ); 2004 // Test case-insensitivity. 2005 assert!( 2006 DomainOnly::parse_value("www.ExAMPle.CoM").is_ok_and(|val| match val { 2007 Value::Domain(ref dom) => 2008 DomainOnly::parse_value("www.example.com").is_ok_and(|val2| match val2 { 2009 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2010 Value::Comment(_) | Value::Blank => false, 2011 }), 2012 Value::Comment(_) | Value::Blank => false, 2013 }) 2014 ); 2015 // Test comment. 2016 assert!( 2017 DomainOnly::parse_value(" \t\t \t\t \t # hi").is_ok_and(|val| match val { 2018 Value::Comment(com) => com == " hi", 2019 Value::Domain(_) | Value::Blank => false, 2020 }) 2021 ); 2022 // Test blank. 2023 assert!( 2024 DomainOnly::parse_value(" \t\t \t\t \t ") 2025 .is_ok_and(|val| matches!(val, Value::Blank)) 2026 ); 2027 // Test blank. 2028 assert!( 2029 DomainOnly::parse_value("example.xn--abc") 2030 .is_ok_and(|val| matches!(val, Value::Domain(_))) 2031 ); 2032 // Test invalid TLD. 2033 assert!( 2034 DomainOnly::parse_value("www.c1m") 2035 .is_err_and(|err| err == FirefoxDomainErr::InvalidTld) 2036 ); 2037 } 2038 #[test] 2039 fn hosts_parse_value() { 2040 // Test whitespace and comment. 2041 assert!( 2042 Hosts::parse_value(" \t\t 127.0.0.1\t\t \twww.example.com#asdflkj asdf alskdfj ") 2043 .is_ok_and(|val| match val { 2044 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 2045 Value::Comment(_) | Value::Blank => false, 2046 }) 2047 ); 2048 assert!( 2049 Hosts::parse_value(" \t\t 0.0.0.0\t\t \twww.example.com \t\t ^ \t\t ") 2050 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' '))) 2051 ); 2052 assert!( 2053 Hosts::parse_value("::1\twww .example.com") 2054 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' '))) 2055 ); 2056 // Test invalid IP 2057 assert!( 2058 Hosts::parse_value("::2 www.example.com") 2059 .is_err_and(|e| e == FirefoxDomainErr::InvalidHostsIP) 2060 ); 2061 assert!( 2062 Hosts::parse_value(":2 www.example.com") 2063 .is_err_and(|e| e == FirefoxDomainErr::InvalidHostsIP) 2064 ); 2065 assert!( 2066 Hosts::parse_value("www.example.com") 2067 .is_err_and(|e| e == FirefoxDomainErr::InvalidHostsIP) 2068 ); 2069 assert!( 2070 Hosts::parse_value("10.4.2.256 www.example.com") 2071 .is_err_and(|e| e == FirefoxDomainErr::InvalidHostsIP) 2072 ); 2073 // Test case-insensitivity. 2074 assert!( 2075 Hosts::parse_value(":: www.ExAMPle.Com").is_ok_and(|val| match val { 2076 Value::Domain(ref dom) => Hosts::parse_value("127.0.0.1 www.example.com") 2077 .is_ok_and(|val2| match val2 { 2078 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2079 Value::Comment(_) | Value::Blank => false, 2080 }), 2081 Value::Comment(_) | Value::Blank => false, 2082 }) 2083 ); 2084 // Test comment. 2085 assert!( 2086 Hosts::parse_value(" \t\t \t\t \t # hi").is_ok_and(|val| match val { 2087 Value::Comment(com) => com == " hi", 2088 Value::Domain(_) | Value::Blank => false, 2089 }) 2090 ); 2091 // Test blank. 2092 assert!( 2093 Hosts::parse_value(" \t\t \t\t \t ").is_ok_and(|val| matches!(val, Value::Blank)) 2094 ); 2095 } 2096 #[test] 2097 fn wildcard_parse_value() { 2098 // Test bad asterisk. 2099 assert!( 2100 Wildcard::parse_value("*") 2101 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*'))) 2102 ); 2103 assert!( 2104 Wildcard::parse_value("www*.example.com") 2105 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*'))) 2106 ); 2107 assert!( 2108 Wildcard::parse_value("www.*.com") 2109 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*'))) 2110 ); 2111 assert!( 2112 Wildcard::parse_value("*..com") 2113 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::EmptyLabel)) 2114 ); 2115 assert!( 2116 Wildcard::parse_value("www.com*") 2117 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*'))) 2118 ); 2119 assert!( 2120 Wildcard::parse_value("ww*w.com") 2121 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*'))) 2122 ); 2123 // Test case-insensitivity. 2124 assert!( 2125 Wildcard::parse_value("*.wWw.ExamPLE.com").is_ok_and(|val| match val { 2126 Value::Domain(ref dom) => 2127 Wildcard::parse_value("*.www.example.com").is_ok_and(|val2| match val2 { 2128 Value::Domain(ref dom2) => 2129 dom.cmp(dom2).is_eq() 2130 && dom == dom2 2131 && dom.proper_subdomains 2132 && dom2.proper_subdomains, 2133 Value::Comment(_) | Value::Blank => false, 2134 }), 2135 Value::Comment(_) | Value::Blank => false, 2136 }) 2137 ); 2138 // Test proper subdomains. 2139 assert!( 2140 Wildcard::parse_value("*.www.example.com").is_ok_and(|val| match val { 2141 Value::Domain(ref dom) => 2142 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2143 Value::Comment(_) | Value::Blank => false, 2144 }) 2145 ); 2146 // Test comment. 2147 assert!( 2148 Wildcard::parse_value(" \t\t \t\t \t*.www.example.com#asdflkj asdf alskdfj ") 2149 .is_ok_and(|val| match val { 2150 Value::Domain(ref dom) => 2151 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2152 Value::Comment(_) | Value::Blank => false, 2153 }) 2154 ); 2155 assert!( 2156 Wildcard::parse_value(" \t\t \t\t \twww.example.com #asdflkj asdf alskdfj ") 2157 .is_ok_and(|val| match val { 2158 Value::Domain(ref dom) => 2159 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains, 2160 Value::Comment(_) | Value::Blank => false, 2161 }) 2162 ); 2163 // Test whitespace removal. 2164 assert!( 2165 Wildcard::parse_value(" \t\t *.www.example.com \t\t \t ").is_ok_and(|val| { 2166 match val { 2167 Value::Domain(ref dom) => { 2168 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains 2169 } 2170 Value::Comment(_) | Value::Blank => false, 2171 } 2172 }) 2173 ); 2174 assert!( 2175 Wildcard::parse_value("\t\t \twww.example.com \t\t \t\t ").is_ok_and(|val| { 2176 match val { 2177 Value::Domain(ref dom) => { 2178 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains 2179 } 2180 Value::Comment(_) | Value::Blank => false, 2181 } 2182 }) 2183 ); 2184 assert!( 2185 Wildcard::parse_value("www .example.com") 2186 .is_err_and(|e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' '))) 2187 ); 2188 // Test 127 labels after wildcard error. 2189 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").is_err_and(|e| e == FirefoxDomainErr::InvalidWildcardDomain)); 2190 // Test 126 labels after wildcard is ok. 2191 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").is_ok_and(|val| match val { 2192 Value::Domain(ref dom) => dom.domain.iter().count() == 126 && dom.proper_subdomains, 2193 Value::Comment(_) | Value::Blank => false, 2194 })); 2195 // Test comment. 2196 assert!( 2197 Wildcard::parse_value(" \t\t \t\t \t # hi").is_ok_and(|val| match val { 2198 Value::Comment(com) => com == " hi", 2199 Value::Domain(_) | Value::Blank => false, 2200 }) 2201 ); 2202 // Test blank. 2203 assert!( 2204 Wildcard::parse_value(" \t\t \t\t \t ") 2205 .is_ok_and(|val| matches!(val, Value::Blank)) 2206 ); 2207 } 2208 #[test] 2209 fn rpz_parse_value() { 2210 assert!( 2211 RpzDomain::parse_value("*.www.example.com").is_ok_and(|val| { 2212 let dom = val.unwrap_domain(); 2213 dom.is_proper_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2214 }) 2215 ); 2216 assert!( 2217 RpzDomain::parse_value("||www.example.com").is_ok_and(|val| { 2218 let dom = val.unwrap_domain(); 2219 dom.is_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2220 }) 2221 ); 2222 assert!( 2223 RpzDomain::parse_value("0.0.0.0 www.example.com").is_ok_and(|val| { 2224 let dom = val.unwrap_domain(); 2225 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2226 && dom.domain().as_bytes() == b"www.example.com" 2227 }) 2228 ); 2229 assert!(RpzDomain::parse_value("www.example.com").is_ok_and(|val| { 2230 let dom = val.unwrap_domain(); 2231 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2232 && dom.domain().as_bytes() == b"www.example.com" 2233 })); 2234 // Test case-insensitivity. 2235 assert!( 2236 RpzDomain::parse_value("*.Www.ExaMPle.COm").is_ok_and(|val| { 2237 let dom = val.unwrap_domain(); 2238 RpzDomain::parse_value("*.www.example.com").is_ok_and(|val2| { 2239 let dom2 = val2.unwrap_domain(); 2240 dom.is_proper_subdomains() 2241 && dom2.is_proper_subdomains() 2242 && dom == dom2 2243 && dom.cmp(&dom2).is_eq() 2244 }) 2245 }) 2246 ); 2247 // Test comment. 2248 assert!( 2249 RpzDomain::parse_value(" \t\t \t\t \t # hi").is_ok_and(|val| match val { 2250 Value::Comment(com) => com == " hi", 2251 Value::Domain(_) | Value::Blank => false, 2252 }) 2253 ); 2254 assert!( 2255 RpzDomain::parse_value(" \t\t \t\t \t ! hi").is_ok_and(|val| match val { 2256 Value::Comment(com) => com == " hi", 2257 Value::Domain(_) | Value::Blank => false, 2258 }) 2259 ); 2260 // Test blank. 2261 assert!( 2262 RpzDomain::parse_value(" \t\t \t\t \t ") 2263 .is_ok_and(|val| matches!(val, Value::Blank)) 2264 ); 2265 } 2266 #[expect(clippy::expect_used, clippy::unwrap_in_result, reason = "OK in tests")] 2267 #[expect(clippy::indexing_slicing, reason = "comments justify correctness")] 2268 #[expect(clippy::nonminimal_bool, reason = "want to test all ord ops")] 2269 #[test] 2270 fn rpz_ord_and_eq() -> Result<(), &'static str> { 2271 "www.bar.com,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,Example.com,WwW.exaMple.com,*.www.example.com,||www.example.com,*.example.com,||example.com,FOo.coM,Www.foo.com,*.foo.com,*.coM,example.net,*.net".split(',').try_fold(RpzDomain::DomainOnly(DomainOnly::parse_value("bar.com").expect("bug in DomainOnly::parse_value").unwrap_domain()), |prev, slice| { 2272 let cur = if slice.as_bytes()[0] == b'|' { 2273 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2274 } else { 2275 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2276 }; 2277 if prev < cur && cur > prev && prev == prev && cur == cur { 2278 Ok(cur) 2279 } else { 2280 Err("PartialEq or Ord are not correctly implemented for RpzDomain.") 2281 } 2282 }).map(|_| ()) 2283 } 2284 #[expect(clippy::expect_used, reason = "OK in tests")] 2285 #[expect(clippy::indexing_slicing, reason = "comments justify correctness")] 2286 #[test] 2287 fn superset_set() { 2288 let mut iter = "*.NeT,*.net,www.bar.com,*.net,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,example.com,www.example.com,*.www.example.com,||www.example.com,*.example.com,||example.com,foo.com,www.foo.com,*.foo.com,*.com,example.net,*.abc.abc,||aawww.abc,abc.abc".split(',').fold(SupersetSet::new(), |mut doms, slice| { 2289 _ = doms.insert(if slice.as_bytes()[0] == b'|' { 2290 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2291 } else { 2292 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2293 }); 2294 doms 2295 }).into_iter(); 2296 assert!( 2297 iter.next() 2298 .is_some_and(|d| { d.domain().as_bytes() == b"aawww.abc" && d.is_subdomains() }) 2299 ); 2300 assert!( 2301 iter.next() 2302 .is_some_and(|d| { d.domain().as_bytes() == b"abc.abc" && d.is_domain() }) 2303 ); 2304 assert!( 2305 iter.next().is_some_and(|d| { 2306 d.domain().as_bytes() == b"abc.abc" && d.is_proper_subdomains() 2307 }) 2308 ); 2309 assert!( 2310 iter.next() 2311 .is_some_and(|d| { d.domain().as_bytes() == b"com" && d.is_proper_subdomains() }) 2312 ); 2313 assert!( 2314 iter.next() 2315 .is_some_and(|d| { d.domain().as_bytes() == b"NeT" && d.is_proper_subdomains() }) 2316 ); 2317 assert!(iter.next().is_none()); 2318 } 2319 #[test] 2320 fn card() { 2321 // Geometric series. 2322 // We can have two labels each with one character, 2323 // one label with one to three characters, or 0 labels. 2324 // This is 1 + 52 + 52^2 + 52^3 + 52^2 = (1-52^4)/(1-52) + 52^2 = (52^4 - 1)/51 + 52^2 = 146069. 2325 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").is_ok_and(|val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.iter().count() == 125 && dom.domain_count() == BigUint::new(vec![146_069]) })); 2326 // A subdomain of length 252 or 253 gets converted to a domain. 2327 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").is_ok_and(|val| { let dom = val.unwrap_domain(); dom.domain.iter().count() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) })); 2328 // Pre-calculated manually. 2329 // This is the number of domains possible between 2 and 252 characters. 2330 assert!(Wildcard::parse_value("*.a").is_ok_and(|val| { 2331 val.unwrap_domain().domain_count() 2332 == BigUint::new(vec![ 2333 375_288_404, 2334 2_460_223_985, 2335 1_334_358_771, 2336 2_543_621_408, 2337 2_519_466_280, 2338 1_133_682_239, 2339 3_589_178_618, 2340 348_125_705, 2341 1_709_233_643, 2342 958_334_503, 2343 3_780_539_710, 2344 2_181_893_897, 2345 2_457_156_833, 2346 3_204_765_645, 2347 2_728_103_430, 2348 1_817_547_150, 2349 3_102_358_416, 2350 444_185_044, 2351 3_659_003_776, 2352 10_341_713, 2353 306_326_206, 2354 1_336_386_425, 2355 3_942_332_649, 2356 2_036_577_878, 2357 2_460_939_277, 2358 3_976_861_337, 2359 2_101_094_571, 2360 2_241_770_079, 2361 2_667_853_164, 2362 3_687_350_273, 2363 109_356_153, 2364 3_455_569_358, 2365 2_333_076_459, 2366 2_433_207_896, 2367 1_553_903_141, 2368 2_621_943_843, 2369 4_223_295_645, 2370 1_753_858_368, 2371 130_924_388, 2372 965_594_304, 2373 3_942_586_845, 2374 1_573_844_087, 2375 4_237_886_128, 2376 481_383_133, 2377 56_931_017, 2378 ]) 2379 })); 2380 } 2381 }