dom.rs (89064B)
1 use crate::dom_count_auto_gen::proper_subdomain_count; 2 use ascii_domain::{ 3 char_set::{ASCII_FIREFOX, AllowedAscii}, 4 dom::{Domain, DomainErr, DomainOrdering}, 5 }; 6 use core::{ 7 borrow::Borrow, 8 cmp::Ordering, 9 convert, 10 fmt::{self, Display, Formatter}, 11 hash::{Hash, Hasher}, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 use num_bigint::BigUint; 17 use std::{ 18 error, 19 io::{Error, Write}, 20 }; 21 use superset_map::SetOrd; 22 use zfc::{BoundedCardinality, Cardinality, Set}; 23 /// One. 24 const ONE: NonZeroU8 = NonZeroU8::new(1).unwrap(); 25 /// Error returned when an invalid string is passed to [`Adblock::parse_value`], [`DomainOnly::parse_value`], 26 /// [`Hosts::parse_value`], [`Wildcard::parse_value`], or [`RpzDomain::parse_value`]. 27 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] 28 pub enum FirefoxDomainErr { 29 /// The domain is invalid based on [`Domain`] using [`ASCII_FIREFOX`]. 30 InvalidDomain(DomainErr), 31 /// The domain had a TLD that was not all letters nor length of at least five beginning with `b"xn--"`. 32 InvalidTld, 33 /// The string passed to [`Adblock::parse_value`] contained `$`. 34 InvalidAdblockDomain, 35 /// The string passed to [`Hosts::parse_value`] did not conform 36 /// to the required [`Hosts`] format. 37 InvalidHostsIP, 38 /// The length of the non-wildcard portion of the string passed to 39 /// [`Wildcard::parse_value`] was at least 252 which means there are 40 /// no proper subdomains. 41 InvalidWildcardDomain, 42 } 43 impl Display for FirefoxDomainErr { 44 #[inline] 45 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 46 match *self { 47 Self::InvalidDomain(err) => err.fmt(f), 48 Self::InvalidTld => f.write_str("domain had a TLD that was not all letters nor at least five characters long starting with 'xn--'"), 49 Self::InvalidAdblockDomain => f.write_str("Adblock-style domain contained a '$'"), 50 Self::InvalidHostsIP => f.write_str("hosts-style domain does not begin with the IP '::', '::1', '0.0.0.0', or '127.0.0.1' followed by at least one space or tab"), 51 Self::InvalidWildcardDomain => f.write_str("non-wildcard portion of a wildcard domain had length of at least 252 which means there are 0 proper subdomains"), 52 } 53 } 54 } 55 impl error::Error for FirefoxDomainErr {} 56 /// The ASCII we allow domains to have. 57 const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX; 58 /// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` has a TLD 59 /// that is either all letters or has length of at least five and begins with `b"xn--"`. 60 #[expect(single_use_lifetimes, reason = "false positive")] 61 #[expect(clippy::indexing_slicing, reason = "we verify manually")] 62 fn domain_icann_tld<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> { 63 Domain::try_from_bytes(val, CHARS) 64 .map_err(FirefoxDomainErr::InvalidDomain) 65 .and_then(|dom| { 66 let tld = dom.tld(); 67 // `tld.as_bytes()[..4]` won't panic since we check before that that the length is at least 5. 68 if tld.is_alphabetic() || (tld.len().get() > 4 && tld.as_bytes()[..4] == *b"xn--") { 69 Ok(dom.into()) 70 } else { 71 Err(FirefoxDomainErr::InvalidTld) 72 } 73 }) 74 } 75 /// Action taken by a DNS server when a domain matches. 76 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 77 pub enum RpzAction { 78 /// Send `NXDOMAIN` reply. 79 Nxdomain, 80 /// Send `NODATA` reply. 81 Nodata, 82 /// Do nothing; continue as normal. 83 Passthru, 84 /// Drop the query. 85 Drop, 86 /// Answer over TCP. 87 TcpOnly, 88 } 89 impl Display for RpzAction { 90 #[inline] 91 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 92 match *self { 93 Self::Nxdomain => f.write_str("NXDOMAIN"), 94 Self::Nodata => f.write_str("NODATA"), 95 Self::Passthru => f.write_str("PASSTHRU"), 96 Self::Drop => f.write_str("DROP"), 97 Self::TcpOnly => f.write_str("TCP-Only"), 98 } 99 } 100 } 101 impl PartialEq<&Self> for RpzAction { 102 #[inline] 103 fn eq(&self, other: &&Self) -> bool { 104 *self == **other 105 } 106 } 107 impl PartialEq<RpzAction> for &RpzAction { 108 #[inline] 109 fn eq(&self, other: &RpzAction) -> bool { 110 **self == *other 111 } 112 } 113 /// Writes the following line with `writer` based on `action`: 114 /// * `RpzAction::Nxdomain`: `<dom> CNAME .`. 115 /// * `RpzAction::Nodata`: `<dom> CNAME *.`. 116 /// * `RpzAction::Passthru`: `<dom> CNAME rpz-passthru.`. 117 /// * `RpzAction::Drop`: `<dom> CNAME rpz-drop.`. 118 /// * `RpzAction::TcpOnly`: `<dom> CNAME rpz-tcp-only.`. 119 /// 120 /// `*.` is prepended to `<dom>` iff `wildcard`. 121 /// 122 /// # Errors 123 /// 124 /// Returns [`Error`] iff [`writeln`] does. 125 #[inline] 126 pub fn write_rpz_line<W: Write, T>( 127 mut writer: W, 128 dom: &Domain<T>, 129 action: RpzAction, 130 wildcard: bool, 131 ) -> Result<(), Error> 132 where 133 Domain<T>: Display, 134 { 135 writeln!( 136 writer, 137 "{}{} CNAME {}.", 138 if wildcard { "*." } else { "" }, 139 dom, 140 match action { 141 RpzAction::Nxdomain => "", 142 RpzAction::Nodata => "*", 143 RpzAction::Passthru => "rpz-passthru", 144 RpzAction::Drop => "rpz-drop", 145 RpzAction::TcpOnly => "rpz-tcp-only", 146 } 147 ) 148 } 149 /// Type that can be returned by [`Domain`]-like parsers (e.g., [`Adblock`]). 150 #[derive(Clone, Copy, Debug)] 151 pub enum Value<'a, T: ParsedDomain<'a>> { 152 /// The parsed value is a domain. 153 Domain(T), 154 /// The parsed value is a comment. 155 Comment(&'a str), 156 /// The parsed value is blank or just [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 157 Blank, 158 } 159 impl<'a, T: ParsedDomain<'a>> Value<'a, T> { 160 /// Returns `true` iff `self` is a [`Self::Domain`]. 161 #[inline] 162 pub const fn is_domain(&self) -> bool { 163 match *self { 164 Self::Domain(_) => true, 165 Self::Comment(_) | Self::Blank => false, 166 } 167 } 168 /// Returns `true` iff `self` is a [`Self::Comment`]. 169 #[inline] 170 pub const fn is_comment(&self) -> bool { 171 match *self { 172 Self::Comment(_) => true, 173 Self::Domain(_) | Self::Blank => false, 174 } 175 } 176 /// Returns `true` iff `self` is a [`Self::Blank`]. 177 #[inline] 178 pub const fn is_blank(&self) -> bool { 179 matches!(*self, Value::Blank) 180 } 181 /// Returns the contained [`Self::Domain`] value. 182 /// 183 /// # Panics 184 /// 185 /// Panics iff `self` is [`Self::Comment`] or [`Self::Blank`]. 186 #[expect(clippy::panic, reason = "bug if called incorrectly")] 187 #[inline] 188 pub fn unwrap_domain(self) -> T { 189 match self { 190 Self::Domain(dom) => dom, 191 Self::Comment(_) | Self::Blank => { 192 panic!("called `ParsedDomain::unwrap_domain()` on a `Comment` or `Blank` value") 193 } 194 } 195 } 196 /// Returns the contained [`prim@str`] in [`Self::Comment`]. 197 /// 198 /// # Panics 199 /// 200 /// Panics iff `self` is [`Self::Domain`] or [`Self::Blank`]. 201 #[expect(clippy::panic, reason = "bug if called incorrectly")] 202 #[inline] 203 pub fn unwrap_comment(self) -> &'a str { 204 match self { 205 Self::Comment(com) => com, 206 Self::Domain(_) | Self::Blank => { 207 panic!("called `ParsedDomain::unwrap_comment()` on a `Domain` or `Blank` value") 208 } 209 } 210 } 211 /// Returns [`unit`] when `self` is [`Self::Blank`]. 212 /// 213 /// # Panics 214 /// 215 /// Panics iff `self` is [`Self::Domain`] or [`Self::Comment`]. 216 #[expect(clippy::panic, reason = "bug if called incorrectly")] 217 #[inline] 218 pub fn unwrap_blank(self) { 219 match self { 220 Self::Blank => {} 221 Self::Domain(_) | Self::Comment(_) => { 222 panic!("called `ParsedDomain::unwrap_blank()` on a `Domain` or `Comment` value") 223 } 224 } 225 } 226 } 227 /// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. 228 /// 229 /// When parsed into a [`Value::Domain`], the domain can be written to a 230 /// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 231 pub trait ParsedDomain<'a>: Sized { 232 /// The error returned from [`Self::parse_value`]. 233 type Error; 234 /// Parses a `str` into a `Value`. 235 /// # Errors 236 /// 237 /// Errors iff `val` is unable to be parsed into a `Value`. 238 #[expect(single_use_lifetimes, reason = "false positive")] 239 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error>; 240 /// Reference to the contained `Domain`. 241 fn domain(&self) -> &Domain<&'a str>; 242 /// Writes `self` as RPZ lines via `writer`. 243 /// 244 /// # Errors 245 /// 246 /// Errors iff `writer` errors. 247 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>; 248 } 249 /// Domain constructed from an 250 /// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax). 251 /// 252 /// Specifically the domain must conform to the following extended regex: 253 /// 254 /// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` 255 /// 256 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`] with the added requirement that it 257 /// does not contain `$`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of 258 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 259 /// 260 /// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments after a valid 261 /// domain are ignored and will be parsed into a [`Value::Domain`]. 262 /// 263 /// Note that this means some valid Adblock-style rules are not considered valid since such rules often contain 264 /// path information or modifiers (e.g., “third-party”), but this only considers domain-only rules. 265 #[derive(Clone, Debug)] 266 pub struct Adblock<'a> { 267 /// The `Domain`. 268 domain: Domain<&'a str>, 269 /// `true` iff `domain` represents all subdomains. Note that this includes `domain` itself. 270 subdomains: bool, 271 } 272 impl Adblock<'_> { 273 /// Returns `true` iff the contained [`Domain`] represents all subdomains. Note this includes the 274 /// `Domain` itself. 275 #[inline] 276 #[must_use] 277 pub const fn is_subdomains(&self) -> bool { 278 self.subdomains 279 } 280 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 281 /// for both. 282 #[must_use] 283 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 284 match self.domain.cmp_by_domain_ordering(other) { 285 DomainOrdering::Less => Ordering::Less, 286 DomainOrdering::Shorter => { 287 if self.subdomains { 288 Ordering::Greater 289 } else { 290 Ordering::Less 291 } 292 } 293 DomainOrdering::Equal => { 294 if self.subdomains { 295 Ordering::Greater 296 } else { 297 Ordering::Equal 298 } 299 } 300 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 301 } 302 } 303 /// The total order that is defined follows the following hierarchy: 304 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 305 /// 2. If 1. evaluates as not equivalent, then return the result. 306 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_subdomains()`), 307 /// then return the comparison of label counts. 308 /// 4. `self` is greater. 309 /// 310 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 311 #[inline] 312 #[must_use] 313 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 314 self.cmp_dom(&other.domain) 315 } 316 /// Same as [`Adblock::cmp_domain_only`]. 317 #[inline] 318 #[must_use] 319 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 320 self.cmp_dom(&other.domain) 321 } 322 /// The total order that is defined follows the following hierarchy: 323 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 324 /// 2. If 1. evaluates as not equivalent, then return the result. 325 /// 3. If both domains represent a single `Domain`, then return the comparison 326 /// of label counts. 327 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 328 /// 5. If the label counts are the same, `self` is greater. 329 /// 6. Return the inverse of the comparison of label counts. 330 /// 331 /// For example the following is a sequence of domains in 332 /// ascending order: 333 /// 334 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 335 #[inline] 336 #[must_use] 337 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 338 match self.domain.cmp_by_domain_ordering(&other.domain) { 339 DomainOrdering::Less => Ordering::Less, 340 DomainOrdering::Shorter => { 341 if self.subdomains { 342 Ordering::Greater 343 } else { 344 Ordering::Less 345 } 346 } 347 DomainOrdering::Equal => { 348 if self.subdomains { 349 Ordering::Greater 350 } else if other.proper_subdomains { 351 Ordering::Less 352 } else { 353 Ordering::Equal 354 } 355 } 356 DomainOrdering::Longer => { 357 if self.subdomains { 358 if other.proper_subdomains { 359 Ordering::Less 360 } else { 361 Ordering::Greater 362 } 363 } else if other.proper_subdomains { 364 Ordering::Less 365 } else { 366 Ordering::Greater 367 } 368 } 369 DomainOrdering::Greater => Ordering::Greater, 370 } 371 } 372 /// Same as [`Adblock::cardinality`] except that a `BigUint` is returned. Note the count _includes_ 373 /// the `Domain` itself when `self.is_subdomains()`. 374 /// 375 /// `!self.is_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 376 #[expect(clippy::arithmetic_side_effects, reason = "arbitrary-sized arithmetic")] 377 #[inline] 378 #[must_use] 379 pub fn domain_count(&self) -> BigUint { 380 if self.subdomains { 381 proper_subdomain_count(&self.domain) + BigUint::new(vec![1]) 382 } else { 383 BigUint::new(vec![1]) 384 } 385 } 386 } 387 impl Display for Adblock<'_> { 388 #[inline] 389 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 390 write!( 391 f, 392 "{}{}", 393 if self.subdomains { "||" } else { "" }, 394 self.domain 395 ) 396 } 397 } 398 impl PartialEq<Adblock<'_>> for Adblock<'_> { 399 #[inline] 400 fn eq(&self, other: &Adblock<'_>) -> bool { 401 self.domain == other.domain && self.subdomains == other.subdomains 402 } 403 } 404 impl PartialEq<&Adblock<'_>> for Adblock<'_> { 405 #[inline] 406 fn eq(&self, other: &&Adblock<'_>) -> bool { 407 *self == **other 408 } 409 } 410 impl PartialEq<Adblock<'_>> for &Adblock<'_> { 411 #[inline] 412 fn eq(&self, other: &Adblock<'_>) -> bool { 413 **self == *other 414 } 415 } 416 impl PartialEq<DomainOnly<'_>> for Adblock<'_> { 417 #[inline] 418 fn eq(&self, other: &DomainOnly<'_>) -> bool { 419 !self.subdomains && self.domain == other.domain 420 } 421 } 422 impl PartialEq<&DomainOnly<'_>> for Adblock<'_> { 423 #[inline] 424 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 425 *self == **other 426 } 427 } 428 impl PartialEq<DomainOnly<'_>> for &Adblock<'_> { 429 #[inline] 430 fn eq(&self, other: &DomainOnly<'_>) -> bool { 431 **self == *other 432 } 433 } 434 impl PartialEq<&Adblock<'_>> for DomainOnly<'_> { 435 #[inline] 436 fn eq(&self, other: &&Adblock<'_>) -> bool { 437 *self == **other 438 } 439 } 440 impl PartialEq<Adblock<'_>> for &DomainOnly<'_> { 441 #[inline] 442 fn eq(&self, other: &Adblock<'_>) -> bool { 443 **self == *other 444 } 445 } 446 impl PartialEq<Hosts<'_>> for Adblock<'_> { 447 #[inline] 448 fn eq(&self, other: &Hosts<'_>) -> bool { 449 !self.subdomains && self.domain == other.domain 450 } 451 } 452 impl PartialEq<&Hosts<'_>> for Adblock<'_> { 453 #[inline] 454 fn eq(&self, other: &&Hosts<'_>) -> bool { 455 *self == **other 456 } 457 } 458 impl PartialEq<Hosts<'_>> for &Adblock<'_> { 459 #[inline] 460 fn eq(&self, other: &Hosts<'_>) -> bool { 461 **self == *other 462 } 463 } 464 impl PartialEq<&Adblock<'_>> for Hosts<'_> { 465 #[inline] 466 fn eq(&self, other: &&Adblock<'_>) -> bool { 467 *self == **other 468 } 469 } 470 impl PartialEq<Adblock<'_>> for &Hosts<'_> { 471 #[inline] 472 fn eq(&self, other: &Adblock<'_>) -> bool { 473 **self == *other 474 } 475 } 476 impl PartialEq<Wildcard<'_>> for Adblock<'_> { 477 #[expect(clippy::suspicious_operation_groupings, reason = "false positive")] 478 #[inline] 479 fn eq(&self, other: &Wildcard<'_>) -> bool { 480 !(self.subdomains || other.proper_subdomains) && self.domain == other.domain 481 } 482 } 483 impl PartialEq<&Wildcard<'_>> for Adblock<'_> { 484 #[inline] 485 fn eq(&self, other: &&Wildcard<'_>) -> bool { 486 *self == **other 487 } 488 } 489 impl PartialEq<Wildcard<'_>> for &Adblock<'_> { 490 #[inline] 491 fn eq(&self, other: &Wildcard<'_>) -> bool { 492 **self == *other 493 } 494 } 495 impl PartialEq<&Adblock<'_>> for Wildcard<'_> { 496 #[inline] 497 fn eq(&self, other: &&Adblock<'_>) -> bool { 498 *self == **other 499 } 500 } 501 impl PartialEq<Adblock<'_>> for &Wildcard<'_> { 502 #[inline] 503 fn eq(&self, other: &Adblock<'_>) -> bool { 504 **self == *other 505 } 506 } 507 impl Eq for Adblock<'_> {} 508 impl Hash for Adblock<'_> { 509 #[inline] 510 fn hash<H: Hasher>(&self, state: &mut H) { 511 self.domain.hash(state); 512 } 513 } 514 impl PartialOrd<Adblock<'_>> for Adblock<'_> { 515 #[inline] 516 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 517 Some(self.cmp(other)) 518 } 519 } 520 impl Ord for Adblock<'_> { 521 /// The total order that is defined follows the following hierarchy: 522 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 523 /// 2. If 1. evaluates as not equivalent, then return the result. 524 /// 3. If both domains represent a single `Domain`, then return the comparison 525 /// of label counts. 526 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 527 /// 5. Return the inverse of the comparison of label counts. 528 /// 529 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 530 #[inline] 531 fn cmp(&self, other: &Self) -> Ordering { 532 match self.domain.cmp_by_domain_ordering(&other.domain) { 533 DomainOrdering::Less => Ordering::Less, 534 DomainOrdering::Shorter => { 535 if self.subdomains { 536 Ordering::Greater 537 } else { 538 Ordering::Less 539 } 540 } 541 DomainOrdering::Equal => { 542 if self.subdomains { 543 if other.subdomains { 544 Ordering::Equal 545 } else { 546 Ordering::Greater 547 } 548 } else if other.subdomains { 549 Ordering::Less 550 } else { 551 Ordering::Equal 552 } 553 } 554 DomainOrdering::Longer => { 555 if self.subdomains { 556 if other.subdomains { 557 Ordering::Less 558 } else { 559 Ordering::Greater 560 } 561 } else if other.subdomains { 562 Ordering::Less 563 } else { 564 Ordering::Greater 565 } 566 } 567 DomainOrdering::Greater => Ordering::Greater, 568 } 569 } 570 } 571 impl PartialOrd<DomainOnly<'_>> for Adblock<'_> { 572 #[inline] 573 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 574 Some(self.cmp_domain_only(other)) 575 } 576 } 577 impl PartialOrd<Hosts<'_>> for Adblock<'_> { 578 #[inline] 579 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 580 Some(self.cmp_hosts(other)) 581 } 582 } 583 impl PartialOrd<Wildcard<'_>> for Adblock<'_> { 584 #[inline] 585 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 586 Some(self.cmp_wildcard(other)) 587 } 588 } 589 impl<'a> Set for Adblock<'a> { 590 type Elem = Domain<&'a str>; 591 #[inline] 592 fn bounded_cardinality(&self) -> BoundedCardinality { 593 BoundedCardinality::from_biguint_exact(self.domain_count()) 594 } 595 #[inline] 596 fn cardinality(&self) -> Option<Cardinality> { 597 Some(Cardinality::Finite(self.domain_count())) 598 } 599 #[inline] 600 fn contains<Q>(&self, elem: &Q) -> bool 601 where 602 Q: Borrow<Self::Elem> + Eq + ?Sized, 603 { 604 if self.subdomains { 605 matches!( 606 self.domain.cmp_by_domain_ordering(elem.borrow()), 607 DomainOrdering::Shorter 608 ) 609 } else { 610 self.domain == *elem.borrow() 611 } 612 } 613 #[inline] 614 fn is_proper_subset(&self, val: &Self) -> bool { 615 // A single domain can never be a proper superset. Subdomains` cannot be a proper superset if it has 616 // more labels or the same number of labels as another subdomains. In all other cases, we need to 617 // recursively check from the TLD that the labels are the same. 618 val.subdomains 619 && match val.domain.cmp_by_domain_ordering(&self.domain) { 620 DomainOrdering::Less | DomainOrdering::Longer | DomainOrdering::Greater => false, 621 DomainOrdering::Shorter => true, 622 DomainOrdering::Equal => !self.subdomains, 623 } 624 } 625 #[inline] 626 fn is_subset(&self, val: &Self) -> bool { 627 self == val || self.is_proper_subset(val) 628 } 629 } 630 impl SetOrd for Adblock<'_> {} 631 impl<'a> Deref for Adblock<'a> { 632 type Target = Domain<&'a str>; 633 #[inline] 634 fn deref(&self) -> &Self::Target { 635 &self.domain 636 } 637 } 638 impl<'a> ParsedDomain<'a> for Adblock<'a> { 639 type Error = FirefoxDomainErr; 640 #[expect(single_use_lifetimes, reason = "false positive")] 641 #[expect( 642 unsafe_code, 643 clippy::indexing_slicing, 644 reason = "we carefully verify what we are doing" 645 )] 646 #[inline] 647 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 648 // First remove leading whitepace. Then check for comments via '#' and '!'. Return Blank iff empty. 649 // Return Comment iff '#' or '!' is the first character. Remove trailing whitespace. Next remove the 650 // last byte if it is '^' as well as whitespace before. Next track and remove '||' at the beginning 651 // and any subsequent whitespace. 652 let mut value = val.as_bytes().trim_ascii_start(); 653 value.first().map_or_else( 654 || Ok(Value::Blank), 655 |byt| { 656 if *byt == b'#' || *byt == b'!' { 657 // SAFETY: 658 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 659 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 660 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 661 Ok(Value::Comment(comment)) 662 } else { 663 value = value.trim_ascii_end(); 664 let len = value.len().wrapping_sub(1); 665 value = value.get(len).map_or(value, |byt2| { 666 if *byt2 == b'^' { 667 value[..len].trim_ascii_end() 668 } else { 669 value 670 } 671 }); 672 let (subdomains, val2) = value.get(..2).map_or_else( 673 || (false, value), 674 |fst| { 675 if fst == b"||" { 676 (true, value[2..].trim_ascii_start()) 677 } else { 678 (false, value) 679 } 680 }, 681 ); 682 // `Domain`s allow `$`, but we don't want to allow that symbol for Adblock-style rules. 683 val2.iter() 684 .try_fold((), |(), byt2| { 685 if *byt2 == b'$' { 686 Err(FirefoxDomainErr::InvalidAdblockDomain) 687 } else { 688 Ok(()) 689 } 690 }) 691 .and_then(|()| { 692 domain_icann_tld(val2).map(|domain| { 693 // A domain of length 252 or 253 can't have subdomains due to there not being enough 694 // characters. 695 Value::Domain(Self { 696 subdomains: if domain.len().get() > 251 { 697 false 698 } else { 699 subdomains 700 }, 701 domain, 702 }) 703 }) 704 }) 705 } 706 }, 707 ) 708 } 709 #[inline] 710 fn domain(&self) -> &Domain<&'a str> { 711 &self.domain 712 } 713 #[inline] 714 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 715 write_rpz_line(&mut writer, self.domain(), action, false).and_then(|()| { 716 if self.subdomains { 717 write_rpz_line(writer, self.domain(), action, true) 718 } else { 719 Ok(()) 720 } 721 }) 722 } 723 } 724 /// Domain constructed from a 725 /// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax). 726 /// 727 /// Specifically the domain must conform to the following extended regex: 728 /// 729 /// `^<ws>*<domain><ws>*(#.*)?$` 730 /// 731 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 732 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 733 /// 734 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 735 /// are ignored and will be parsed into a [`Value::Domain`]. 736 #[derive(Clone, Debug)] 737 pub struct DomainOnly<'a> { 738 /// The `Domain`. 739 domain: Domain<&'a str>, 740 } 741 impl DomainOnly<'_> { 742 /// Read [`Adblock::cmp_domain_only`]. 743 #[inline] 744 #[must_use] 745 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 746 other.cmp_domain_only(self).reverse() 747 } 748 /// Read [`Domain::cmp`]. 749 #[inline] 750 #[must_use] 751 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 752 self.domain.cmp(&other.domain) 753 } 754 /// Read [`Wildcard::cmp_domain_only`]. 755 #[inline] 756 #[must_use] 757 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 758 other.cmp_domain_only(self).reverse() 759 } 760 /// Same as [`DomainOnly::cardinality`] except that a `NonZeroU8` is returned. 761 /// 762 /// The value is always 1. 763 #[inline] 764 #[must_use] 765 pub const fn domain_count(&self) -> NonZeroU8 { 766 ONE 767 } 768 } 769 impl PartialEq<DomainOnly<'_>> for DomainOnly<'_> { 770 #[inline] 771 fn eq(&self, other: &DomainOnly<'_>) -> bool { 772 self.domain == other.domain 773 } 774 } 775 impl PartialEq<DomainOnly<'_>> for &DomainOnly<'_> { 776 #[inline] 777 fn eq(&self, other: &DomainOnly<'_>) -> bool { 778 **self == *other 779 } 780 } 781 impl PartialEq<&DomainOnly<'_>> for DomainOnly<'_> { 782 #[inline] 783 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 784 *self == **other 785 } 786 } 787 impl PartialEq<Adblock<'_>> for DomainOnly<'_> { 788 #[inline] 789 fn eq(&self, other: &Adblock<'_>) -> bool { 790 other == self 791 } 792 } 793 impl PartialEq<Hosts<'_>> for DomainOnly<'_> { 794 #[inline] 795 fn eq(&self, other: &Hosts<'_>) -> bool { 796 self.domain == other.domain 797 } 798 } 799 impl PartialEq<&Hosts<'_>> for DomainOnly<'_> { 800 #[inline] 801 fn eq(&self, other: &&Hosts<'_>) -> bool { 802 *self == **other 803 } 804 } 805 impl PartialEq<Hosts<'_>> for &DomainOnly<'_> { 806 #[inline] 807 fn eq(&self, other: &Hosts<'_>) -> bool { 808 **self == *other 809 } 810 } 811 impl PartialEq<&DomainOnly<'_>> for Hosts<'_> { 812 #[inline] 813 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 814 *self == **other 815 } 816 } 817 impl PartialEq<DomainOnly<'_>> for &Hosts<'_> { 818 #[inline] 819 fn eq(&self, other: &DomainOnly<'_>) -> bool { 820 **self == *other 821 } 822 } 823 impl PartialEq<Wildcard<'_>> for DomainOnly<'_> { 824 #[inline] 825 fn eq(&self, other: &Wildcard<'_>) -> bool { 826 !other.proper_subdomains && self.domain == other.domain 827 } 828 } 829 impl PartialEq<&Wildcard<'_>> for DomainOnly<'_> { 830 #[inline] 831 fn eq(&self, other: &&Wildcard<'_>) -> bool { 832 *self == **other 833 } 834 } 835 impl PartialEq<Wildcard<'_>> for &DomainOnly<'_> { 836 #[inline] 837 fn eq(&self, other: &Wildcard<'_>) -> bool { 838 **self == *other 839 } 840 } 841 impl PartialEq<&DomainOnly<'_>> for Wildcard<'_> { 842 #[inline] 843 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 844 *self == **other 845 } 846 } 847 impl PartialEq<DomainOnly<'_>> for &Wildcard<'_> { 848 #[inline] 849 fn eq(&self, other: &DomainOnly<'_>) -> bool { 850 **self == *other 851 } 852 } 853 impl Eq for DomainOnly<'_> {} 854 impl Hash for DomainOnly<'_> { 855 #[inline] 856 fn hash<H: Hasher>(&self, state: &mut H) { 857 self.domain.hash(state); 858 } 859 } 860 impl PartialOrd<DomainOnly<'_>> for DomainOnly<'_> { 861 #[inline] 862 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 863 Some(self.cmp(other)) 864 } 865 } 866 impl Ord for DomainOnly<'_> { 867 /// Read [`Domain::cmp`]. 868 #[inline] 869 fn cmp(&self, other: &Self) -> Ordering { 870 self.domain.cmp(&other.domain) 871 } 872 } 873 impl PartialOrd<Adblock<'_>> for DomainOnly<'_> { 874 #[inline] 875 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 876 Some(self.cmp_adblock(other)) 877 } 878 } 879 impl PartialOrd<Hosts<'_>> for DomainOnly<'_> { 880 #[inline] 881 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 882 Some(self.cmp_hosts(other)) 883 } 884 } 885 impl PartialOrd<Wildcard<'_>> for DomainOnly<'_> { 886 #[inline] 887 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 888 Some(self.cmp_wildcard(other)) 889 } 890 } 891 impl Display for DomainOnly<'_> { 892 #[inline] 893 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 894 self.domain.fmt(f) 895 } 896 } 897 impl<'a> Set for DomainOnly<'a> { 898 type Elem = Domain<&'a str>; 899 #[inline] 900 fn bounded_cardinality(&self) -> BoundedCardinality { 901 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 902 } 903 #[inline] 904 fn cardinality(&self) -> Option<Cardinality> { 905 Some(Cardinality::Finite(self.domain_count().get().into())) 906 } 907 #[inline] 908 fn contains<Q>(&self, elem: &Q) -> bool 909 where 910 Q: Borrow<Self::Elem> + Eq + ?Sized, 911 { 912 self.domain == *elem.borrow() 913 } 914 #[inline] 915 fn is_proper_subset(&self, _: &Self) -> bool { 916 false 917 } 918 #[inline] 919 fn is_subset(&self, val: &Self) -> bool { 920 self == val 921 } 922 } 923 impl SetOrd for DomainOnly<'_> {} 924 impl<'a> Deref for DomainOnly<'a> { 925 type Target = Domain<&'a str>; 926 #[inline] 927 fn deref(&self) -> &Self::Target { 928 &self.domain 929 } 930 } 931 impl<'a> ParsedDomain<'a> for DomainOnly<'a> { 932 type Error = FirefoxDomainErr; 933 #[expect(single_use_lifetimes, reason = "false positive")] 934 #[expect( 935 unsafe_code, 936 clippy::arithmetic_side_effects, 937 clippy::indexing_slicing, 938 reason = "we verify all use is correct" 939 )] 940 #[inline] 941 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 942 let value = val.as_bytes().trim_ascii_start(); 943 value.first().map_or_else( 944 || Ok(Value::Blank), 945 |byt| { 946 if *byt == b'#' { 947 // SAFETY: 948 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 949 // since the first byte is '#' or '$' the remaining bytes are still valid UTF-8. 950 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 951 Ok(Value::Comment(comment)) 952 } else { 953 domain_icann_tld( 954 value[..value 955 .iter() 956 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 957 .map_or_else(convert::identity, convert::identity)] 958 .trim_ascii_end(), 959 ) 960 .map(|domain| Value::Domain(Self { domain })) 961 } 962 }, 963 ) 964 } 965 #[inline] 966 fn domain(&self) -> &Domain<&'a str> { 967 &self.domain 968 } 969 #[inline] 970 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 971 write_rpz_line(&mut writer, self.domain(), action, false) 972 } 973 } 974 /// Domain constructed from a 975 /// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax). 976 /// 977 /// Specifically the domain must conform to the following extended regex: 978 /// 979 /// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` 980 /// 981 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 982 /// or at least length five and begins with `xn--`, `<ws>` is any sequence of 983 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and `<ip>` is one of the following: 984 /// 985 /// `::`, `::1`, `0.0.0.0`, or `127.0.0.1`. 986 /// 987 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 988 /// are ignored and will be parsed into a [`Value::Domain`]. 989 #[derive(Clone, Debug)] 990 pub struct Hosts<'a> { 991 /// The `Domain`. 992 domain: Domain<&'a str>, 993 } 994 impl Hosts<'_> { 995 /// Read [`Adblock::cmp_hosts`]. 996 #[inline] 997 #[must_use] 998 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 999 other.cmp_hosts(self).reverse() 1000 } 1001 /// Read [`DomainOnly::cmp_hosts`]. 1002 #[inline] 1003 #[must_use] 1004 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1005 other.cmp_hosts(self).reverse() 1006 } 1007 /// Read [`Wildcard::cmp_hosts`]. 1008 #[inline] 1009 #[must_use] 1010 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 1011 other.cmp_hosts(self).reverse() 1012 } 1013 /// Same as [`Hosts::cardinality`] except that a `NonZeroU8` is returned. 1014 /// 1015 /// The value is always 1. 1016 #[inline] 1017 #[must_use] 1018 pub const fn domain_count(&self) -> NonZeroU8 { 1019 ONE 1020 } 1021 } 1022 impl PartialEq<Hosts<'_>> for Hosts<'_> { 1023 #[inline] 1024 fn eq(&self, other: &Hosts<'_>) -> bool { 1025 self.domain == other.domain 1026 } 1027 } 1028 impl PartialEq<Hosts<'_>> for &Hosts<'_> { 1029 #[inline] 1030 fn eq(&self, other: &Hosts<'_>) -> bool { 1031 **self == *other 1032 } 1033 } 1034 impl PartialEq<&Hosts<'_>> for Hosts<'_> { 1035 #[inline] 1036 fn eq(&self, other: &&Hosts<'_>) -> bool { 1037 *self == **other 1038 } 1039 } 1040 impl PartialEq<Adblock<'_>> for Hosts<'_> { 1041 #[inline] 1042 fn eq(&self, other: &Adblock<'_>) -> bool { 1043 other == self 1044 } 1045 } 1046 impl PartialEq<DomainOnly<'_>> for Hosts<'_> { 1047 #[inline] 1048 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1049 other == self 1050 } 1051 } 1052 impl PartialEq<Wildcard<'_>> for Hosts<'_> { 1053 #[inline] 1054 fn eq(&self, other: &Wildcard<'_>) -> bool { 1055 !other.proper_subdomains && self.domain == other.domain 1056 } 1057 } 1058 impl PartialEq<&Wildcard<'_>> for Hosts<'_> { 1059 #[inline] 1060 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1061 *self == **other 1062 } 1063 } 1064 impl PartialEq<Wildcard<'_>> for &Hosts<'_> { 1065 #[inline] 1066 fn eq(&self, other: &Wildcard<'_>) -> bool { 1067 **self == *other 1068 } 1069 } 1070 impl PartialEq<&Hosts<'_>> for Wildcard<'_> { 1071 #[inline] 1072 fn eq(&self, other: &&Hosts<'_>) -> bool { 1073 *self == **other 1074 } 1075 } 1076 impl PartialEq<Hosts<'_>> for &Wildcard<'_> { 1077 #[inline] 1078 fn eq(&self, other: &Hosts<'_>) -> bool { 1079 **self == *other 1080 } 1081 } 1082 impl Eq for Hosts<'_> {} 1083 impl Hash for Hosts<'_> { 1084 #[inline] 1085 fn hash<H: Hasher>(&self, state: &mut H) { 1086 self.domain.hash(state); 1087 } 1088 } 1089 impl PartialOrd<Hosts<'_>> for Hosts<'_> { 1090 #[inline] 1091 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1092 Some(self.cmp(other)) 1093 } 1094 } 1095 impl Ord for Hosts<'_> { 1096 /// Read [`Domain::cmp`]. 1097 #[inline] 1098 fn cmp(&self, other: &Self) -> Ordering { 1099 self.domain.cmp(&other.domain) 1100 } 1101 } 1102 impl PartialOrd<Adblock<'_>> for Hosts<'_> { 1103 #[inline] 1104 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1105 Some(self.cmp_adblock(other)) 1106 } 1107 } 1108 impl PartialOrd<DomainOnly<'_>> for Hosts<'_> { 1109 #[inline] 1110 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1111 Some(self.cmp_domain_only(other)) 1112 } 1113 } 1114 impl PartialOrd<Wildcard<'_>> for Hosts<'_> { 1115 #[inline] 1116 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1117 Some(self.cmp_wildcard(other)) 1118 } 1119 } 1120 impl Display for Hosts<'_> { 1121 #[inline] 1122 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1123 self.domain.fmt(f) 1124 } 1125 } 1126 impl<'a> Set for Hosts<'a> { 1127 type Elem = Domain<&'a str>; 1128 #[inline] 1129 fn bounded_cardinality(&self) -> BoundedCardinality { 1130 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 1131 } 1132 #[inline] 1133 fn cardinality(&self) -> Option<Cardinality> { 1134 Some(Cardinality::Finite(self.domain_count().get().into())) 1135 } 1136 #[inline] 1137 fn contains<Q>(&self, elem: &Q) -> bool 1138 where 1139 Q: Borrow<Self::Elem> + Eq + ?Sized, 1140 { 1141 self.domain == *elem.borrow() 1142 } 1143 #[inline] 1144 fn is_proper_subset(&self, _: &Self) -> bool { 1145 false 1146 } 1147 #[inline] 1148 fn is_subset(&self, val: &Self) -> bool { 1149 self == val 1150 } 1151 } 1152 impl SetOrd for Hosts<'_> {} 1153 impl<'a> Deref for Hosts<'a> { 1154 type Target = Domain<&'a str>; 1155 #[inline] 1156 fn deref(&self) -> &Self::Target { 1157 &self.domain 1158 } 1159 } 1160 impl<'a> ParsedDomain<'a> for Hosts<'a> { 1161 type Error = FirefoxDomainErr; 1162 #[expect(single_use_lifetimes, reason = "false positive")] 1163 #[expect( 1164 unsafe_code, 1165 clippy::arithmetic_side_effects, 1166 clippy::indexing_slicing, 1167 reason = "carefully verified use is correct" 1168 )] 1169 #[inline] 1170 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1171 let mut value = val.as_bytes().trim_ascii_start(); 1172 value.first().map_or_else( 1173 || Ok(Value::Blank), 1174 |byt| { 1175 if *byt == b'#' { 1176 // SAFETY: 1177 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1178 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1179 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1180 Ok(Value::Comment(comment)) 1181 } else { 1182 value = value 1183 .get(..3) 1184 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1185 .and_then(|fst| { 1186 if fst == b"::1" { 1187 Ok(&value[3..]) 1188 } else if &value[..2] == b"::" { 1189 Ok(&value[2..]) 1190 } else { 1191 value 1192 .get(..7) 1193 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1194 .and_then(|fst2| { 1195 if fst2 == b"0.0.0.0" { 1196 Ok(&value[7..]) 1197 } else { 1198 value 1199 .get(..9) 1200 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1201 .and_then(|fst3| { 1202 if fst3 == b"127.0.0.1" { 1203 Ok(&value[9..]) 1204 } else { 1205 Err(FirefoxDomainErr::InvalidHostsIP) 1206 } 1207 }) 1208 } 1209 }) 1210 } 1211 })?; 1212 let len = value.len(); 1213 value = value.trim_ascii_start(); 1214 if len == value.len() { 1215 // There has to be at least one space or tab between the IP and domain. 1216 Err(FirefoxDomainErr::InvalidHostsIP) 1217 } else { 1218 domain_icann_tld( 1219 value[..value 1220 .iter() 1221 .try_fold( 1222 0, 1223 |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }, 1224 ) 1225 .map_or_else(convert::identity, convert::identity)] 1226 .trim_ascii_end(), 1227 ) 1228 .map(|domain| Value::Domain(Self { domain })) 1229 } 1230 } 1231 }, 1232 ) 1233 } 1234 #[inline] 1235 fn domain(&self) -> &Domain<&'a str> { 1236 &self.domain 1237 } 1238 #[inline] 1239 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1240 write_rpz_line(&mut writer, self.domain(), action, false) 1241 } 1242 } 1243 /// Domain constructed from a 1244 /// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext). 1245 /// 1246 /// Specifically the domain must conform to the following extended regex: 1247 /// 1248 /// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` 1249 /// 1250 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 1251 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of 1252 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 1253 /// 1254 /// If `domain` begins with `*.`, then `domain` must have length less than 252. 1255 /// 1256 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 1257 /// are ignored and will be parsed into a [`Value::Domain`]. 1258 #[derive(Clone, Debug)] 1259 pub struct Wildcard<'a> { 1260 /// The `Domain`. 1261 domain: Domain<&'a str>, 1262 /// `true` iff `domain` represents all proper subdomains. Note that this does _not_ include `domain` itself. 1263 proper_subdomains: bool, 1264 } 1265 impl Wildcard<'_> { 1266 /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. Note this does _not_ 1267 /// include the `Domain` itself. 1268 #[inline] 1269 #[must_use] 1270 pub const fn is_proper_subdomains(&self) -> bool { 1271 self.proper_subdomains 1272 } 1273 /// Read [`Adblock::cmp_wildcard`]. 1274 #[inline] 1275 #[must_use] 1276 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 1277 other.cmp_wildcard(self).reverse() 1278 } 1279 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 1280 /// for both. 1281 #[must_use] 1282 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 1283 match self.domain.cmp_by_domain_ordering(other) { 1284 DomainOrdering::Less => Ordering::Less, 1285 DomainOrdering::Shorter => { 1286 if self.proper_subdomains { 1287 Ordering::Greater 1288 } else { 1289 Ordering::Less 1290 } 1291 } 1292 DomainOrdering::Equal => { 1293 if self.proper_subdomains { 1294 Ordering::Greater 1295 } else { 1296 Ordering::Equal 1297 } 1298 } 1299 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 1300 } 1301 } 1302 /// The total order that is defined follows the following hierarchy: 1303 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1304 /// 2. If 1. evaluates as not equivalent, then return the result. 1305 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_proper_subdomains()`), 1306 /// then return the comparison of label counts. 1307 /// 4. Return `self` is greater. 1308 /// 1309 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1310 #[inline] 1311 #[must_use] 1312 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1313 self.cmp_dom(&other.domain) 1314 } 1315 /// Read [`Wildcard::cmp_domain_only`]. 1316 #[inline] 1317 #[must_use] 1318 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 1319 self.cmp_dom(&other.domain) 1320 } 1321 /// Same as [`Wildcard::cardinality`] except that a `BigUint` is returned. Note the count does _not_ include 1322 /// the `Domain` itself when `self.is_proper_subdomains()`. 1323 /// 1324 /// `!self.is_proper_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 1325 #[inline] 1326 #[must_use] 1327 pub fn domain_count(&self) -> BigUint { 1328 if self.proper_subdomains { 1329 proper_subdomain_count(&self.domain) 1330 } else { 1331 BigUint::new(vec![1]) 1332 } 1333 } 1334 } 1335 impl PartialEq<Wildcard<'_>> for Wildcard<'_> { 1336 #[inline] 1337 fn eq(&self, other: &Wildcard<'_>) -> bool { 1338 self.domain == other.domain && self.proper_subdomains == other.proper_subdomains 1339 } 1340 } 1341 impl PartialEq<Wildcard<'_>> for &Wildcard<'_> { 1342 #[inline] 1343 fn eq(&self, other: &Wildcard<'_>) -> bool { 1344 **self == *other 1345 } 1346 } 1347 impl PartialEq<&Wildcard<'_>> for Wildcard<'_> { 1348 #[inline] 1349 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1350 *self == **other 1351 } 1352 } 1353 impl PartialEq<Adblock<'_>> for Wildcard<'_> { 1354 #[inline] 1355 fn eq(&self, other: &Adblock<'_>) -> bool { 1356 other == self 1357 } 1358 } 1359 impl PartialEq<DomainOnly<'_>> for Wildcard<'_> { 1360 #[inline] 1361 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1362 other == self 1363 } 1364 } 1365 impl PartialEq<Hosts<'_>> for Wildcard<'_> { 1366 #[inline] 1367 fn eq(&self, other: &Hosts<'_>) -> bool { 1368 other == self 1369 } 1370 } 1371 impl Eq for Wildcard<'_> {} 1372 impl Hash for Wildcard<'_> { 1373 #[inline] 1374 fn hash<H: Hasher>(&self, state: &mut H) { 1375 self.domain.hash(state); 1376 } 1377 } 1378 impl PartialOrd<Wildcard<'_>> for Wildcard<'_> { 1379 #[inline] 1380 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1381 Some(self.cmp(other)) 1382 } 1383 } 1384 impl Ord for Wildcard<'_> { 1385 /// The total order that is defined follows the following hierarchy: 1386 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1387 /// 2. If 1. evaluates as not equivalent, then return the result. 1388 /// 3. If both domains represent a single `Domain`, then return the comparison 1389 /// of label counts. 1390 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1391 /// 5. Return the inverse of the comparison of label counts. 1392 /// 1393 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1394 #[inline] 1395 fn cmp(&self, other: &Self) -> Ordering { 1396 match self.domain.cmp_by_domain_ordering(&other.domain) { 1397 DomainOrdering::Less => Ordering::Less, 1398 DomainOrdering::Shorter => { 1399 if self.proper_subdomains { 1400 Ordering::Greater 1401 } else { 1402 Ordering::Less 1403 } 1404 } 1405 DomainOrdering::Equal => { 1406 if self.proper_subdomains { 1407 if other.proper_subdomains { 1408 Ordering::Equal 1409 } else { 1410 Ordering::Greater 1411 } 1412 } else if other.proper_subdomains { 1413 Ordering::Less 1414 } else { 1415 Ordering::Equal 1416 } 1417 } 1418 DomainOrdering::Longer => { 1419 if self.proper_subdomains { 1420 if other.proper_subdomains { 1421 Ordering::Less 1422 } else { 1423 Ordering::Greater 1424 } 1425 } else if other.proper_subdomains { 1426 Ordering::Less 1427 } else { 1428 Ordering::Greater 1429 } 1430 } 1431 DomainOrdering::Greater => Ordering::Greater, 1432 } 1433 } 1434 } 1435 impl PartialOrd<Adblock<'_>> for Wildcard<'_> { 1436 #[inline] 1437 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1438 Some(self.cmp_adblock(other)) 1439 } 1440 } 1441 impl PartialOrd<DomainOnly<'_>> for Wildcard<'_> { 1442 #[inline] 1443 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1444 Some(self.cmp_domain_only(other)) 1445 } 1446 } 1447 impl PartialOrd<Hosts<'_>> for Wildcard<'_> { 1448 #[inline] 1449 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1450 Some(self.cmp_hosts(other)) 1451 } 1452 } 1453 impl Display for Wildcard<'_> { 1454 #[inline] 1455 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1456 write!( 1457 f, 1458 "{}{}", 1459 if self.proper_subdomains { "*." } else { "" }, 1460 self.domain 1461 ) 1462 } 1463 } 1464 impl<'a> Set for Wildcard<'a> { 1465 type Elem = Domain<&'a str>; 1466 #[inline] 1467 fn bounded_cardinality(&self) -> BoundedCardinality { 1468 BoundedCardinality::from_biguint_exact(self.domain_count()) 1469 } 1470 #[inline] 1471 fn cardinality(&self) -> Option<Cardinality> { 1472 Some(Cardinality::Finite(self.domain_count())) 1473 } 1474 #[inline] 1475 fn contains<Q>(&self, elem: &Q) -> bool 1476 where 1477 Q: Borrow<Self::Elem> + Eq + ?Sized, 1478 { 1479 if self.proper_subdomains { 1480 self.domain.cmp_by_domain_ordering(elem.borrow()) == DomainOrdering::Shorter 1481 } else { 1482 self.domain == *elem.borrow() 1483 } 1484 } 1485 #[inline] 1486 fn is_proper_subset(&self, val: &Self) -> bool { 1487 // A single domain can never be a proper superset. Proper subdomains cannot be a proper superset if it 1488 // has more labels or the same number of labels as another domain. In all other cases, we need to 1489 // recursively check from the TLD that the labels are the same. 1490 val.proper_subdomains 1491 && val.domain.cmp_by_domain_ordering(&self.domain) == DomainOrdering::Shorter 1492 } 1493 #[inline] 1494 fn is_subset(&self, val: &Self) -> bool { 1495 self == val || self.is_proper_subset(val) 1496 } 1497 } 1498 impl SetOrd for Wildcard<'_> {} 1499 impl<'a> Deref for Wildcard<'a> { 1500 type Target = Domain<&'a str>; 1501 #[inline] 1502 fn deref(&self) -> &Self::Target { 1503 &self.domain 1504 } 1505 } 1506 impl<'a> ParsedDomain<'a> for Wildcard<'a> { 1507 type Error = FirefoxDomainErr; 1508 #[expect(single_use_lifetimes, reason = "false positive")] 1509 #[expect( 1510 unsafe_code, 1511 clippy::arithmetic_side_effects, 1512 clippy::indexing_slicing, 1513 reason = "need them all. care has been taken." 1514 )] 1515 #[inline] 1516 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1517 let value = val.as_bytes().trim_ascii_start(); 1518 value.first().map_or_else( 1519 || Ok(Value::Blank), 1520 |byt| { 1521 if *byt == b'#' { 1522 // SAFETY: 1523 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1524 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1525 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1526 Ok(Value::Comment(comment)) 1527 } else { 1528 let (proper_subdomains, val2) = value.get(..2).map_or_else( 1529 || (false, value), 1530 |fst| { 1531 if fst == b"*." { 1532 (true, &value[2..]) 1533 } else { 1534 (false, value) 1535 } 1536 }, 1537 ); 1538 domain_icann_tld( 1539 val2[..val2 1540 .iter() 1541 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 1542 .map_or_else(convert::identity, convert::identity)] 1543 .trim_ascii_end(), 1544 ) 1545 .and_then(|domain| { 1546 if proper_subdomains { 1547 if domain.len().get() > 251 { 1548 Err(FirefoxDomainErr::InvalidWildcardDomain) 1549 } else { 1550 Ok(Value::Domain(Self { 1551 domain, 1552 proper_subdomains: true, 1553 })) 1554 } 1555 } else { 1556 Ok(Value::Domain(Self { 1557 domain, 1558 proper_subdomains, 1559 })) 1560 } 1561 }) 1562 } 1563 }, 1564 ) 1565 } 1566 #[inline] 1567 fn domain(&self) -> &Domain<&'a str> { 1568 &self.domain 1569 } 1570 #[inline] 1571 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1572 write_rpz_line(&mut writer, self.domain(), action, self.proper_subdomains) 1573 } 1574 } 1575 /// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 1576 #[derive(Clone, Debug)] 1577 pub enum RpzDomain<'a> { 1578 /// An `Adblock` domain. 1579 Adblock(Adblock<'a>), 1580 /// A `DomainOnly` domain. 1581 DomainOnly(DomainOnly<'a>), 1582 /// A `Hosts` domain. 1583 Hosts(Hosts<'a>), 1584 /// A `Wildcard` domain. 1585 Wildcard(Wildcard<'a>), 1586 } 1587 impl RpzDomain<'_> { 1588 /// Returns `true` iff `self` represents a single [`Domain`]. 1589 #[inline] 1590 #[must_use] 1591 pub const fn is_domain(&self) -> bool { 1592 match *self { 1593 Self::Adblock(ref dom) => !dom.subdomains, 1594 Self::DomainOnly(_) | Self::Hosts(_) => true, 1595 Self::Wildcard(ref dom) => !dom.proper_subdomains, 1596 } 1597 } 1598 /// Returns `true` iff `self` represents proper subdomains of the contained [`Domain`] (i.e., 1599 /// is a [`Wildcard`] such that [`Wildcard::is_proper_subdomains`]). 1600 #[inline] 1601 #[must_use] 1602 pub const fn is_proper_subdomains(&self) -> bool { 1603 match *self { 1604 Self::Adblock(_) | Self::DomainOnly(_) | Self::Hosts(_) => false, 1605 Self::Wildcard(ref dom) => dom.proper_subdomains, 1606 } 1607 } 1608 /// Returns `true` iff `self` represents subdomains of the contained [`Domain`] (i.e., is an 1609 /// [`Adblock`] such that [`Adblock::is_subdomains`]). 1610 #[inline] 1611 #[must_use] 1612 pub const fn is_subdomains(&self) -> bool { 1613 match *self { 1614 Self::Adblock(ref dom) => dom.subdomains, 1615 Self::DomainOnly(_) | Self::Hosts(_) | Self::Wildcard(_) => false, 1616 } 1617 } 1618 /// Returns the count of [`Domain`]s represented by `self`. This function is the same as 1619 /// [`RpzDomain::cardinality`] except that it returns a `BigUint`. 1620 #[inline] 1621 #[must_use] 1622 pub fn domain_count(&self) -> BigUint { 1623 match *self { 1624 Self::Adblock(ref dom) => dom.domain_count(), 1625 Self::DomainOnly(ref dom) => dom.domain_count().get().into(), 1626 Self::Hosts(ref dom) => dom.domain_count().get().into(), 1627 Self::Wildcard(ref dom) => dom.domain_count(), 1628 } 1629 } 1630 } 1631 impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> { 1632 #[inline] 1633 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1634 match *self { 1635 Self::Adblock(ref dom) => match *other { 1636 RpzDomain::Adblock(ref dom2) => dom == dom2, 1637 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1638 RpzDomain::Hosts(ref dom2) => dom == dom2, 1639 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1640 }, 1641 Self::DomainOnly(ref dom) => match *other { 1642 RpzDomain::Adblock(ref dom2) => dom == dom2, 1643 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1644 RpzDomain::Hosts(ref dom2) => dom == dom2, 1645 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1646 }, 1647 Self::Hosts(ref dom) => match *other { 1648 RpzDomain::Adblock(ref dom2) => dom == dom2, 1649 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1650 RpzDomain::Hosts(ref dom2) => dom == dom2, 1651 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1652 }, 1653 Self::Wildcard(ref dom) => match *other { 1654 RpzDomain::Adblock(ref dom2) => dom == dom2, 1655 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1656 RpzDomain::Hosts(ref dom2) => dom == dom2, 1657 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1658 }, 1659 } 1660 } 1661 } 1662 impl PartialEq<RpzDomain<'_>> for &RpzDomain<'_> { 1663 #[inline] 1664 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1665 **self == *other 1666 } 1667 } 1668 impl PartialEq<&RpzDomain<'_>> for RpzDomain<'_> { 1669 #[inline] 1670 fn eq(&self, other: &&RpzDomain<'_>) -> bool { 1671 *self == **other 1672 } 1673 } 1674 impl Eq for RpzDomain<'_> {} 1675 impl Hash for RpzDomain<'_> { 1676 #[inline] 1677 fn hash<H: Hasher>(&self, state: &mut H) { 1678 self.domain().hash(state); 1679 } 1680 } 1681 impl PartialOrd<RpzDomain<'_>> for RpzDomain<'_> { 1682 #[inline] 1683 fn partial_cmp(&self, other: &RpzDomain<'_>) -> Option<Ordering> { 1684 Some(self.cmp(other)) 1685 } 1686 } 1687 impl Ord for RpzDomain<'_> { 1688 /// The total order that is defined follows the following hierarchy: 1689 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1690 /// 2. If 1. evaluates as not equivalent, then return the result. 1691 /// 3. If both domains represent a single `Domain`, then return the comparison 1692 /// of label counts. 1693 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1694 /// 5. If the label counts are the same and exactly one domain represents proper subdomains, the other domain is greater. 1695 /// 6. Return the inverse of the comparison of label counts. 1696 /// 1697 /// For example the following is a sequence of domains in 1698 /// ascending order: 1699 /// 1700 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 1701 #[inline] 1702 fn cmp(&self, other: &Self) -> Ordering { 1703 match *self { 1704 Self::Adblock(ref dom) => match *other { 1705 Self::Adblock(ref dom2) => dom.cmp(dom2), 1706 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1707 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1708 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1709 }, 1710 Self::DomainOnly(ref dom) => match *other { 1711 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1712 Self::DomainOnly(ref dom2) => dom.cmp(dom2), 1713 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1714 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1715 }, 1716 Self::Hosts(ref dom) => match *other { 1717 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1718 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1719 Self::Hosts(ref dom2) => dom.cmp(dom2), 1720 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1721 }, 1722 Self::Wildcard(ref dom) => match *other { 1723 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1724 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1725 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1726 Self::Wildcard(ref dom2) => dom.cmp(dom2), 1727 }, 1728 } 1729 } 1730 } 1731 impl Display for RpzDomain<'_> { 1732 #[inline] 1733 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1734 match *self { 1735 Self::Adblock(ref dom) => dom.fmt(f), 1736 Self::DomainOnly(ref dom) => dom.fmt(f), 1737 Self::Hosts(ref dom) => dom.fmt(f), 1738 Self::Wildcard(ref dom) => dom.fmt(f), 1739 } 1740 } 1741 } 1742 impl<'a> Set for RpzDomain<'a> { 1743 type Elem = Domain<&'a str>; 1744 #[inline] 1745 fn bounded_cardinality(&self) -> BoundedCardinality { 1746 BoundedCardinality::from_biguint_exact(self.domain_count()) 1747 } 1748 #[inline] 1749 fn cardinality(&self) -> Option<Cardinality> { 1750 Some(Cardinality::Finite(self.domain_count())) 1751 } 1752 #[inline] 1753 fn contains<Q>(&self, elem: &Q) -> bool 1754 where 1755 Q: Borrow<Self::Elem> + Eq + ?Sized, 1756 { 1757 match *self { 1758 Self::Adblock(ref dom) => dom.contains(elem), 1759 Self::DomainOnly(ref dom) => dom.contains(elem), 1760 Self::Hosts(ref dom) => dom.contains(elem), 1761 Self::Wildcard(ref dom) => dom.contains(elem), 1762 } 1763 } 1764 #[inline] 1765 fn is_proper_subset(&self, val: &Self) -> bool { 1766 match *val { 1767 Self::Adblock(ref dom) => { 1768 dom.subdomains 1769 && match *self { 1770 Self::Adblock(ref dom2) => { 1771 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1772 == DomainOrdering::Shorter 1773 } 1774 Self::DomainOnly(ref dom2) => matches!( 1775 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1776 DomainOrdering::Shorter | DomainOrdering::Equal 1777 ), 1778 Self::Hosts(ref dom2) => matches!( 1779 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1780 DomainOrdering::Shorter | DomainOrdering::Equal 1781 ), 1782 Self::Wildcard(ref dom2) => matches!( 1783 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1784 DomainOrdering::Shorter | DomainOrdering::Equal 1785 ), 1786 } 1787 } 1788 Self::DomainOnly(_) | Self::Hosts(_) => false, 1789 Self::Wildcard(ref dom) => { 1790 dom.proper_subdomains 1791 && match *self { 1792 Self::Adblock(ref dom2) => { 1793 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1794 == DomainOrdering::Shorter 1795 } 1796 Self::DomainOnly(ref dom2) => { 1797 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1798 == DomainOrdering::Shorter 1799 } 1800 Self::Hosts(ref dom2) => { 1801 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1802 == DomainOrdering::Shorter 1803 } 1804 Self::Wildcard(ref dom2) => { 1805 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1806 == DomainOrdering::Shorter 1807 } 1808 } 1809 } 1810 } 1811 } 1812 #[inline] 1813 fn is_subset(&self, val: &Self) -> bool { 1814 self == val || self.is_proper_subset(val) 1815 } 1816 } 1817 impl SetOrd for RpzDomain<'_> {} 1818 impl<'a> Deref for RpzDomain<'a> { 1819 type Target = Domain<&'a str>; 1820 #[inline] 1821 fn deref(&self) -> &Self::Target { 1822 match *self { 1823 Self::Adblock(ref dom) => &dom.domain, 1824 Self::DomainOnly(ref dom) => &dom.domain, 1825 Self::Hosts(ref dom) => &dom.domain, 1826 Self::Wildcard(ref dom) => &dom.domain, 1827 } 1828 } 1829 } 1830 impl<'a: 'b, 'b> From<Adblock<'a>> for RpzDomain<'b> { 1831 #[inline] 1832 fn from(value: Adblock<'a>) -> Self { 1833 Self::Adblock(value) 1834 } 1835 } 1836 impl<'a: 'b, 'b> From<DomainOnly<'a>> for RpzDomain<'b> { 1837 #[inline] 1838 fn from(value: DomainOnly<'a>) -> Self { 1839 Self::DomainOnly(value) 1840 } 1841 } 1842 impl<'a: 'b, 'b> From<Hosts<'a>> for RpzDomain<'b> { 1843 #[inline] 1844 fn from(value: Hosts<'a>) -> Self { 1845 Self::Hosts(value) 1846 } 1847 } 1848 impl<'a: 'b, 'b> From<Wildcard<'a>> for RpzDomain<'b> { 1849 #[inline] 1850 fn from(value: Wildcard<'a>) -> Self { 1851 Self::Wildcard(value) 1852 } 1853 } 1854 impl<'a> ParsedDomain<'a> for RpzDomain<'a> { 1855 type Error = FirefoxDomainErr; 1856 #[expect(single_use_lifetimes, reason = "false positive")] 1857 #[inline] 1858 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1859 DomainOnly::parse_value(val).map_or_else( 1860 |_| { 1861 Hosts::parse_value(val).map_or_else( 1862 |_| { 1863 Wildcard::parse_value(val).map_or_else( 1864 |_| { 1865 Adblock::parse_value(val).map(|value| match value { 1866 Value::Domain(dom) => Value::Domain(Self::Adblock(dom)), 1867 Value::Comment(com) => Value::Comment(com), 1868 Value::Blank => Value::Blank, 1869 }) 1870 }, 1871 |value| { 1872 Ok(match value { 1873 Value::Domain(dom) => Value::Domain(Self::Wildcard(dom)), 1874 Value::Comment(com) => Value::Comment(com), 1875 Value::Blank => Value::Blank, 1876 }) 1877 }, 1878 ) 1879 }, 1880 |value| { 1881 Ok(match value { 1882 Value::Domain(dom) => Value::Domain(Self::Hosts(dom)), 1883 Value::Comment(com) => Value::Comment(com), 1884 Value::Blank => Value::Blank, 1885 }) 1886 }, 1887 ) 1888 }, 1889 |value| { 1890 Ok(match value { 1891 Value::Domain(dom) => Value::Domain(Self::DomainOnly(dom)), 1892 Value::Comment(com) => Value::Comment(com), 1893 Value::Blank => Value::Blank, 1894 }) 1895 }, 1896 ) 1897 } 1898 #[inline] 1899 fn domain(&self) -> &Domain<&'a str> { 1900 match *self { 1901 Self::Adblock(ref dom) => &dom.domain, 1902 Self::DomainOnly(ref dom) => &dom.domain, 1903 Self::Hosts(ref dom) => &dom.domain, 1904 Self::Wildcard(ref dom) => &dom.domain, 1905 } 1906 } 1907 #[inline] 1908 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error> { 1909 match *self { 1910 Self::Adblock(ref dom) => dom.write_to_rpz(action, writer), 1911 Self::DomainOnly(ref dom) => dom.write_to_rpz(action, writer), 1912 Self::Hosts(ref dom) => dom.write_to_rpz(action, writer), 1913 Self::Wildcard(ref dom) => dom.write_to_rpz(action, writer), 1914 } 1915 } 1916 } 1917 #[cfg(test)] 1918 mod tests { 1919 use super::{ 1920 Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, 1921 }; 1922 use ascii_domain::dom::DomainErr; 1923 use num_bigint::BigUint; 1924 use superset_map::SupersetSet; 1925 #[test] 1926 fn test_adblock_parse() { 1927 // Test subdomains. 1928 assert!( 1929 Adblock::parse_value("||www.example.com").map_or(false, |val| match val { 1930 Value::Domain(ref dom) => 1931 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1932 Value::Comment(_) | Value::Blank => false, 1933 }) 1934 ); 1935 // Test whitespace and '^' removal. 1936 assert!( 1937 Adblock::parse_value(" \t\t ||\t\t \twww.example.com \t\t ^ \t\t ").map_or( 1938 false, 1939 |val| match val { 1940 Value::Domain(ref dom) => 1941 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1942 Value::Comment(_) | Value::Blank => false, 1943 } 1944 ) 1945 ); 1946 assert!( 1947 Adblock::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { 1948 match val { 1949 Value::Domain(ref dom) => { 1950 !dom.subdomains && dom.domain.as_bytes() == b"www.example.com" 1951 } 1952 Value::Comment(_) | Value::Blank => false, 1953 } 1954 }) 1955 ); 1956 assert!(Adblock::parse_value("www .example.com").map_or_else( 1957 |err| err == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 1958 |_| false 1959 )); 1960 assert!( 1961 Adblock::parse_value("||www.ExAMPle.COm").map_or(false, |val| { 1962 match val { 1963 Value::Domain(ref dom) => { 1964 Adblock::parse_value("||www.example.com").map_or(false, |val| match val { 1965 Value::Domain(ref dom2) => { 1966 dom == dom2 1967 && dom.subdomains 1968 && dom2.subdomains 1969 && dom.cmp(dom2).is_eq() 1970 } 1971 Value::Comment(_) | Value::Blank => false, 1972 }) 1973 } 1974 Value::Comment(_) | Value::Blank => false, 1975 } 1976 }) 1977 ); 1978 // Test comment 1979 assert!( 1980 Adblock::parse_value(" \t\t #hi").map_or(false, |val| match val { 1981 Value::Comment(com) => com == "hi", 1982 Value::Domain(_) | Value::Blank => false, 1983 }) 1984 ); 1985 assert!( 1986 Adblock::parse_value(" \t\t !! foo").map_or(false, |val| match val { 1987 Value::Comment(com) => com == "! foo", 1988 Value::Domain(_) | Value::Blank => false, 1989 }) 1990 ); 1991 // Test blank 1992 assert!(Adblock::parse_value(" \t\t ").map_or(false, |val| matches!(val, Value::Blank))); 1993 } 1994 #[test] 1995 fn test_domain_only_parse_value() { 1996 // Test whitespace and comment. 1997 assert!( 1998 DomainOnly::parse_value(" \t\t \t\t \twww.example.com#asdflkj asdf alskdfj ") 1999 .map_or(false, |val| match val { 2000 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 2001 Value::Comment(_) | Value::Blank => false, 2002 }) 2003 ); 2004 assert!( 2005 DomainOnly::parse_value(" \t\t \t\t \twww.example.com \t\t ^ \t\t ") 2006 .map_or_else( 2007 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2008 |_| false 2009 ) 2010 ); 2011 // Test case-insensitivity. 2012 assert!( 2013 DomainOnly::parse_value("www.ExAMPle.CoM").map_or(false, |val| match val { 2014 Value::Domain(ref dom) => 2015 DomainOnly::parse_value("www.example.com").map_or(false, |val2| match val2 { 2016 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2017 Value::Comment(_) | Value::Blank => false, 2018 }), 2019 Value::Comment(_) | Value::Blank => false, 2020 }) 2021 ); 2022 // Test comment. 2023 assert!( 2024 DomainOnly::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2025 Value::Comment(com) => com == " hi", 2026 Value::Domain(_) | Value::Blank => false, 2027 }) 2028 ); 2029 // Test blank. 2030 assert!( 2031 DomainOnly::parse_value(" \t\t \t\t \t ") 2032 .map_or(false, |val| matches!(val, Value::Blank)) 2033 ); 2034 // Test blank. 2035 assert!( 2036 DomainOnly::parse_value("example.xn--abc") 2037 .map_or(false, |val| matches!(val, Value::Domain(_))) 2038 ); 2039 // Test invalid TLD. 2040 assert!( 2041 DomainOnly::parse_value("www.c1m") 2042 .map_or_else(|err| err == FirefoxDomainErr::InvalidTld, |_| false) 2043 ); 2044 } 2045 #[test] 2046 fn test_hosts_parse_value() { 2047 // Test whitespace and comment. 2048 assert!( 2049 Hosts::parse_value(" \t\t 127.0.0.1\t\t \twww.example.com#asdflkj asdf alskdfj ") 2050 .map_or(false, |val| match val { 2051 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 2052 Value::Comment(_) | Value::Blank => false, 2053 }) 2054 ); 2055 assert!( 2056 Hosts::parse_value(" \t\t 0.0.0.0\t\t \twww.example.com \t\t ^ \t\t ") 2057 .map_or_else( 2058 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2059 |_| false 2060 ) 2061 ); 2062 assert!(Hosts::parse_value("::1\twww .example.com").map_or_else( 2063 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2064 |_| false 2065 )); 2066 // Test invalid IP 2067 assert!( 2068 Hosts::parse_value("::2 www.example.com") 2069 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2070 ); 2071 assert!( 2072 Hosts::parse_value(":2 www.example.com") 2073 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2074 ); 2075 assert!( 2076 Hosts::parse_value("www.example.com") 2077 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2078 ); 2079 assert!( 2080 Hosts::parse_value("10.4.2.256 www.example.com") 2081 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2082 ); 2083 // Test case-insensitivity. 2084 assert!( 2085 Hosts::parse_value(":: www.ExAMPle.Com").map_or(false, |val| match val { 2086 Value::Domain(ref dom) => 2087 Hosts::parse_value("127.0.0.1 www.example.com").map_or(false, |val2| match val2 2088 { 2089 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2090 Value::Comment(_) | Value::Blank => false, 2091 }), 2092 Value::Comment(_) | Value::Blank => false, 2093 }) 2094 ); 2095 // Test comment. 2096 assert!( 2097 Hosts::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2098 Value::Comment(com) => com == " hi", 2099 Value::Domain(_) | Value::Blank => false, 2100 }) 2101 ); 2102 // Test blank. 2103 assert!( 2104 Hosts::parse_value(" \t\t \t\t \t ") 2105 .map_or(false, |val| matches!(val, Value::Blank)) 2106 ); 2107 } 2108 #[test] 2109 fn test_wildcard_parse_value() { 2110 // Test bad asterisk. 2111 assert!(Wildcard::parse_value("*").map_or_else( 2112 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2113 |_| false 2114 )); 2115 assert!(Wildcard::parse_value("www*.example.com").map_or_else( 2116 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2117 |_| false 2118 )); 2119 assert!(Wildcard::parse_value("www.*.com").map_or_else( 2120 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2121 |_| false 2122 )); 2123 assert!(Wildcard::parse_value("*..com").map_or_else( 2124 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::EmptyLabel), 2125 |_| false 2126 )); 2127 assert!(Wildcard::parse_value("www.com*").map_or_else( 2128 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2129 |_| false 2130 )); 2131 assert!(Wildcard::parse_value("ww*w.com").map_or_else( 2132 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2133 |_| false 2134 )); 2135 // Test case-insensitivity. 2136 assert!( 2137 Wildcard::parse_value("*.wWw.ExamPLE.com").map_or(false, |val| match val { 2138 Value::Domain(ref dom) => 2139 Wildcard::parse_value("*.www.example.com").map_or(false, |val2| match val2 { 2140 Value::Domain(ref dom2) => 2141 dom.cmp(dom2).is_eq() 2142 && dom == dom2 2143 && dom.proper_subdomains 2144 && dom2.proper_subdomains, 2145 Value::Comment(_) | Value::Blank => false, 2146 }), 2147 Value::Comment(_) | Value::Blank => false, 2148 }) 2149 ); 2150 // Test proper subdomains. 2151 assert!( 2152 Wildcard::parse_value("*.www.example.com").map_or(false, |val| match val { 2153 Value::Domain(ref dom) => 2154 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2155 Value::Comment(_) | Value::Blank => false, 2156 }) 2157 ); 2158 // Test comment. 2159 assert!( 2160 Wildcard::parse_value(" \t\t \t\t \t*.www.example.com#asdflkj asdf alskdfj ") 2161 .map_or(false, |val| match val { 2162 Value::Domain(ref dom) => 2163 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2164 Value::Comment(_) | Value::Blank => false, 2165 }) 2166 ); 2167 assert!( 2168 Wildcard::parse_value(" \t\t \t\t \twww.example.com #asdflkj asdf alskdfj ") 2169 .map_or(false, |val| match val { 2170 Value::Domain(ref dom) => 2171 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains, 2172 Value::Comment(_) | Value::Blank => false, 2173 }) 2174 ); 2175 // Test whitespace removal. 2176 assert!( 2177 Wildcard::parse_value(" \t\t *.www.example.com \t\t \t ").map_or(false, |val| { 2178 match val { 2179 Value::Domain(ref dom) => { 2180 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains 2181 } 2182 Value::Comment(_) | Value::Blank => false, 2183 } 2184 }) 2185 ); 2186 assert!( 2187 Wildcard::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { 2188 match val { 2189 Value::Domain(ref dom) => { 2190 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains 2191 } 2192 Value::Comment(_) | Value::Blank => false, 2193 } 2194 }) 2195 ); 2196 assert!(Wildcard::parse_value("www .example.com").map_or_else( 2197 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2198 |_| false 2199 )); 2200 // Test 127 labels after wildcard error. 2201 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false)); 2202 // Test 126 labels after wildcard is ok. 2203 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val { 2204 Value::Domain(ref dom) => dom.domain.iter().count() == 126 && dom.proper_subdomains, 2205 Value::Comment(_) | Value::Blank => false, 2206 })); 2207 // Test comment. 2208 assert!( 2209 Wildcard::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2210 Value::Comment(com) => com == " hi", 2211 Value::Domain(_) | Value::Blank => false, 2212 }) 2213 ); 2214 // Test blank. 2215 assert!( 2216 Wildcard::parse_value(" \t\t \t\t \t ") 2217 .map_or(false, |val| matches!(val, Value::Blank)) 2218 ); 2219 } 2220 #[test] 2221 fn test_rpz_parse_value() { 2222 assert!( 2223 RpzDomain::parse_value("*.www.example.com").map_or(false, |val| { 2224 let dom = val.unwrap_domain(); 2225 dom.is_proper_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2226 }) 2227 ); 2228 assert!( 2229 RpzDomain::parse_value("||www.example.com").map_or(false, |val| { 2230 let dom = val.unwrap_domain(); 2231 dom.is_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2232 }) 2233 ); 2234 assert!( 2235 RpzDomain::parse_value("0.0.0.0 www.example.com").map_or(false, |val| { 2236 let dom = val.unwrap_domain(); 2237 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2238 && dom.domain().as_bytes() == b"www.example.com" 2239 }) 2240 ); 2241 assert!( 2242 RpzDomain::parse_value("www.example.com").map_or(false, |val| { 2243 let dom = val.unwrap_domain(); 2244 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2245 && dom.domain().as_bytes() == b"www.example.com" 2246 }) 2247 ); 2248 // Test case-insensitivity. 2249 assert!( 2250 RpzDomain::parse_value("*.Www.ExaMPle.COm").map_or(false, |val| { 2251 let dom = val.unwrap_domain(); 2252 RpzDomain::parse_value("*.www.example.com").map_or(false, |val2| { 2253 let dom2 = val2.unwrap_domain(); 2254 dom.is_proper_subdomains() 2255 && dom2.is_proper_subdomains() 2256 && dom == dom2 2257 && dom.cmp(&dom2).is_eq() 2258 }) 2259 }) 2260 ); 2261 // Test comment. 2262 assert!( 2263 RpzDomain::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2264 Value::Comment(com) => com == " hi", 2265 Value::Domain(_) | Value::Blank => false, 2266 }) 2267 ); 2268 assert!( 2269 RpzDomain::parse_value(" \t\t \t\t \t ! hi").map_or(false, |val| match val { 2270 Value::Comment(com) => com == " hi", 2271 Value::Domain(_) | Value::Blank => false, 2272 }) 2273 ); 2274 // Test blank. 2275 assert!( 2276 RpzDomain::parse_value(" \t\t \t\t \t ") 2277 .map_or(false, |val| matches!(val, Value::Blank)) 2278 ); 2279 } 2280 #[test] 2281 fn test_rpz_ord_and_eq() -> Result<(), &'static str> { 2282 "www.bar.com,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,Example.com,WwW.exaMple.com,*.www.example.com,||www.example.com,*.example.com,||example.com,FOo.coM,Www.foo.com,*.foo.com,*.coM,example.net,*.net".split(|b| b == ',').try_fold(RpzDomain::DomainOnly(DomainOnly::parse_value("bar.com").expect("bug in DomainOnly::parse_value").unwrap_domain()), |prev, slice| { 2283 let cur = if slice.as_bytes()[0] == b'|' { 2284 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2285 } else { 2286 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2287 }; 2288 if prev < cur && cur > prev && prev == prev && cur == cur { 2289 Ok(cur) 2290 } else { 2291 Err("PartialEq or Ord are not correctly implemented for RpzDomain.") 2292 } 2293 }).map(|_| ()) 2294 } 2295 #[test] 2296 fn test_superset_set() { 2297 let mut iter = "*.NeT,*.net,www.bar.com,*.net,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,example.com,www.example.com,*.www.example.com,||www.example.com,*.example.com,||example.com,foo.com,www.foo.com,*.foo.com,*.com,example.net,*.abc.abc,||aawww.abc,abc.abc".split(|b| b == ',').fold(SupersetSet::new(), |mut doms, slice| { 2298 _ = doms.insert(if slice.as_bytes()[0] == b'|' { 2299 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2300 } else { 2301 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2302 }); 2303 doms 2304 }).into_iter(); 2305 assert!(iter.next().map_or(false, |d| { 2306 d.domain().as_bytes() == b"aawww.abc" && d.is_subdomains() 2307 })); 2308 assert!(iter.next().map_or(false, |d| { 2309 d.domain().as_bytes() == b"abc.abc" && d.is_domain() 2310 })); 2311 assert!(iter.next().map_or(false, |d| { 2312 d.domain().as_bytes() == b"abc.abc" && d.is_proper_subdomains() 2313 })); 2314 assert!(iter.next().map_or(false, |d| { 2315 d.domain().as_bytes() == b"com" && d.is_proper_subdomains() 2316 })); 2317 assert!(iter.next().map_or(false, |d| { 2318 d.domain().as_bytes() == b"NeT" && d.is_proper_subdomains() 2319 })); 2320 assert!(iter.next().is_none()); 2321 } 2322 #[test] 2323 fn test_card() { 2324 // Geometric series. 2325 // We can have two labels each with one character, 2326 // one label with one to three characters, or 0 labels. 2327 // This is 1 + 52 + 52^2 + 52^3 + 52^2 = (1-52^4)/(1-52) + 52^2 = (52^4 - 1)/51 + 52^2 = 146069. 2328 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.iter().count() == 125 && dom.domain_count() == BigUint::new(vec![146069]) })); 2329 // A subdomain of length 252 or 253 gets converted to a domain. 2330 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.iter().count() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) })); 2331 // Pre-calculated manually. 2332 // This is the number of domains possible between 2 and 252 characters. 2333 assert!(Wildcard::parse_value("*.a").map_or(false, |val| { 2334 val.unwrap_domain().domain_count() 2335 == BigUint::new(vec![ 2336 375288404, 2460223985, 1334358771, 2543621408, 2519466280, 1133682239, 2337 3589178618, 348125705, 1709233643, 958334503, 3780539710, 2181893897, 2338 2457156833, 3204765645, 2728103430, 1817547150, 3102358416, 444185044, 2339 3659003776, 10341713, 306326206, 1336386425, 3942332649, 2036577878, 2340 2460939277, 3976861337, 2101094571, 2241770079, 2667853164, 3687350273, 2341 109356153, 3455569358, 2333076459, 2433207896, 1553903141, 2621943843, 2342 4223295645, 1753858368, 130924388, 965594304, 3942586845, 1573844087, 2343 4237886128, 481383133, 56931017, 2344 ]) 2345 })); 2346 } 2347 }