dom.rs (88637B)
1 use crate::dom_count_auto_gen::proper_subdomain_count; 2 use ascii_domain::{ 3 char_set::{AllowedAscii, ASCII_FIREFOX}, 4 dom::{Domain, DomainErr, DomainOrdering}, 5 }; 6 use core::{ 7 borrow::Borrow, 8 cmp::Ordering, 9 convert, 10 fmt::{self, Display, Formatter}, 11 hash::{Hash, Hasher}, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 use num_bigint::BigUint; 17 use std::{ 18 error, 19 io::{Error, Write}, 20 }; 21 use superset_map::SetOrd; 22 use zfc::{BoundedCardinality, Cardinality, Set}; 23 /// Error returned when an invalid string is passed to [`Adblock::parse_value`], [`DomainOnly::parse_value`], 24 /// [`Hosts::parse_value`], [`Wildcard::parse_value`], or [`RpzDomain::parse_value`]. 25 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] 26 pub enum FirefoxDomainErr { 27 /// The domain is invalid based on [`Domain`] using [`ASCII_FIREFOX`]. 28 InvalidDomain(DomainErr), 29 /// The domain had a TLD that was not all letters nor length of at least five beginning with `b"xn--"`. 30 InvalidTld, 31 /// The string passed to [`Adblock::parse_value`] contained `$`. 32 InvalidAdblockDomain, 33 /// The string passed to [`Hosts::parse_value`] did not conform 34 /// to the required [`Hosts`] format. 35 InvalidHostsIP, 36 /// The length of the non-wildcard portion of the string passed to 37 /// [`Wildcard::parse_value`] was at least 252 which means there are 38 /// no proper subdomains. 39 InvalidWildcardDomain, 40 } 41 impl Display for FirefoxDomainErr { 42 #[inline] 43 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 44 match *self { 45 Self::InvalidDomain(err) => err.fmt(f), 46 Self::InvalidTld => f.write_str("domain had a TLD that was not all letters nor at least five characters long starting with 'xn--'"), 47 Self::InvalidAdblockDomain => f.write_str("Adblock-style domain contained a '$'"), 48 Self::InvalidHostsIP => f.write_str("hosts-style domain does not begin with the IP '::', '::1', '0.0.0.0', or '127.0.0.1' followed by at least one space or tab"), 49 Self::InvalidWildcardDomain => f.write_str("non-wildcard portion of a wildcard domain had length of at least 252 which means there are 0 proper subdomains"), 50 } 51 } 52 } 53 impl error::Error for FirefoxDomainErr {} 54 /// The ASCII we allow domains to have. 55 const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX; 56 /// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` has a TLD 57 /// that is either all letters or has length of at least five and begins with `b"xn--"`. 58 #[expect(clippy::indexing_slicing, reason = "we verify manually")] 59 fn domain_icann_tld<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> { 60 Domain::try_from_bytes(val, CHARS) 61 .map_err(FirefoxDomainErr::InvalidDomain) 62 .and_then(|dom| { 63 let tld = dom.tld(); 64 // `tld.as_bytes()[..4]` won't panic since we check before that that the length is at least 5. 65 if tld.is_alphabetic() || (tld.len().get() > 4 && tld.as_bytes()[..4] == *b"xn--") { 66 Ok(dom.into()) 67 } else { 68 Err(FirefoxDomainErr::InvalidTld) 69 } 70 }) 71 } 72 /// Action taken by a DNS server when a domain matches. 73 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 74 pub enum RpzAction { 75 /// Send `NXDOMAIN` reply. 76 Nxdomain, 77 /// Send `NODATA` reply. 78 Nodata, 79 /// Do nothing; continue as normal. 80 Passthru, 81 /// Drop the query. 82 Drop, 83 /// Answer over TCP. 84 TcpOnly, 85 } 86 impl Display for RpzAction { 87 #[inline] 88 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 89 match *self { 90 Self::Nxdomain => f.write_str("NXDOMAIN"), 91 Self::Nodata => f.write_str("NODATA"), 92 Self::Passthru => f.write_str("PASSTHRU"), 93 Self::Drop => f.write_str("DROP"), 94 Self::TcpOnly => f.write_str("TCP-Only"), 95 } 96 } 97 } 98 impl PartialEq<&Self> for RpzAction { 99 #[inline] 100 fn eq(&self, other: &&Self) -> bool { 101 *self == **other 102 } 103 } 104 impl PartialEq<RpzAction> for &RpzAction { 105 #[inline] 106 fn eq(&self, other: &RpzAction) -> bool { 107 **self == *other 108 } 109 } 110 /// Writes the following line with `writer` based on `action`: 111 /// * `RpzAction::Nxdomain`: `<dom> CNAME .`. 112 /// * `RpzAction::Nodata`: `<dom> CNAME *.`. 113 /// * `RpzAction::Passthru`: `<dom> CNAME rpz-passthru.`. 114 /// * `RpzAction::Drop`: `<dom> CNAME rpz-drop.`. 115 /// * `RpzAction::TcpOnly`: `<dom> CNAME rpz-tcp-only.`. 116 /// 117 /// `*.` is prepended to `<dom>` iff `wildcard`. 118 /// 119 /// # Errors 120 /// 121 /// Returns [`Error`] iff [`writeln`] does. 122 #[inline] 123 pub fn write_rpz_line<W: Write, T>( 124 mut writer: W, 125 dom: &Domain<T>, 126 action: RpzAction, 127 wildcard: bool, 128 ) -> Result<(), Error> 129 where 130 Domain<T>: Display, 131 { 132 writeln!( 133 writer, 134 "{}{} CNAME {}.", 135 if wildcard { "*." } else { "" }, 136 dom, 137 match action { 138 RpzAction::Nxdomain => "", 139 RpzAction::Nodata => "*", 140 RpzAction::Passthru => "rpz-passthru", 141 RpzAction::Drop => "rpz-drop", 142 RpzAction::TcpOnly => "rpz-tcp-only", 143 } 144 ) 145 } 146 /// Type that can be returned by [`Domain`]-like parsers (e.g., [`Adblock`]). 147 #[derive(Clone, Copy, Debug)] 148 pub enum Value<'a, T: ParsedDomain<'a>> { 149 /// The parsed value is a domain. 150 Domain(T), 151 /// The parsed value is a comment. 152 Comment(&'a str), 153 /// The parsed value is blank or just [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 154 Blank, 155 } 156 impl<'a, T: ParsedDomain<'a>> Value<'a, T> { 157 /// Returns `true` iff `self` is a [`Self::Domain`]. 158 #[inline] 159 pub const fn is_domain(&self) -> bool { 160 match *self { 161 Self::Domain(_) => true, 162 Self::Comment(_) | Self::Blank => false, 163 } 164 } 165 /// Returns `true` iff `self` is a [`Self::Comment`]. 166 #[inline] 167 pub const fn is_comment(&self) -> bool { 168 match *self { 169 Self::Comment(_) => true, 170 Self::Domain(_) | Self::Blank => false, 171 } 172 } 173 /// Returns `true` iff `self` is a [`Self::Blank`]. 174 #[inline] 175 pub const fn is_blank(&self) -> bool { 176 matches!(*self, Value::Blank) 177 } 178 /// Returns the contained [`Self::Domain`] value. 179 /// 180 /// # Panics 181 /// 182 /// Panics iff `self` is [`Self::Comment`] or [`Self::Blank`]. 183 #[expect(clippy::panic, reason = "bug if called incorrectly")] 184 #[inline] 185 pub fn unwrap_domain(self) -> T { 186 match self { 187 Self::Domain(dom) => dom, 188 Self::Comment(_) | Self::Blank => { 189 panic!("called `ParsedDomain::unwrap_domain()` on a `Comment` or `Blank` value") 190 } 191 } 192 } 193 /// Returns the contained [`prim@str`] in [`Self::Comment`]. 194 /// 195 /// # Panics 196 /// 197 /// Panics iff `self` is [`Self::Domain`] or [`Self::Blank`]. 198 #[expect(clippy::panic, reason = "bug if called incorrectly")] 199 #[inline] 200 pub fn unwrap_comment(self) -> &'a str { 201 match self { 202 Self::Comment(com) => com, 203 Self::Domain(_) | Self::Blank => { 204 panic!("called `ParsedDomain::unwrap_comment()` on a `Domain` or `Blank` value") 205 } 206 } 207 } 208 /// Returns [`unit`] when `self` is [`Self::Blank`]. 209 /// 210 /// # Panics 211 /// 212 /// Panics iff `self` is [`Self::Domain`] or [`Self::Comment`]. 213 #[expect(clippy::panic, reason = "bug if called incorrectly")] 214 #[inline] 215 pub fn unwrap_blank(self) { 216 match self { 217 Self::Blank => {} 218 Self::Domain(_) | Self::Comment(_) => { 219 panic!("called `ParsedDomain::unwrap_blank()` on a `Domain` or `Comment` value") 220 } 221 } 222 } 223 } 224 /// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. 225 /// 226 /// When parsed into a [`Value::Domain`], the domain can be written to a 227 /// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 228 pub trait ParsedDomain<'a>: Sized { 229 /// The error returned from [`Self::parse_value`]. 230 type Error; 231 /// Parses a `str` into a `Value`. 232 /// # Errors 233 /// 234 /// Errors iff `val` is unable to be parsed into a `Value`. 235 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error>; 236 /// Reference to the contained `Domain`. 237 fn domain(&self) -> &Domain<&'a str>; 238 /// Writes `self` as RPZ lines via `writer`. 239 /// 240 /// # Errors 241 /// 242 /// Errors iff `writer` errors. 243 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>; 244 } 245 /// Domain constructed from an 246 /// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax). 247 /// 248 /// Specifically the domain must conform to the following extended regex: 249 /// 250 /// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` 251 /// 252 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`] with the added requirement that it 253 /// does not contain `$`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of 254 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 255 /// 256 /// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments after a valid 257 /// domain are ignored and will be parsed into a [`Value::Domain`]. 258 /// 259 /// Note that this means some valid Adblock-style rules are not considered valid since such rules often contain 260 /// path information or modifiers (e.g., “third-party”), but this only considers domain-only rules. 261 #[derive(Clone, Debug)] 262 pub struct Adblock<'a> { 263 /// The `Domain`. 264 domain: Domain<&'a str>, 265 /// `true` iff `domain` represents all subdomains. Note that this includes `domain` itself. 266 subdomains: bool, 267 } 268 impl<'a> Adblock<'a> { 269 /// Returns `true` iff the contained [`Domain`] represents all subdomains. Note this includes the 270 /// `Domain` itself. 271 #[inline] 272 #[must_use] 273 pub const fn is_subdomains(&self) -> bool { 274 self.subdomains 275 } 276 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 277 /// for both. 278 #[must_use] 279 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 280 match self.domain.cmp_by_domain_ordering(other) { 281 DomainOrdering::Less => Ordering::Less, 282 DomainOrdering::Shorter => { 283 if self.subdomains { 284 Ordering::Greater 285 } else { 286 Ordering::Less 287 } 288 } 289 DomainOrdering::Equal => { 290 if self.subdomains { 291 Ordering::Greater 292 } else { 293 Ordering::Equal 294 } 295 } 296 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 297 } 298 } 299 /// The total order that is defined follows the following hierarchy: 300 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 301 /// 2. If 1. evaluates as not equivalent, then return the result. 302 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_subdomains()`), 303 /// then return the comparison of label counts. 304 /// 4. `self` is greater. 305 /// 306 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 307 #[inline] 308 #[must_use] 309 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 310 self.cmp_dom(&other.domain) 311 } 312 /// Same as [`Adblock::cmp_domain_only`]. 313 #[inline] 314 #[must_use] 315 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 316 self.cmp_dom(&other.domain) 317 } 318 /// The total order that is defined follows the following hierarchy: 319 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 320 /// 2. If 1. evaluates as not equivalent, then return the result. 321 /// 3. If both domains represent a single `Domain`, then return the comparison 322 /// of label counts. 323 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 324 /// 5. If the label counts are the same, `self` is greater. 325 /// 6. Return the inverse of the comparison of label counts. 326 /// 327 /// For example the following is a sequence of domains in 328 /// ascending order: 329 /// 330 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 331 #[inline] 332 #[must_use] 333 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 334 match self.domain.cmp_by_domain_ordering(&other.domain) { 335 DomainOrdering::Less => Ordering::Less, 336 DomainOrdering::Shorter => { 337 if self.subdomains { 338 Ordering::Greater 339 } else { 340 Ordering::Less 341 } 342 } 343 DomainOrdering::Equal => { 344 if self.subdomains { 345 Ordering::Greater 346 } else if other.proper_subdomains { 347 Ordering::Less 348 } else { 349 Ordering::Equal 350 } 351 } 352 DomainOrdering::Longer => { 353 if self.subdomains { 354 if other.proper_subdomains { 355 Ordering::Less 356 } else { 357 Ordering::Greater 358 } 359 } else if other.proper_subdomains { 360 Ordering::Less 361 } else { 362 Ordering::Greater 363 } 364 } 365 DomainOrdering::Greater => Ordering::Greater, 366 } 367 } 368 /// Same as [`Adblock::cardinality`] except that a `BigUint` is returned. Note the count _includes_ 369 /// the `Domain` itself when `self.is_subdomains()`. 370 /// 371 /// `!self.is_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 372 #[expect(clippy::arithmetic_side_effects, reason = "arbitrary-sized arithmetic")] 373 #[inline] 374 #[must_use] 375 pub fn domain_count(&self) -> BigUint { 376 if self.subdomains { 377 proper_subdomain_count(&self.domain) + BigUint::new(vec![1]) 378 } else { 379 BigUint::new(vec![1]) 380 } 381 } 382 } 383 impl Display for Adblock<'_> { 384 #[inline] 385 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 386 write!( 387 f, 388 "{}{}", 389 if self.subdomains { "||" } else { "" }, 390 self.domain 391 ) 392 } 393 } 394 impl PartialEq<Adblock<'_>> for Adblock<'_> { 395 #[inline] 396 fn eq(&self, other: &Adblock<'_>) -> bool { 397 self.domain == other.domain && self.subdomains == other.subdomains 398 } 399 } 400 impl PartialEq<&Adblock<'_>> for Adblock<'_> { 401 #[inline] 402 fn eq(&self, other: &&Adblock<'_>) -> bool { 403 *self == **other 404 } 405 } 406 impl PartialEq<Adblock<'_>> for &Adblock<'_> { 407 #[inline] 408 fn eq(&self, other: &Adblock<'_>) -> bool { 409 **self == *other 410 } 411 } 412 impl PartialEq<DomainOnly<'_>> for Adblock<'_> { 413 #[inline] 414 fn eq(&self, other: &DomainOnly<'_>) -> bool { 415 !self.subdomains && self.domain == other.domain 416 } 417 } 418 impl PartialEq<&DomainOnly<'_>> for Adblock<'_> { 419 #[inline] 420 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 421 *self == **other 422 } 423 } 424 impl PartialEq<DomainOnly<'_>> for &Adblock<'_> { 425 #[inline] 426 fn eq(&self, other: &DomainOnly<'_>) -> bool { 427 **self == *other 428 } 429 } 430 impl PartialEq<&Adblock<'_>> for DomainOnly<'_> { 431 #[inline] 432 fn eq(&self, other: &&Adblock<'_>) -> bool { 433 *self == **other 434 } 435 } 436 impl PartialEq<Adblock<'_>> for &DomainOnly<'_> { 437 #[inline] 438 fn eq(&self, other: &Adblock<'_>) -> bool { 439 **self == *other 440 } 441 } 442 impl PartialEq<Hosts<'_>> for Adblock<'_> { 443 #[inline] 444 fn eq(&self, other: &Hosts<'_>) -> bool { 445 !self.subdomains && self.domain == other.domain 446 } 447 } 448 impl PartialEq<&Hosts<'_>> for Adblock<'_> { 449 #[inline] 450 fn eq(&self, other: &&Hosts<'_>) -> bool { 451 *self == **other 452 } 453 } 454 impl PartialEq<Hosts<'_>> for &Adblock<'_> { 455 #[inline] 456 fn eq(&self, other: &Hosts<'_>) -> bool { 457 **self == *other 458 } 459 } 460 impl PartialEq<&Adblock<'_>> for Hosts<'_> { 461 #[inline] 462 fn eq(&self, other: &&Adblock<'_>) -> bool { 463 *self == **other 464 } 465 } 466 impl PartialEq<Adblock<'_>> for &Hosts<'_> { 467 #[inline] 468 fn eq(&self, other: &Adblock<'_>) -> bool { 469 **self == *other 470 } 471 } 472 impl PartialEq<Wildcard<'_>> for Adblock<'_> { 473 #[expect(clippy::suspicious_operation_groupings, reason = "false positive")] 474 #[inline] 475 fn eq(&self, other: &Wildcard<'_>) -> bool { 476 !(self.subdomains || other.proper_subdomains) && self.domain == other.domain 477 } 478 } 479 impl PartialEq<&Wildcard<'_>> for Adblock<'_> { 480 #[inline] 481 fn eq(&self, other: &&Wildcard<'_>) -> bool { 482 *self == **other 483 } 484 } 485 impl PartialEq<Wildcard<'_>> for &Adblock<'_> { 486 #[inline] 487 fn eq(&self, other: &Wildcard<'_>) -> bool { 488 **self == *other 489 } 490 } 491 impl PartialEq<&Adblock<'_>> for Wildcard<'_> { 492 #[inline] 493 fn eq(&self, other: &&Adblock<'_>) -> bool { 494 *self == **other 495 } 496 } 497 impl PartialEq<Adblock<'_>> for &Wildcard<'_> { 498 #[inline] 499 fn eq(&self, other: &Adblock<'_>) -> bool { 500 **self == *other 501 } 502 } 503 impl Eq for Adblock<'_> {} 504 impl Hash for Adblock<'_> { 505 #[inline] 506 fn hash<H: Hasher>(&self, state: &mut H) { 507 self.domain.hash(state); 508 } 509 } 510 impl PartialOrd<Adblock<'_>> for Adblock<'_> { 511 #[inline] 512 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 513 Some(self.cmp(other)) 514 } 515 } 516 impl Ord for Adblock<'_> { 517 /// The total order that is defined follows the following hierarchy: 518 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 519 /// 2. If 1. evaluates as not equivalent, then return the result. 520 /// 3. If both domains represent a single `Domain`, then return the comparison 521 /// of label counts. 522 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 523 /// 5. Return the inverse of the comparison of label counts. 524 /// 525 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 526 #[inline] 527 #[must_use] 528 fn cmp(&self, other: &Self) -> Ordering { 529 match self.domain.cmp_by_domain_ordering(&other.domain) { 530 DomainOrdering::Less => Ordering::Less, 531 DomainOrdering::Shorter => { 532 if self.subdomains { 533 Ordering::Greater 534 } else { 535 Ordering::Less 536 } 537 } 538 DomainOrdering::Equal => { 539 if self.subdomains { 540 if other.subdomains { 541 Ordering::Equal 542 } else { 543 Ordering::Greater 544 } 545 } else if other.subdomains { 546 Ordering::Less 547 } else { 548 Ordering::Equal 549 } 550 } 551 DomainOrdering::Longer => { 552 if self.subdomains { 553 if other.subdomains { 554 Ordering::Less 555 } else { 556 Ordering::Greater 557 } 558 } else if other.subdomains { 559 Ordering::Less 560 } else { 561 Ordering::Greater 562 } 563 } 564 DomainOrdering::Greater => Ordering::Greater, 565 } 566 } 567 } 568 impl PartialOrd<DomainOnly<'_>> for Adblock<'_> { 569 #[inline] 570 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 571 Some(self.cmp_domain_only(other)) 572 } 573 } 574 impl PartialOrd<Hosts<'_>> for Adblock<'_> { 575 #[inline] 576 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 577 Some(self.cmp_hosts(other)) 578 } 579 } 580 impl PartialOrd<Wildcard<'_>> for Adblock<'_> { 581 #[inline] 582 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 583 Some(self.cmp_wildcard(other)) 584 } 585 } 586 impl<'a> Set for Adblock<'a> { 587 type Elem = Domain<&'a str>; 588 #[inline] 589 fn bounded_cardinality(&self) -> BoundedCardinality { 590 BoundedCardinality::from_biguint_exact(self.domain_count()) 591 } 592 #[inline] 593 fn cardinality(&self) -> Option<Cardinality> { 594 Some(Cardinality::Finite(self.domain_count())) 595 } 596 #[inline] 597 fn contains<Q>(&self, elem: &Q) -> bool 598 where 599 Q: Borrow<Self::Elem> + Eq + ?Sized, 600 { 601 if self.subdomains { 602 matches!( 603 self.domain.cmp_by_domain_ordering(elem.borrow()), 604 DomainOrdering::Shorter 605 ) 606 } else { 607 self.domain == *elem.borrow() 608 } 609 } 610 #[inline] 611 fn is_proper_subset(&self, val: &Self) -> bool { 612 // A single domain can never be a proper superset. Subdomains` cannot be a proper superset if it has 613 // more labels or the same number of labels as another subdomains. In all other cases, we need to 614 // recursively check from the TLD that the labels are the same. 615 val.subdomains 616 && match val.domain.cmp_by_domain_ordering(&self.domain) { 617 DomainOrdering::Less | DomainOrdering::Longer | DomainOrdering::Greater => false, 618 DomainOrdering::Shorter => true, 619 DomainOrdering::Equal => !self.subdomains, 620 } 621 } 622 #[inline] 623 fn is_subset(&self, val: &Self) -> bool { 624 self == val || self.is_proper_subset(val) 625 } 626 } 627 impl SetOrd for Adblock<'_> {} 628 impl<'a> Deref for Adblock<'a> { 629 type Target = Domain<&'a str>; 630 #[inline] 631 fn deref(&self) -> &Self::Target { 632 &self.domain 633 } 634 } 635 impl<'a> ParsedDomain<'a> for Adblock<'a> { 636 type Error = FirefoxDomainErr; 637 #[expect( 638 unsafe_code, 639 clippy::indexing_slicing, 640 reason = "we carefully verify what we are doing" 641 )] 642 #[inline] 643 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 644 // First remove leading whitepace. Then check for comments via '#' and '!'. Return Blank iff empty. 645 // Return Comment iff '#' or '!' is the first character. Remove trailing whitespace. Next remove the 646 // last byte if it is '^' as well as whitespace before. Next track and remove '||' at the beginning 647 // and any subsequent whitespace. 648 let mut value = val.as_bytes().trim_ascii_start(); 649 value.first().map_or_else( 650 || Ok(Value::Blank), 651 |byt| { 652 if *byt == b'#' || *byt == b'!' { 653 // SAFETY: 654 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 655 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 656 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 657 Ok(Value::Comment(comment)) 658 } else { 659 value = value.trim_ascii_end(); 660 let len = value.len().wrapping_sub(1); 661 value = value.get(len).map_or(value, |byt2| { 662 if *byt2 == b'^' { 663 value[..len].trim_ascii_end() 664 } else { 665 value 666 } 667 }); 668 let (subdomains, val2) = value.get(..2).map_or_else( 669 || (false, value), 670 |fst| { 671 if fst == b"||" { 672 (true, value[2..].trim_ascii_start()) 673 } else { 674 (false, value) 675 } 676 }, 677 ); 678 // `Domain`s allow `$`, but we don't want to allow that symbol for Adblock-style rules. 679 val2.iter() 680 .try_fold((), |(), byt2| { 681 if *byt2 == b'$' { 682 Err(FirefoxDomainErr::InvalidAdblockDomain) 683 } else { 684 Ok(()) 685 } 686 }) 687 .and_then(|()| { 688 domain_icann_tld(val2).map(|domain| { 689 // A domain of length 252 or 253 can't have subdomains due to there not being enough 690 // characters. 691 Value::Domain(Self { 692 subdomains: if domain.len().get() > 251 { 693 false 694 } else { 695 subdomains 696 }, 697 domain, 698 }) 699 }) 700 }) 701 } 702 }, 703 ) 704 } 705 #[inline] 706 fn domain(&self) -> &Domain<&'a str> { 707 &self.domain 708 } 709 #[inline] 710 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 711 write_rpz_line(&mut writer, self.domain(), action, false).and_then(|()| { 712 if self.subdomains { 713 write_rpz_line(writer, self.domain(), action, true) 714 } else { 715 Ok(()) 716 } 717 }) 718 } 719 } 720 /// Domain constructed from a 721 /// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax). 722 /// 723 /// Specifically the domain must conform to the following extended regex: 724 /// 725 /// `^<ws>*<domain><ws>*(#.*)?$` 726 /// 727 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 728 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 729 /// 730 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 731 /// are ignored and will be parsed into a [`Value::Domain`]. 732 #[derive(Clone, Debug)] 733 pub struct DomainOnly<'a> { 734 /// The `Domain`. 735 domain: Domain<&'a str>, 736 } 737 impl<'a> DomainOnly<'a> { 738 /// Read [`Adblock::cmp_domain_only`]. 739 #[inline] 740 #[must_use] 741 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 742 other.cmp_domain_only(self).reverse() 743 } 744 /// Read [`Domain::cmp`]. 745 #[inline] 746 #[must_use] 747 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 748 self.domain.cmp(&other.domain) 749 } 750 /// Read [`Wildcard::cmp_domain_only`]. 751 #[inline] 752 #[must_use] 753 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 754 other.cmp_domain_only(self).reverse() 755 } 756 /// Same as [`DomainOnly::cardinality`] except that a `NonZeroU8` is returned. 757 /// 758 /// The value is always 1. 759 #[expect(unsafe_code, reason = "trivial use of NonZeroU8::new_unchecked")] 760 #[inline] 761 #[must_use] 762 pub const fn domain_count(&self) -> NonZeroU8 { 763 // SAFETY: 764 // 0 < 1 < 256. 765 unsafe { NonZeroU8::new_unchecked(1) } 766 } 767 } 768 impl PartialEq<DomainOnly<'_>> for DomainOnly<'_> { 769 #[inline] 770 fn eq(&self, other: &DomainOnly<'_>) -> bool { 771 self.domain == other.domain 772 } 773 } 774 impl PartialEq<DomainOnly<'_>> for &DomainOnly<'_> { 775 #[inline] 776 fn eq(&self, other: &DomainOnly<'_>) -> bool { 777 **self == *other 778 } 779 } 780 impl PartialEq<&DomainOnly<'_>> for DomainOnly<'_> { 781 #[inline] 782 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 783 *self == **other 784 } 785 } 786 impl PartialEq<Adblock<'_>> for DomainOnly<'_> { 787 #[inline] 788 fn eq(&self, other: &Adblock<'_>) -> bool { 789 other == self 790 } 791 } 792 impl PartialEq<Hosts<'_>> for DomainOnly<'_> { 793 #[inline] 794 fn eq(&self, other: &Hosts<'_>) -> bool { 795 self.domain == other.domain 796 } 797 } 798 impl PartialEq<&Hosts<'_>> for DomainOnly<'_> { 799 #[inline] 800 fn eq(&self, other: &&Hosts<'_>) -> bool { 801 *self == **other 802 } 803 } 804 impl PartialEq<Hosts<'_>> for &DomainOnly<'_> { 805 #[inline] 806 fn eq(&self, other: &Hosts<'_>) -> bool { 807 **self == *other 808 } 809 } 810 impl PartialEq<&DomainOnly<'_>> for Hosts<'_> { 811 #[inline] 812 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 813 *self == **other 814 } 815 } 816 impl PartialEq<DomainOnly<'_>> for &Hosts<'_> { 817 #[inline] 818 fn eq(&self, other: &DomainOnly<'_>) -> bool { 819 **self == *other 820 } 821 } 822 impl PartialEq<Wildcard<'_>> for DomainOnly<'_> { 823 #[inline] 824 fn eq(&self, other: &Wildcard<'_>) -> bool { 825 !other.proper_subdomains && self.domain == other.domain 826 } 827 } 828 impl PartialEq<&Wildcard<'_>> for DomainOnly<'_> { 829 #[inline] 830 fn eq(&self, other: &&Wildcard<'_>) -> bool { 831 *self == **other 832 } 833 } 834 impl PartialEq<Wildcard<'_>> for &DomainOnly<'_> { 835 #[inline] 836 fn eq(&self, other: &Wildcard<'_>) -> bool { 837 **self == *other 838 } 839 } 840 impl PartialEq<&DomainOnly<'_>> for Wildcard<'_> { 841 #[inline] 842 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 843 *self == **other 844 } 845 } 846 impl PartialEq<DomainOnly<'_>> for &Wildcard<'_> { 847 #[inline] 848 fn eq(&self, other: &DomainOnly<'_>) -> bool { 849 **self == *other 850 } 851 } 852 impl Eq for DomainOnly<'_> {} 853 impl Hash for DomainOnly<'_> { 854 #[inline] 855 fn hash<H: Hasher>(&self, state: &mut H) { 856 self.domain.hash(state); 857 } 858 } 859 impl PartialOrd<DomainOnly<'_>> for DomainOnly<'_> { 860 #[inline] 861 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 862 Some(self.cmp(other)) 863 } 864 } 865 impl Ord for DomainOnly<'_> { 866 /// Read [`Domain::cmp`]. 867 #[inline] 868 fn cmp(&self, other: &Self) -> Ordering { 869 self.domain.cmp(&other.domain) 870 } 871 } 872 impl PartialOrd<Adblock<'_>> for DomainOnly<'_> { 873 #[inline] 874 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 875 Some(self.cmp_adblock(other)) 876 } 877 } 878 impl PartialOrd<Hosts<'_>> for DomainOnly<'_> { 879 #[inline] 880 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 881 Some(self.cmp_hosts(other)) 882 } 883 } 884 impl PartialOrd<Wildcard<'_>> for DomainOnly<'_> { 885 #[inline] 886 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 887 Some(self.cmp_wildcard(other)) 888 } 889 } 890 impl Display for DomainOnly<'_> { 891 #[inline] 892 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 893 self.domain.fmt(f) 894 } 895 } 896 impl<'a> Set for DomainOnly<'a> { 897 type Elem = Domain<&'a str>; 898 #[inline] 899 fn bounded_cardinality(&self) -> BoundedCardinality { 900 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 901 } 902 #[inline] 903 fn cardinality(&self) -> Option<Cardinality> { 904 Some(Cardinality::Finite(self.domain_count().get().into())) 905 } 906 #[inline] 907 fn contains<Q>(&self, elem: &Q) -> bool 908 where 909 Q: Borrow<Self::Elem> + Eq + ?Sized, 910 { 911 self.domain == *elem.borrow() 912 } 913 #[inline] 914 fn is_proper_subset(&self, _: &Self) -> bool { 915 false 916 } 917 #[inline] 918 fn is_subset(&self, val: &Self) -> bool { 919 self == val 920 } 921 } 922 impl SetOrd for DomainOnly<'_> {} 923 impl<'a> Deref for DomainOnly<'a> { 924 type Target = Domain<&'a str>; 925 #[inline] 926 fn deref(&self) -> &Self::Target { 927 &self.domain 928 } 929 } 930 impl<'a> ParsedDomain<'a> for DomainOnly<'a> { 931 type Error = FirefoxDomainErr; 932 #[expect( 933 unsafe_code, 934 clippy::arithmetic_side_effects, 935 clippy::indexing_slicing, 936 reason = "we verify all use is correct" 937 )] 938 #[inline] 939 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 940 let value = val.as_bytes().trim_ascii_start(); 941 value.first().map_or_else( 942 || Ok(Value::Blank), 943 |byt| { 944 if *byt == b'#' { 945 // SAFETY: 946 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 947 // since the first byte is '#' or '$' the remaining bytes are still valid UTF-8. 948 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 949 Ok(Value::Comment(comment)) 950 } else { 951 domain_icann_tld( 952 value[..value 953 .iter() 954 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 955 .map_or_else(convert::identity, convert::identity)] 956 .trim_ascii_end(), 957 ) 958 .map(|domain| Value::Domain(Self { domain })) 959 } 960 }, 961 ) 962 } 963 #[inline] 964 fn domain(&self) -> &Domain<&'a str> { 965 &self.domain 966 } 967 #[inline] 968 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 969 write_rpz_line(&mut writer, self.domain(), action, false) 970 } 971 } 972 /// Domain constructed from a 973 /// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax). 974 /// 975 /// Specifically the domain must conform to the following extended regex: 976 /// 977 /// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` 978 /// 979 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 980 /// or at least length five and begins with `xn--`, `<ws>` is any sequence of 981 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and `<ip>` is one of the following: 982 /// 983 /// `::`, `::1`, `0.0.0.0`, or `127.0.0.1`. 984 /// 985 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 986 /// are ignored and will be parsed into a [`Value::Domain`]. 987 #[derive(Clone, Debug)] 988 pub struct Hosts<'a> { 989 /// The `Domain`. 990 domain: Domain<&'a str>, 991 } 992 impl<'a> Hosts<'a> { 993 /// Read [`Adblock::cmp_hosts`]. 994 #[inline] 995 #[must_use] 996 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 997 other.cmp_hosts(self).reverse() 998 } 999 /// Read [`DomainOnly::cmp_hosts`]. 1000 #[inline] 1001 #[must_use] 1002 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1003 other.cmp_hosts(self).reverse() 1004 } 1005 /// Read [`Wildcard::cmp_hosts`]. 1006 #[inline] 1007 #[must_use] 1008 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 1009 other.cmp_hosts(self).reverse() 1010 } 1011 /// Same as [`Hosts::cardinality`] except that a `NonZeroU8` is returned. 1012 /// 1013 /// The value is always 1. 1014 #[expect(unsafe_code, reason = "trivial use of NonZeroU8::new_unchecked")] 1015 #[inline] 1016 #[must_use] 1017 pub const fn domain_count(&self) -> NonZeroU8 { 1018 // SAFETY: 1019 // 0 < 1 < 256. 1020 unsafe { NonZeroU8::new_unchecked(1) } 1021 } 1022 } 1023 impl PartialEq<Hosts<'_>> for Hosts<'_> { 1024 #[inline] 1025 fn eq(&self, other: &Hosts<'_>) -> bool { 1026 self.domain == other.domain 1027 } 1028 } 1029 impl PartialEq<Hosts<'_>> for &Hosts<'_> { 1030 #[inline] 1031 fn eq(&self, other: &Hosts<'_>) -> bool { 1032 **self == *other 1033 } 1034 } 1035 impl PartialEq<&Hosts<'_>> for Hosts<'_> { 1036 #[inline] 1037 fn eq(&self, other: &&Hosts<'_>) -> bool { 1038 *self == **other 1039 } 1040 } 1041 impl PartialEq<Adblock<'_>> for Hosts<'_> { 1042 #[inline] 1043 fn eq(&self, other: &Adblock<'_>) -> bool { 1044 other == self 1045 } 1046 } 1047 impl PartialEq<DomainOnly<'_>> for Hosts<'_> { 1048 #[inline] 1049 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1050 other == self 1051 } 1052 } 1053 impl PartialEq<Wildcard<'_>> for Hosts<'_> { 1054 #[inline] 1055 fn eq(&self, other: &Wildcard<'_>) -> bool { 1056 !other.proper_subdomains && self.domain == other.domain 1057 } 1058 } 1059 impl PartialEq<&Wildcard<'_>> for Hosts<'_> { 1060 #[inline] 1061 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1062 *self == **other 1063 } 1064 } 1065 impl PartialEq<Wildcard<'_>> for &Hosts<'_> { 1066 #[inline] 1067 fn eq(&self, other: &Wildcard<'_>) -> bool { 1068 **self == *other 1069 } 1070 } 1071 impl PartialEq<&Hosts<'_>> for Wildcard<'_> { 1072 #[inline] 1073 fn eq(&self, other: &&Hosts<'_>) -> bool { 1074 *self == **other 1075 } 1076 } 1077 impl PartialEq<Hosts<'_>> for &Wildcard<'_> { 1078 #[inline] 1079 fn eq(&self, other: &Hosts<'_>) -> bool { 1080 **self == *other 1081 } 1082 } 1083 impl Eq for Hosts<'_> {} 1084 impl Hash for Hosts<'_> { 1085 #[inline] 1086 fn hash<H: Hasher>(&self, state: &mut H) { 1087 self.domain.hash(state); 1088 } 1089 } 1090 impl PartialOrd<Hosts<'_>> for Hosts<'_> { 1091 #[inline] 1092 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1093 Some(self.cmp(other)) 1094 } 1095 } 1096 impl Ord for Hosts<'_> { 1097 /// Read [`Domain::cmp`]. 1098 #[inline] 1099 fn cmp(&self, other: &Self) -> Ordering { 1100 self.domain.cmp(&other.domain) 1101 } 1102 } 1103 impl PartialOrd<Adblock<'_>> for Hosts<'_> { 1104 #[inline] 1105 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1106 Some(self.cmp_adblock(other)) 1107 } 1108 } 1109 impl PartialOrd<DomainOnly<'_>> for Hosts<'_> { 1110 #[inline] 1111 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1112 Some(self.cmp_domain_only(other)) 1113 } 1114 } 1115 impl PartialOrd<Wildcard<'_>> for Hosts<'_> { 1116 #[inline] 1117 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1118 Some(self.cmp_wildcard(other)) 1119 } 1120 } 1121 impl Display for Hosts<'_> { 1122 #[inline] 1123 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1124 self.domain.fmt(f) 1125 } 1126 } 1127 impl<'a> Set for Hosts<'a> { 1128 type Elem = Domain<&'a str>; 1129 #[inline] 1130 fn bounded_cardinality(&self) -> BoundedCardinality { 1131 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 1132 } 1133 #[inline] 1134 fn cardinality(&self) -> Option<Cardinality> { 1135 Some(Cardinality::Finite(self.domain_count().get().into())) 1136 } 1137 #[inline] 1138 fn contains<Q>(&self, elem: &Q) -> bool 1139 where 1140 Q: Borrow<Self::Elem> + Eq + ?Sized, 1141 { 1142 self.domain == *elem.borrow() 1143 } 1144 #[inline] 1145 fn is_proper_subset(&self, _: &Self) -> bool { 1146 false 1147 } 1148 #[inline] 1149 fn is_subset(&self, val: &Self) -> bool { 1150 self == val 1151 } 1152 } 1153 impl SetOrd for Hosts<'_> {} 1154 impl<'a> Deref for Hosts<'a> { 1155 type Target = Domain<&'a str>; 1156 #[inline] 1157 fn deref(&self) -> &Self::Target { 1158 &self.domain 1159 } 1160 } 1161 impl<'a> ParsedDomain<'a> for Hosts<'a> { 1162 type Error = FirefoxDomainErr; 1163 #[expect( 1164 unsafe_code, 1165 clippy::arithmetic_side_effects, 1166 clippy::indexing_slicing, 1167 reason = "carefully verified use is correct" 1168 )] 1169 #[inline] 1170 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1171 let mut value = val.as_bytes().trim_ascii_start(); 1172 value.first().map_or_else( 1173 || Ok(Value::Blank), 1174 |byt| { 1175 if *byt == b'#' { 1176 // SAFETY: 1177 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1178 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1179 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1180 Ok(Value::Comment(comment)) 1181 } else { 1182 value = value 1183 .get(..3) 1184 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1185 .and_then(|fst| { 1186 if fst == b"::1" { 1187 Ok(&value[3..]) 1188 } else if &value[..2] == b"::" { 1189 Ok(&value[2..]) 1190 } else { 1191 value 1192 .get(..7) 1193 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1194 .and_then(|fst2| { 1195 if fst2 == b"0.0.0.0" { 1196 Ok(&value[7..]) 1197 } else { 1198 value 1199 .get(..9) 1200 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1201 .and_then(|fst3| { 1202 if fst3 == b"127.0.0.1" { 1203 Ok(&value[9..]) 1204 } else { 1205 Err(FirefoxDomainErr::InvalidHostsIP) 1206 } 1207 }) 1208 } 1209 }) 1210 } 1211 })?; 1212 let len = value.len(); 1213 value = value.trim_ascii_start(); 1214 if len == value.len() { 1215 // There has to be at least one space or tab between the IP and domain. 1216 Err(FirefoxDomainErr::InvalidHostsIP) 1217 } else { 1218 domain_icann_tld( 1219 value[..value 1220 .iter() 1221 .try_fold( 1222 0, 1223 |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }, 1224 ) 1225 .map_or_else(convert::identity, convert::identity)] 1226 .trim_ascii_end(), 1227 ) 1228 .map(|domain| Value::Domain(Self { domain })) 1229 } 1230 } 1231 }, 1232 ) 1233 } 1234 #[inline] 1235 fn domain(&self) -> &Domain<&'a str> { 1236 &self.domain 1237 } 1238 #[inline] 1239 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1240 write_rpz_line(&mut writer, self.domain(), action, false) 1241 } 1242 } 1243 /// Domain constructed from a 1244 /// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext). 1245 /// 1246 /// Specifically the domain must conform to the following extended regex: 1247 /// 1248 /// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` 1249 /// 1250 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 1251 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of 1252 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 1253 /// 1254 /// If `domain` begins with `*.`, then `domain` must have length less than 252. 1255 /// 1256 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 1257 /// are ignored and will be parsed into a [`Value::Domain`]. 1258 #[derive(Clone, Debug)] 1259 pub struct Wildcard<'a> { 1260 /// The `Domain`. 1261 domain: Domain<&'a str>, 1262 /// `true` iff `domain` represents all proper subdomains. Note that this does _not_ include `domain` itself. 1263 proper_subdomains: bool, 1264 } 1265 impl<'a> Wildcard<'a> { 1266 /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. Note this does _not_ 1267 /// include the `Domain` itself. 1268 #[inline] 1269 #[must_use] 1270 pub const fn is_proper_subdomains(&self) -> bool { 1271 self.proper_subdomains 1272 } 1273 /// Read [`Adblock::cmp_wildcard`]. 1274 #[inline] 1275 #[must_use] 1276 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 1277 other.cmp_wildcard(self).reverse() 1278 } 1279 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 1280 /// for both. 1281 #[must_use] 1282 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 1283 match self.domain.cmp_by_domain_ordering(other) { 1284 DomainOrdering::Less => Ordering::Less, 1285 DomainOrdering::Shorter => { 1286 if self.proper_subdomains { 1287 Ordering::Greater 1288 } else { 1289 Ordering::Less 1290 } 1291 } 1292 DomainOrdering::Equal => { 1293 if self.proper_subdomains { 1294 Ordering::Greater 1295 } else { 1296 Ordering::Equal 1297 } 1298 } 1299 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 1300 } 1301 } 1302 /// The total order that is defined follows the following hierarchy: 1303 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1304 /// 2. If 1. evaluates as not equivalent, then return the result. 1305 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_proper_subdomains()`), 1306 /// then return the comparison of label counts. 1307 /// 4. Return `self` is greater. 1308 /// 1309 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1310 #[inline] 1311 #[must_use] 1312 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1313 self.cmp_dom(&other.domain) 1314 } 1315 /// Read [`Wildcard::cmp_domain_only`]. 1316 #[inline] 1317 #[must_use] 1318 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 1319 self.cmp_dom(&other.domain) 1320 } 1321 /// Same as [`Wildcard::cardinality`] except that a `BigUint` is returned. Note the count does _not_ include 1322 /// the `Domain` itself when `self.is_proper_subdomains()`. 1323 /// 1324 /// `!self.is_proper_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 1325 #[inline] 1326 #[must_use] 1327 pub fn domain_count(&self) -> BigUint { 1328 if self.proper_subdomains { 1329 proper_subdomain_count(&self.domain) 1330 } else { 1331 BigUint::new(vec![1]) 1332 } 1333 } 1334 } 1335 impl PartialEq<Wildcard<'_>> for Wildcard<'_> { 1336 #[inline] 1337 fn eq(&self, other: &Wildcard<'_>) -> bool { 1338 self.domain == other.domain && self.proper_subdomains == other.proper_subdomains 1339 } 1340 } 1341 impl PartialEq<Wildcard<'_>> for &Wildcard<'_> { 1342 #[inline] 1343 fn eq(&self, other: &Wildcard<'_>) -> bool { 1344 **self == *other 1345 } 1346 } 1347 impl PartialEq<&Wildcard<'_>> for Wildcard<'_> { 1348 #[inline] 1349 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1350 *self == **other 1351 } 1352 } 1353 impl PartialEq<Adblock<'_>> for Wildcard<'_> { 1354 #[inline] 1355 fn eq(&self, other: &Adblock<'_>) -> bool { 1356 other == self 1357 } 1358 } 1359 impl PartialEq<DomainOnly<'_>> for Wildcard<'_> { 1360 #[inline] 1361 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1362 other == self 1363 } 1364 } 1365 impl PartialEq<Hosts<'_>> for Wildcard<'_> { 1366 #[inline] 1367 fn eq(&self, other: &Hosts<'_>) -> bool { 1368 other == self 1369 } 1370 } 1371 impl Eq for Wildcard<'_> {} 1372 impl Hash for Wildcard<'_> { 1373 #[inline] 1374 fn hash<H: Hasher>(&self, state: &mut H) { 1375 self.domain.hash(state); 1376 } 1377 } 1378 impl PartialOrd<Wildcard<'_>> for Wildcard<'_> { 1379 #[inline] 1380 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1381 Some(self.cmp(other)) 1382 } 1383 } 1384 impl Ord for Wildcard<'_> { 1385 /// The total order that is defined follows the following hierarchy: 1386 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1387 /// 2. If 1. evaluates as not equivalent, then return the result. 1388 /// 3. If both domains represent a single `Domain`, then return the comparison 1389 /// of label counts. 1390 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1391 /// 5. Return the inverse of the comparison of label counts. 1392 /// 1393 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1394 #[inline] 1395 #[must_use] 1396 fn cmp(&self, other: &Self) -> Ordering { 1397 match self.domain.cmp_by_domain_ordering(&other.domain) { 1398 DomainOrdering::Less => Ordering::Less, 1399 DomainOrdering::Shorter => { 1400 if self.proper_subdomains { 1401 Ordering::Greater 1402 } else { 1403 Ordering::Less 1404 } 1405 } 1406 DomainOrdering::Equal => { 1407 if self.proper_subdomains { 1408 if other.proper_subdomains { 1409 Ordering::Equal 1410 } else { 1411 Ordering::Greater 1412 } 1413 } else if other.proper_subdomains { 1414 Ordering::Less 1415 } else { 1416 Ordering::Equal 1417 } 1418 } 1419 DomainOrdering::Longer => { 1420 if self.proper_subdomains { 1421 if other.proper_subdomains { 1422 Ordering::Less 1423 } else { 1424 Ordering::Greater 1425 } 1426 } else if other.proper_subdomains { 1427 Ordering::Less 1428 } else { 1429 Ordering::Greater 1430 } 1431 } 1432 DomainOrdering::Greater => Ordering::Greater, 1433 } 1434 } 1435 } 1436 impl PartialOrd<Adblock<'_>> for Wildcard<'_> { 1437 #[inline] 1438 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1439 Some(self.cmp_adblock(other)) 1440 } 1441 } 1442 impl PartialOrd<DomainOnly<'_>> for Wildcard<'_> { 1443 #[inline] 1444 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1445 Some(self.cmp_domain_only(other)) 1446 } 1447 } 1448 impl PartialOrd<Hosts<'_>> for Wildcard<'_> { 1449 #[inline] 1450 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1451 Some(self.cmp_hosts(other)) 1452 } 1453 } 1454 impl Display for Wildcard<'_> { 1455 #[inline] 1456 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1457 write!( 1458 f, 1459 "{}{}", 1460 if self.proper_subdomains { "*." } else { "" }, 1461 self.domain 1462 ) 1463 } 1464 } 1465 impl<'a> Set for Wildcard<'a> { 1466 type Elem = Domain<&'a str>; 1467 #[inline] 1468 fn bounded_cardinality(&self) -> BoundedCardinality { 1469 BoundedCardinality::from_biguint_exact(self.domain_count()) 1470 } 1471 #[inline] 1472 fn cardinality(&self) -> Option<Cardinality> { 1473 Some(Cardinality::Finite(self.domain_count())) 1474 } 1475 #[inline] 1476 fn contains<Q>(&self, elem: &Q) -> bool 1477 where 1478 Q: Borrow<Self::Elem> + Eq + ?Sized, 1479 { 1480 if self.proper_subdomains { 1481 self.domain.cmp_by_domain_ordering(elem.borrow()) == DomainOrdering::Shorter 1482 } else { 1483 self.domain == *elem.borrow() 1484 } 1485 } 1486 #[inline] 1487 fn is_proper_subset(&self, val: &Self) -> bool { 1488 // A single domain can never be a proper superset. Proper subdomains cannot be a proper superset if it 1489 // has more labels or the same number of labels as another domain. In all other cases, we need to 1490 // recursively check from the TLD that the labels are the same. 1491 val.proper_subdomains 1492 && val.domain.cmp_by_domain_ordering(&self.domain) == DomainOrdering::Shorter 1493 } 1494 #[inline] 1495 fn is_subset(&self, val: &Self) -> bool { 1496 self == val || self.is_proper_subset(val) 1497 } 1498 } 1499 impl SetOrd for Wildcard<'_> {} 1500 impl<'a> Deref for Wildcard<'a> { 1501 type Target = Domain<&'a str>; 1502 #[inline] 1503 fn deref(&self) -> &Self::Target { 1504 &self.domain 1505 } 1506 } 1507 impl<'a> ParsedDomain<'a> for Wildcard<'a> { 1508 type Error = FirefoxDomainErr; 1509 #[expect( 1510 unsafe_code, 1511 clippy::arithmetic_side_effects, 1512 clippy::indexing_slicing, 1513 reason = "need them all. care has been taken." 1514 )] 1515 #[inline] 1516 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1517 let value = val.as_bytes().trim_ascii_start(); 1518 value.first().map_or_else( 1519 || Ok(Value::Blank), 1520 |byt| { 1521 if *byt == b'#' { 1522 // SAFETY: 1523 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1524 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1525 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1526 Ok(Value::Comment(comment)) 1527 } else { 1528 let (proper_subdomains, val2) = value.get(..2).map_or_else( 1529 || (false, value), 1530 |fst| { 1531 if fst == b"*." { 1532 (true, &value[2..]) 1533 } else { 1534 (false, value) 1535 } 1536 }, 1537 ); 1538 domain_icann_tld( 1539 val2[..val2 1540 .iter() 1541 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 1542 .map_or_else(convert::identity, convert::identity)] 1543 .trim_ascii_end(), 1544 ) 1545 .and_then(|domain| { 1546 if proper_subdomains { 1547 if domain.len().get() > 251 { 1548 Err(FirefoxDomainErr::InvalidWildcardDomain) 1549 } else { 1550 Ok(Value::Domain(Self { 1551 domain, 1552 proper_subdomains: true, 1553 })) 1554 } 1555 } else { 1556 Ok(Value::Domain(Self { 1557 domain, 1558 proper_subdomains, 1559 })) 1560 } 1561 }) 1562 } 1563 }, 1564 ) 1565 } 1566 #[inline] 1567 fn domain(&self) -> &Domain<&'a str> { 1568 &self.domain 1569 } 1570 #[inline] 1571 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1572 write_rpz_line(&mut writer, self.domain(), action, self.proper_subdomains) 1573 } 1574 } 1575 /// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 1576 #[derive(Clone, Debug)] 1577 pub enum RpzDomain<'a> { 1578 /// An `Adblock` domain. 1579 Adblock(Adblock<'a>), 1580 /// A `DomainOnly` domain. 1581 DomainOnly(DomainOnly<'a>), 1582 /// A `Hosts` domain. 1583 Hosts(Hosts<'a>), 1584 /// A `Wildcard` domain. 1585 Wildcard(Wildcard<'a>), 1586 } 1587 impl<'a> RpzDomain<'a> { 1588 /// Returns `true` iff `self` represents a single [`Domain`]. 1589 #[inline] 1590 #[must_use] 1591 pub const fn is_domain(&self) -> bool { 1592 match *self { 1593 Self::Adblock(ref dom) => !dom.subdomains, 1594 Self::DomainOnly(_) | Self::Hosts(_) => true, 1595 Self::Wildcard(ref dom) => !dom.proper_subdomains, 1596 } 1597 } 1598 /// Returns `true` iff `self` represents proper subdomains of the contained [`Domain`] (i.e., 1599 /// is a [`Wildcard`] such that [`Wildcard::is_proper_subdomains`]). 1600 #[inline] 1601 #[must_use] 1602 pub const fn is_proper_subdomains(&self) -> bool { 1603 match *self { 1604 Self::Adblock(_) | Self::DomainOnly(_) | Self::Hosts(_) => false, 1605 Self::Wildcard(ref dom) => dom.proper_subdomains, 1606 } 1607 } 1608 /// Returns `true` iff `self` represents subdomains of the contained [`Domain`] (i.e., is an 1609 /// [`Adblock`] such that [`Adblock::is_subdomains`]). 1610 #[inline] 1611 #[must_use] 1612 pub const fn is_subdomains(&self) -> bool { 1613 match *self { 1614 Self::Adblock(ref dom) => dom.subdomains, 1615 Self::DomainOnly(_) | Self::Hosts(_) | Self::Wildcard(_) => false, 1616 } 1617 } 1618 /// Returns the count of [`Domain`]s represented by `self`. This function is the same as 1619 /// [`RpzDomain::cardinality`] except that it returns a `BigUint`. 1620 #[inline] 1621 #[must_use] 1622 pub fn domain_count(&self) -> BigUint { 1623 match *self { 1624 Self::Adblock(ref dom) => dom.domain_count(), 1625 Self::DomainOnly(ref dom) => dom.domain_count().get().into(), 1626 Self::Hosts(ref dom) => dom.domain_count().get().into(), 1627 Self::Wildcard(ref dom) => dom.domain_count(), 1628 } 1629 } 1630 } 1631 impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> { 1632 #[inline] 1633 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1634 match *self { 1635 Self::Adblock(ref dom) => match *other { 1636 RpzDomain::Adblock(ref dom2) => dom == dom2, 1637 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1638 RpzDomain::Hosts(ref dom2) => dom == dom2, 1639 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1640 }, 1641 Self::DomainOnly(ref dom) => match *other { 1642 RpzDomain::Adblock(ref dom2) => dom == dom2, 1643 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1644 RpzDomain::Hosts(ref dom2) => dom == dom2, 1645 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1646 }, 1647 Self::Hosts(ref dom) => match *other { 1648 RpzDomain::Adblock(ref dom2) => dom == dom2, 1649 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1650 RpzDomain::Hosts(ref dom2) => dom == dom2, 1651 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1652 }, 1653 Self::Wildcard(ref dom) => match *other { 1654 RpzDomain::Adblock(ref dom2) => dom == dom2, 1655 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1656 RpzDomain::Hosts(ref dom2) => dom == dom2, 1657 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1658 }, 1659 } 1660 } 1661 } 1662 impl PartialEq<RpzDomain<'_>> for &RpzDomain<'_> { 1663 #[inline] 1664 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1665 **self == *other 1666 } 1667 } 1668 impl PartialEq<&RpzDomain<'_>> for RpzDomain<'_> { 1669 #[inline] 1670 fn eq(&self, other: &&RpzDomain<'_>) -> bool { 1671 *self == **other 1672 } 1673 } 1674 impl Eq for RpzDomain<'_> {} 1675 impl Hash for RpzDomain<'_> { 1676 #[inline] 1677 fn hash<H: Hasher>(&self, state: &mut H) { 1678 self.domain().hash(state); 1679 } 1680 } 1681 impl PartialOrd<RpzDomain<'_>> for RpzDomain<'_> { 1682 #[inline] 1683 fn partial_cmp(&self, other: &RpzDomain<'_>) -> Option<Ordering> { 1684 Some(self.cmp(other)) 1685 } 1686 } 1687 impl Ord for RpzDomain<'_> { 1688 /// The total order that is defined follows the following hierarchy: 1689 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1690 /// 2. If 1. evaluates as not equivalent, then return the result. 1691 /// 3. If both domains represent a single `Domain`, then return the comparison 1692 /// of label counts. 1693 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1694 /// 5. If the label counts are the same and exactly one domain represents proper subdomains, the other domain is greater. 1695 /// 6. Return the inverse of the comparison of label counts. 1696 /// 1697 /// For example the following is a sequence of domains in 1698 /// ascending order: 1699 /// 1700 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 1701 #[inline] 1702 fn cmp(&self, other: &Self) -> Ordering { 1703 match *self { 1704 Self::Adblock(ref dom) => match *other { 1705 Self::Adblock(ref dom2) => dom.cmp(dom2), 1706 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1707 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1708 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1709 }, 1710 Self::DomainOnly(ref dom) => match *other { 1711 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1712 Self::DomainOnly(ref dom2) => dom.cmp(dom2), 1713 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1714 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1715 }, 1716 Self::Hosts(ref dom) => match *other { 1717 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1718 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1719 Self::Hosts(ref dom2) => dom.cmp(dom2), 1720 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1721 }, 1722 Self::Wildcard(ref dom) => match *other { 1723 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1724 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1725 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1726 Self::Wildcard(ref dom2) => dom.cmp(dom2), 1727 }, 1728 } 1729 } 1730 } 1731 impl Display for RpzDomain<'_> { 1732 #[inline] 1733 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1734 match *self { 1735 Self::Adblock(ref dom) => dom.fmt(f), 1736 Self::DomainOnly(ref dom) => dom.fmt(f), 1737 Self::Hosts(ref dom) => dom.fmt(f), 1738 Self::Wildcard(ref dom) => dom.fmt(f), 1739 } 1740 } 1741 } 1742 impl<'a> Set for RpzDomain<'a> { 1743 type Elem = Domain<&'a str>; 1744 #[inline] 1745 fn bounded_cardinality(&self) -> BoundedCardinality { 1746 BoundedCardinality::from_biguint_exact(self.domain_count()) 1747 } 1748 #[inline] 1749 fn cardinality(&self) -> Option<Cardinality> { 1750 Some(Cardinality::Finite(self.domain_count())) 1751 } 1752 #[inline] 1753 fn contains<Q>(&self, elem: &Q) -> bool 1754 where 1755 Q: Borrow<Self::Elem> + Eq + ?Sized, 1756 { 1757 match *self { 1758 Self::Adblock(ref dom) => dom.contains(elem), 1759 Self::DomainOnly(ref dom) => dom.contains(elem), 1760 Self::Hosts(ref dom) => dom.contains(elem), 1761 Self::Wildcard(ref dom) => dom.contains(elem), 1762 } 1763 } 1764 #[inline] 1765 fn is_proper_subset(&self, val: &Self) -> bool { 1766 match *val { 1767 Self::Adblock(ref dom) => { 1768 dom.subdomains 1769 && match *self { 1770 Self::Adblock(ref dom2) => { 1771 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1772 == DomainOrdering::Shorter 1773 } 1774 Self::DomainOnly(ref dom2) => matches!( 1775 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1776 DomainOrdering::Shorter | DomainOrdering::Equal 1777 ), 1778 Self::Hosts(ref dom2) => matches!( 1779 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1780 DomainOrdering::Shorter | DomainOrdering::Equal 1781 ), 1782 Self::Wildcard(ref dom2) => matches!( 1783 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1784 DomainOrdering::Shorter | DomainOrdering::Equal 1785 ), 1786 } 1787 } 1788 Self::DomainOnly(_) | Self::Hosts(_) => false, 1789 Self::Wildcard(ref dom) => { 1790 dom.proper_subdomains 1791 && match *self { 1792 Self::Adblock(ref dom2) => { 1793 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1794 == DomainOrdering::Shorter 1795 } 1796 Self::DomainOnly(ref dom2) => { 1797 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1798 == DomainOrdering::Shorter 1799 } 1800 Self::Hosts(ref dom2) => { 1801 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1802 == DomainOrdering::Shorter 1803 } 1804 Self::Wildcard(ref dom2) => { 1805 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1806 == DomainOrdering::Shorter 1807 } 1808 } 1809 } 1810 } 1811 } 1812 #[inline] 1813 fn is_subset(&self, val: &Self) -> bool { 1814 self == val || self.is_proper_subset(val) 1815 } 1816 } 1817 impl SetOrd for RpzDomain<'_> {} 1818 impl<'a> Deref for RpzDomain<'a> { 1819 type Target = Domain<&'a str>; 1820 #[inline] 1821 fn deref(&self) -> &Self::Target { 1822 match *self { 1823 Self::Adblock(ref dom) => &dom.domain, 1824 Self::DomainOnly(ref dom) => &dom.domain, 1825 Self::Hosts(ref dom) => &dom.domain, 1826 Self::Wildcard(ref dom) => &dom.domain, 1827 } 1828 } 1829 } 1830 impl<'a: 'b, 'b> From<Adblock<'a>> for RpzDomain<'b> { 1831 #[inline] 1832 fn from(value: Adblock<'a>) -> Self { 1833 Self::Adblock(value) 1834 } 1835 } 1836 impl<'a: 'b, 'b> From<DomainOnly<'a>> for RpzDomain<'b> { 1837 #[inline] 1838 fn from(value: DomainOnly<'a>) -> Self { 1839 Self::DomainOnly(value) 1840 } 1841 } 1842 impl<'a: 'b, 'b> From<Hosts<'a>> for RpzDomain<'b> { 1843 #[inline] 1844 fn from(value: Hosts<'a>) -> Self { 1845 Self::Hosts(value) 1846 } 1847 } 1848 impl<'a: 'b, 'b> From<Wildcard<'a>> for RpzDomain<'b> { 1849 #[inline] 1850 fn from(value: Wildcard<'a>) -> Self { 1851 Self::Wildcard(value) 1852 } 1853 } 1854 impl<'a> ParsedDomain<'a> for RpzDomain<'a> { 1855 type Error = FirefoxDomainErr; 1856 #[inline] 1857 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1858 DomainOnly::parse_value(val).map_or_else( 1859 |_| { 1860 Hosts::parse_value(val).map_or_else( 1861 |_| { 1862 Wildcard::parse_value(val).map_or_else( 1863 |_| { 1864 Adblock::parse_value(val).map(|value| match value { 1865 Value::Domain(dom) => Value::Domain(Self::Adblock(dom)), 1866 Value::Comment(com) => Value::Comment(com), 1867 Value::Blank => Value::Blank, 1868 }) 1869 }, 1870 |value| { 1871 Ok(match value { 1872 Value::Domain(dom) => Value::Domain(Self::Wildcard(dom)), 1873 Value::Comment(com) => Value::Comment(com), 1874 Value::Blank => Value::Blank, 1875 }) 1876 }, 1877 ) 1878 }, 1879 |value| { 1880 Ok(match value { 1881 Value::Domain(dom) => Value::Domain(Self::Hosts(dom)), 1882 Value::Comment(com) => Value::Comment(com), 1883 Value::Blank => Value::Blank, 1884 }) 1885 }, 1886 ) 1887 }, 1888 |value| { 1889 Ok(match value { 1890 Value::Domain(dom) => Value::Domain(Self::DomainOnly(dom)), 1891 Value::Comment(com) => Value::Comment(com), 1892 Value::Blank => Value::Blank, 1893 }) 1894 }, 1895 ) 1896 } 1897 #[inline] 1898 fn domain(&self) -> &Domain<&'a str> { 1899 match *self { 1900 Self::Adblock(ref dom) => &dom.domain, 1901 Self::DomainOnly(ref dom) => &dom.domain, 1902 Self::Hosts(ref dom) => &dom.domain, 1903 Self::Wildcard(ref dom) => &dom.domain, 1904 } 1905 } 1906 #[inline] 1907 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error> { 1908 match *self { 1909 Self::Adblock(ref dom) => dom.write_to_rpz(action, writer), 1910 Self::DomainOnly(ref dom) => dom.write_to_rpz(action, writer), 1911 Self::Hosts(ref dom) => dom.write_to_rpz(action, writer), 1912 Self::Wildcard(ref dom) => dom.write_to_rpz(action, writer), 1913 } 1914 } 1915 } 1916 #[cfg(test)] 1917 mod tests { 1918 use super::{ 1919 Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, 1920 }; 1921 use ascii_domain::dom::DomainErr; 1922 use num_bigint::BigUint; 1923 use superset_map::SupersetSet; 1924 #[test] 1925 fn test_adblock_parse() { 1926 // Test subdomains. 1927 assert!( 1928 Adblock::parse_value("||www.example.com").map_or(false, |val| match val { 1929 Value::Domain(ref dom) => 1930 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1931 Value::Comment(_) | Value::Blank => false, 1932 }) 1933 ); 1934 // Test whitespace and '^' removal. 1935 assert!( 1936 Adblock::parse_value(" \t\t ||\t\t \twww.example.com \t\t ^ \t\t ").map_or( 1937 false, 1938 |val| match val { 1939 Value::Domain(ref dom) => 1940 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1941 Value::Comment(_) | Value::Blank => false, 1942 } 1943 ) 1944 ); 1945 assert!( 1946 Adblock::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { 1947 match val { 1948 Value::Domain(ref dom) => { 1949 !dom.subdomains && dom.domain.as_bytes() == b"www.example.com" 1950 } 1951 Value::Comment(_) | Value::Blank => false, 1952 } 1953 }) 1954 ); 1955 assert!(Adblock::parse_value("www .example.com").map_or_else( 1956 |err| err == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 1957 |_| false 1958 )); 1959 assert!( 1960 Adblock::parse_value("||www.ExAMPle.COm").map_or(false, |val| { 1961 match val { 1962 Value::Domain(ref dom) => { 1963 Adblock::parse_value("||www.example.com").map_or(false, |val| match val { 1964 Value::Domain(ref dom2) => { 1965 dom == dom2 1966 && dom.subdomains 1967 && dom2.subdomains 1968 && dom.cmp(dom2).is_eq() 1969 } 1970 Value::Comment(_) | Value::Blank => false, 1971 }) 1972 } 1973 Value::Comment(_) | Value::Blank => false, 1974 } 1975 }) 1976 ); 1977 // Test comment 1978 assert!( 1979 Adblock::parse_value(" \t\t #hi").map_or(false, |val| match val { 1980 Value::Comment(com) => com == "hi", 1981 Value::Domain(_) | Value::Blank => false, 1982 }) 1983 ); 1984 assert!( 1985 Adblock::parse_value(" \t\t !! foo").map_or(false, |val| match val { 1986 Value::Comment(com) => com == "! foo", 1987 Value::Domain(_) | Value::Blank => false, 1988 }) 1989 ); 1990 // Test blank 1991 assert!(Adblock::parse_value(" \t\t ").map_or(false, |val| matches!(val, Value::Blank))); 1992 } 1993 #[test] 1994 fn test_domain_only_parse_value() { 1995 // Test whitespace and comment. 1996 assert!( 1997 DomainOnly::parse_value(" \t\t \t\t \twww.example.com#asdflkj asdf alskdfj ") 1998 .map_or(false, |val| match val { 1999 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 2000 Value::Comment(_) | Value::Blank => false, 2001 }) 2002 ); 2003 assert!( 2004 DomainOnly::parse_value(" \t\t \t\t \twww.example.com \t\t ^ \t\t ") 2005 .map_or_else( 2006 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2007 |_| false 2008 ) 2009 ); 2010 // Test case-insensitivity. 2011 assert!( 2012 DomainOnly::parse_value("www.ExAMPle.CoM").map_or(false, |val| match val { 2013 Value::Domain(ref dom) => 2014 DomainOnly::parse_value("www.example.com").map_or(false, |val2| match val2 { 2015 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2016 Value::Comment(_) | Value::Blank => false, 2017 }), 2018 Value::Comment(_) | Value::Blank => false, 2019 }) 2020 ); 2021 // Test comment. 2022 assert!( 2023 DomainOnly::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2024 Value::Comment(com) => com == " hi", 2025 Value::Domain(_) | Value::Blank => false, 2026 }) 2027 ); 2028 // Test blank. 2029 assert!(DomainOnly::parse_value(" \t\t \t\t \t ") 2030 .map_or(false, |val| matches!(val, Value::Blank))); 2031 // Test blank. 2032 assert!(DomainOnly::parse_value("example.xn--abc") 2033 .map_or(false, |val| matches!(val, Value::Domain(_)))); 2034 // Test invalid TLD. 2035 assert!(DomainOnly::parse_value("www.c1m") 2036 .map_or_else(|err| err == FirefoxDomainErr::InvalidTld, |_| false)); 2037 } 2038 #[test] 2039 fn test_hosts_parse_value() { 2040 // Test whitespace and comment. 2041 assert!(Hosts::parse_value( 2042 " \t\t 127.0.0.1\t\t \twww.example.com#asdflkj asdf alskdfj " 2043 ) 2044 .map_or(false, |val| match val { 2045 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 2046 Value::Comment(_) | Value::Blank => false, 2047 })); 2048 assert!( 2049 Hosts::parse_value(" \t\t 0.0.0.0\t\t \twww.example.com \t\t ^ \t\t ") 2050 .map_or_else( 2051 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2052 |_| false 2053 ) 2054 ); 2055 assert!(Hosts::parse_value("::1\twww .example.com").map_or_else( 2056 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2057 |_| false 2058 )); 2059 // Test invalid IP 2060 assert!(Hosts::parse_value("::2 www.example.com") 2061 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); 2062 assert!(Hosts::parse_value(":2 www.example.com") 2063 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); 2064 assert!(Hosts::parse_value("www.example.com") 2065 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); 2066 assert!(Hosts::parse_value("10.4.2.256 www.example.com") 2067 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); 2068 // Test case-insensitivity. 2069 assert!( 2070 Hosts::parse_value(":: www.ExAMPle.Com").map_or(false, |val| match val { 2071 Value::Domain(ref dom) => 2072 Hosts::parse_value("127.0.0.1 www.example.com").map_or(false, |val2| match val2 2073 { 2074 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2075 Value::Comment(_) | Value::Blank => false, 2076 }), 2077 Value::Comment(_) | Value::Blank => false, 2078 }) 2079 ); 2080 // Test comment. 2081 assert!( 2082 Hosts::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2083 Value::Comment(com) => com == " hi", 2084 Value::Domain(_) | Value::Blank => false, 2085 }) 2086 ); 2087 // Test blank. 2088 assert!(Hosts::parse_value(" \t\t \t\t \t ") 2089 .map_or(false, |val| matches!(val, Value::Blank))); 2090 } 2091 #[test] 2092 fn test_wildcard_parse_value() { 2093 // Test bad asterisk. 2094 assert!(Wildcard::parse_value("*").map_or_else( 2095 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2096 |_| false 2097 )); 2098 assert!(Wildcard::parse_value("www*.example.com").map_or_else( 2099 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2100 |_| false 2101 )); 2102 assert!(Wildcard::parse_value("www.*.com").map_or_else( 2103 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2104 |_| false 2105 )); 2106 assert!(Wildcard::parse_value("*..com").map_or_else( 2107 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::EmptyLabel), 2108 |_| false 2109 )); 2110 assert!(Wildcard::parse_value("www.com*").map_or_else( 2111 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2112 |_| false 2113 )); 2114 assert!(Wildcard::parse_value("ww*w.com").map_or_else( 2115 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2116 |_| false 2117 )); 2118 // Test case-insensitivity. 2119 assert!( 2120 Wildcard::parse_value("*.wWw.ExamPLE.com").map_or(false, |val| match val { 2121 Value::Domain(ref dom) => 2122 Wildcard::parse_value("*.www.example.com").map_or(false, |val2| match val2 { 2123 Value::Domain(ref dom2) => 2124 dom.cmp(dom2).is_eq() 2125 && dom == dom2 2126 && dom.proper_subdomains 2127 && dom2.proper_subdomains, 2128 Value::Comment(_) | Value::Blank => false, 2129 }), 2130 Value::Comment(_) | Value::Blank => false, 2131 }) 2132 ); 2133 // Test proper subdomains. 2134 assert!( 2135 Wildcard::parse_value("*.www.example.com").map_or(false, |val| match val { 2136 Value::Domain(ref dom) => 2137 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2138 Value::Comment(_) | Value::Blank => false, 2139 }) 2140 ); 2141 // Test comment. 2142 assert!( 2143 Wildcard::parse_value(" \t\t \t\t \t*.www.example.com#asdflkj asdf alskdfj ") 2144 .map_or(false, |val| match val { 2145 Value::Domain(ref dom) => 2146 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2147 Value::Comment(_) | Value::Blank => false, 2148 }) 2149 ); 2150 assert!( 2151 Wildcard::parse_value(" \t\t \t\t \twww.example.com #asdflkj asdf alskdfj ") 2152 .map_or(false, |val| match val { 2153 Value::Domain(ref dom) => 2154 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains, 2155 Value::Comment(_) | Value::Blank => false, 2156 }) 2157 ); 2158 // Test whitespace removal. 2159 assert!( 2160 Wildcard::parse_value(" \t\t *.www.example.com \t\t \t ").map_or(false, |val| { 2161 match val { 2162 Value::Domain(ref dom) => { 2163 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains 2164 } 2165 Value::Comment(_) | Value::Blank => false, 2166 } 2167 }) 2168 ); 2169 assert!( 2170 Wildcard::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { 2171 match val { 2172 Value::Domain(ref dom) => { 2173 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains 2174 } 2175 Value::Comment(_) | Value::Blank => false, 2176 } 2177 }) 2178 ); 2179 assert!(Wildcard::parse_value("www .example.com").map_or_else( 2180 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2181 |_| false 2182 )); 2183 // Test 127 labels after wildcard error. 2184 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false)); 2185 // Test 126 labels after wildcard is ok. 2186 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val { 2187 Value::Domain(ref dom) => dom.domain.iter().count() == 126 && dom.proper_subdomains, 2188 Value::Comment(_) | Value::Blank => false, 2189 })); 2190 // Test comment. 2191 assert!( 2192 Wildcard::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2193 Value::Comment(com) => com == " hi", 2194 Value::Domain(_) | Value::Blank => false, 2195 }) 2196 ); 2197 // Test blank. 2198 assert!(Wildcard::parse_value(" \t\t \t\t \t ") 2199 .map_or(false, |val| matches!(val, Value::Blank))); 2200 } 2201 #[test] 2202 fn test_rpz_parse_value() { 2203 assert!( 2204 RpzDomain::parse_value("*.www.example.com").map_or(false, |val| { 2205 let dom = val.unwrap_domain(); 2206 dom.is_proper_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2207 }) 2208 ); 2209 assert!( 2210 RpzDomain::parse_value("||www.example.com").map_or(false, |val| { 2211 let dom = val.unwrap_domain(); 2212 dom.is_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2213 }) 2214 ); 2215 assert!( 2216 RpzDomain::parse_value("0.0.0.0 www.example.com").map_or(false, |val| { 2217 let dom = val.unwrap_domain(); 2218 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2219 && dom.domain().as_bytes() == b"www.example.com" 2220 }) 2221 ); 2222 assert!( 2223 RpzDomain::parse_value("www.example.com").map_or(false, |val| { 2224 let dom = val.unwrap_domain(); 2225 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2226 && dom.domain().as_bytes() == b"www.example.com" 2227 }) 2228 ); 2229 // Test case-insensitivity. 2230 assert!( 2231 RpzDomain::parse_value("*.Www.ExaMPle.COm").map_or(false, |val| { 2232 let dom = val.unwrap_domain(); 2233 RpzDomain::parse_value("*.www.example.com").map_or(false, |val2| { 2234 let dom2 = val2.unwrap_domain(); 2235 dom.is_proper_subdomains() 2236 && dom2.is_proper_subdomains() 2237 && dom == dom2 2238 && dom.cmp(&dom2).is_eq() 2239 }) 2240 }) 2241 ); 2242 // Test comment. 2243 assert!( 2244 RpzDomain::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2245 Value::Comment(com) => com == " hi", 2246 Value::Domain(_) | Value::Blank => false, 2247 }) 2248 ); 2249 assert!( 2250 RpzDomain::parse_value(" \t\t \t\t \t ! hi").map_or(false, |val| match val { 2251 Value::Comment(com) => com == " hi", 2252 Value::Domain(_) | Value::Blank => false, 2253 }) 2254 ); 2255 // Test blank. 2256 assert!(RpzDomain::parse_value(" \t\t \t\t \t ") 2257 .map_or(false, |val| matches!(val, Value::Blank))); 2258 } 2259 #[test] 2260 fn test_rpz_ord_and_eq() -> Result<(), &'static str> { 2261 "www.bar.com,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,Example.com,WwW.exaMple.com,*.www.example.com,||www.example.com,*.example.com,||example.com,FOo.coM,Www.foo.com,*.foo.com,*.coM,example.net,*.net".split(|b| b == ',').try_fold(RpzDomain::DomainOnly(DomainOnly::parse_value("bar.com").expect("bug in DomainOnly::parse_value").unwrap_domain()), |prev, slice| { 2262 let cur = if slice.as_bytes()[0] == b'|' { 2263 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2264 } else { 2265 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2266 }; 2267 if prev < cur && cur > prev && prev == prev && cur == cur { 2268 Ok(cur) 2269 } else { 2270 Err("PartialEq or Ord are not correctly implemented for RpzDomain.") 2271 } 2272 }).map(|_| ()) 2273 } 2274 #[test] 2275 fn test_superset_set() { 2276 let mut iter = "*.NeT,*.net,www.bar.com,*.net,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,example.com,www.example.com,*.www.example.com,||www.example.com,*.example.com,||example.com,foo.com,www.foo.com,*.foo.com,*.com,example.net,*.abc.abc,||aawww.abc,abc.abc".split(|b| b == ',').fold(SupersetSet::new(), |mut doms, slice| { 2277 doms.insert(if slice.as_bytes()[0] == b'|' { 2278 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2279 } else { 2280 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2281 }); 2282 doms 2283 }).into_iter(); 2284 assert!(iter.next().map_or(false, |d| { 2285 d.domain().as_bytes() == b"aawww.abc" && d.is_subdomains() 2286 })); 2287 assert!(iter.next().map_or(false, |d| { 2288 d.domain().as_bytes() == b"abc.abc" && d.is_domain() 2289 })); 2290 assert!(iter.next().map_or(false, |d| { 2291 d.domain().as_bytes() == b"abc.abc" && d.is_proper_subdomains() 2292 })); 2293 assert!(iter.next().map_or(false, |d| { 2294 d.domain().as_bytes() == b"com" && d.is_proper_subdomains() 2295 })); 2296 assert!(iter.next().map_or(false, |d| { 2297 d.domain().as_bytes() == b"NeT" && d.is_proper_subdomains() 2298 })); 2299 assert!(iter.next().is_none()); 2300 } 2301 #[test] 2302 fn test_card() { 2303 // Geometric series. 2304 // We can have two labels each with one character, 2305 // one label with one to three characters, or 0 labels. 2306 // This is 1 + 52 + 52^2 + 52^3 + 52^2 = (1-52^4)/(1-52) + 52^2 = (52^4 - 1)/51 + 52^2 = 146069. 2307 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.iter().count() == 125 && dom.domain_count() == BigUint::new(vec![146069]) })); 2308 // A subdomain of length 252 or 253 gets converted to a domain. 2309 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.iter().count() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) })); 2310 // Pre-calculated manually. 2311 // This is the number of domains possible between 2 and 252 characters. 2312 assert!(Wildcard::parse_value("*.a").map_or(false, |val| { 2313 val.unwrap_domain().domain_count() 2314 == BigUint::new(vec![ 2315 375288404, 2460223985, 1334358771, 2543621408, 2519466280, 1133682239, 2316 3589178618, 348125705, 1709233643, 958334503, 3780539710, 2181893897, 2317 2457156833, 3204765645, 2728103430, 1817547150, 3102358416, 444185044, 2318 3659003776, 10341713, 306326206, 1336386425, 3942332649, 2036577878, 2319 2460939277, 3976861337, 2101094571, 2241770079, 2667853164, 3687350273, 2320 109356153, 3455569358, 2333076459, 2433207896, 1553903141, 2621943843, 2321 4223295645, 1753858368, 130924388, 965594304, 3942586845, 1573844087, 2322 4237886128, 481383133, 56931017, 2323 ]) 2324 })); 2325 } 2326 }