dom.rs (88877B)
1 use crate::dom_count_auto_gen::proper_subdomain_count; 2 use ascii_domain::{ 3 char_set::{ASCII_FIREFOX, AllowedAscii}, 4 dom::{Domain, DomainErr, DomainOrdering}, 5 }; 6 use core::{ 7 borrow::Borrow, 8 cmp::Ordering, 9 convert, 10 fmt::{self, Display, Formatter}, 11 hash::{Hash, Hasher}, 12 num::NonZeroU8, 13 ops::Deref, 14 str, 15 }; 16 use num_bigint::BigUint; 17 use std::{ 18 error, 19 io::{Error, Write}, 20 }; 21 use superset_map::SetOrd; 22 use zfc::{BoundedCardinality, Cardinality, Set}; 23 /// Error returned when an invalid string is passed to [`Adblock::parse_value`], [`DomainOnly::parse_value`], 24 /// [`Hosts::parse_value`], [`Wildcard::parse_value`], or [`RpzDomain::parse_value`]. 25 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] 26 pub enum FirefoxDomainErr { 27 /// The domain is invalid based on [`Domain`] using [`ASCII_FIREFOX`]. 28 InvalidDomain(DomainErr), 29 /// The domain had a TLD that was not all letters nor length of at least five beginning with `b"xn--"`. 30 InvalidTld, 31 /// The string passed to [`Adblock::parse_value`] contained `$`. 32 InvalidAdblockDomain, 33 /// The string passed to [`Hosts::parse_value`] did not conform 34 /// to the required [`Hosts`] format. 35 InvalidHostsIP, 36 /// The length of the non-wildcard portion of the string passed to 37 /// [`Wildcard::parse_value`] was at least 252 which means there are 38 /// no proper subdomains. 39 InvalidWildcardDomain, 40 } 41 impl Display for FirefoxDomainErr { 42 #[inline] 43 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 44 match *self { 45 Self::InvalidDomain(err) => err.fmt(f), 46 Self::InvalidTld => f.write_str("domain had a TLD that was not all letters nor at least five characters long starting with 'xn--'"), 47 Self::InvalidAdblockDomain => f.write_str("Adblock-style domain contained a '$'"), 48 Self::InvalidHostsIP => f.write_str("hosts-style domain does not begin with the IP '::', '::1', '0.0.0.0', or '127.0.0.1' followed by at least one space or tab"), 49 Self::InvalidWildcardDomain => f.write_str("non-wildcard portion of a wildcard domain had length of at least 252 which means there are 0 proper subdomains"), 50 } 51 } 52 } 53 impl error::Error for FirefoxDomainErr {} 54 /// The ASCII we allow domains to have. 55 const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX; 56 /// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` has a TLD 57 /// that is either all letters or has length of at least five and begins with `b"xn--"`. 58 #[expect(clippy::indexing_slicing, reason = "we verify manually")] 59 fn domain_icann_tld<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> { 60 Domain::try_from_bytes(val, CHARS) 61 .map_err(FirefoxDomainErr::InvalidDomain) 62 .and_then(|dom| { 63 let tld = dom.tld(); 64 // `tld.as_bytes()[..4]` won't panic since we check before that that the length is at least 5. 65 if tld.is_alphabetic() || (tld.len().get() > 4 && tld.as_bytes()[..4] == *b"xn--") { 66 Ok(dom.into()) 67 } else { 68 Err(FirefoxDomainErr::InvalidTld) 69 } 70 }) 71 } 72 /// Action taken by a DNS server when a domain matches. 73 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 74 pub enum RpzAction { 75 /// Send `NXDOMAIN` reply. 76 Nxdomain, 77 /// Send `NODATA` reply. 78 Nodata, 79 /// Do nothing; continue as normal. 80 Passthru, 81 /// Drop the query. 82 Drop, 83 /// Answer over TCP. 84 TcpOnly, 85 } 86 impl Display for RpzAction { 87 #[inline] 88 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 89 match *self { 90 Self::Nxdomain => f.write_str("NXDOMAIN"), 91 Self::Nodata => f.write_str("NODATA"), 92 Self::Passthru => f.write_str("PASSTHRU"), 93 Self::Drop => f.write_str("DROP"), 94 Self::TcpOnly => f.write_str("TCP-Only"), 95 } 96 } 97 } 98 impl PartialEq<&Self> for RpzAction { 99 #[inline] 100 fn eq(&self, other: &&Self) -> bool { 101 *self == **other 102 } 103 } 104 impl PartialEq<RpzAction> for &RpzAction { 105 #[inline] 106 fn eq(&self, other: &RpzAction) -> bool { 107 **self == *other 108 } 109 } 110 /// Writes the following line with `writer` based on `action`: 111 /// * `RpzAction::Nxdomain`: `<dom> CNAME .`. 112 /// * `RpzAction::Nodata`: `<dom> CNAME *.`. 113 /// * `RpzAction::Passthru`: `<dom> CNAME rpz-passthru.`. 114 /// * `RpzAction::Drop`: `<dom> CNAME rpz-drop.`. 115 /// * `RpzAction::TcpOnly`: `<dom> CNAME rpz-tcp-only.`. 116 /// 117 /// `*.` is prepended to `<dom>` iff `wildcard`. 118 /// 119 /// # Errors 120 /// 121 /// Returns [`Error`] iff [`writeln`] does. 122 #[inline] 123 pub fn write_rpz_line<W: Write, T>( 124 mut writer: W, 125 dom: &Domain<T>, 126 action: RpzAction, 127 wildcard: bool, 128 ) -> Result<(), Error> 129 where 130 Domain<T>: Display, 131 { 132 writeln!( 133 writer, 134 "{}{} CNAME {}.", 135 if wildcard { "*." } else { "" }, 136 dom, 137 match action { 138 RpzAction::Nxdomain => "", 139 RpzAction::Nodata => "*", 140 RpzAction::Passthru => "rpz-passthru", 141 RpzAction::Drop => "rpz-drop", 142 RpzAction::TcpOnly => "rpz-tcp-only", 143 } 144 ) 145 } 146 /// Type that can be returned by [`Domain`]-like parsers (e.g., [`Adblock`]). 147 #[derive(Clone, Copy, Debug)] 148 pub enum Value<'a, T: ParsedDomain<'a>> { 149 /// The parsed value is a domain. 150 Domain(T), 151 /// The parsed value is a comment. 152 Comment(&'a str), 153 /// The parsed value is blank or just [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 154 Blank, 155 } 156 impl<'a, T: ParsedDomain<'a>> Value<'a, T> { 157 /// Returns `true` iff `self` is a [`Self::Domain`]. 158 #[inline] 159 pub const fn is_domain(&self) -> bool { 160 match *self { 161 Self::Domain(_) => true, 162 Self::Comment(_) | Self::Blank => false, 163 } 164 } 165 /// Returns `true` iff `self` is a [`Self::Comment`]. 166 #[inline] 167 pub const fn is_comment(&self) -> bool { 168 match *self { 169 Self::Comment(_) => true, 170 Self::Domain(_) | Self::Blank => false, 171 } 172 } 173 /// Returns `true` iff `self` is a [`Self::Blank`]. 174 #[inline] 175 pub const fn is_blank(&self) -> bool { 176 matches!(*self, Value::Blank) 177 } 178 /// Returns the contained [`Self::Domain`] value. 179 /// 180 /// # Panics 181 /// 182 /// Panics iff `self` is [`Self::Comment`] or [`Self::Blank`]. 183 #[expect(clippy::panic, reason = "bug if called incorrectly")] 184 #[inline] 185 pub fn unwrap_domain(self) -> T { 186 match self { 187 Self::Domain(dom) => dom, 188 Self::Comment(_) | Self::Blank => { 189 panic!("called `ParsedDomain::unwrap_domain()` on a `Comment` or `Blank` value") 190 } 191 } 192 } 193 /// Returns the contained [`prim@str`] in [`Self::Comment`]. 194 /// 195 /// # Panics 196 /// 197 /// Panics iff `self` is [`Self::Domain`] or [`Self::Blank`]. 198 #[expect(clippy::panic, reason = "bug if called incorrectly")] 199 #[inline] 200 pub fn unwrap_comment(self) -> &'a str { 201 match self { 202 Self::Comment(com) => com, 203 Self::Domain(_) | Self::Blank => { 204 panic!("called `ParsedDomain::unwrap_comment()` on a `Domain` or `Blank` value") 205 } 206 } 207 } 208 /// Returns [`unit`] when `self` is [`Self::Blank`]. 209 /// 210 /// # Panics 211 /// 212 /// Panics iff `self` is [`Self::Domain`] or [`Self::Comment`]. 213 #[expect(clippy::panic, reason = "bug if called incorrectly")] 214 #[inline] 215 pub fn unwrap_blank(self) { 216 match self { 217 Self::Blank => {} 218 Self::Domain(_) | Self::Comment(_) => { 219 panic!("called `ParsedDomain::unwrap_blank()` on a `Domain` or `Comment` value") 220 } 221 } 222 } 223 } 224 /// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. 225 /// 226 /// When parsed into a [`Value::Domain`], the domain can be written to a 227 /// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 228 pub trait ParsedDomain<'a>: Sized { 229 /// The error returned from [`Self::parse_value`]. 230 type Error; 231 /// Parses a `str` into a `Value`. 232 /// # Errors 233 /// 234 /// Errors iff `val` is unable to be parsed into a `Value`. 235 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error>; 236 /// Reference to the contained `Domain`. 237 fn domain(&self) -> &Domain<&'a str>; 238 /// Writes `self` as RPZ lines via `writer`. 239 /// 240 /// # Errors 241 /// 242 /// Errors iff `writer` errors. 243 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>; 244 } 245 /// Domain constructed from an 246 /// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax). 247 /// 248 /// Specifically the domain must conform to the following extended regex: 249 /// 250 /// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` 251 /// 252 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`] with the added requirement that it 253 /// does not contain `$`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of 254 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 255 /// 256 /// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments after a valid 257 /// domain are ignored and will be parsed into a [`Value::Domain`]. 258 /// 259 /// Note that this means some valid Adblock-style rules are not considered valid since such rules often contain 260 /// path information or modifiers (e.g., “third-party”), but this only considers domain-only rules. 261 #[derive(Clone, Debug)] 262 pub struct Adblock<'a> { 263 /// The `Domain`. 264 domain: Domain<&'a str>, 265 /// `true` iff `domain` represents all subdomains. Note that this includes `domain` itself. 266 subdomains: bool, 267 } 268 impl Adblock<'_> { 269 /// Returns `true` iff the contained [`Domain`] represents all subdomains. Note this includes the 270 /// `Domain` itself. 271 #[inline] 272 #[must_use] 273 pub const fn is_subdomains(&self) -> bool { 274 self.subdomains 275 } 276 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 277 /// for both. 278 #[must_use] 279 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 280 match self.domain.cmp_by_domain_ordering(other) { 281 DomainOrdering::Less => Ordering::Less, 282 DomainOrdering::Shorter => { 283 if self.subdomains { 284 Ordering::Greater 285 } else { 286 Ordering::Less 287 } 288 } 289 DomainOrdering::Equal => { 290 if self.subdomains { 291 Ordering::Greater 292 } else { 293 Ordering::Equal 294 } 295 } 296 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 297 } 298 } 299 /// The total order that is defined follows the following hierarchy: 300 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 301 /// 2. If 1. evaluates as not equivalent, then return the result. 302 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_subdomains()`), 303 /// then return the comparison of label counts. 304 /// 4. `self` is greater. 305 /// 306 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 307 #[inline] 308 #[must_use] 309 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 310 self.cmp_dom(&other.domain) 311 } 312 /// Same as [`Adblock::cmp_domain_only`]. 313 #[inline] 314 #[must_use] 315 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 316 self.cmp_dom(&other.domain) 317 } 318 /// The total order that is defined follows the following hierarchy: 319 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 320 /// 2. If 1. evaluates as not equivalent, then return the result. 321 /// 3. If both domains represent a single `Domain`, then return the comparison 322 /// of label counts. 323 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 324 /// 5. If the label counts are the same, `self` is greater. 325 /// 6. Return the inverse of the comparison of label counts. 326 /// 327 /// For example the following is a sequence of domains in 328 /// ascending order: 329 /// 330 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 331 #[inline] 332 #[must_use] 333 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 334 match self.domain.cmp_by_domain_ordering(&other.domain) { 335 DomainOrdering::Less => Ordering::Less, 336 DomainOrdering::Shorter => { 337 if self.subdomains { 338 Ordering::Greater 339 } else { 340 Ordering::Less 341 } 342 } 343 DomainOrdering::Equal => { 344 if self.subdomains { 345 Ordering::Greater 346 } else if other.proper_subdomains { 347 Ordering::Less 348 } else { 349 Ordering::Equal 350 } 351 } 352 DomainOrdering::Longer => { 353 if self.subdomains { 354 if other.proper_subdomains { 355 Ordering::Less 356 } else { 357 Ordering::Greater 358 } 359 } else if other.proper_subdomains { 360 Ordering::Less 361 } else { 362 Ordering::Greater 363 } 364 } 365 DomainOrdering::Greater => Ordering::Greater, 366 } 367 } 368 /// Same as [`Adblock::cardinality`] except that a `BigUint` is returned. Note the count _includes_ 369 /// the `Domain` itself when `self.is_subdomains()`. 370 /// 371 /// `!self.is_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 372 #[expect(clippy::arithmetic_side_effects, reason = "arbitrary-sized arithmetic")] 373 #[inline] 374 #[must_use] 375 pub fn domain_count(&self) -> BigUint { 376 if self.subdomains { 377 proper_subdomain_count(&self.domain) + BigUint::new(vec![1]) 378 } else { 379 BigUint::new(vec![1]) 380 } 381 } 382 } 383 impl Display for Adblock<'_> { 384 #[inline] 385 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 386 write!( 387 f, 388 "{}{}", 389 if self.subdomains { "||" } else { "" }, 390 self.domain 391 ) 392 } 393 } 394 impl PartialEq<Adblock<'_>> for Adblock<'_> { 395 #[inline] 396 fn eq(&self, other: &Adblock<'_>) -> bool { 397 self.domain == other.domain && self.subdomains == other.subdomains 398 } 399 } 400 impl PartialEq<&Adblock<'_>> for Adblock<'_> { 401 #[inline] 402 fn eq(&self, other: &&Adblock<'_>) -> bool { 403 *self == **other 404 } 405 } 406 impl PartialEq<Adblock<'_>> for &Adblock<'_> { 407 #[inline] 408 fn eq(&self, other: &Adblock<'_>) -> bool { 409 **self == *other 410 } 411 } 412 impl PartialEq<DomainOnly<'_>> for Adblock<'_> { 413 #[inline] 414 fn eq(&self, other: &DomainOnly<'_>) -> bool { 415 !self.subdomains && self.domain == other.domain 416 } 417 } 418 impl PartialEq<&DomainOnly<'_>> for Adblock<'_> { 419 #[inline] 420 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 421 *self == **other 422 } 423 } 424 impl PartialEq<DomainOnly<'_>> for &Adblock<'_> { 425 #[inline] 426 fn eq(&self, other: &DomainOnly<'_>) -> bool { 427 **self == *other 428 } 429 } 430 impl PartialEq<&Adblock<'_>> for DomainOnly<'_> { 431 #[inline] 432 fn eq(&self, other: &&Adblock<'_>) -> bool { 433 *self == **other 434 } 435 } 436 impl PartialEq<Adblock<'_>> for &DomainOnly<'_> { 437 #[inline] 438 fn eq(&self, other: &Adblock<'_>) -> bool { 439 **self == *other 440 } 441 } 442 impl PartialEq<Hosts<'_>> for Adblock<'_> { 443 #[inline] 444 fn eq(&self, other: &Hosts<'_>) -> bool { 445 !self.subdomains && self.domain == other.domain 446 } 447 } 448 impl PartialEq<&Hosts<'_>> for Adblock<'_> { 449 #[inline] 450 fn eq(&self, other: &&Hosts<'_>) -> bool { 451 *self == **other 452 } 453 } 454 impl PartialEq<Hosts<'_>> for &Adblock<'_> { 455 #[inline] 456 fn eq(&self, other: &Hosts<'_>) -> bool { 457 **self == *other 458 } 459 } 460 impl PartialEq<&Adblock<'_>> for Hosts<'_> { 461 #[inline] 462 fn eq(&self, other: &&Adblock<'_>) -> bool { 463 *self == **other 464 } 465 } 466 impl PartialEq<Adblock<'_>> for &Hosts<'_> { 467 #[inline] 468 fn eq(&self, other: &Adblock<'_>) -> bool { 469 **self == *other 470 } 471 } 472 impl PartialEq<Wildcard<'_>> for Adblock<'_> { 473 #[expect(clippy::suspicious_operation_groupings, reason = "false positive")] 474 #[inline] 475 fn eq(&self, other: &Wildcard<'_>) -> bool { 476 !(self.subdomains || other.proper_subdomains) && self.domain == other.domain 477 } 478 } 479 impl PartialEq<&Wildcard<'_>> for Adblock<'_> { 480 #[inline] 481 fn eq(&self, other: &&Wildcard<'_>) -> bool { 482 *self == **other 483 } 484 } 485 impl PartialEq<Wildcard<'_>> for &Adblock<'_> { 486 #[inline] 487 fn eq(&self, other: &Wildcard<'_>) -> bool { 488 **self == *other 489 } 490 } 491 impl PartialEq<&Adblock<'_>> for Wildcard<'_> { 492 #[inline] 493 fn eq(&self, other: &&Adblock<'_>) -> bool { 494 *self == **other 495 } 496 } 497 impl PartialEq<Adblock<'_>> for &Wildcard<'_> { 498 #[inline] 499 fn eq(&self, other: &Adblock<'_>) -> bool { 500 **self == *other 501 } 502 } 503 impl Eq for Adblock<'_> {} 504 impl Hash for Adblock<'_> { 505 #[inline] 506 fn hash<H: Hasher>(&self, state: &mut H) { 507 self.domain.hash(state); 508 } 509 } 510 impl PartialOrd<Adblock<'_>> for Adblock<'_> { 511 #[inline] 512 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 513 Some(self.cmp(other)) 514 } 515 } 516 impl Ord for Adblock<'_> { 517 /// The total order that is defined follows the following hierarchy: 518 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 519 /// 2. If 1. evaluates as not equivalent, then return the result. 520 /// 3. If both domains represent a single `Domain`, then return the comparison 521 /// of label counts. 522 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 523 /// 5. Return the inverse of the comparison of label counts. 524 /// 525 /// For example, `com` `<` `example.com` `<` `||example.com` `<` `||com` `<` `net` `<` `example.net` `<` `||example.net` `<` `||net`. 526 #[inline] 527 fn cmp(&self, other: &Self) -> Ordering { 528 match self.domain.cmp_by_domain_ordering(&other.domain) { 529 DomainOrdering::Less => Ordering::Less, 530 DomainOrdering::Shorter => { 531 if self.subdomains { 532 Ordering::Greater 533 } else { 534 Ordering::Less 535 } 536 } 537 DomainOrdering::Equal => { 538 if self.subdomains { 539 if other.subdomains { 540 Ordering::Equal 541 } else { 542 Ordering::Greater 543 } 544 } else if other.subdomains { 545 Ordering::Less 546 } else { 547 Ordering::Equal 548 } 549 } 550 DomainOrdering::Longer => { 551 if self.subdomains { 552 if other.subdomains { 553 Ordering::Less 554 } else { 555 Ordering::Greater 556 } 557 } else if other.subdomains { 558 Ordering::Less 559 } else { 560 Ordering::Greater 561 } 562 } 563 DomainOrdering::Greater => Ordering::Greater, 564 } 565 } 566 } 567 impl PartialOrd<DomainOnly<'_>> for Adblock<'_> { 568 #[inline] 569 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 570 Some(self.cmp_domain_only(other)) 571 } 572 } 573 impl PartialOrd<Hosts<'_>> for Adblock<'_> { 574 #[inline] 575 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 576 Some(self.cmp_hosts(other)) 577 } 578 } 579 impl PartialOrd<Wildcard<'_>> for Adblock<'_> { 580 #[inline] 581 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 582 Some(self.cmp_wildcard(other)) 583 } 584 } 585 impl<'a> Set for Adblock<'a> { 586 type Elem = Domain<&'a str>; 587 #[inline] 588 fn bounded_cardinality(&self) -> BoundedCardinality { 589 BoundedCardinality::from_biguint_exact(self.domain_count()) 590 } 591 #[inline] 592 fn cardinality(&self) -> Option<Cardinality> { 593 Some(Cardinality::Finite(self.domain_count())) 594 } 595 #[inline] 596 fn contains<Q>(&self, elem: &Q) -> bool 597 where 598 Q: Borrow<Self::Elem> + Eq + ?Sized, 599 { 600 if self.subdomains { 601 matches!( 602 self.domain.cmp_by_domain_ordering(elem.borrow()), 603 DomainOrdering::Shorter 604 ) 605 } else { 606 self.domain == *elem.borrow() 607 } 608 } 609 #[inline] 610 fn is_proper_subset(&self, val: &Self) -> bool { 611 // A single domain can never be a proper superset. Subdomains` cannot be a proper superset if it has 612 // more labels or the same number of labels as another subdomains. In all other cases, we need to 613 // recursively check from the TLD that the labels are the same. 614 val.subdomains 615 && match val.domain.cmp_by_domain_ordering(&self.domain) { 616 DomainOrdering::Less | DomainOrdering::Longer | DomainOrdering::Greater => false, 617 DomainOrdering::Shorter => true, 618 DomainOrdering::Equal => !self.subdomains, 619 } 620 } 621 #[inline] 622 fn is_subset(&self, val: &Self) -> bool { 623 self == val || self.is_proper_subset(val) 624 } 625 } 626 impl SetOrd for Adblock<'_> {} 627 impl<'a> Deref for Adblock<'a> { 628 type Target = Domain<&'a str>; 629 #[inline] 630 fn deref(&self) -> &Self::Target { 631 &self.domain 632 } 633 } 634 impl<'a> ParsedDomain<'a> for Adblock<'a> { 635 type Error = FirefoxDomainErr; 636 #[expect( 637 unsafe_code, 638 clippy::indexing_slicing, 639 reason = "we carefully verify what we are doing" 640 )] 641 #[inline] 642 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 643 // First remove leading whitepace. Then check for comments via '#' and '!'. Return Blank iff empty. 644 // Return Comment iff '#' or '!' is the first character. Remove trailing whitespace. Next remove the 645 // last byte if it is '^' as well as whitespace before. Next track and remove '||' at the beginning 646 // and any subsequent whitespace. 647 let mut value = val.as_bytes().trim_ascii_start(); 648 value.first().map_or_else( 649 || Ok(Value::Blank), 650 |byt| { 651 if *byt == b'#' || *byt == b'!' { 652 // SAFETY: 653 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 654 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 655 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 656 Ok(Value::Comment(comment)) 657 } else { 658 value = value.trim_ascii_end(); 659 let len = value.len().wrapping_sub(1); 660 value = value.get(len).map_or(value, |byt2| { 661 if *byt2 == b'^' { 662 value[..len].trim_ascii_end() 663 } else { 664 value 665 } 666 }); 667 let (subdomains, val2) = value.get(..2).map_or_else( 668 || (false, value), 669 |fst| { 670 if fst == b"||" { 671 (true, value[2..].trim_ascii_start()) 672 } else { 673 (false, value) 674 } 675 }, 676 ); 677 // `Domain`s allow `$`, but we don't want to allow that symbol for Adblock-style rules. 678 val2.iter() 679 .try_fold((), |(), byt2| { 680 if *byt2 == b'$' { 681 Err(FirefoxDomainErr::InvalidAdblockDomain) 682 } else { 683 Ok(()) 684 } 685 }) 686 .and_then(|()| { 687 domain_icann_tld(val2).map(|domain| { 688 // A domain of length 252 or 253 can't have subdomains due to there not being enough 689 // characters. 690 Value::Domain(Self { 691 subdomains: if domain.len().get() > 251 { 692 false 693 } else { 694 subdomains 695 }, 696 domain, 697 }) 698 }) 699 }) 700 } 701 }, 702 ) 703 } 704 #[inline] 705 fn domain(&self) -> &Domain<&'a str> { 706 &self.domain 707 } 708 #[inline] 709 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 710 write_rpz_line(&mut writer, self.domain(), action, false).and_then(|()| { 711 if self.subdomains { 712 write_rpz_line(writer, self.domain(), action, true) 713 } else { 714 Ok(()) 715 } 716 }) 717 } 718 } 719 /// Domain constructed from a 720 /// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax). 721 /// 722 /// Specifically the domain must conform to the following extended regex: 723 /// 724 /// `^<ws>*<domain><ws>*(#.*)?$` 725 /// 726 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 727 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 728 /// 729 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 730 /// are ignored and will be parsed into a [`Value::Domain`]. 731 #[derive(Clone, Debug)] 732 pub struct DomainOnly<'a> { 733 /// The `Domain`. 734 domain: Domain<&'a str>, 735 } 736 impl DomainOnly<'_> { 737 /// Read [`Adblock::cmp_domain_only`]. 738 #[inline] 739 #[must_use] 740 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 741 other.cmp_domain_only(self).reverse() 742 } 743 /// Read [`Domain::cmp`]. 744 #[inline] 745 #[must_use] 746 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 747 self.domain.cmp(&other.domain) 748 } 749 /// Read [`Wildcard::cmp_domain_only`]. 750 #[inline] 751 #[must_use] 752 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 753 other.cmp_domain_only(self).reverse() 754 } 755 /// Same as [`DomainOnly::cardinality`] except that a `NonZeroU8` is returned. 756 /// 757 /// The value is always 1. 758 #[expect(unsafe_code, reason = "trivial use of NonZeroU8::new_unchecked")] 759 #[inline] 760 #[must_use] 761 pub const fn domain_count(&self) -> NonZeroU8 { 762 // SAFETY: 763 // 0 < 1 < 256. 764 unsafe { NonZeroU8::new_unchecked(1) } 765 } 766 } 767 impl PartialEq<DomainOnly<'_>> for DomainOnly<'_> { 768 #[inline] 769 fn eq(&self, other: &DomainOnly<'_>) -> bool { 770 self.domain == other.domain 771 } 772 } 773 impl PartialEq<DomainOnly<'_>> for &DomainOnly<'_> { 774 #[inline] 775 fn eq(&self, other: &DomainOnly<'_>) -> bool { 776 **self == *other 777 } 778 } 779 impl PartialEq<&DomainOnly<'_>> for DomainOnly<'_> { 780 #[inline] 781 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 782 *self == **other 783 } 784 } 785 impl PartialEq<Adblock<'_>> for DomainOnly<'_> { 786 #[inline] 787 fn eq(&self, other: &Adblock<'_>) -> bool { 788 other == self 789 } 790 } 791 impl PartialEq<Hosts<'_>> for DomainOnly<'_> { 792 #[inline] 793 fn eq(&self, other: &Hosts<'_>) -> bool { 794 self.domain == other.domain 795 } 796 } 797 impl PartialEq<&Hosts<'_>> for DomainOnly<'_> { 798 #[inline] 799 fn eq(&self, other: &&Hosts<'_>) -> bool { 800 *self == **other 801 } 802 } 803 impl PartialEq<Hosts<'_>> for &DomainOnly<'_> { 804 #[inline] 805 fn eq(&self, other: &Hosts<'_>) -> bool { 806 **self == *other 807 } 808 } 809 impl PartialEq<&DomainOnly<'_>> for Hosts<'_> { 810 #[inline] 811 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 812 *self == **other 813 } 814 } 815 impl PartialEq<DomainOnly<'_>> for &Hosts<'_> { 816 #[inline] 817 fn eq(&self, other: &DomainOnly<'_>) -> bool { 818 **self == *other 819 } 820 } 821 impl PartialEq<Wildcard<'_>> for DomainOnly<'_> { 822 #[inline] 823 fn eq(&self, other: &Wildcard<'_>) -> bool { 824 !other.proper_subdomains && self.domain == other.domain 825 } 826 } 827 impl PartialEq<&Wildcard<'_>> for DomainOnly<'_> { 828 #[inline] 829 fn eq(&self, other: &&Wildcard<'_>) -> bool { 830 *self == **other 831 } 832 } 833 impl PartialEq<Wildcard<'_>> for &DomainOnly<'_> { 834 #[inline] 835 fn eq(&self, other: &Wildcard<'_>) -> bool { 836 **self == *other 837 } 838 } 839 impl PartialEq<&DomainOnly<'_>> for Wildcard<'_> { 840 #[inline] 841 fn eq(&self, other: &&DomainOnly<'_>) -> bool { 842 *self == **other 843 } 844 } 845 impl PartialEq<DomainOnly<'_>> for &Wildcard<'_> { 846 #[inline] 847 fn eq(&self, other: &DomainOnly<'_>) -> bool { 848 **self == *other 849 } 850 } 851 impl Eq for DomainOnly<'_> {} 852 impl Hash for DomainOnly<'_> { 853 #[inline] 854 fn hash<H: Hasher>(&self, state: &mut H) { 855 self.domain.hash(state); 856 } 857 } 858 impl PartialOrd<DomainOnly<'_>> for DomainOnly<'_> { 859 #[inline] 860 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 861 Some(self.cmp(other)) 862 } 863 } 864 impl Ord for DomainOnly<'_> { 865 /// Read [`Domain::cmp`]. 866 #[inline] 867 fn cmp(&self, other: &Self) -> Ordering { 868 self.domain.cmp(&other.domain) 869 } 870 } 871 impl PartialOrd<Adblock<'_>> for DomainOnly<'_> { 872 #[inline] 873 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 874 Some(self.cmp_adblock(other)) 875 } 876 } 877 impl PartialOrd<Hosts<'_>> for DomainOnly<'_> { 878 #[inline] 879 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 880 Some(self.cmp_hosts(other)) 881 } 882 } 883 impl PartialOrd<Wildcard<'_>> for DomainOnly<'_> { 884 #[inline] 885 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 886 Some(self.cmp_wildcard(other)) 887 } 888 } 889 impl Display for DomainOnly<'_> { 890 #[inline] 891 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 892 self.domain.fmt(f) 893 } 894 } 895 impl<'a> Set for DomainOnly<'a> { 896 type Elem = Domain<&'a str>; 897 #[inline] 898 fn bounded_cardinality(&self) -> BoundedCardinality { 899 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 900 } 901 #[inline] 902 fn cardinality(&self) -> Option<Cardinality> { 903 Some(Cardinality::Finite(self.domain_count().get().into())) 904 } 905 #[inline] 906 fn contains<Q>(&self, elem: &Q) -> bool 907 where 908 Q: Borrow<Self::Elem> + Eq + ?Sized, 909 { 910 self.domain == *elem.borrow() 911 } 912 #[inline] 913 fn is_proper_subset(&self, _: &Self) -> bool { 914 false 915 } 916 #[inline] 917 fn is_subset(&self, val: &Self) -> bool { 918 self == val 919 } 920 } 921 impl SetOrd for DomainOnly<'_> {} 922 impl<'a> Deref for DomainOnly<'a> { 923 type Target = Domain<&'a str>; 924 #[inline] 925 fn deref(&self) -> &Self::Target { 926 &self.domain 927 } 928 } 929 impl<'a> ParsedDomain<'a> for DomainOnly<'a> { 930 type Error = FirefoxDomainErr; 931 #[expect( 932 unsafe_code, 933 clippy::arithmetic_side_effects, 934 clippy::indexing_slicing, 935 reason = "we verify all use is correct" 936 )] 937 #[inline] 938 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 939 let value = val.as_bytes().trim_ascii_start(); 940 value.first().map_or_else( 941 || Ok(Value::Blank), 942 |byt| { 943 if *byt == b'#' { 944 // SAFETY: 945 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 946 // since the first byte is '#' or '$' the remaining bytes are still valid UTF-8. 947 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 948 Ok(Value::Comment(comment)) 949 } else { 950 domain_icann_tld( 951 value[..value 952 .iter() 953 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 954 .map_or_else(convert::identity, convert::identity)] 955 .trim_ascii_end(), 956 ) 957 .map(|domain| Value::Domain(Self { domain })) 958 } 959 }, 960 ) 961 } 962 #[inline] 963 fn domain(&self) -> &Domain<&'a str> { 964 &self.domain 965 } 966 #[inline] 967 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 968 write_rpz_line(&mut writer, self.domain(), action, false) 969 } 970 } 971 /// Domain constructed from a 972 /// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax). 973 /// 974 /// Specifically the domain must conform to the following extended regex: 975 /// 976 /// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` 977 /// 978 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 979 /// or at least length five and begins with `xn--`, `<ws>` is any sequence of 980 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and `<ip>` is one of the following: 981 /// 982 /// `::`, `::1`, `0.0.0.0`, or `127.0.0.1`. 983 /// 984 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 985 /// are ignored and will be parsed into a [`Value::Domain`]. 986 #[derive(Clone, Debug)] 987 pub struct Hosts<'a> { 988 /// The `Domain`. 989 domain: Domain<&'a str>, 990 } 991 impl Hosts<'_> { 992 /// Read [`Adblock::cmp_hosts`]. 993 #[inline] 994 #[must_use] 995 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 996 other.cmp_hosts(self).reverse() 997 } 998 /// Read [`DomainOnly::cmp_hosts`]. 999 #[inline] 1000 #[must_use] 1001 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1002 other.cmp_hosts(self).reverse() 1003 } 1004 /// Read [`Wildcard::cmp_hosts`]. 1005 #[inline] 1006 #[must_use] 1007 pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { 1008 other.cmp_hosts(self).reverse() 1009 } 1010 /// Same as [`Hosts::cardinality`] except that a `NonZeroU8` is returned. 1011 /// 1012 /// The value is always 1. 1013 #[expect(unsafe_code, reason = "trivial use of NonZeroU8::new_unchecked")] 1014 #[inline] 1015 #[must_use] 1016 pub const fn domain_count(&self) -> NonZeroU8 { 1017 // SAFETY: 1018 // 0 < 1 < 256. 1019 unsafe { NonZeroU8::new_unchecked(1) } 1020 } 1021 } 1022 impl PartialEq<Hosts<'_>> for Hosts<'_> { 1023 #[inline] 1024 fn eq(&self, other: &Hosts<'_>) -> bool { 1025 self.domain == other.domain 1026 } 1027 } 1028 impl PartialEq<Hosts<'_>> for &Hosts<'_> { 1029 #[inline] 1030 fn eq(&self, other: &Hosts<'_>) -> bool { 1031 **self == *other 1032 } 1033 } 1034 impl PartialEq<&Hosts<'_>> for Hosts<'_> { 1035 #[inline] 1036 fn eq(&self, other: &&Hosts<'_>) -> bool { 1037 *self == **other 1038 } 1039 } 1040 impl PartialEq<Adblock<'_>> for Hosts<'_> { 1041 #[inline] 1042 fn eq(&self, other: &Adblock<'_>) -> bool { 1043 other == self 1044 } 1045 } 1046 impl PartialEq<DomainOnly<'_>> for Hosts<'_> { 1047 #[inline] 1048 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1049 other == self 1050 } 1051 } 1052 impl PartialEq<Wildcard<'_>> for Hosts<'_> { 1053 #[inline] 1054 fn eq(&self, other: &Wildcard<'_>) -> bool { 1055 !other.proper_subdomains && self.domain == other.domain 1056 } 1057 } 1058 impl PartialEq<&Wildcard<'_>> for Hosts<'_> { 1059 #[inline] 1060 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1061 *self == **other 1062 } 1063 } 1064 impl PartialEq<Wildcard<'_>> for &Hosts<'_> { 1065 #[inline] 1066 fn eq(&self, other: &Wildcard<'_>) -> bool { 1067 **self == *other 1068 } 1069 } 1070 impl PartialEq<&Hosts<'_>> for Wildcard<'_> { 1071 #[inline] 1072 fn eq(&self, other: &&Hosts<'_>) -> bool { 1073 *self == **other 1074 } 1075 } 1076 impl PartialEq<Hosts<'_>> for &Wildcard<'_> { 1077 #[inline] 1078 fn eq(&self, other: &Hosts<'_>) -> bool { 1079 **self == *other 1080 } 1081 } 1082 impl Eq for Hosts<'_> {} 1083 impl Hash for Hosts<'_> { 1084 #[inline] 1085 fn hash<H: Hasher>(&self, state: &mut H) { 1086 self.domain.hash(state); 1087 } 1088 } 1089 impl PartialOrd<Hosts<'_>> for Hosts<'_> { 1090 #[inline] 1091 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1092 Some(self.cmp(other)) 1093 } 1094 } 1095 impl Ord for Hosts<'_> { 1096 /// Read [`Domain::cmp`]. 1097 #[inline] 1098 fn cmp(&self, other: &Self) -> Ordering { 1099 self.domain.cmp(&other.domain) 1100 } 1101 } 1102 impl PartialOrd<Adblock<'_>> for Hosts<'_> { 1103 #[inline] 1104 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1105 Some(self.cmp_adblock(other)) 1106 } 1107 } 1108 impl PartialOrd<DomainOnly<'_>> for Hosts<'_> { 1109 #[inline] 1110 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1111 Some(self.cmp_domain_only(other)) 1112 } 1113 } 1114 impl PartialOrd<Wildcard<'_>> for Hosts<'_> { 1115 #[inline] 1116 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1117 Some(self.cmp_wildcard(other)) 1118 } 1119 } 1120 impl Display for Hosts<'_> { 1121 #[inline] 1122 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1123 self.domain.fmt(f) 1124 } 1125 } 1126 impl<'a> Set for Hosts<'a> { 1127 type Elem = Domain<&'a str>; 1128 #[inline] 1129 fn bounded_cardinality(&self) -> BoundedCardinality { 1130 BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) 1131 } 1132 #[inline] 1133 fn cardinality(&self) -> Option<Cardinality> { 1134 Some(Cardinality::Finite(self.domain_count().get().into())) 1135 } 1136 #[inline] 1137 fn contains<Q>(&self, elem: &Q) -> bool 1138 where 1139 Q: Borrow<Self::Elem> + Eq + ?Sized, 1140 { 1141 self.domain == *elem.borrow() 1142 } 1143 #[inline] 1144 fn is_proper_subset(&self, _: &Self) -> bool { 1145 false 1146 } 1147 #[inline] 1148 fn is_subset(&self, val: &Self) -> bool { 1149 self == val 1150 } 1151 } 1152 impl SetOrd for Hosts<'_> {} 1153 impl<'a> Deref for Hosts<'a> { 1154 type Target = Domain<&'a str>; 1155 #[inline] 1156 fn deref(&self) -> &Self::Target { 1157 &self.domain 1158 } 1159 } 1160 impl<'a> ParsedDomain<'a> for Hosts<'a> { 1161 type Error = FirefoxDomainErr; 1162 #[expect( 1163 unsafe_code, 1164 clippy::arithmetic_side_effects, 1165 clippy::indexing_slicing, 1166 reason = "carefully verified use is correct" 1167 )] 1168 #[inline] 1169 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1170 let mut value = val.as_bytes().trim_ascii_start(); 1171 value.first().map_or_else( 1172 || Ok(Value::Blank), 1173 |byt| { 1174 if *byt == b'#' { 1175 // SAFETY: 1176 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1177 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1178 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1179 Ok(Value::Comment(comment)) 1180 } else { 1181 value = value 1182 .get(..3) 1183 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1184 .and_then(|fst| { 1185 if fst == b"::1" { 1186 Ok(&value[3..]) 1187 } else if &value[..2] == b"::" { 1188 Ok(&value[2..]) 1189 } else { 1190 value 1191 .get(..7) 1192 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1193 .and_then(|fst2| { 1194 if fst2 == b"0.0.0.0" { 1195 Ok(&value[7..]) 1196 } else { 1197 value 1198 .get(..9) 1199 .ok_or(FirefoxDomainErr::InvalidHostsIP) 1200 .and_then(|fst3| { 1201 if fst3 == b"127.0.0.1" { 1202 Ok(&value[9..]) 1203 } else { 1204 Err(FirefoxDomainErr::InvalidHostsIP) 1205 } 1206 }) 1207 } 1208 }) 1209 } 1210 })?; 1211 let len = value.len(); 1212 value = value.trim_ascii_start(); 1213 if len == value.len() { 1214 // There has to be at least one space or tab between the IP and domain. 1215 Err(FirefoxDomainErr::InvalidHostsIP) 1216 } else { 1217 domain_icann_tld( 1218 value[..value 1219 .iter() 1220 .try_fold( 1221 0, 1222 |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }, 1223 ) 1224 .map_or_else(convert::identity, convert::identity)] 1225 .trim_ascii_end(), 1226 ) 1227 .map(|domain| Value::Domain(Self { domain })) 1228 } 1229 } 1230 }, 1231 ) 1232 } 1233 #[inline] 1234 fn domain(&self) -> &Domain<&'a str> { 1235 &self.domain 1236 } 1237 #[inline] 1238 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1239 write_rpz_line(&mut writer, self.domain(), action, false) 1240 } 1241 } 1242 /// Domain constructed from a 1243 /// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext). 1244 /// 1245 /// Specifically the domain must conform to the following extended regex: 1246 /// 1247 /// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` 1248 /// 1249 /// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], the TLD is either all letters 1250 /// or at least length five and begins with `xn--`, and `<ws>` is any sequence of 1251 /// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). 1252 /// 1253 /// If `domain` begins with `*.`, then `domain` must have length less than 252. 1254 /// 1255 /// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain 1256 /// are ignored and will be parsed into a [`Value::Domain`]. 1257 #[derive(Clone, Debug)] 1258 pub struct Wildcard<'a> { 1259 /// The `Domain`. 1260 domain: Domain<&'a str>, 1261 /// `true` iff `domain` represents all proper subdomains. Note that this does _not_ include `domain` itself. 1262 proper_subdomains: bool, 1263 } 1264 impl Wildcard<'_> { 1265 /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. Note this does _not_ 1266 /// include the `Domain` itself. 1267 #[inline] 1268 #[must_use] 1269 pub const fn is_proper_subdomains(&self) -> bool { 1270 self.proper_subdomains 1271 } 1272 /// Read [`Adblock::cmp_wildcard`]. 1273 #[inline] 1274 #[must_use] 1275 pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { 1276 other.cmp_wildcard(self).reverse() 1277 } 1278 /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used 1279 /// for both. 1280 #[must_use] 1281 fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { 1282 match self.domain.cmp_by_domain_ordering(other) { 1283 DomainOrdering::Less => Ordering::Less, 1284 DomainOrdering::Shorter => { 1285 if self.proper_subdomains { 1286 Ordering::Greater 1287 } else { 1288 Ordering::Less 1289 } 1290 } 1291 DomainOrdering::Equal => { 1292 if self.proper_subdomains { 1293 Ordering::Greater 1294 } else { 1295 Ordering::Equal 1296 } 1297 } 1298 DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, 1299 } 1300 } 1301 /// The total order that is defined follows the following hierarchy: 1302 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1303 /// 2. If 1. evaluates as not equivalent, then return the result. 1304 /// 3. If `self` represents a single `Domain` (i.e., `!self.is_proper_subdomains()`), 1305 /// then return the comparison of label counts. 1306 /// 4. Return `self` is greater. 1307 /// 1308 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1309 #[inline] 1310 #[must_use] 1311 pub fn cmp_domain_only(&self, other: &DomainOnly<'_>) -> Ordering { 1312 self.cmp_dom(&other.domain) 1313 } 1314 /// Read [`Wildcard::cmp_domain_only`]. 1315 #[inline] 1316 #[must_use] 1317 pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { 1318 self.cmp_dom(&other.domain) 1319 } 1320 /// Same as [`Wildcard::cardinality`] except that a `BigUint` is returned. Note the count does _not_ include 1321 /// the `Domain` itself when `self.is_proper_subdomains()`. 1322 /// 1323 /// `!self.is_proper_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. 1324 #[inline] 1325 #[must_use] 1326 pub fn domain_count(&self) -> BigUint { 1327 if self.proper_subdomains { 1328 proper_subdomain_count(&self.domain) 1329 } else { 1330 BigUint::new(vec![1]) 1331 } 1332 } 1333 } 1334 impl PartialEq<Wildcard<'_>> for Wildcard<'_> { 1335 #[inline] 1336 fn eq(&self, other: &Wildcard<'_>) -> bool { 1337 self.domain == other.domain && self.proper_subdomains == other.proper_subdomains 1338 } 1339 } 1340 impl PartialEq<Wildcard<'_>> for &Wildcard<'_> { 1341 #[inline] 1342 fn eq(&self, other: &Wildcard<'_>) -> bool { 1343 **self == *other 1344 } 1345 } 1346 impl PartialEq<&Wildcard<'_>> for Wildcard<'_> { 1347 #[inline] 1348 fn eq(&self, other: &&Wildcard<'_>) -> bool { 1349 *self == **other 1350 } 1351 } 1352 impl PartialEq<Adblock<'_>> for Wildcard<'_> { 1353 #[inline] 1354 fn eq(&self, other: &Adblock<'_>) -> bool { 1355 other == self 1356 } 1357 } 1358 impl PartialEq<DomainOnly<'_>> for Wildcard<'_> { 1359 #[inline] 1360 fn eq(&self, other: &DomainOnly<'_>) -> bool { 1361 other == self 1362 } 1363 } 1364 impl PartialEq<Hosts<'_>> for Wildcard<'_> { 1365 #[inline] 1366 fn eq(&self, other: &Hosts<'_>) -> bool { 1367 other == self 1368 } 1369 } 1370 impl Eq for Wildcard<'_> {} 1371 impl Hash for Wildcard<'_> { 1372 #[inline] 1373 fn hash<H: Hasher>(&self, state: &mut H) { 1374 self.domain.hash(state); 1375 } 1376 } 1377 impl PartialOrd<Wildcard<'_>> for Wildcard<'_> { 1378 #[inline] 1379 fn partial_cmp(&self, other: &Wildcard<'_>) -> Option<Ordering> { 1380 Some(self.cmp(other)) 1381 } 1382 } 1383 impl Ord for Wildcard<'_> { 1384 /// The total order that is defined follows the following hierarchy: 1385 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1386 /// 2. If 1. evaluates as not equivalent, then return the result. 1387 /// 3. If both domains represent a single `Domain`, then return the comparison 1388 /// of label counts. 1389 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1390 /// 5. Return the inverse of the comparison of label counts. 1391 /// 1392 /// For example, `com` `<` `example.com` `<` `*.example.com` `<` `*.com` `<` `net` `<` `example.net` `<` `*.example.net` `<` `*.net`. 1393 #[inline] 1394 fn cmp(&self, other: &Self) -> Ordering { 1395 match self.domain.cmp_by_domain_ordering(&other.domain) { 1396 DomainOrdering::Less => Ordering::Less, 1397 DomainOrdering::Shorter => { 1398 if self.proper_subdomains { 1399 Ordering::Greater 1400 } else { 1401 Ordering::Less 1402 } 1403 } 1404 DomainOrdering::Equal => { 1405 if self.proper_subdomains { 1406 if other.proper_subdomains { 1407 Ordering::Equal 1408 } else { 1409 Ordering::Greater 1410 } 1411 } else if other.proper_subdomains { 1412 Ordering::Less 1413 } else { 1414 Ordering::Equal 1415 } 1416 } 1417 DomainOrdering::Longer => { 1418 if self.proper_subdomains { 1419 if other.proper_subdomains { 1420 Ordering::Less 1421 } else { 1422 Ordering::Greater 1423 } 1424 } else if other.proper_subdomains { 1425 Ordering::Less 1426 } else { 1427 Ordering::Greater 1428 } 1429 } 1430 DomainOrdering::Greater => Ordering::Greater, 1431 } 1432 } 1433 } 1434 impl PartialOrd<Adblock<'_>> for Wildcard<'_> { 1435 #[inline] 1436 fn partial_cmp(&self, other: &Adblock<'_>) -> Option<Ordering> { 1437 Some(self.cmp_adblock(other)) 1438 } 1439 } 1440 impl PartialOrd<DomainOnly<'_>> for Wildcard<'_> { 1441 #[inline] 1442 fn partial_cmp(&self, other: &DomainOnly<'_>) -> Option<Ordering> { 1443 Some(self.cmp_domain_only(other)) 1444 } 1445 } 1446 impl PartialOrd<Hosts<'_>> for Wildcard<'_> { 1447 #[inline] 1448 fn partial_cmp(&self, other: &Hosts<'_>) -> Option<Ordering> { 1449 Some(self.cmp_hosts(other)) 1450 } 1451 } 1452 impl Display for Wildcard<'_> { 1453 #[inline] 1454 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1455 write!( 1456 f, 1457 "{}{}", 1458 if self.proper_subdomains { "*." } else { "" }, 1459 self.domain 1460 ) 1461 } 1462 } 1463 impl<'a> Set for Wildcard<'a> { 1464 type Elem = Domain<&'a str>; 1465 #[inline] 1466 fn bounded_cardinality(&self) -> BoundedCardinality { 1467 BoundedCardinality::from_biguint_exact(self.domain_count()) 1468 } 1469 #[inline] 1470 fn cardinality(&self) -> Option<Cardinality> { 1471 Some(Cardinality::Finite(self.domain_count())) 1472 } 1473 #[inline] 1474 fn contains<Q>(&self, elem: &Q) -> bool 1475 where 1476 Q: Borrow<Self::Elem> + Eq + ?Sized, 1477 { 1478 if self.proper_subdomains { 1479 self.domain.cmp_by_domain_ordering(elem.borrow()) == DomainOrdering::Shorter 1480 } else { 1481 self.domain == *elem.borrow() 1482 } 1483 } 1484 #[inline] 1485 fn is_proper_subset(&self, val: &Self) -> bool { 1486 // A single domain can never be a proper superset. Proper subdomains cannot be a proper superset if it 1487 // has more labels or the same number of labels as another domain. In all other cases, we need to 1488 // recursively check from the TLD that the labels are the same. 1489 val.proper_subdomains 1490 && val.domain.cmp_by_domain_ordering(&self.domain) == DomainOrdering::Shorter 1491 } 1492 #[inline] 1493 fn is_subset(&self, val: &Self) -> bool { 1494 self == val || self.is_proper_subset(val) 1495 } 1496 } 1497 impl SetOrd for Wildcard<'_> {} 1498 impl<'a> Deref for Wildcard<'a> { 1499 type Target = Domain<&'a str>; 1500 #[inline] 1501 fn deref(&self) -> &Self::Target { 1502 &self.domain 1503 } 1504 } 1505 impl<'a> ParsedDomain<'a> for Wildcard<'a> { 1506 type Error = FirefoxDomainErr; 1507 #[expect( 1508 unsafe_code, 1509 clippy::arithmetic_side_effects, 1510 clippy::indexing_slicing, 1511 reason = "need them all. care has been taken." 1512 )] 1513 #[inline] 1514 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1515 let value = val.as_bytes().trim_ascii_start(); 1516 value.first().map_or_else( 1517 || Ok(Value::Blank), 1518 |byt| { 1519 if *byt == b'#' { 1520 // SAFETY: 1521 // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 1522 // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. 1523 let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; 1524 Ok(Value::Comment(comment)) 1525 } else { 1526 let (proper_subdomains, val2) = value.get(..2).map_or_else( 1527 || (false, value), 1528 |fst| { 1529 if fst == b"*." { 1530 (true, &value[2..]) 1531 } else { 1532 (false, value) 1533 } 1534 }, 1535 ); 1536 domain_icann_tld( 1537 val2[..val2 1538 .iter() 1539 .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) 1540 .map_or_else(convert::identity, convert::identity)] 1541 .trim_ascii_end(), 1542 ) 1543 .and_then(|domain| { 1544 if proper_subdomains { 1545 if domain.len().get() > 251 { 1546 Err(FirefoxDomainErr::InvalidWildcardDomain) 1547 } else { 1548 Ok(Value::Domain(Self { 1549 domain, 1550 proper_subdomains: true, 1551 })) 1552 } 1553 } else { 1554 Ok(Value::Domain(Self { 1555 domain, 1556 proper_subdomains, 1557 })) 1558 } 1559 }) 1560 } 1561 }, 1562 ) 1563 } 1564 #[inline] 1565 fn domain(&self) -> &Domain<&'a str> { 1566 &self.domain 1567 } 1568 #[inline] 1569 fn write_to_rpz<W: Write>(&self, action: RpzAction, mut writer: W) -> Result<(), Error> { 1570 write_rpz_line(&mut writer, self.domain(), action, self.proper_subdomains) 1571 } 1572 } 1573 /// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. 1574 #[derive(Clone, Debug)] 1575 pub enum RpzDomain<'a> { 1576 /// An `Adblock` domain. 1577 Adblock(Adblock<'a>), 1578 /// A `DomainOnly` domain. 1579 DomainOnly(DomainOnly<'a>), 1580 /// A `Hosts` domain. 1581 Hosts(Hosts<'a>), 1582 /// A `Wildcard` domain. 1583 Wildcard(Wildcard<'a>), 1584 } 1585 impl RpzDomain<'_> { 1586 /// Returns `true` iff `self` represents a single [`Domain`]. 1587 #[inline] 1588 #[must_use] 1589 pub const fn is_domain(&self) -> bool { 1590 match *self { 1591 Self::Adblock(ref dom) => !dom.subdomains, 1592 Self::DomainOnly(_) | Self::Hosts(_) => true, 1593 Self::Wildcard(ref dom) => !dom.proper_subdomains, 1594 } 1595 } 1596 /// Returns `true` iff `self` represents proper subdomains of the contained [`Domain`] (i.e., 1597 /// is a [`Wildcard`] such that [`Wildcard::is_proper_subdomains`]). 1598 #[inline] 1599 #[must_use] 1600 pub const fn is_proper_subdomains(&self) -> bool { 1601 match *self { 1602 Self::Adblock(_) | Self::DomainOnly(_) | Self::Hosts(_) => false, 1603 Self::Wildcard(ref dom) => dom.proper_subdomains, 1604 } 1605 } 1606 /// Returns `true` iff `self` represents subdomains of the contained [`Domain`] (i.e., is an 1607 /// [`Adblock`] such that [`Adblock::is_subdomains`]). 1608 #[inline] 1609 #[must_use] 1610 pub const fn is_subdomains(&self) -> bool { 1611 match *self { 1612 Self::Adblock(ref dom) => dom.subdomains, 1613 Self::DomainOnly(_) | Self::Hosts(_) | Self::Wildcard(_) => false, 1614 } 1615 } 1616 /// Returns the count of [`Domain`]s represented by `self`. This function is the same as 1617 /// [`RpzDomain::cardinality`] except that it returns a `BigUint`. 1618 #[inline] 1619 #[must_use] 1620 pub fn domain_count(&self) -> BigUint { 1621 match *self { 1622 Self::Adblock(ref dom) => dom.domain_count(), 1623 Self::DomainOnly(ref dom) => dom.domain_count().get().into(), 1624 Self::Hosts(ref dom) => dom.domain_count().get().into(), 1625 Self::Wildcard(ref dom) => dom.domain_count(), 1626 } 1627 } 1628 } 1629 impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> { 1630 #[inline] 1631 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1632 match *self { 1633 Self::Adblock(ref dom) => match *other { 1634 RpzDomain::Adblock(ref dom2) => dom == dom2, 1635 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1636 RpzDomain::Hosts(ref dom2) => dom == dom2, 1637 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1638 }, 1639 Self::DomainOnly(ref dom) => match *other { 1640 RpzDomain::Adblock(ref dom2) => dom == dom2, 1641 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1642 RpzDomain::Hosts(ref dom2) => dom == dom2, 1643 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1644 }, 1645 Self::Hosts(ref dom) => match *other { 1646 RpzDomain::Adblock(ref dom2) => dom == dom2, 1647 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1648 RpzDomain::Hosts(ref dom2) => dom == dom2, 1649 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1650 }, 1651 Self::Wildcard(ref dom) => match *other { 1652 RpzDomain::Adblock(ref dom2) => dom == dom2, 1653 RpzDomain::DomainOnly(ref dom2) => dom == dom2, 1654 RpzDomain::Hosts(ref dom2) => dom == dom2, 1655 RpzDomain::Wildcard(ref dom2) => dom == dom2, 1656 }, 1657 } 1658 } 1659 } 1660 impl PartialEq<RpzDomain<'_>> for &RpzDomain<'_> { 1661 #[inline] 1662 fn eq(&self, other: &RpzDomain<'_>) -> bool { 1663 **self == *other 1664 } 1665 } 1666 impl PartialEq<&RpzDomain<'_>> for RpzDomain<'_> { 1667 #[inline] 1668 fn eq(&self, other: &&RpzDomain<'_>) -> bool { 1669 *self == **other 1670 } 1671 } 1672 impl Eq for RpzDomain<'_> {} 1673 impl Hash for RpzDomain<'_> { 1674 #[inline] 1675 fn hash<H: Hasher>(&self, state: &mut H) { 1676 self.domain().hash(state); 1677 } 1678 } 1679 impl PartialOrd<RpzDomain<'_>> for RpzDomain<'_> { 1680 #[inline] 1681 fn partial_cmp(&self, other: &RpzDomain<'_>) -> Option<Ordering> { 1682 Some(self.cmp(other)) 1683 } 1684 } 1685 impl Ord for RpzDomain<'_> { 1686 /// The total order that is defined follows the following hierarchy: 1687 /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. 1688 /// 2. If 1. evaluates as not equivalent, then return the result. 1689 /// 3. If both domains represent a single `Domain`, then return the comparison 1690 /// of label counts. 1691 /// 4. If one domain represents a single `Domain`, then return that that domain is less. 1692 /// 5. If the label counts are the same and exactly one domain represents proper subdomains, the other domain is greater. 1693 /// 6. Return the inverse of the comparison of label counts. 1694 /// 1695 /// For example the following is a sequence of domains in 1696 /// ascending order: 1697 /// 1698 /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` 1699 #[inline] 1700 fn cmp(&self, other: &Self) -> Ordering { 1701 match *self { 1702 Self::Adblock(ref dom) => match *other { 1703 Self::Adblock(ref dom2) => dom.cmp(dom2), 1704 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1705 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1706 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1707 }, 1708 Self::DomainOnly(ref dom) => match *other { 1709 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1710 Self::DomainOnly(ref dom2) => dom.cmp(dom2), 1711 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1712 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1713 }, 1714 Self::Hosts(ref dom) => match *other { 1715 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1716 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1717 Self::Hosts(ref dom2) => dom.cmp(dom2), 1718 Self::Wildcard(ref dom2) => dom.cmp_wildcard(dom2), 1719 }, 1720 Self::Wildcard(ref dom) => match *other { 1721 Self::Adblock(ref dom2) => dom.cmp_adblock(dom2), 1722 Self::DomainOnly(ref dom2) => dom.cmp_domain_only(dom2), 1723 Self::Hosts(ref dom2) => dom.cmp_hosts(dom2), 1724 Self::Wildcard(ref dom2) => dom.cmp(dom2), 1725 }, 1726 } 1727 } 1728 } 1729 impl Display for RpzDomain<'_> { 1730 #[inline] 1731 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 1732 match *self { 1733 Self::Adblock(ref dom) => dom.fmt(f), 1734 Self::DomainOnly(ref dom) => dom.fmt(f), 1735 Self::Hosts(ref dom) => dom.fmt(f), 1736 Self::Wildcard(ref dom) => dom.fmt(f), 1737 } 1738 } 1739 } 1740 impl<'a> Set for RpzDomain<'a> { 1741 type Elem = Domain<&'a str>; 1742 #[inline] 1743 fn bounded_cardinality(&self) -> BoundedCardinality { 1744 BoundedCardinality::from_biguint_exact(self.domain_count()) 1745 } 1746 #[inline] 1747 fn cardinality(&self) -> Option<Cardinality> { 1748 Some(Cardinality::Finite(self.domain_count())) 1749 } 1750 #[inline] 1751 fn contains<Q>(&self, elem: &Q) -> bool 1752 where 1753 Q: Borrow<Self::Elem> + Eq + ?Sized, 1754 { 1755 match *self { 1756 Self::Adblock(ref dom) => dom.contains(elem), 1757 Self::DomainOnly(ref dom) => dom.contains(elem), 1758 Self::Hosts(ref dom) => dom.contains(elem), 1759 Self::Wildcard(ref dom) => dom.contains(elem), 1760 } 1761 } 1762 #[inline] 1763 fn is_proper_subset(&self, val: &Self) -> bool { 1764 match *val { 1765 Self::Adblock(ref dom) => { 1766 dom.subdomains 1767 && match *self { 1768 Self::Adblock(ref dom2) => { 1769 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1770 == DomainOrdering::Shorter 1771 } 1772 Self::DomainOnly(ref dom2) => matches!( 1773 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1774 DomainOrdering::Shorter | DomainOrdering::Equal 1775 ), 1776 Self::Hosts(ref dom2) => matches!( 1777 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1778 DomainOrdering::Shorter | DomainOrdering::Equal 1779 ), 1780 Self::Wildcard(ref dom2) => matches!( 1781 dom.domain.cmp_by_domain_ordering(&dom2.domain), 1782 DomainOrdering::Shorter | DomainOrdering::Equal 1783 ), 1784 } 1785 } 1786 Self::DomainOnly(_) | Self::Hosts(_) => false, 1787 Self::Wildcard(ref dom) => { 1788 dom.proper_subdomains 1789 && match *self { 1790 Self::Adblock(ref dom2) => { 1791 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1792 == DomainOrdering::Shorter 1793 } 1794 Self::DomainOnly(ref dom2) => { 1795 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1796 == DomainOrdering::Shorter 1797 } 1798 Self::Hosts(ref dom2) => { 1799 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1800 == DomainOrdering::Shorter 1801 } 1802 Self::Wildcard(ref dom2) => { 1803 dom.domain.cmp_by_domain_ordering(&dom2.domain) 1804 == DomainOrdering::Shorter 1805 } 1806 } 1807 } 1808 } 1809 } 1810 #[inline] 1811 fn is_subset(&self, val: &Self) -> bool { 1812 self == val || self.is_proper_subset(val) 1813 } 1814 } 1815 impl SetOrd for RpzDomain<'_> {} 1816 impl<'a> Deref for RpzDomain<'a> { 1817 type Target = Domain<&'a str>; 1818 #[inline] 1819 fn deref(&self) -> &Self::Target { 1820 match *self { 1821 Self::Adblock(ref dom) => &dom.domain, 1822 Self::DomainOnly(ref dom) => &dom.domain, 1823 Self::Hosts(ref dom) => &dom.domain, 1824 Self::Wildcard(ref dom) => &dom.domain, 1825 } 1826 } 1827 } 1828 impl<'a: 'b, 'b> From<Adblock<'a>> for RpzDomain<'b> { 1829 #[inline] 1830 fn from(value: Adblock<'a>) -> Self { 1831 Self::Adblock(value) 1832 } 1833 } 1834 impl<'a: 'b, 'b> From<DomainOnly<'a>> for RpzDomain<'b> { 1835 #[inline] 1836 fn from(value: DomainOnly<'a>) -> Self { 1837 Self::DomainOnly(value) 1838 } 1839 } 1840 impl<'a: 'b, 'b> From<Hosts<'a>> for RpzDomain<'b> { 1841 #[inline] 1842 fn from(value: Hosts<'a>) -> Self { 1843 Self::Hosts(value) 1844 } 1845 } 1846 impl<'a: 'b, 'b> From<Wildcard<'a>> for RpzDomain<'b> { 1847 #[inline] 1848 fn from(value: Wildcard<'a>) -> Self { 1849 Self::Wildcard(value) 1850 } 1851 } 1852 impl<'a> ParsedDomain<'a> for RpzDomain<'a> { 1853 type Error = FirefoxDomainErr; 1854 #[inline] 1855 fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { 1856 DomainOnly::parse_value(val).map_or_else( 1857 |_| { 1858 Hosts::parse_value(val).map_or_else( 1859 |_| { 1860 Wildcard::parse_value(val).map_or_else( 1861 |_| { 1862 Adblock::parse_value(val).map(|value| match value { 1863 Value::Domain(dom) => Value::Domain(Self::Adblock(dom)), 1864 Value::Comment(com) => Value::Comment(com), 1865 Value::Blank => Value::Blank, 1866 }) 1867 }, 1868 |value| { 1869 Ok(match value { 1870 Value::Domain(dom) => Value::Domain(Self::Wildcard(dom)), 1871 Value::Comment(com) => Value::Comment(com), 1872 Value::Blank => Value::Blank, 1873 }) 1874 }, 1875 ) 1876 }, 1877 |value| { 1878 Ok(match value { 1879 Value::Domain(dom) => Value::Domain(Self::Hosts(dom)), 1880 Value::Comment(com) => Value::Comment(com), 1881 Value::Blank => Value::Blank, 1882 }) 1883 }, 1884 ) 1885 }, 1886 |value| { 1887 Ok(match value { 1888 Value::Domain(dom) => Value::Domain(Self::DomainOnly(dom)), 1889 Value::Comment(com) => Value::Comment(com), 1890 Value::Blank => Value::Blank, 1891 }) 1892 }, 1893 ) 1894 } 1895 #[inline] 1896 fn domain(&self) -> &Domain<&'a str> { 1897 match *self { 1898 Self::Adblock(ref dom) => &dom.domain, 1899 Self::DomainOnly(ref dom) => &dom.domain, 1900 Self::Hosts(ref dom) => &dom.domain, 1901 Self::Wildcard(ref dom) => &dom.domain, 1902 } 1903 } 1904 #[inline] 1905 fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error> { 1906 match *self { 1907 Self::Adblock(ref dom) => dom.write_to_rpz(action, writer), 1908 Self::DomainOnly(ref dom) => dom.write_to_rpz(action, writer), 1909 Self::Hosts(ref dom) => dom.write_to_rpz(action, writer), 1910 Self::Wildcard(ref dom) => dom.write_to_rpz(action, writer), 1911 } 1912 } 1913 } 1914 #[cfg(test)] 1915 mod tests { 1916 use super::{ 1917 Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, 1918 }; 1919 use ascii_domain::dom::DomainErr; 1920 use num_bigint::BigUint; 1921 use superset_map::SupersetSet; 1922 #[test] 1923 fn test_adblock_parse() { 1924 // Test subdomains. 1925 assert!( 1926 Adblock::parse_value("||www.example.com").map_or(false, |val| match val { 1927 Value::Domain(ref dom) => 1928 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1929 Value::Comment(_) | Value::Blank => false, 1930 }) 1931 ); 1932 // Test whitespace and '^' removal. 1933 assert!( 1934 Adblock::parse_value(" \t\t ||\t\t \twww.example.com \t\t ^ \t\t ").map_or( 1935 false, 1936 |val| match val { 1937 Value::Domain(ref dom) => 1938 dom.subdomains && dom.domain.as_bytes() == b"www.example.com", 1939 Value::Comment(_) | Value::Blank => false, 1940 } 1941 ) 1942 ); 1943 assert!( 1944 Adblock::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { 1945 match val { 1946 Value::Domain(ref dom) => { 1947 !dom.subdomains && dom.domain.as_bytes() == b"www.example.com" 1948 } 1949 Value::Comment(_) | Value::Blank => false, 1950 } 1951 }) 1952 ); 1953 assert!(Adblock::parse_value("www .example.com").map_or_else( 1954 |err| err == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 1955 |_| false 1956 )); 1957 assert!( 1958 Adblock::parse_value("||www.ExAMPle.COm").map_or(false, |val| { 1959 match val { 1960 Value::Domain(ref dom) => { 1961 Adblock::parse_value("||www.example.com").map_or(false, |val| match val { 1962 Value::Domain(ref dom2) => { 1963 dom == dom2 1964 && dom.subdomains 1965 && dom2.subdomains 1966 && dom.cmp(dom2).is_eq() 1967 } 1968 Value::Comment(_) | Value::Blank => false, 1969 }) 1970 } 1971 Value::Comment(_) | Value::Blank => false, 1972 } 1973 }) 1974 ); 1975 // Test comment 1976 assert!( 1977 Adblock::parse_value(" \t\t #hi").map_or(false, |val| match val { 1978 Value::Comment(com) => com == "hi", 1979 Value::Domain(_) | Value::Blank => false, 1980 }) 1981 ); 1982 assert!( 1983 Adblock::parse_value(" \t\t !! foo").map_or(false, |val| match val { 1984 Value::Comment(com) => com == "! foo", 1985 Value::Domain(_) | Value::Blank => false, 1986 }) 1987 ); 1988 // Test blank 1989 assert!(Adblock::parse_value(" \t\t ").map_or(false, |val| matches!(val, Value::Blank))); 1990 } 1991 #[test] 1992 fn test_domain_only_parse_value() { 1993 // Test whitespace and comment. 1994 assert!( 1995 DomainOnly::parse_value(" \t\t \t\t \twww.example.com#asdflkj asdf alskdfj ") 1996 .map_or(false, |val| match val { 1997 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 1998 Value::Comment(_) | Value::Blank => false, 1999 }) 2000 ); 2001 assert!( 2002 DomainOnly::parse_value(" \t\t \t\t \twww.example.com \t\t ^ \t\t ") 2003 .map_or_else( 2004 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2005 |_| false 2006 ) 2007 ); 2008 // Test case-insensitivity. 2009 assert!( 2010 DomainOnly::parse_value("www.ExAMPle.CoM").map_or(false, |val| match val { 2011 Value::Domain(ref dom) => 2012 DomainOnly::parse_value("www.example.com").map_or(false, |val2| match val2 { 2013 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2014 Value::Comment(_) | Value::Blank => false, 2015 }), 2016 Value::Comment(_) | Value::Blank => false, 2017 }) 2018 ); 2019 // Test comment. 2020 assert!( 2021 DomainOnly::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2022 Value::Comment(com) => com == " hi", 2023 Value::Domain(_) | Value::Blank => false, 2024 }) 2025 ); 2026 // Test blank. 2027 assert!( 2028 DomainOnly::parse_value(" \t\t \t\t \t ") 2029 .map_or(false, |val| matches!(val, Value::Blank)) 2030 ); 2031 // Test blank. 2032 assert!( 2033 DomainOnly::parse_value("example.xn--abc") 2034 .map_or(false, |val| matches!(val, Value::Domain(_))) 2035 ); 2036 // Test invalid TLD. 2037 assert!( 2038 DomainOnly::parse_value("www.c1m") 2039 .map_or_else(|err| err == FirefoxDomainErr::InvalidTld, |_| false) 2040 ); 2041 } 2042 #[test] 2043 fn test_hosts_parse_value() { 2044 // Test whitespace and comment. 2045 assert!( 2046 Hosts::parse_value(" \t\t 127.0.0.1\t\t \twww.example.com#asdflkj asdf alskdfj ") 2047 .map_or(false, |val| match val { 2048 Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", 2049 Value::Comment(_) | Value::Blank => false, 2050 }) 2051 ); 2052 assert!( 2053 Hosts::parse_value(" \t\t 0.0.0.0\t\t \twww.example.com \t\t ^ \t\t ") 2054 .map_or_else( 2055 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2056 |_| false 2057 ) 2058 ); 2059 assert!(Hosts::parse_value("::1\twww .example.com").map_or_else( 2060 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2061 |_| false 2062 )); 2063 // Test invalid IP 2064 assert!( 2065 Hosts::parse_value("::2 www.example.com") 2066 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2067 ); 2068 assert!( 2069 Hosts::parse_value(":2 www.example.com") 2070 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2071 ); 2072 assert!( 2073 Hosts::parse_value("www.example.com") 2074 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2075 ); 2076 assert!( 2077 Hosts::parse_value("10.4.2.256 www.example.com") 2078 .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false) 2079 ); 2080 // Test case-insensitivity. 2081 assert!( 2082 Hosts::parse_value(":: www.ExAMPle.Com").map_or(false, |val| match val { 2083 Value::Domain(ref dom) => 2084 Hosts::parse_value("127.0.0.1 www.example.com").map_or(false, |val2| match val2 2085 { 2086 Value::Domain(ref dom2) => dom.cmp(dom2).is_eq(), 2087 Value::Comment(_) | Value::Blank => false, 2088 }), 2089 Value::Comment(_) | Value::Blank => false, 2090 }) 2091 ); 2092 // Test comment. 2093 assert!( 2094 Hosts::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2095 Value::Comment(com) => com == " hi", 2096 Value::Domain(_) | Value::Blank => false, 2097 }) 2098 ); 2099 // Test blank. 2100 assert!( 2101 Hosts::parse_value(" \t\t \t\t \t ") 2102 .map_or(false, |val| matches!(val, Value::Blank)) 2103 ); 2104 } 2105 #[test] 2106 fn test_wildcard_parse_value() { 2107 // Test bad asterisk. 2108 assert!(Wildcard::parse_value("*").map_or_else( 2109 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2110 |_| false 2111 )); 2112 assert!(Wildcard::parse_value("www*.example.com").map_or_else( 2113 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2114 |_| false 2115 )); 2116 assert!(Wildcard::parse_value("www.*.com").map_or_else( 2117 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2118 |_| false 2119 )); 2120 assert!(Wildcard::parse_value("*..com").map_or_else( 2121 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::EmptyLabel), 2122 |_| false 2123 )); 2124 assert!(Wildcard::parse_value("www.com*").map_or_else( 2125 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2126 |_| false 2127 )); 2128 assert!(Wildcard::parse_value("ww*w.com").map_or_else( 2129 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), 2130 |_| false 2131 )); 2132 // Test case-insensitivity. 2133 assert!( 2134 Wildcard::parse_value("*.wWw.ExamPLE.com").map_or(false, |val| match val { 2135 Value::Domain(ref dom) => 2136 Wildcard::parse_value("*.www.example.com").map_or(false, |val2| match val2 { 2137 Value::Domain(ref dom2) => 2138 dom.cmp(dom2).is_eq() 2139 && dom == dom2 2140 && dom.proper_subdomains 2141 && dom2.proper_subdomains, 2142 Value::Comment(_) | Value::Blank => false, 2143 }), 2144 Value::Comment(_) | Value::Blank => false, 2145 }) 2146 ); 2147 // Test proper subdomains. 2148 assert!( 2149 Wildcard::parse_value("*.www.example.com").map_or(false, |val| match val { 2150 Value::Domain(ref dom) => 2151 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2152 Value::Comment(_) | Value::Blank => false, 2153 }) 2154 ); 2155 // Test comment. 2156 assert!( 2157 Wildcard::parse_value(" \t\t \t\t \t*.www.example.com#asdflkj asdf alskdfj ") 2158 .map_or(false, |val| match val { 2159 Value::Domain(ref dom) => 2160 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, 2161 Value::Comment(_) | Value::Blank => false, 2162 }) 2163 ); 2164 assert!( 2165 Wildcard::parse_value(" \t\t \t\t \twww.example.com #asdflkj asdf alskdfj ") 2166 .map_or(false, |val| match val { 2167 Value::Domain(ref dom) => 2168 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains, 2169 Value::Comment(_) | Value::Blank => false, 2170 }) 2171 ); 2172 // Test whitespace removal. 2173 assert!( 2174 Wildcard::parse_value(" \t\t *.www.example.com \t\t \t ").map_or(false, |val| { 2175 match val { 2176 Value::Domain(ref dom) => { 2177 dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains 2178 } 2179 Value::Comment(_) | Value::Blank => false, 2180 } 2181 }) 2182 ); 2183 assert!( 2184 Wildcard::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { 2185 match val { 2186 Value::Domain(ref dom) => { 2187 dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains 2188 } 2189 Value::Comment(_) | Value::Blank => false, 2190 } 2191 }) 2192 ); 2193 assert!(Wildcard::parse_value("www .example.com").map_or_else( 2194 |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), 2195 |_| false 2196 )); 2197 // Test 127 labels after wildcard error. 2198 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false)); 2199 // Test 126 labels after wildcard is ok. 2200 assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val { 2201 Value::Domain(ref dom) => dom.domain.iter().count() == 126 && dom.proper_subdomains, 2202 Value::Comment(_) | Value::Blank => false, 2203 })); 2204 // Test comment. 2205 assert!( 2206 Wildcard::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2207 Value::Comment(com) => com == " hi", 2208 Value::Domain(_) | Value::Blank => false, 2209 }) 2210 ); 2211 // Test blank. 2212 assert!( 2213 Wildcard::parse_value(" \t\t \t\t \t ") 2214 .map_or(false, |val| matches!(val, Value::Blank)) 2215 ); 2216 } 2217 #[test] 2218 fn test_rpz_parse_value() { 2219 assert!( 2220 RpzDomain::parse_value("*.www.example.com").map_or(false, |val| { 2221 let dom = val.unwrap_domain(); 2222 dom.is_proper_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2223 }) 2224 ); 2225 assert!( 2226 RpzDomain::parse_value("||www.example.com").map_or(false, |val| { 2227 let dom = val.unwrap_domain(); 2228 dom.is_subdomains() && dom.domain().as_bytes() == b"www.example.com" 2229 }) 2230 ); 2231 assert!( 2232 RpzDomain::parse_value("0.0.0.0 www.example.com").map_or(false, |val| { 2233 let dom = val.unwrap_domain(); 2234 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2235 && dom.domain().as_bytes() == b"www.example.com" 2236 }) 2237 ); 2238 assert!( 2239 RpzDomain::parse_value("www.example.com").map_or(false, |val| { 2240 let dom = val.unwrap_domain(); 2241 !(dom.is_subdomains() || dom.is_proper_subdomains()) 2242 && dom.domain().as_bytes() == b"www.example.com" 2243 }) 2244 ); 2245 // Test case-insensitivity. 2246 assert!( 2247 RpzDomain::parse_value("*.Www.ExaMPle.COm").map_or(false, |val| { 2248 let dom = val.unwrap_domain(); 2249 RpzDomain::parse_value("*.www.example.com").map_or(false, |val2| { 2250 let dom2 = val2.unwrap_domain(); 2251 dom.is_proper_subdomains() 2252 && dom2.is_proper_subdomains() 2253 && dom == dom2 2254 && dom.cmp(&dom2).is_eq() 2255 }) 2256 }) 2257 ); 2258 // Test comment. 2259 assert!( 2260 RpzDomain::parse_value(" \t\t \t\t \t # hi").map_or(false, |val| match val { 2261 Value::Comment(com) => com == " hi", 2262 Value::Domain(_) | Value::Blank => false, 2263 }) 2264 ); 2265 assert!( 2266 RpzDomain::parse_value(" \t\t \t\t \t ! hi").map_or(false, |val| match val { 2267 Value::Comment(com) => com == " hi", 2268 Value::Domain(_) | Value::Blank => false, 2269 }) 2270 ); 2271 // Test blank. 2272 assert!( 2273 RpzDomain::parse_value(" \t\t \t\t \t ") 2274 .map_or(false, |val| matches!(val, Value::Blank)) 2275 ); 2276 } 2277 #[test] 2278 fn test_rpz_ord_and_eq() -> Result<(), &'static str> { 2279 "www.bar.com,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,Example.com,WwW.exaMple.com,*.www.example.com,||www.example.com,*.example.com,||example.com,FOo.coM,Www.foo.com,*.foo.com,*.coM,example.net,*.net".split(|b| b == ',').try_fold(RpzDomain::DomainOnly(DomainOnly::parse_value("bar.com").expect("bug in DomainOnly::parse_value").unwrap_domain()), |prev, slice| { 2280 let cur = if slice.as_bytes()[0] == b'|' { 2281 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2282 } else { 2283 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2284 }; 2285 if prev < cur && cur > prev && prev == prev && cur == cur { 2286 Ok(cur) 2287 } else { 2288 Err("PartialEq or Ord are not correctly implemented for RpzDomain.") 2289 } 2290 }).map(|_| ()) 2291 } 2292 #[test] 2293 fn test_superset_set() { 2294 let mut iter = "*.NeT,*.net,www.bar.com,*.net,*.www.bar.com,||www.bar.com,*.bar.com,||bar.com,example.com,www.example.com,*.www.example.com,||www.example.com,*.example.com,||example.com,foo.com,www.foo.com,*.foo.com,*.com,example.net,*.abc.abc,||aawww.abc,abc.abc".split(|b| b == ',').fold(SupersetSet::new(), |mut doms, slice| { 2295 doms.insert(if slice.as_bytes()[0] == b'|' { 2296 RpzDomain::Adblock(Adblock::parse_value(slice).expect("Bug in Adblock::parse_value").unwrap_domain()) 2297 } else { 2298 RpzDomain::Wildcard(Wildcard::parse_value(slice).expect("Bug in Wildcard::parse_value").unwrap_domain()) 2299 }); 2300 doms 2301 }).into_iter(); 2302 assert!(iter.next().map_or(false, |d| { 2303 d.domain().as_bytes() == b"aawww.abc" && d.is_subdomains() 2304 })); 2305 assert!(iter.next().map_or(false, |d| { 2306 d.domain().as_bytes() == b"abc.abc" && d.is_domain() 2307 })); 2308 assert!(iter.next().map_or(false, |d| { 2309 d.domain().as_bytes() == b"abc.abc" && d.is_proper_subdomains() 2310 })); 2311 assert!(iter.next().map_or(false, |d| { 2312 d.domain().as_bytes() == b"com" && d.is_proper_subdomains() 2313 })); 2314 assert!(iter.next().map_or(false, |d| { 2315 d.domain().as_bytes() == b"NeT" && d.is_proper_subdomains() 2316 })); 2317 assert!(iter.next().is_none()); 2318 } 2319 #[test] 2320 fn test_card() { 2321 // Geometric series. 2322 // We can have two labels each with one character, 2323 // one label with one to three characters, or 0 labels. 2324 // This is 1 + 52 + 52^2 + 52^3 + 52^2 = (1-52^4)/(1-52) + 52^2 = (52^4 - 1)/51 + 52^2 = 146069. 2325 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.iter().count() == 125 && dom.domain_count() == BigUint::new(vec![146069]) })); 2326 // A subdomain of length 252 or 253 gets converted to a domain. 2327 assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.iter().count() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) })); 2328 // Pre-calculated manually. 2329 // This is the number of domains possible between 2 and 252 characters. 2330 assert!(Wildcard::parse_value("*.a").map_or(false, |val| { 2331 val.unwrap_domain().domain_count() 2332 == BigUint::new(vec![ 2333 375288404, 2460223985, 1334358771, 2543621408, 2519466280, 1133682239, 2334 3589178618, 348125705, 1709233643, 958334503, 3780539710, 2181893897, 2335 2457156833, 3204765645, 2728103430, 1817547150, 3102358416, 444185044, 2336 3659003776, 10341713, 306326206, 1336386425, 3942332649, 2036577878, 2337 2460939277, 3976861337, 2101094571, 2241770079, 2667853164, 3687350273, 2338 109356153, 3455569358, 2333076459, 2433207896, 1553903141, 2621943843, 2339 4223295645, 1753858368, 130924388, 965594304, 3942586845, 1573844087, 2340 4237886128, 481383133, 56931017, 2341 ]) 2342 })); 2343 } 2344 }