rpz

Response policy zone (RPZ) file generator.
git clone https://git.philomathiclife.com/repos/rpz
Log | Files | Refs | README

commit 7bd219808ca7000bd41d8ae3ac83b4902cc9cf1a
parent 9fd5ba3850cf6fe8d46d38ed1e30512bfc2eeb81
Author: Zack Newman <zack@philomathiclife.com>
Date:   Thu,  8 Feb 2024 13:16:30 -0700

update priv_sep and ascii_domain

Diffstat:
MCargo.toml | 12++++++------
MREADME.md | 3+--
Msrc/dom.rs | 355+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Msrc/dom_count_auto_gen.rs | 46++++++++++++++++++++++------------------------
4 files changed, 295 insertions(+), 121 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0" name = "rpz" readme = "README.md" repository = "https://git.philomathiclife.com/repos/rpz/" -version = "0.3.0" +version = "0.4.0" [lib] name = "rpz" @@ -20,18 +20,18 @@ name = "rpz" path = "src/main.rs" [dependencies] -ascii_domain = { version = "0.3.0", default-features = false } +ascii_domain = { version = "0.4.0", default-features = false } num-bigint = { version = "0.4.4", default-features = false } -reqwest = { version = "0.11.23", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] } +reqwest = { version = "0.11.24", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] } serde = { version = "1.0.196", default-features = false } superset_map = { version = "0.2.2", default-features = false } -tokio = { version = "1.35.1", default-features = false, features = ["rt", "time"] } -toml = { version = "0.8.8", default-features = false, features = ["parse"] } +tokio = { version = "1.36.0", default-features = false, features = ["rt", "time"] } +toml = { version = "0.8.10", default-features = false, features = ["parse"] } url = { version = "2.5.0", default-features = false, features = ["serde"] } zfc = { version = "0.3.1", default-features = false } [target.'cfg(target_os = "openbsd")'.dependencies] -priv_sep = { version = "0.8.1", default-features = false, features = ["openbsd"], optional = true } +priv_sep = { version = "1.0.0", default-features = false, features = ["openbsd"], optional = true } [build-dependencies] rustc_version = "0.4.0" diff --git a/README.md b/README.md @@ -184,8 +184,7 @@ Parsing errors are ignored; all other errors are written to `stderr` before prog This package is actively maintained. The crates are only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but -they should work on any [Tier 1 with Host Tools](https://doc.rust-lang.org/beta/rustc/platform-support.html) -target. +they should work on platform. Nightly `rustc` is required. Once `BTreeMap` [cursors are stabilized](https://github.com/rust-lang/rust/issues/107540), stable `rustc` will work. On OpenBSD-stable, one can use the `rust` port as long as `RUSTC_BOOTSTRAP` is `export`ed with a value of `1` before invoking diff --git a/src/dom.rs b/src/dom.rs @@ -56,48 +56,29 @@ impl error::Error for FirefoxDomainErr {} const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX; /// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` does not /// have the format of an IPv4 address. -#[allow( - clippy::arithmetic_side_effects, - clippy::as_conversions, - clippy::cast_lossless, - clippy::into_iter_on_ref -)] +#[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)] #[inline] fn domain_no_ip<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> { Domain::try_from_bytes(val, CHARS) .map_err(FirefoxDomainErr::InvalidDomain) .and_then(|dom| { - // Faster to consult the metadata first to hopefully avoid parsing as an IPv4 address. - if dom.len().get() < 16 - && dom.label_count().get() == 4 // We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading // 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we // want to consider that as an IP. - && dom - .into_iter() - .try_fold((), |(), label| { - if label.len() < 4 { - label - .as_bytes() - .into_iter() - .try_fold(0u16, |octet, byt| { - if byt.is_ascii_digit() { - // We already verified the length is at most 3, and we only perform - // this arithmetic on integers between 0 and 9. This means the max value - // of these operations is 999 which is smaller than `u16::MAX`. We verified - // `byt` is an ASCII digit so we know `byt - b'0'` will be inclusively between - // 0 and 9. So no overflow, underflow, or truncation will occur. - Ok(octet * 10 + (byt - b'0') as u16) - } else { - Err(()) - } - }) - .and_then(|int| u8::try_from(int).map_or(Err(()), |_| Ok(()))) - } else { - Err(()) - } - }) - .is_ok() + if (7..=15).contains(&dom.len().get()) + && dom + .into_iter() + .try_fold(0u8, |count, label| { + // If there are more than 4 `Label`s, it's not an IPv4 address. Similarly if there is more + // than 3 characters in the the `Label`, then it's not a valid IPv4 octet. + if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() { + Ok(count + 1) + } else { + Err(()) + } + }) + // There must be exactly 4 `Label`s. + .map_or(false, |count| count == 4) { Err(FirefoxDomainErr::Ipv4) } else { @@ -132,6 +113,18 @@ impl Display for RpzAction { } } } +impl PartialEq<&Self> for RpzAction { + #[inline] + fn eq(&self, other: &&Self) -> bool { + *self == **other + } +} +impl PartialEq<RpzAction> for &RpzAction { + #[inline] + fn eq(&self, other: &RpzAction) -> bool { + **self == *other + } +} /// Writes the following line with `writer` based on `action`: /// * `RpzAction::Nxdomain`: `<dom> CNAME .`. /// * `RpzAction::Nodata`: `<dom> CNAME *.`. @@ -421,18 +414,78 @@ impl PartialEq<Adblock<'_>> for Adblock<'_> { self.domain == other.domain && self.subdomains == other.subdomains } } +impl PartialEq<&Adblock<'_>> for Adblock<'_> { + #[inline] + fn eq(&self, other: &&Adblock<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Adblock<'_>> for &Adblock<'_> { + #[inline] + fn eq(&self, other: &Adblock<'_>) -> bool { + **self == *other + } +} impl PartialEq<DomainOnly<'_>> for Adblock<'_> { #[inline] fn eq(&self, other: &DomainOnly<'_>) -> bool { !self.subdomains && self.domain == other.domain } } +impl PartialEq<&DomainOnly<'_>> for Adblock<'_> { + #[inline] + fn eq(&self, other: &&DomainOnly<'_>) -> bool { + *self == **other + } +} +impl PartialEq<DomainOnly<'_>> for &Adblock<'_> { + #[inline] + fn eq(&self, other: &DomainOnly<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&Adblock<'_>> for DomainOnly<'_> { + #[inline] + fn eq(&self, other: &&Adblock<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Adblock<'_>> for &DomainOnly<'_> { + #[inline] + fn eq(&self, other: &Adblock<'_>) -> bool { + **self == *other + } +} impl PartialEq<Hosts<'_>> for Adblock<'_> { #[inline] fn eq(&self, other: &Hosts<'_>) -> bool { !self.subdomains && self.domain == other.domain } } +impl PartialEq<&Hosts<'_>> for Adblock<'_> { + #[inline] + fn eq(&self, other: &&Hosts<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Hosts<'_>> for &Adblock<'_> { + #[inline] + fn eq(&self, other: &Hosts<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&Adblock<'_>> for Hosts<'_> { + #[inline] + fn eq(&self, other: &&Adblock<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Adblock<'_>> for &Hosts<'_> { + #[inline] + fn eq(&self, other: &Adblock<'_>) -> bool { + **self == *other + } +} impl PartialEq<Wildcard<'_>> for Adblock<'_> { #[allow(clippy::suspicious_operation_groupings)] #[inline] @@ -440,6 +493,30 @@ impl PartialEq<Wildcard<'_>> for Adblock<'_> { !(self.subdomains || other.proper_subdomains) && self.domain == other.domain } } +impl PartialEq<&Wildcard<'_>> for Adblock<'_> { + #[inline] + fn eq(&self, other: &&Wildcard<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Wildcard<'_>> for &Adblock<'_> { + #[inline] + fn eq(&self, other: &Wildcard<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&Adblock<'_>> for Wildcard<'_> { + #[inline] + fn eq(&self, other: &&Adblock<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Adblock<'_>> for &Wildcard<'_> { + #[inline] + fn eq(&self, other: &Adblock<'_>) -> bool { + **self == *other + } +} impl Eq for Adblock<'_> {} impl Hash for Adblock<'_> { #[inline] @@ -539,8 +616,10 @@ impl<'a> Set for Adblock<'a> { Q: Borrow<Self::Elem> + Eq + ?Sized, { if self.subdomains { - let dom2 = elem.borrow(); - self.domain.label_count() <= dom2.label_count() && self.domain.same_branch(dom2) + matches!( + self.domain.cmp_by_domain_ordering(elem.borrow()), + DomainOrdering::Shorter + ) } else { self.domain == *elem.borrow() } @@ -551,10 +630,10 @@ impl<'a> Set for Adblock<'a> { // more labels or the same number of labels as another subdomains. In all other cases, we need to // recursively check from the TLD that the labels are the same. val.subdomains - && match val.domain.label_count().cmp(&self.domain.label_count()) { - Ordering::Less => val.domain.same_branch(&self.domain), - Ordering::Equal => !self.subdomains && val.domain.same_branch(&self.domain), - Ordering::Greater => false, + && match val.domain.cmp_by_domain_ordering(&self.domain) { + DomainOrdering::Less | DomainOrdering::Longer | DomainOrdering::Greater => false, + DomainOrdering::Shorter => true, + DomainOrdering::Equal => !self.subdomains, } } #[inline] @@ -704,6 +783,18 @@ impl PartialEq<DomainOnly<'_>> for DomainOnly<'_> { self.domain == other.domain } } +impl PartialEq<DomainOnly<'_>> for &DomainOnly<'_> { + #[inline] + fn eq(&self, other: &DomainOnly<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&DomainOnly<'_>> for DomainOnly<'_> { + #[inline] + fn eq(&self, other: &&DomainOnly<'_>) -> bool { + *self == **other + } +} impl PartialEq<Adblock<'_>> for DomainOnly<'_> { #[inline] fn eq(&self, other: &Adblock<'_>) -> bool { @@ -716,12 +807,60 @@ impl PartialEq<Hosts<'_>> for DomainOnly<'_> { self.domain == other.domain } } +impl PartialEq<&Hosts<'_>> for DomainOnly<'_> { + #[inline] + fn eq(&self, other: &&Hosts<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Hosts<'_>> for &DomainOnly<'_> { + #[inline] + fn eq(&self, other: &Hosts<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&DomainOnly<'_>> for Hosts<'_> { + #[inline] + fn eq(&self, other: &&DomainOnly<'_>) -> bool { + *self == **other + } +} +impl PartialEq<DomainOnly<'_>> for &Hosts<'_> { + #[inline] + fn eq(&self, other: &DomainOnly<'_>) -> bool { + **self == *other + } +} impl PartialEq<Wildcard<'_>> for DomainOnly<'_> { #[inline] fn eq(&self, other: &Wildcard<'_>) -> bool { !other.proper_subdomains && self.domain == other.domain } } +impl PartialEq<&Wildcard<'_>> for DomainOnly<'_> { + #[inline] + fn eq(&self, other: &&Wildcard<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Wildcard<'_>> for &DomainOnly<'_> { + #[inline] + fn eq(&self, other: &Wildcard<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&DomainOnly<'_>> for Wildcard<'_> { + #[inline] + fn eq(&self, other: &&DomainOnly<'_>) -> bool { + *self == **other + } +} +impl PartialEq<DomainOnly<'_>> for &Wildcard<'_> { + #[inline] + fn eq(&self, other: &DomainOnly<'_>) -> bool { + **self == *other + } +} impl Eq for DomainOnly<'_> {} impl Hash for DomainOnly<'_> { #[inline] @@ -898,6 +1037,18 @@ impl PartialEq<Hosts<'_>> for Hosts<'_> { self.domain == other.domain } } +impl PartialEq<Hosts<'_>> for &Hosts<'_> { + #[inline] + fn eq(&self, other: &Hosts<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&Hosts<'_>> for Hosts<'_> { + #[inline] + fn eq(&self, other: &&Hosts<'_>) -> bool { + *self == **other + } +} impl PartialEq<Adblock<'_>> for Hosts<'_> { #[inline] fn eq(&self, other: &Adblock<'_>) -> bool { @@ -916,6 +1067,30 @@ impl PartialEq<Wildcard<'_>> for Hosts<'_> { !other.proper_subdomains && self.domain == other.domain } } +impl PartialEq<&Wildcard<'_>> for Hosts<'_> { + #[inline] + fn eq(&self, other: &&Wildcard<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Wildcard<'_>> for &Hosts<'_> { + #[inline] + fn eq(&self, other: &Wildcard<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&Hosts<'_>> for Wildcard<'_> { + #[inline] + fn eq(&self, other: &&Hosts<'_>) -> bool { + *self == **other + } +} +impl PartialEq<Hosts<'_>> for &Wildcard<'_> { + #[inline] + fn eq(&self, other: &Hosts<'_>) -> bool { + **self == *other + } +} impl Eq for Hosts<'_> {} impl Hash for Hosts<'_> { #[inline] @@ -1173,6 +1348,18 @@ impl PartialEq<Wildcard<'_>> for Wildcard<'_> { self.domain == other.domain && self.proper_subdomains == other.proper_subdomains } } +impl PartialEq<Wildcard<'_>> for &Wildcard<'_> { + #[inline] + fn eq(&self, other: &Wildcard<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&Wildcard<'_>> for Wildcard<'_> { + #[inline] + fn eq(&self, other: &&Wildcard<'_>) -> bool { + *self == **other + } +} impl PartialEq<Adblock<'_>> for Wildcard<'_> { #[inline] fn eq(&self, other: &Adblock<'_>) -> bool { @@ -1301,8 +1488,7 @@ impl<'a> Set for Wildcard<'a> { Q: Borrow<Self::Elem> + Eq + ?Sized, { if self.proper_subdomains { - let dom2 = elem.borrow(); - self.domain.label_count() < dom2.label_count() && self.domain.same_branch(dom2) + self.domain.cmp_by_domain_ordering(elem.borrow()) == DomainOrdering::Shorter } else { self.domain == *elem.borrow() } @@ -1313,8 +1499,7 @@ impl<'a> Set for Wildcard<'a> { // has more labels or the same number of labels as another domain. In all other cases, we need to // recursively check from the TLD that the labels are the same. val.proper_subdomains - && val.domain.label_count() < self.domain.label_count() - && val.domain.same_branch(&self.domain) + && val.domain.cmp_by_domain_ordering(&self.domain) == DomainOrdering::Shorter } #[inline] fn is_subset(&self, val: &Self) -> bool { @@ -1490,6 +1675,18 @@ impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> { } } } +impl PartialEq<RpzDomain<'_>> for &RpzDomain<'_> { + #[inline] + fn eq(&self, other: &RpzDomain<'_>) -> bool { + **self == *other + } +} +impl PartialEq<&RpzDomain<'_>> for RpzDomain<'_> { + #[inline] + fn eq(&self, other: &&RpzDomain<'_>) -> bool { + *self == **other + } +} impl Eq for RpzDomain<'_> {} impl Hash for RpzDomain<'_> { #[inline] @@ -1586,46 +1783,26 @@ impl<'a> Set for RpzDomain<'a> { #[allow(clippy::ref_patterns)] #[inline] fn is_proper_subset(&self, val: &Self) -> bool { - /// Helper function that verifies all labels are the same. - #[inline] - fn helper(left: &Domain<&str>, right: &Domain<&str>) -> bool { - left.label_count() <= right.label_count() && left.same_branch(right) - } match *val { Self::Adblock(ref dom) => { dom.subdomains && match *self { Self::Adblock(ref dom2) => { - dom.domain.label_count() < dom2.domain.label_count() - && helper(&dom.domain, &dom2.domain) - } - Self::DomainOnly(ref dom2) => { - match dom.domain.label_count().cmp(&dom2.domain.label_count()) { - Ordering::Less => helper(&dom.domain, &dom2.domain), - Ordering::Equal => { - dom.domain == dom2.domain || helper(&dom.domain, &dom2.domain) - } - Ordering::Greater => false, - } - } - Self::Hosts(ref dom2) => { - match dom.domain.label_count().cmp(&dom2.domain.label_count()) { - Ordering::Less => helper(&dom.domain, &dom2.domain), - Ordering::Equal => { - dom.domain == dom2.domain || helper(&dom.domain, &dom2.domain) - } - Ordering::Greater => false, - } - } - Self::Wildcard(ref dom2) => { - match dom.domain.label_count().cmp(&dom2.domain.label_count()) { - Ordering::Less => helper(&dom.domain, &dom2.domain), - Ordering::Equal => { - dom.domain == dom2.domain || helper(&dom.domain, &dom2.domain) - } - Ordering::Greater => false, - } + dom.domain.cmp_by_domain_ordering(&dom2.domain) + == DomainOrdering::Shorter } + Self::DomainOnly(ref dom2) => matches!( + dom.domain.cmp_by_domain_ordering(&dom2.domain), + DomainOrdering::Shorter | DomainOrdering::Equal + ), + Self::Hosts(ref dom2) => matches!( + dom.domain.cmp_by_domain_ordering(&dom2.domain), + DomainOrdering::Shorter | DomainOrdering::Equal + ), + Self::Wildcard(ref dom2) => matches!( + dom.domain.cmp_by_domain_ordering(&dom2.domain), + DomainOrdering::Shorter | DomainOrdering::Equal + ), } } Self::DomainOnly(_) | Self::Hosts(_) => false, @@ -1633,20 +1810,20 @@ impl<'a> Set for RpzDomain<'a> { dom.proper_subdomains && match *self { Self::Adblock(ref dom2) => { - dom.domain.label_count() < dom2.domain.label_count() - && helper(&dom.domain, &dom2.domain) + dom.domain.cmp_by_domain_ordering(&dom2.domain) + == DomainOrdering::Shorter } Self::DomainOnly(ref dom2) => { - dom.domain.label_count() < dom2.domain.label_count() - && helper(&dom.domain, &dom2.domain) + dom.domain.cmp_by_domain_ordering(&dom2.domain) + == DomainOrdering::Shorter } Self::Hosts(ref dom2) => { - dom.domain.label_count() < dom2.domain.label_count() - && helper(&dom.domain, &dom2.domain) + dom.domain.cmp_by_domain_ordering(&dom2.domain) + == DomainOrdering::Shorter } Self::Wildcard(ref dom2) => { - dom.domain.label_count() < dom2.domain.label_count() - && helper(&dom.domain, &dom2.domain) + dom.domain.cmp_by_domain_ordering(&dom2.domain) + == DomainOrdering::Shorter } } } @@ -2024,7 +2201,7 @@ mod tests { assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false)); // Test 126 labels after wildcard is ok. assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val { - Value::Domain(ref dom) => dom.domain.label_count().get() == 126 && dom.proper_subdomains, + Value::Domain(ref dom) => dom.domain.into_iter().count() == 126 && dom.proper_subdomains, Value::Comment(_) | Value::Blank => false, })); // Test comment. @@ -2144,9 +2321,9 @@ mod tests { // We can have two labels each with one character, // one label with one to three characters, or 0 labels. // This is 1 + 52 + 52^2 + 52^3 + 52^2 = (1-52^4)/(1-52) + 52^2 = (52^4 - 1)/51 + 52^2 = 146069. - assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.label_count().get() == 125 && dom.domain_count() == BigUint::new(vec![146069]) })); + assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.into_iter().count() == 125 && dom.domain_count() == BigUint::new(vec![146069]) })); // A subdomain of length 252 or 253 gets converted to a domain. - assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.label_count().get() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) })); + assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.into_iter().count() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) })); // Pre-calculated manually. // This is the number of domains possible between 2 and 252 characters. // The other check is to ensure that IPv4 address subdomains are not counted. diff --git a/src/dom_count_auto_gen.rs b/src/dom_count_auto_gen.rs @@ -20,31 +20,29 @@ pub fn proper_subdomain_count(dom: &Domain<&str>) -> BigUint { #[inline] fn ip_count(dom: &Domain<&str>) -> u32 { // `Domain`s that have 4 or more `Label`s can't be an IPv4 address. Also `Domain`s must have at least one - // `Label`, so `0 < 4 - dom.label_count < 4` and `(10 * 3 + 90 * 2 + 156 * 1)^3 = (30 + 180 + 156)^3 + // `Label`, so 0 < 4 - label count < 4 and `(10 * 3 + 90 * 2 + 156 * 1)^3 = (30 + 180 + 156)^3 // = 366^3 <= u32::MAX`. - if dom.label_count().get() < 4 { - dom.into_iter() - .try_fold((), |(), label| { - // Only a sequence of 1 to 3 digits whose value is between 0 and 255 is a valid octet - // in an IPv4 address. For `Domain`s that have such `Label`s, the total number of IPv4 - // addresses is simply 366^(4- label count). - // 366 comes from the fact that there are 3 distinct ways to represents integers < 10, - // 2 distinct ways to represent integers inclusively between 10 and 99, and 1 way - // to represent integers greater than 99 giving (3 * 10) + (2 * 90) + (1 * 156) = 366 - // ways a `Label` can be a valid octet for an IPv4 address. - // - // Note that `Label`s always have a length of at least 1 and any `Label` longer than 3 - // cannot be a valid octet in an IPv4 address. - if label.len() < 4 { - label.as_str().parse::<u8>().map_or(Err(()), |_| Ok(())) - } else { - Err(()) - } - }) - .map_or(0, |()| 366u32.pow(4 - dom.label_count().get() as u32)) - } else { - 0 - } + dom.into_iter() + .try_fold(0, |count, label| { + // If this is the fourth `Label`, then it can't be an IPv4 address since it has too many + // `Label`s. + // Only a sequence of 1 to 3 digits whose value is between 0 and 255 is a valid octet + // in an IPv4 address. For `Domain`s that have such `Label`s, the total number of IPv4 + // addresses is simply 366^(4- label count). + // 366 comes from the fact that there are 3 distinct ways to represents integers < 10, + // 2 distinct ways to represent integers inclusively between 10 and 99, and 1 way + // to represent integers greater than 99 giving (3 * 10) + (2 * 90) + (1 * 156) = 366 + // ways a `Label` can be a valid octet for an IPv4 address. + // + // Note that `Label`s always have a length of at least 1 and any `Label` longer than 3 + // cannot be a valid octet in an IPv4 address. + if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() { + Ok(count + 1) + } else { + Err(()) + } + }) + .map_or(0, |count| 366u32.pow(4 - count)) } // The commented out code at the end of the function was used to calculate the cardinalities // for each possible value of domain length allowing IPv4 addresses; however it takes