commit 7bd219808ca7000bd41d8ae3ac83b4902cc9cf1a
parent 9fd5ba3850cf6fe8d46d38ed1e30512bfc2eeb81
Author: Zack Newman <zack@philomathiclife.com>
Date: Thu, 8 Feb 2024 13:16:30 -0700
update priv_sep and ascii_domain
Diffstat:
4 files changed, 295 insertions(+), 121 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0"
name = "rpz"
readme = "README.md"
repository = "https://git.philomathiclife.com/repos/rpz/"
-version = "0.3.0"
+version = "0.4.0"
[lib]
name = "rpz"
@@ -20,18 +20,18 @@ name = "rpz"
path = "src/main.rs"
[dependencies]
-ascii_domain = { version = "0.3.0", default-features = false }
+ascii_domain = { version = "0.4.0", default-features = false }
num-bigint = { version = "0.4.4", default-features = false }
-reqwest = { version = "0.11.23", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] }
+reqwest = { version = "0.11.24", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] }
serde = { version = "1.0.196", default-features = false }
superset_map = { version = "0.2.2", default-features = false }
-tokio = { version = "1.35.1", default-features = false, features = ["rt", "time"] }
-toml = { version = "0.8.8", default-features = false, features = ["parse"] }
+tokio = { version = "1.36.0", default-features = false, features = ["rt", "time"] }
+toml = { version = "0.8.10", default-features = false, features = ["parse"] }
url = { version = "2.5.0", default-features = false, features = ["serde"] }
zfc = { version = "0.3.1", default-features = false }
[target.'cfg(target_os = "openbsd")'.dependencies]
-priv_sep = { version = "0.8.1", default-features = false, features = ["openbsd"], optional = true }
+priv_sep = { version = "1.0.0", default-features = false, features = ["openbsd"], optional = true }
[build-dependencies]
rustc_version = "0.4.0"
diff --git a/README.md b/README.md
@@ -184,8 +184,7 @@ Parsing errors are ignored; all other errors are written to `stderr` before prog
This package is actively maintained.
The crates are only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but
-they should work on any [Tier 1 with Host Tools](https://doc.rust-lang.org/beta/rustc/platform-support.html)
-target.
+they should work on platform.
Nightly `rustc` is required. Once `BTreeMap` [cursors are stabilized](https://github.com/rust-lang/rust/issues/107540), stable `rustc` will work.
On OpenBSD-stable, one can use the `rust` port as long as `RUSTC_BOOTSTRAP` is `export`ed with a value of `1` before invoking
diff --git a/src/dom.rs b/src/dom.rs
@@ -56,48 +56,29 @@ impl error::Error for FirefoxDomainErr {}
const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX;
/// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` does not
/// have the format of an IPv4 address.
-#[allow(
- clippy::arithmetic_side_effects,
- clippy::as_conversions,
- clippy::cast_lossless,
- clippy::into_iter_on_ref
-)]
+#[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)]
#[inline]
fn domain_no_ip<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> {
Domain::try_from_bytes(val, CHARS)
.map_err(FirefoxDomainErr::InvalidDomain)
.and_then(|dom| {
- // Faster to consult the metadata first to hopefully avoid parsing as an IPv4 address.
- if dom.len().get() < 16
- && dom.label_count().get() == 4
// We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading
// 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we
// want to consider that as an IP.
- && dom
- .into_iter()
- .try_fold((), |(), label| {
- if label.len() < 4 {
- label
- .as_bytes()
- .into_iter()
- .try_fold(0u16, |octet, byt| {
- if byt.is_ascii_digit() {
- // We already verified the length is at most 3, and we only perform
- // this arithmetic on integers between 0 and 9. This means the max value
- // of these operations is 999 which is smaller than `u16::MAX`. We verified
- // `byt` is an ASCII digit so we know `byt - b'0'` will be inclusively between
- // 0 and 9. So no overflow, underflow, or truncation will occur.
- Ok(octet * 10 + (byt - b'0') as u16)
- } else {
- Err(())
- }
- })
- .and_then(|int| u8::try_from(int).map_or(Err(()), |_| Ok(())))
- } else {
- Err(())
- }
- })
- .is_ok()
+ if (7..=15).contains(&dom.len().get())
+ && dom
+ .into_iter()
+ .try_fold(0u8, |count, label| {
+ // If there are more than 4 `Label`s, it's not an IPv4 address. Similarly if there is more
+ // than 3 characters in the the `Label`, then it's not a valid IPv4 octet.
+ if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() {
+ Ok(count + 1)
+ } else {
+ Err(())
+ }
+ })
+ // There must be exactly 4 `Label`s.
+ .map_or(false, |count| count == 4)
{
Err(FirefoxDomainErr::Ipv4)
} else {
@@ -132,6 +113,18 @@ impl Display for RpzAction {
}
}
}
+impl PartialEq<&Self> for RpzAction {
+ #[inline]
+ fn eq(&self, other: &&Self) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<RpzAction> for &RpzAction {
+ #[inline]
+ fn eq(&self, other: &RpzAction) -> bool {
+ **self == *other
+ }
+}
/// Writes the following line with `writer` based on `action`:
/// * `RpzAction::Nxdomain`: `<dom> CNAME .`.
/// * `RpzAction::Nodata`: `<dom> CNAME *.`.
@@ -421,18 +414,78 @@ impl PartialEq<Adblock<'_>> for Adblock<'_> {
self.domain == other.domain && self.subdomains == other.subdomains
}
}
+impl PartialEq<&Adblock<'_>> for Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &&Adblock<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Adblock<'_>> for &Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &Adblock<'_>) -> bool {
+ **self == *other
+ }
+}
impl PartialEq<DomainOnly<'_>> for Adblock<'_> {
#[inline]
fn eq(&self, other: &DomainOnly<'_>) -> bool {
!self.subdomains && self.domain == other.domain
}
}
+impl PartialEq<&DomainOnly<'_>> for Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &&DomainOnly<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<DomainOnly<'_>> for &Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &DomainOnly<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&Adblock<'_>> for DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &&Adblock<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Adblock<'_>> for &DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &Adblock<'_>) -> bool {
+ **self == *other
+ }
+}
impl PartialEq<Hosts<'_>> for Adblock<'_> {
#[inline]
fn eq(&self, other: &Hosts<'_>) -> bool {
!self.subdomains && self.domain == other.domain
}
}
+impl PartialEq<&Hosts<'_>> for Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &&Hosts<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Hosts<'_>> for &Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &Hosts<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&Adblock<'_>> for Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &&Adblock<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Adblock<'_>> for &Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &Adblock<'_>) -> bool {
+ **self == *other
+ }
+}
impl PartialEq<Wildcard<'_>> for Adblock<'_> {
#[allow(clippy::suspicious_operation_groupings)]
#[inline]
@@ -440,6 +493,30 @@ impl PartialEq<Wildcard<'_>> for Adblock<'_> {
!(self.subdomains || other.proper_subdomains) && self.domain == other.domain
}
}
+impl PartialEq<&Wildcard<'_>> for Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &&Wildcard<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Wildcard<'_>> for &Adblock<'_> {
+ #[inline]
+ fn eq(&self, other: &Wildcard<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&Adblock<'_>> for Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &&Adblock<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Adblock<'_>> for &Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &Adblock<'_>) -> bool {
+ **self == *other
+ }
+}
impl Eq for Adblock<'_> {}
impl Hash for Adblock<'_> {
#[inline]
@@ -539,8 +616,10 @@ impl<'a> Set for Adblock<'a> {
Q: Borrow<Self::Elem> + Eq + ?Sized,
{
if self.subdomains {
- let dom2 = elem.borrow();
- self.domain.label_count() <= dom2.label_count() && self.domain.same_branch(dom2)
+ matches!(
+ self.domain.cmp_by_domain_ordering(elem.borrow()),
+ DomainOrdering::Shorter
+ )
} else {
self.domain == *elem.borrow()
}
@@ -551,10 +630,10 @@ impl<'a> Set for Adblock<'a> {
// more labels or the same number of labels as another subdomains. In all other cases, we need to
// recursively check from the TLD that the labels are the same.
val.subdomains
- && match val.domain.label_count().cmp(&self.domain.label_count()) {
- Ordering::Less => val.domain.same_branch(&self.domain),
- Ordering::Equal => !self.subdomains && val.domain.same_branch(&self.domain),
- Ordering::Greater => false,
+ && match val.domain.cmp_by_domain_ordering(&self.domain) {
+ DomainOrdering::Less | DomainOrdering::Longer | DomainOrdering::Greater => false,
+ DomainOrdering::Shorter => true,
+ DomainOrdering::Equal => !self.subdomains,
}
}
#[inline]
@@ -704,6 +783,18 @@ impl PartialEq<DomainOnly<'_>> for DomainOnly<'_> {
self.domain == other.domain
}
}
+impl PartialEq<DomainOnly<'_>> for &DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &DomainOnly<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&DomainOnly<'_>> for DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &&DomainOnly<'_>) -> bool {
+ *self == **other
+ }
+}
impl PartialEq<Adblock<'_>> for DomainOnly<'_> {
#[inline]
fn eq(&self, other: &Adblock<'_>) -> bool {
@@ -716,12 +807,60 @@ impl PartialEq<Hosts<'_>> for DomainOnly<'_> {
self.domain == other.domain
}
}
+impl PartialEq<&Hosts<'_>> for DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &&Hosts<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Hosts<'_>> for &DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &Hosts<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&DomainOnly<'_>> for Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &&DomainOnly<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<DomainOnly<'_>> for &Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &DomainOnly<'_>) -> bool {
+ **self == *other
+ }
+}
impl PartialEq<Wildcard<'_>> for DomainOnly<'_> {
#[inline]
fn eq(&self, other: &Wildcard<'_>) -> bool {
!other.proper_subdomains && self.domain == other.domain
}
}
+impl PartialEq<&Wildcard<'_>> for DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &&Wildcard<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Wildcard<'_>> for &DomainOnly<'_> {
+ #[inline]
+ fn eq(&self, other: &Wildcard<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&DomainOnly<'_>> for Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &&DomainOnly<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<DomainOnly<'_>> for &Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &DomainOnly<'_>) -> bool {
+ **self == *other
+ }
+}
impl Eq for DomainOnly<'_> {}
impl Hash for DomainOnly<'_> {
#[inline]
@@ -898,6 +1037,18 @@ impl PartialEq<Hosts<'_>> for Hosts<'_> {
self.domain == other.domain
}
}
+impl PartialEq<Hosts<'_>> for &Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &Hosts<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&Hosts<'_>> for Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &&Hosts<'_>) -> bool {
+ *self == **other
+ }
+}
impl PartialEq<Adblock<'_>> for Hosts<'_> {
#[inline]
fn eq(&self, other: &Adblock<'_>) -> bool {
@@ -916,6 +1067,30 @@ impl PartialEq<Wildcard<'_>> for Hosts<'_> {
!other.proper_subdomains && self.domain == other.domain
}
}
+impl PartialEq<&Wildcard<'_>> for Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &&Wildcard<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Wildcard<'_>> for &Hosts<'_> {
+ #[inline]
+ fn eq(&self, other: &Wildcard<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&Hosts<'_>> for Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &&Hosts<'_>) -> bool {
+ *self == **other
+ }
+}
+impl PartialEq<Hosts<'_>> for &Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &Hosts<'_>) -> bool {
+ **self == *other
+ }
+}
impl Eq for Hosts<'_> {}
impl Hash for Hosts<'_> {
#[inline]
@@ -1173,6 +1348,18 @@ impl PartialEq<Wildcard<'_>> for Wildcard<'_> {
self.domain == other.domain && self.proper_subdomains == other.proper_subdomains
}
}
+impl PartialEq<Wildcard<'_>> for &Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &Wildcard<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&Wildcard<'_>> for Wildcard<'_> {
+ #[inline]
+ fn eq(&self, other: &&Wildcard<'_>) -> bool {
+ *self == **other
+ }
+}
impl PartialEq<Adblock<'_>> for Wildcard<'_> {
#[inline]
fn eq(&self, other: &Adblock<'_>) -> bool {
@@ -1301,8 +1488,7 @@ impl<'a> Set for Wildcard<'a> {
Q: Borrow<Self::Elem> + Eq + ?Sized,
{
if self.proper_subdomains {
- let dom2 = elem.borrow();
- self.domain.label_count() < dom2.label_count() && self.domain.same_branch(dom2)
+ self.domain.cmp_by_domain_ordering(elem.borrow()) == DomainOrdering::Shorter
} else {
self.domain == *elem.borrow()
}
@@ -1313,8 +1499,7 @@ impl<'a> Set for Wildcard<'a> {
// has more labels or the same number of labels as another domain. In all other cases, we need to
// recursively check from the TLD that the labels are the same.
val.proper_subdomains
- && val.domain.label_count() < self.domain.label_count()
- && val.domain.same_branch(&self.domain)
+ && val.domain.cmp_by_domain_ordering(&self.domain) == DomainOrdering::Shorter
}
#[inline]
fn is_subset(&self, val: &Self) -> bool {
@@ -1490,6 +1675,18 @@ impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> {
}
}
}
+impl PartialEq<RpzDomain<'_>> for &RpzDomain<'_> {
+ #[inline]
+ fn eq(&self, other: &RpzDomain<'_>) -> bool {
+ **self == *other
+ }
+}
+impl PartialEq<&RpzDomain<'_>> for RpzDomain<'_> {
+ #[inline]
+ fn eq(&self, other: &&RpzDomain<'_>) -> bool {
+ *self == **other
+ }
+}
impl Eq for RpzDomain<'_> {}
impl Hash for RpzDomain<'_> {
#[inline]
@@ -1586,46 +1783,26 @@ impl<'a> Set for RpzDomain<'a> {
#[allow(clippy::ref_patterns)]
#[inline]
fn is_proper_subset(&self, val: &Self) -> bool {
- /// Helper function that verifies all labels are the same.
- #[inline]
- fn helper(left: &Domain<&str>, right: &Domain<&str>) -> bool {
- left.label_count() <= right.label_count() && left.same_branch(right)
- }
match *val {
Self::Adblock(ref dom) => {
dom.subdomains
&& match *self {
Self::Adblock(ref dom2) => {
- dom.domain.label_count() < dom2.domain.label_count()
- && helper(&dom.domain, &dom2.domain)
- }
- Self::DomainOnly(ref dom2) => {
- match dom.domain.label_count().cmp(&dom2.domain.label_count()) {
- Ordering::Less => helper(&dom.domain, &dom2.domain),
- Ordering::Equal => {
- dom.domain == dom2.domain || helper(&dom.domain, &dom2.domain)
- }
- Ordering::Greater => false,
- }
- }
- Self::Hosts(ref dom2) => {
- match dom.domain.label_count().cmp(&dom2.domain.label_count()) {
- Ordering::Less => helper(&dom.domain, &dom2.domain),
- Ordering::Equal => {
- dom.domain == dom2.domain || helper(&dom.domain, &dom2.domain)
- }
- Ordering::Greater => false,
- }
- }
- Self::Wildcard(ref dom2) => {
- match dom.domain.label_count().cmp(&dom2.domain.label_count()) {
- Ordering::Less => helper(&dom.domain, &dom2.domain),
- Ordering::Equal => {
- dom.domain == dom2.domain || helper(&dom.domain, &dom2.domain)
- }
- Ordering::Greater => false,
- }
+ dom.domain.cmp_by_domain_ordering(&dom2.domain)
+ == DomainOrdering::Shorter
}
+ Self::DomainOnly(ref dom2) => matches!(
+ dom.domain.cmp_by_domain_ordering(&dom2.domain),
+ DomainOrdering::Shorter | DomainOrdering::Equal
+ ),
+ Self::Hosts(ref dom2) => matches!(
+ dom.domain.cmp_by_domain_ordering(&dom2.domain),
+ DomainOrdering::Shorter | DomainOrdering::Equal
+ ),
+ Self::Wildcard(ref dom2) => matches!(
+ dom.domain.cmp_by_domain_ordering(&dom2.domain),
+ DomainOrdering::Shorter | DomainOrdering::Equal
+ ),
}
}
Self::DomainOnly(_) | Self::Hosts(_) => false,
@@ -1633,20 +1810,20 @@ impl<'a> Set for RpzDomain<'a> {
dom.proper_subdomains
&& match *self {
Self::Adblock(ref dom2) => {
- dom.domain.label_count() < dom2.domain.label_count()
- && helper(&dom.domain, &dom2.domain)
+ dom.domain.cmp_by_domain_ordering(&dom2.domain)
+ == DomainOrdering::Shorter
}
Self::DomainOnly(ref dom2) => {
- dom.domain.label_count() < dom2.domain.label_count()
- && helper(&dom.domain, &dom2.domain)
+ dom.domain.cmp_by_domain_ordering(&dom2.domain)
+ == DomainOrdering::Shorter
}
Self::Hosts(ref dom2) => {
- dom.domain.label_count() < dom2.domain.label_count()
- && helper(&dom.domain, &dom2.domain)
+ dom.domain.cmp_by_domain_ordering(&dom2.domain)
+ == DomainOrdering::Shorter
}
Self::Wildcard(ref dom2) => {
- dom.domain.label_count() < dom2.domain.label_count()
- && helper(&dom.domain, &dom2.domain)
+ dom.domain.cmp_by_domain_ordering(&dom2.domain)
+ == DomainOrdering::Shorter
}
}
}
@@ -2024,7 +2201,7 @@ mod tests {
assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false));
// Test 126 labels after wildcard is ok.
assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val {
- Value::Domain(ref dom) => dom.domain.label_count().get() == 126 && dom.proper_subdomains,
+ Value::Domain(ref dom) => dom.domain.into_iter().count() == 126 && dom.proper_subdomains,
Value::Comment(_) | Value::Blank => false,
}));
// Test comment.
@@ -2144,9 +2321,9 @@ mod tests {
// We can have two labels each with one character,
// one label with one to three characters, or 0 labels.
// This is 1 + 52 + 52^2 + 52^3 + 52^2 = (1-52^4)/(1-52) + 52^2 = (52^4 - 1)/51 + 52^2 = 146069.
- assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.label_count().get() == 125 && dom.domain_count() == BigUint::new(vec![146069]) }));
+ assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.len().get() == 249 && dom.domain.into_iter().count() == 125 && dom.domain_count() == BigUint::new(vec![146069]) }));
// A subdomain of length 252 or 253 gets converted to a domain.
- assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.label_count().get() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) }));
+ assert!(Adblock::parse_value("||a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| { let dom = val.unwrap_domain(); dom.domain.into_iter().count() == 127 && !dom.subdomains && dom.domain_count() == BigUint::new(vec![1]) }));
// Pre-calculated manually.
// This is the number of domains possible between 2 and 252 characters.
// The other check is to ensure that IPv4 address subdomains are not counted.
diff --git a/src/dom_count_auto_gen.rs b/src/dom_count_auto_gen.rs
@@ -20,31 +20,29 @@ pub fn proper_subdomain_count(dom: &Domain<&str>) -> BigUint {
#[inline]
fn ip_count(dom: &Domain<&str>) -> u32 {
// `Domain`s that have 4 or more `Label`s can't be an IPv4 address. Also `Domain`s must have at least one
- // `Label`, so `0 < 4 - dom.label_count < 4` and `(10 * 3 + 90 * 2 + 156 * 1)^3 = (30 + 180 + 156)^3
+ // `Label`, so 0 < 4 - label count < 4 and `(10 * 3 + 90 * 2 + 156 * 1)^3 = (30 + 180 + 156)^3
// = 366^3 <= u32::MAX`.
- if dom.label_count().get() < 4 {
- dom.into_iter()
- .try_fold((), |(), label| {
- // Only a sequence of 1 to 3 digits whose value is between 0 and 255 is a valid octet
- // in an IPv4 address. For `Domain`s that have such `Label`s, the total number of IPv4
- // addresses is simply 366^(4- label count).
- // 366 comes from the fact that there are 3 distinct ways to represents integers < 10,
- // 2 distinct ways to represent integers inclusively between 10 and 99, and 1 way
- // to represent integers greater than 99 giving (3 * 10) + (2 * 90) + (1 * 156) = 366
- // ways a `Label` can be a valid octet for an IPv4 address.
- //
- // Note that `Label`s always have a length of at least 1 and any `Label` longer than 3
- // cannot be a valid octet in an IPv4 address.
- if label.len() < 4 {
- label.as_str().parse::<u8>().map_or(Err(()), |_| Ok(()))
- } else {
- Err(())
- }
- })
- .map_or(0, |()| 366u32.pow(4 - dom.label_count().get() as u32))
- } else {
- 0
- }
+ dom.into_iter()
+ .try_fold(0, |count, label| {
+ // If this is the fourth `Label`, then it can't be an IPv4 address since it has too many
+ // `Label`s.
+ // Only a sequence of 1 to 3 digits whose value is between 0 and 255 is a valid octet
+ // in an IPv4 address. For `Domain`s that have such `Label`s, the total number of IPv4
+ // addresses is simply 366^(4- label count).
+ // 366 comes from the fact that there are 3 distinct ways to represents integers < 10,
+ // 2 distinct ways to represent integers inclusively between 10 and 99, and 1 way
+ // to represent integers greater than 99 giving (3 * 10) + (2 * 90) + (1 * 156) = 366
+ // ways a `Label` can be a valid octet for an IPv4 address.
+ //
+ // Note that `Label`s always have a length of at least 1 and any `Label` longer than 3
+ // cannot be a valid octet in an IPv4 address.
+ if count < 4 && label.len() < 4 && label.as_str().parse::<u8>().is_ok() {
+ Ok(count + 1)
+ } else {
+ Err(())
+ }
+ })
+ .map_or(0, |count| 366u32.pow(4 - count))
}
// The commented out code at the end of the function was used to calculate the cardinalities
// for each possible value of domain length allowing IPv4 addresses; however it takes