commit 9fd5ba3850cf6fe8d46d38ed1e30512bfc2eeb81
parent 06b89378239dbcc1eb0a2679bf8e62171e9d78e8
Author: Zack Newman <zack@philomathiclife.com>
Date: Mon, 5 Feb 2024 12:00:00 -0700
refactor to use ascii_domain
Diffstat:
11 files changed, 556 insertions(+), 1689 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0"
name = "rpz"
readme = "README.md"
repository = "https://git.philomathiclife.com/repos/rpz/"
-version = "0.2.1"
+version = "0.3.0"
[lib]
name = "rpz"
@@ -20,6 +20,7 @@ name = "rpz"
path = "src/main.rs"
[dependencies]
+ascii_domain = { version = "0.3.0", default-features = false }
num-bigint = { version = "0.4.4", default-features = false }
reqwest = { version = "0.11.23", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] }
serde = { version = "1.0.196", default-features = false }
diff --git a/README.md b/README.md
@@ -62,7 +62,7 @@ All ad-(un)block files must be valid UTF-8; however for a given domain, each lab
`!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, and `~`. Labels must be delimited
by `.`. Domains in the file must be delimited by a line feed or carriage return and line feed. A domain must be less than 254 characters in length
including the `.` label separator. Domains are treated as case-insensitive with uppercase letters treated as lowercase. Domains must not be an
-[Ipv4Addr](https://doc.rust-lang.org/std/net/struct.Ipv4Addr.html).
+IPv4 address.
### Adblock-style
@@ -71,8 +71,9 @@ with the requirement that the rule conforms to the following extended regex:
`^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$`
-where `<domain>` conforms to a valid [`Domain`](https://docs.rs/rpz/latest/rpz/dom/struct.Domain.html) with the added requirement that it does not contain `$`, and
-`<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
+where `<domain>` conforms to a valid [`Domain`](https://docs.rs/ascii_domain/latest/ascii_domain/dom/struct.Domain.html) based on
+[`ASCII_FIREFOX`](https://docs.rs/ascii_domain/latest/ascii_domain/char_set/constant.ASCII_FIREFOX.html) with the added requirements
+that it does not have the form of an IPv4 address and does not contain `$`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
Lines that begin with `||` cause all subdomains to be blocked (i.e., the domain itself and all proper subdomains); without
`||`, only the specific domain is blocked.
@@ -89,7 +90,7 @@ with the requirement that the rule conforms to the following regex:
`^<ws>*<domain><ws>*(#.*)?$`
-where `<domain>` conforms to a valid `Domain`, and `<ws>` is any sequence of ASCII whitespace.
+where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX` but does not have the form of an IPv4 address, and `<ws>` is any sequence of ASCII whitespace.
Domains only represent themselves (i.e., proper subdomains will not be blocked).
@@ -100,7 +101,7 @@ with the requirement that the rule conforms to the following extended regex:
`^<ws>*<ip><ws>+<domain><ws>*(#.*)?$`
-where `<domain>` conforms to a valid `Domain`, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following:
+where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX` but does not have the form of an IPv4 address, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following:
`::`, `::1`, `0.0.0.0`, or `127.0.0.1`.
@@ -113,7 +114,7 @@ with the requirement that the rule conforms to the following extended regex:
`^<ws>*(\*\.)?<domain><ws>*(#.*)?$`
-where `<domain>` conforms to a valid `Domain`, and `<ws>` is any sequence of ASCII whitespace.
+where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX` but does not have the form of an IPv4 address, and `<ws>` is any sequence of ASCII whitespace.
If `domain` begins with `*.`, then `domain` must have length less than 252 and all proper subdomains are blocked—this
does _not_ include the domain itself; otherwise, only the `domain` is blocked.
@@ -180,7 +181,7 @@ Parsing errors are ignored; all other errors are written to `stderr` before prog
### Status
-This package will be actively maintained until it is deemed “feature complete”.
+This package is actively maintained.
The crates are only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but
they should work on any [Tier 1 with Host Tools](https://doc.rust-lang.org/beta/rustc/platform-support.html)
@@ -188,5 +189,4 @@ target.
Nightly `rustc` is required. Once `BTreeMap` [cursors are stabilized](https://github.com/rust-lang/rust/issues/107540), stable `rustc` will work.
On OpenBSD-stable, one can use the `rust` port as long as `RUSTC_BOOTSTRAP` is `export`ed with a value of `1` before invoking
-`cargo build --all-features --release` or `cargo install --all-features rpz`. Note that the `rust-ring` port must also be installed with
-the `[patch]` section of `Cargo.toml` or `~/.cargo/config.toml` configured appropriately.
+`cargo build --all-features --release` or `cargo install --all-features rpz`.
diff --git a/src/app.rs b/src/app.rs
@@ -1,17 +1,14 @@
-#![allow(
- clippy::into_iter_on_ref,
- clippy::ref_patterns,
- clippy::single_char_lifetime_names,
- clippy::unseparated_literal_suffix
-)]
use core::convert;
use rpz::dom::{
- Adblock, DomainErr, DomainOnly, Hosts, ParsedDomain, RpzAction, RpzDomain, Value, Wildcard,
+ Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzAction, RpzDomain, Value,
+ Wildcard,
};
use rpz::file::{AbsFilePath, File, Files, Kind, LocalFiles, Summary};
-use std::collections::HashMap;
-use std::fs;
-use std::io::{self, Error, Write};
+use std::{
+ collections::HashMap,
+ fs,
+ io::{self, Error, Write},
+};
use superset_map::SupersetSet;
/// Helper that returns the `Kind` of a file.
pub trait Helper {
@@ -69,7 +66,7 @@ impl<'unblock, 'block> Domains<'unblock, 'block> {
#[inline]
pub fn new_with<'c: 'unblock + 'block>(
local: &'c LocalFiles,
- ) -> (Self, Vec<Summary<'c, DomainErr>>) {
+ ) -> (Self, Vec<Summary<'c, FirefoxDomainErr>>) {
let mut val = Self {
unblock: SupersetSet::new(),
block: SupersetSet::new(),
@@ -85,11 +82,11 @@ impl<'unblock, 'block> Domains<'unblock, 'block> {
#[inline]
fn add_block_file<
'c: 'block,
- T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = DomainErr> + Helper,
+ T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = FirefoxDomainErr> + Helper,
>(
&mut self,
file: &'c File,
- summaries: &mut Vec<Summary<'c, DomainErr>>,
+ summaries: &mut Vec<Summary<'c, FirefoxDomainErr>>,
) {
let mut summary = Summary {
file,
@@ -133,23 +130,24 @@ impl<'unblock, 'block> Domains<'unblock, 'block> {
pub fn add_block_files<'c: 'block>(
&mut self,
files: &'c Files,
- summaries: &mut Vec<Summary<'c, DomainErr>>,
+ summaries: &mut Vec<Summary<'c, FirefoxDomainErr>>,
) {
/// Parses each line in the `String`s in `files` as an `RpzDomain` before
/// adding it to `Domains::block` iff `Domains::unblock` does not contain
/// a superset of it.
///
/// All parsing errors are ignored.
+ #[allow(clippy::into_iter_on_ref)]
#[inline]
fn add_files<
'unblock,
'block,
'c: 'block,
- T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = DomainErr> + Helper,
+ T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = FirefoxDomainErr> + Helper,
>(
doms: &mut Domains<'unblock, 'block>,
files: &'c [File],
- summaries: &mut Vec<Summary<'c, DomainErr>>,
+ summaries: &mut Vec<Summary<'c, FirefoxDomainErr>>,
) {
files
.into_iter()
@@ -170,7 +168,7 @@ impl<'unblock, 'block> Domains<'unblock, 'block> {
fn add_local_files<'c: 'unblock + 'block>(
&mut self,
files: &'c LocalFiles,
- ) -> Vec<Summary<'c, DomainErr>> {
+ ) -> Vec<Summary<'c, FirefoxDomainErr>> {
let mut summaries = files.unblock.add_to_superset(&mut self.unblock);
self.add_block_files(&files.block, &mut summaries);
summaries
diff --git a/src/args.rs b/src/args.rs
@@ -1,8 +1,9 @@
-#![allow(clippy::question_mark_used, clippy::ref_patterns)]
use core::fmt::{self, Display, Formatter};
use rpz::file::AbsFilePath;
-use std::env::{self, Args};
-use std::error::Error;
+use std::{
+ env::{self, Args},
+ error::Error,
+};
/// Error returned when parsing arguments passed to the application.
#[allow(clippy::exhaustive_enums, clippy::module_name_repetitions)]
#[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
@@ -26,6 +27,7 @@ pub enum ArgsErr {
QuietAndVerbose,
}
impl Display for ArgsErr {
+ #[allow(clippy::ref_patterns)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
diff --git a/src/config.rs b/src/config.rs
@@ -1,15 +1,9 @@
-#![allow(
- clippy::exhaustive_structs,
- clippy::implicit_return,
- clippy::into_iter_on_ref,
- clippy::missing_trait_methods,
- clippy::question_mark_used,
- clippy::single_char_lifetime_names
-)]
extern crate alloc;
use alloc::borrow::Cow;
-use core::fmt::{self, Display, Formatter};
-use core::time::Duration;
+use core::{
+ fmt::{self, Display, Formatter},
+ time::Duration,
+};
use rpz::file::{AbsFilePath, HttpUrl};
use serde::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Unexpected, Visitor};
use std::collections::HashSet;
@@ -36,11 +30,10 @@ pub struct Config {
pub wildcard: HashSet<HttpUrl>,
}
impl Display for Config {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
/// Helper function that writes the `Url`s in a `HashSet<HttpUrl>`.
- #[allow(clippy::min_ident_chars)]
+ #[allow(clippy::into_iter_on_ref)]
#[inline]
fn keys(set: &HashSet<HttpUrl>, f: &mut Formatter<'_>, name: &str) -> fmt::Result {
write!(f, "{name}: [").and_then(|()| {
@@ -112,7 +105,6 @@ impl<'de> Deserialize<'de> for Config {
"'timeout', 'rpz', 'local_dir', 'adblock', 'domain', 'hosts', or 'wildcard'",
)
}
- #[allow(clippy::min_ident_chars)]
#[inline]
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
@@ -148,7 +140,11 @@ impl<'de> Deserialize<'de> for Config {
A: MapAccess<'d>,
{
/// Verifies that the `HashSet`s are pairwise disjoint.
- #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)]
+ #[allow(
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref
+ )]
#[inline]
fn hash_overlap<E: Error>(maps: &[&HashSet<HttpUrl>]) -> Result<(), E> {
/// Verifies the intersection of `left` and `right` is empty.
diff --git a/src/dom.rs b/src/dom.rs
@@ -1,479 +1,33 @@
-#![allow(
- clippy::arithmetic_side_effects,
- clippy::missing_trait_methods,
- clippy::implicit_return,
- clippy::into_iter_on_ref,
- clippy::question_mark_used,
- clippy::ref_patterns,
- clippy::single_char_lifetime_names,
- clippy::unseparated_literal_suffix,
- clippy::wildcard_enum_match_arm
-)]
use crate::dom_count_auto_gen::proper_subdomain_count;
-use core::borrow::Borrow;
-use core::cmp::Ordering;
-use core::convert::{self, AsRef};
-use core::fmt::{self, Display, Formatter};
-use core::hash::{Hash, Hasher};
-use core::iter::FusedIterator;
-use core::net::Ipv4Addr;
-use core::num::NonZeroU8;
-use core::ops::Deref;
-use core::str;
+use ascii_domain::{
+ char_set::{AllowedAscii, ASCII_FIREFOX},
+ dom::{Domain, DomainErr, DomainOrdering},
+};
+use core::{
+ borrow::Borrow,
+ cmp::Ordering,
+ convert,
+ fmt::{self, Display, Formatter},
+ hash::{Hash, Hasher},
+ num::NonZeroU8,
+ ops::Deref,
+ str,
+};
use num_bigint::BigUint;
-use std::error;
-use std::io::{Error, Write};
+use std::{
+ error,
+ io::{Error, Write},
+};
use superset_map::SetOrd;
use zfc::{BoundedCardinality, Cardinality, Set};
-/// A flag used to indicate information about the characters
-/// in a `Domain`. This flag is used to perform more efficient
-/// comparisons that can potentially avoid temporary
-/// memory allocations to treat uppercase letters as if they
-/// were lowercase.
-#[allow(clippy::exhaustive_enums)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
-enum CharFlag {
- /// No letters, ticks, or underscores.
- None,
- /// Lowercase letters but no uppercase letters, ticks, or underscores.
- Lower,
- /// Uppercase letters but no lowercase letters, ticks, or underscores.
- Upper,
- /// Ticks or underscores but no letters.
- Ticks,
- /// Uppercase and lowercase letters but no ticks or underscores.
- LowerUpper,
- /// Lowercase letters and ticks or underscores, but no uppercase letters.
- LowerTicks,
- /// Uppercase letters and ticks or underscores, but no lowercase letters.
- UpperTicks,
- /// Uppercase letters, lowercase letters, and ticks or underscores.
- All,
-}
-impl CharFlag {
- /// Returns a `bool` that indicates
- /// if equivalence must be done in a case
- /// insensitive way.
- #[inline]
- const fn eq_ignore_case(self, other: Self) -> bool {
- match self {
- Self::None | Self::Ticks => false,
- Self::Lower | Self::LowerTicks => !matches!(
- other,
- Self::None | Self::Ticks | Self::Lower | Self::LowerTicks
- ),
- Self::Upper | Self::UpperTicks => !matches!(
- other,
- Self::None | Self::Ticks | Self::Upper | Self::UpperTicks
- ),
- Self::LowerUpper | Self::All => !matches!(other, Self::None | Self::Ticks),
- }
- }
-}
-/// A domain that consists of at least one [`Label`].
-/// The total length of a `Domain` is at most 253
-/// characters in length including the `.` seperator.
-/// The trailing `.`, if one exists, is always removed.
-///
-/// This is more restrictive than what a domain is allowed to be
-/// per the [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181),
-/// but it is more permissive than what [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123)
-/// and [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) allow.
-/// In particular only the ASCII/UTF-8 encoding of the following Unicode scalar values is allowed in a `Label`:
-///
-/// `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, `~`.
-///
-/// with each `Label` delimited by `.`. Uppercase letters are treated as lowercase;
-/// however for better comparison performance that doesn't lead to intermediate memory allocations,
-/// two `Domain`s should consist entirely of the same case. `Domain`s must not be an [`Ipv4Addr`].
-///
-/// Those Unicode scalar values were chosen based on what [Firefox](https://www.mozilla.org/en-US/firefox/)
-/// allows as of 2023-09-03T20:50+00:00.
-#[derive(Clone, Debug)]
-pub struct Domain<'a> {
- /// The domain value.
- /// Guaranteed to have length between 1 and 253.
- /// Guaranteed to be the UTF-8 encoding of a
- /// sequence of Unicode scalar values from the set:
- /// `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`,
- /// `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `.`, `{`, `}`, and `~`.
- /// This is stored as a slice of bytes to
- /// allow for easier construction of `Label`s.
- value: &'a [u8],
- /// The lengths of each label.
- /// Guaranteed to have length between 1 and 127
- /// with each value being between 1 and 63.
- label_lens: Vec<NonZeroU8>,
- /// Flag that contain information about the kind of
- /// characters in `value`.
- flag: CharFlag,
-}
-impl<'a> Domain<'a> {
- /// The maximum length of a `Domain`.
- /// `Domain`s don't include the trailing `.` nor the 0-octet root label,
- /// so this is 253.
- // SAFETY: 0 < 253 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(253) };
- /// The minimum length of a `Domain`.
- /// `Domain`s don't include the trailing `.` nor the 0-octet root label,
- /// so this is 1.
- // SAFETY: 0 < 1 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) };
- /// The domain which always has at least one label.
- /// The last label is _not_ trailed by `.`
- #[allow(unsafe_code)]
- #[inline]
- #[must_use]
- pub const fn as_str(&self) -> &'a str {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- unsafe { str::from_utf8_unchecked(self.value) }
- }
- /// Returns the count of [`Label`]s.
- /// Due to length requirements of `Label` and `Domain`,
- /// this is less than `128`.
- #[inline]
- #[must_use]
- #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)]
- pub fn label_count(&self) -> NonZeroU8 {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- // Due to the length requirements of both `Domain` and `Label`,
- // `label_lens` has between 1 and 127 values which is a valid
- // `NonZeroU8`.
- unsafe { NonZeroU8::new_unchecked(self.label_lens.len() as u8) }
- }
- /// The length of the `Domain`.
- /// This is the same as `self.as_str().len()`.
- #[inline]
- #[must_use]
- #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)]
- pub const fn len(&self) -> NonZeroU8 {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- // Due to the length requirements of `Domain` ,
- // `self.value` is guaranteed to have length > 0 and < 254.
- unsafe { NonZeroU8::new_unchecked(self.value.len() as u8) }
- }
- /// Returns an [`Iterator`] of [`Label`]s without
- /// consuming the `Domain`.
- #[inline]
- #[must_use]
- pub fn iter(&self) -> LabelIter<'a, '_> {
- self.into_iter()
- }
- /// Recursively checks from the TLD if each label is the same
- /// until one domain has no more labels.
- #[inline]
- fn same_labels(&self, other: &Domain<'_>) -> bool {
- self.into_iter()
- .zip(other)
- .try_fold((), |(), (label, label2)| {
- if label.value == label2.value {
- Ok(())
- } else {
- Err(())
- }
- })
- .map_or(false, |()| true)
- }
- /// Recursively checks from the TLD if each label is the same ignoring case
- /// until one domain has no more labels.
- #[inline]
- fn same_labels_ignore_case(&self, other: &Domain<'_>) -> bool {
- self.into_iter()
- .zip(other)
- .try_fold((), |(), (label, label2)| {
- if label.value.eq_ignore_ascii_case(label2.value) {
- Ok(())
- } else {
- Err(())
- }
- })
- .map_or(false, |()| true)
- }
- /// Function that transforms a slice of bytes into a `Domain`.
- /// It is not public since we only want to allow valid `str`s.
- /// Trailing `.` is removed first.
- #[allow(
- clippy::as_conversions,
- clippy::indexing_slicing,
- clippy::option_if_let_else,
- clippy::unreachable
- )]
- #[inline]
- fn try_from_slice<'b: 'a>(mut value: &'b [u8]) -> Result<Self, DomainErr> {
- value = match value.last() {
- None => return Err(DomainErr::Empty),
- Some(byt) => {
- if *byt == b'.' {
- &value[..value.len() - 1]
- } else {
- value
- }
- }
- };
- if value.len() > Self::MAX_LEN.get() as usize {
- Err(DomainErr::LenExceeds253(value.len()))
- } else {
- let mut label_lens = Vec::with_capacity(3);
- let mut label_len = 0;
- // Bitwise flag that means:
- // 0 => no letters, ticks, or underscores,
- // 1 => lowercase letters; but no uppercase letters, ticks, or underscores.
- // 2 => uppercase letters; but no lowercase letters, ticks, or underscores.
- // 4 => ticks or underscores, but no letters.
- let mut flag = 0u8;
- value
- .into_iter()
- .try_fold((), |(), byt| {
- match *byt {
- b'.' => {
- return NonZeroU8::new(label_len).map_or(
- Err(DomainErr::EmptyLabel),
- |length| {
- label_lens.push(length);
- label_len = 0;
- Ok(())
- },
- )
- }
- b'A'..=b'Z' => flag |= 2,
- b'`' | b'_' => flag |= 4,
- b'a'..=b'z' => flag |= 1,
- 0..=b' '
- | b'"'..=b'#'
- | b'%'
- | b'*'
- | b'/'
- | b':'
- | b'<'
- | b'>'..=b'@'
- | b'['..=b'^'
- | b'|'
- | 127.. => return Err(DomainErr::InvalidByte(*byt)),
- _ => (),
- }
- if label_len == 63 {
- Err(DomainErr::LabelLenExceeds63)
- } else {
- label_len += 1;
- Ok(())
- }
- })
- .and_then(|()| {
- NonZeroU8::new(label_len)
- .ok_or(DomainErr::EmptyLabel)
- .and_then(|length| {
- Ipv4Addr::parse_ascii(value).map_or_else(
- |_| {
- label_lens.push(length);
- Ok(Self {
- value,
- label_lens,
- flag: match flag {
- 0 => CharFlag::None,
- 1 => CharFlag::Lower,
- 2 => CharFlag::Upper,
- 3 => CharFlag::LowerUpper,
- 4 => CharFlag::Ticks,
- 5 => CharFlag::LowerTicks,
- 6 => CharFlag::UpperTicks,
- 7 => CharFlag::All,
- _ => unreachable!(
- "there is a bug in Domain::try_from_slice"
- ),
- },
- })
- },
- |_| Err(DomainErr::Ipv4),
- )
- })
- })
- }
- }
-}
-impl PartialEq<Domain<'_>> for Domain<'_> {
- #[inline]
- fn eq(&self, other: &Domain<'_>) -> bool {
- if self.flag.eq_ignore_case(other.flag) {
- self.value.eq_ignore_ascii_case(other.value)
- } else {
- self.value == other.value
- }
- }
-}
-impl Eq for Domain<'_> {}
-impl PartialOrd<Domain<'_>> for Domain<'_> {
- #[inline]
- fn partial_cmp(&self, other: &Domain<'_>) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-impl<'a> AsRef<str> for Domain<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
-impl<'a> Deref for Domain<'a> {
- type Target = str;
- #[inline]
- fn deref(&self) -> &Self::Target {
- self.as_str()
- }
-}
-/// `Ok(())` is returned iff the `Domain`s have the same labels
-/// with one `Domain` having more labels than the other.
-#[inline]
-fn cmp_doms(left: &Domain<'_>, right: &Domain<'_>) -> Result<(), Ordering> {
- let left_input: Vec<u8>;
- let right_input: Vec<u8>;
- let left_dom: Domain<'_>;
- let right_dom: Domain<'_>;
- let (left_ref, right_ref) = match (left.flag, right.flag) {
- (CharFlag::None, _)
- | (_, CharFlag::None)
- | (
- CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks,
- CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks,
- )
- | (CharFlag::Upper, CharFlag::Upper) => (left, right),
- (CharFlag::Lower | CharFlag::LowerTicks | CharFlag::Ticks, _) => {
- right_input = right.value.to_ascii_lowercase();
- right_dom = Domain {
- value: right_input.as_slice(),
- label_lens: right.label_lens.clone(),
- flag: CharFlag::LowerTicks,
- };
- (left, &right_dom)
- }
- (CharFlag::Upper, CharFlag::LowerUpper) => {
- right_input = right.value.to_ascii_uppercase();
- right_dom = Domain {
- value: right_input.as_slice(),
- label_lens: right.label_lens.clone(),
- flag: CharFlag::Upper,
- };
- (left, &right_dom)
- }
- (_, CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks) => {
- left_input = left.value.to_ascii_lowercase();
- left_dom = Domain {
- value: left_input.as_slice(),
- label_lens: left.label_lens.clone(),
- flag: CharFlag::LowerTicks,
- };
- (&left_dom, right)
- }
- (CharFlag::LowerUpper, CharFlag::Upper) => {
- left_input = left.value.to_ascii_uppercase();
- left_dom = Domain {
- value: left_input.as_slice(),
- label_lens: left.label_lens.clone(),
- flag: CharFlag::Upper,
- };
- (&left_dom, right)
- }
- (_, _) => {
- left_input = left.value.to_ascii_lowercase();
- left_dom = Domain {
- value: left_input.as_slice(),
- label_lens: left.label_lens.clone(),
- flag: CharFlag::LowerTicks,
- };
- right_input = right.value.to_ascii_lowercase();
- right_dom = Domain {
- value: right_input.as_slice(),
- label_lens: right.label_lens.clone(),
- flag: CharFlag::LowerTicks,
- };
- (&left_dom, &right_dom)
- }
- };
- // Faster to compare the entire value when we can instead of each label.
- if left_ref.value == right_ref.value {
- Err(Ordering::Equal)
- } else {
- left_ref
- .into_iter()
- .zip(right_ref)
- .try_fold((), |(), (label, label2)| {
- match label.value.cmp(label2.value) {
- Ordering::Less => Err(Ordering::Less),
- Ordering::Equal => Ok(()),
- Ordering::Greater => Err(Ordering::Greater),
- }
- })
- }
-}
-impl Ord for Domain<'_> {
- /// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
- /// 2. If 1. evaluates as not equivalent, then return the result.
- /// 3. Return the comparison of `Label` counts.
- ///
- /// For example, `com` `<` `example.com` `<` `net` `<` `example.net`.
- ///
- /// This is the same as the [canonical DNS name order](https://datatracker.ietf.org/doc/html/rfc4034#section-6.1).
- #[inline]
- fn cmp(&self, other: &Self) -> Ordering {
- cmp_doms(self, other).map_or_else(convert::identity, |()| {
- self.label_count().cmp(&other.label_count())
- })
- }
-}
-impl Hash for Domain<'_> {
- #[inline]
- fn hash<H: Hasher>(&self, state: &mut H) {
- match self.flag {
- CharFlag::None | CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks => {
- self.value.hash(state);
- }
- _ => self.value.to_ascii_lowercase().hash(state),
- }
- }
-}
-impl Display for Domain<'_> {
- #[inline]
- #[allow(clippy::min_ident_chars)]
- fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
- f.write_str(self.as_str())
- }
-}
-/// Error returned when an invalid string
-/// is passed to [`Domain::try_from`], [`Adblock::parse_value`],
-/// [`DomainOnly::parse_value`], [`Hosts::parse_value`], [`Wildcard::parse_value`],
-/// or [`RpzDomain::parse_value`].
+/// Error returned when an invalid string is passed to [`Adblock::parse_value`], [`DomainOnly::parse_value`],
+/// [`Hosts::parse_value`], [`Wildcard::parse_value`], or [`RpzDomain::parse_value`].
#[allow(clippy::exhaustive_enums)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
-pub enum DomainErr {
- /// The domain was empty.
- Empty,
- /// The length of the string had length greater than 253
- /// not counting a terminating `.` if there was one.
- LenExceeds253(usize),
- /// The domain contained at least one empty label.
- EmptyLabel,
- /// The domain contained at least one label whose length exceeded 63.
- LabelLenExceeds63,
- /// The domain contained an invalid byte value.
- ///
- /// Note the contained `u8` is ASCII iff it is `<= 127`; otherwise
- /// it is the first UTF-8 code unit of a multi-byte Unicode scalar value.
- InvalidByte(u8),
- /// The domain was an [`Ipv4Addr`].
+pub enum FirefoxDomainErr {
+ /// The domain is invalid based on [`Domain`] using [`ASCII_FIREFOX`].
+ InvalidDomain(DomainErr),
+ /// The domain was an IPv4 address.
Ipv4,
/// The string passed to [`Adblock::parse_value`] contained `$`.
InvalidAdblockDomain,
@@ -485,32 +39,11 @@ pub enum DomainErr {
/// no proper subdomains.
InvalidWildcardDomain,
}
-impl Display for DomainErr {
+impl Display for FirefoxDomainErr {
#[inline]
- #[allow(unsafe_code, clippy::min_ident_chars)]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
- Self::Empty => f.write_str("domain is empty"),
- Self::LenExceeds253(len) => write!(
- f,
- "domain has length {len} which is greater than the max length of 253"
- ),
- Self::EmptyLabel => f.write_str("domain has an empty label"),
- Self::LabelLenExceeds63 => {
- f.write_str("domain has a label that exceeds the max length of 63")
- }
- Self::InvalidByte(byt) => {
- if byt > 31 && byt < 127 {
- let utf8 = [byt];
- // SAFETY:
- // `byt` is inclusively between 32 and 126 which is valid ASCII which
- // in turn is valid UTF-8.
- let printable_ascii = unsafe { str::from_utf8_unchecked(utf8.as_slice()) };
- write!(f, "domain has a label with the invalid character '{printable_ascii}'")
- } else {
- write!(f, "domain has a label with the invalid byte value {byt}")
- }
- }
+ Self::InvalidDomain(err) => err.fmt(f),
Self::Ipv4 => f.write_str("domain was an IPv4 address"),
Self::InvalidAdblockDomain => f.write_str("Adblock-style domain contained a '$'"),
Self::InvalidHostsIP => f.write_str("hosts-style domain does not begin with the IP '::', '::1', '0.0.0.0', or '127.0.0.1' followed by at least one space or tab"),
@@ -518,363 +51,61 @@ impl Display for DomainErr {
}
}
}
-impl error::Error for DomainErr {}
-impl<'a: 'b, 'b> From<Domain<'a>> for &'b str {
- #[inline]
- fn from(value: Domain<'a>) -> Self {
- value.as_str()
- }
-}
-impl<'a: 'c, 'b, 'c> From<&'b Domain<'a>> for &'c str {
- #[inline]
- fn from(value: &'b Domain<'a>) -> Self {
- value.as_str()
- }
-}
-impl<'a: 'b, 'b> TryFrom<&'a str> for Domain<'b> {
- type Error = DomainErr;
- #[inline]
- fn try_from(val: &'a str) -> Result<Self, Self::Error> {
- Self::try_from_slice(val.as_bytes())
- }
-}
-/// A label of a [`Domain`].
-/// The total length of a `Label` is between 1 and 63
-/// bytes with each Unicode scalar value being one of the following:
-///
-/// `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, `~`.
-///
-/// Note that the uppercase letters are treated as lowercase.
-#[derive(Debug, Clone, Copy, Hash)]
-pub struct Label<'a> {
- /// The label value.
- value: &'a str,
-}
-impl<'a> Label<'a> {
- /// The maximum length of a `Label` which is 63.
- // SAFETY: 0 < 63 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(63) };
- /// The minimum length of a `Label` which is 1.
- // SAFETY: 0 < 1 < 256.
- #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)]
- pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) };
- /// The label.
- #[inline]
- #[must_use]
- pub const fn as_str(self) -> &'a str {
- self.value
- }
-}
-impl Display for Label<'_> {
- #[inline]
- #[allow(clippy::min_ident_chars)]
- fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
- f.write_str(self.value)
- }
-}
-impl PartialEq<Label<'_>> for Label<'_> {
- #[inline]
- fn eq(&self, other: &Label<'_>) -> bool {
- self.value.eq_ignore_ascii_case(other.value)
- }
-}
-impl Eq for Label<'_> {}
-impl PartialOrd<Label<'_>> for Label<'_> {
- #[inline]
- fn partial_cmp(&self, other: &Label<'_>) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-impl Ord for Label<'_> {
- #[inline]
- fn cmp(&self, other: &Self) -> Ordering {
- self.value
- .to_ascii_lowercase()
- .cmp(&other.value.to_ascii_lowercase())
- }
-}
-impl<'a: 'b, 'b> From<Label<'a>> for &'b str {
- #[inline]
- fn from(value: Label<'a>) -> Self {
- value.as_str()
- }
-}
-impl<'a> AsRef<str> for Label<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
-impl Deref for Label<'_> {
- type Target = str;
- #[inline]
- fn deref(&self) -> &Self::Target {
- self.as_str()
- }
-}
-/// [`Iterator`] that iterates [`Label`]s
-/// from a [`Domain`] starting from the TLD
-/// down.
-pub struct IntoLabelIter<'a> {
- /// Domain that contains `Label`s to iterate.
- domain: Domain<'a>,
- /// Starts at domain.label_count().get() - 1
- /// which is valid since domain.label_count().get() > 0.
- /// idx is 255 when the iterator is exhausted.
- /// Since idx is decremented each time and it starts
- /// at a value less than 254, this is a valid value to use
- /// as a flag.
- idx: u8,
- /// This is used to mark the start of a label before
- /// the length of the label has been subtracted.
- /// After a label is read, 1 must be subtracted
- /// to account for '.'.
- start: u8,
- /// Starts at 0 which is valid since domain.label_count().get() > 0.
- /// idx_back is 255 when the iterator is exhausted.
- /// Since idx_back is incremented each time and the max label count
- /// is 127, this is a valid value to use as a flag.
- idx_back: u8,
- /// This is used to mark the start of a label before
- /// the length of the label has been added.
- /// After a label is read, 1 must be added
- /// to account for '.'.
- start_back: u8,
-}
-impl<'a> IntoLabelIter<'a> {
- /// Helper function to construct an instance.
- #[inline]
- #[must_use]
- fn new(domain: Domain<'a>) -> IntoLabelIter<'a> {
- Self {
- idx: domain.label_count().get() - 1,
- start: domain.len().get(),
- idx_back: 0,
- start_back: 0,
- domain,
- }
- }
-}
-impl<'a> Iterator for IntoLabelIter<'a> {
- type Item = Label<'a>;
- #[inline]
- #[allow(
- unsafe_code,
- clippy::as_conversions,
- clippy::cast_possible_truncation,
- clippy::indexing_slicing
- )]
- fn next(&mut self) -> Option<Self::Item> {
- self.domain.label_lens.get(self.idx as usize).map(|len| {
- self.start -= len.get();
- let lbl = &self.domain.value[self.start as usize..(self.start + len.get()) as usize];
- let label = Label {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- value: unsafe { str::from_utf8_unchecked(lbl) },
- };
- if self.idx == 0 || self.idx <= self.idx_back {
- self.idx = 255;
- self.idx_back = 255;
- } else {
- self.idx -= 1;
- self.start -= 1;
- }
- label
- })
- }
-}
-impl FusedIterator for IntoLabelIter<'_> {}
-impl ExactSizeIterator for IntoLabelIter<'_> {
- #[allow(clippy::as_conversions)]
- #[inline]
- fn len(&self) -> usize {
- if self.idx == 255 {
- 0
- } else {
- (self.idx - self.idx_back + 1) as usize
- }
- }
-}
-impl DoubleEndedIterator for IntoLabelIter<'_> {
- #[inline]
- #[allow(
- unsafe_code,
- clippy::as_conversions,
- clippy::cast_possible_truncation,
- clippy::indexing_slicing
- )]
- fn next_back(&mut self) -> Option<Self::Item> {
- self.domain
- .label_lens
- .get(self.idx_back as usize)
- .map(|len| {
- let lbl = &self.domain.value
- [self.start_back as usize..(self.start_back + len.get()) as usize];
- let label = Label {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- value: unsafe { str::from_utf8_unchecked(lbl) },
- };
- if self.idx_back + 1 == self.domain.label_count().get() || self.idx_back >= self.idx
- {
- self.idx = 255;
- self.idx_back = 255;
- } else {
- self.idx_back += 1;
- self.start_back += len.get() + 1;
- }
- label
- })
- }
-}
-/// [`Iterator`] that iterates [`Label`]s
-/// from a borrowed [`Domain`] starting from the TLD
-/// down.
-pub struct LabelIter<'a, 'b> {
- /// Domain that contains `Label`s to iterate.
- domain: &'b Domain<'a>,
- /// Starts at domain.label_count().get() - 1
- /// which is valid since domain.label_count().get() > 0.
- /// idx is 255 when the iterator is exhausted.
- /// Since idx is decremented each time and it starts
- /// at a value less than 254, this is a valid value to use
- /// as a flag.
- idx: u8,
- /// This is used to mark the start of a label before
- /// the length of the label has been subtracted.
- /// After a label is read, 1 must be subtracted
- /// to account for '.'.
- start: u8,
- /// Starts at 0 which is valid since domain.label_count().get() > 0.
- /// idx_back is 255 when the iterator is exhausted.
- /// Since idx_back is incremented each time and the max label count
- /// is 127, this is a valid value to use as a flag.
- idx_back: u8,
- /// This is used to mark the start of a label before
- /// the length of the label has been added.
- /// After a label is read, 1 must be added
- /// to account for '.'.
- start_back: u8,
-}
-impl<'a, 'b> LabelIter<'a, 'b> {
- /// Helper function to construct an instance.
- #[inline]
- #[must_use]
- fn new(domain: &'b Domain<'a>) -> LabelIter<'a, 'b> {
- Self {
- idx: domain.label_count().get() - 1,
- start: domain.len().get(),
- idx_back: 0,
- start_back: 0,
- domain,
- }
- }
-}
-impl<'a> Iterator for LabelIter<'a, '_> {
- type Item = Label<'a>;
- #[inline]
- #[allow(
- unsafe_code,
- clippy::as_conversions,
- clippy::cast_possible_truncation,
- clippy::indexing_slicing
- )]
- fn next(&mut self) -> Option<Self::Item> {
- self.domain.label_lens.get(self.idx as usize).map(|len| {
- self.start -= len.get();
- let lbl = &self.domain.value[self.start as usize..(self.start + len.get()) as usize];
- let label = Label {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- value: unsafe { str::from_utf8_unchecked(lbl) },
- };
- if self.idx == 0 || self.idx <= self.idx_back {
- self.idx = 255;
- self.idx_back = 255;
+impl error::Error for FirefoxDomainErr {}
+/// The ASCII we allow domains to have.
+const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX;
+/// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` does not
+/// have the format of an IPv4 address.
+#[allow(
+ clippy::arithmetic_side_effects,
+ clippy::as_conversions,
+ clippy::cast_lossless,
+ clippy::into_iter_on_ref
+)]
+#[inline]
+fn domain_no_ip<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> {
+ Domain::try_from_bytes(val, CHARS)
+ .map_err(FirefoxDomainErr::InvalidDomain)
+ .and_then(|dom| {
+ // Faster to consult the metadata first to hopefully avoid parsing as an IPv4 address.
+ if dom.len().get() < 16
+ && dom.label_count().get() == 4
+ // We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading
+ // 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we
+ // want to consider that as an IP.
+ && dom
+ .into_iter()
+ .try_fold((), |(), label| {
+ if label.len() < 4 {
+ label
+ .as_bytes()
+ .into_iter()
+ .try_fold(0u16, |octet, byt| {
+ if byt.is_ascii_digit() {
+ // We already verified the length is at most 3, and we only perform
+ // this arithmetic on integers between 0 and 9. This means the max value
+ // of these operations is 999 which is smaller than `u16::MAX`. We verified
+ // `byt` is an ASCII digit so we know `byt - b'0'` will be inclusively between
+ // 0 and 9. So no overflow, underflow, or truncation will occur.
+ Ok(octet * 10 + (byt - b'0') as u16)
+ } else {
+ Err(())
+ }
+ })
+ .and_then(|int| u8::try_from(int).map_or(Err(()), |_| Ok(())))
+ } else {
+ Err(())
+ }
+ })
+ .is_ok()
+ {
+ Err(FirefoxDomainErr::Ipv4)
} else {
- self.idx -= 1;
- self.start -= 1;
+ Ok(dom.into())
}
- label
})
- }
-}
-impl FusedIterator for LabelIter<'_, '_> {}
-impl ExactSizeIterator for LabelIter<'_, '_> {
- #[allow(clippy::as_conversions)]
- #[inline]
- fn len(&self) -> usize {
- if self.idx == 255 {
- 0
- } else {
- (self.idx - self.idx_back + 1) as usize
- }
- }
}
-impl DoubleEndedIterator for LabelIter<'_, '_> {
- #[inline]
- #[allow(
- unsafe_code,
- clippy::as_conversions,
- clippy::cast_possible_truncation,
- clippy::indexing_slicing
- )]
- fn next_back(&mut self) -> Option<Self::Item> {
- self.domain
- .label_lens
- .get(self.idx_back as usize)
- .map(|len| {
- let lbl = &self.domain.value
- [self.start_back as usize..(self.start_back + len.get()) as usize];
- let label = Label {
- // SAFETY:
- // The only way to construct a `Domain` is via
- // `try_from` which only uses a valid ASCII
- // substring from the original `str` input.
- // `Domain` is immutable ensuring such invariants are kept.
- value: unsafe { str::from_utf8_unchecked(lbl) },
- };
- if self.idx_back + 1 == self.domain.label_count().get() || self.idx_back >= self.idx
- {
- self.idx = 255;
- self.idx_back = 255;
- } else {
- self.idx_back += 1;
- self.start_back += len.get() + 1;
- }
- label
- })
- }
-}
-impl<'a> IntoIterator for Domain<'a> {
- type Item = Label<'a>;
- type IntoIter = IntoLabelIter<'a>;
- #[inline]
- fn into_iter(self) -> Self::IntoIter {
- IntoLabelIter::new(self)
- }
-}
-impl<'a, 'b> IntoIterator for &'b Domain<'a> {
- type Item = Label<'a>;
- type IntoIter = LabelIter<'a, 'b>;
- #[inline]
- fn into_iter(self) -> Self::IntoIter {
- LabelIter::new(self)
- }
-}
-/// Action taken by a DNS server when a domain
-/// matches.
+/// Action taken by a DNS server when a domain matches.
#[allow(clippy::exhaustive_enums)]
#[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
pub enum RpzAction {
@@ -890,7 +121,6 @@ pub enum RpzAction {
TcpOnly,
}
impl Display for RpzAction {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
@@ -915,12 +145,15 @@ impl Display for RpzAction {
///
/// Returns [`Error`] iff [`writeln`] does.
#[inline]
-pub fn write_rpz_line<W: Write>(
+pub fn write_rpz_line<W: Write, T>(
mut writer: W,
- dom: &Domain<'_>,
+ dom: &Domain<T>,
action: RpzAction,
wildcard: bool,
-) -> Result<(), Error> {
+) -> Result<(), Error>
+where
+ Domain<T>: Display,
+{
writeln!(
writer,
"{}{} CNAME {}.",
@@ -935,8 +168,7 @@ pub fn write_rpz_line<W: Write>(
}
)
}
-/// Type that can be returned by [`Domain`]-like
-/// parsers (e.g., [`Adblock`]).
+/// Type that can be returned by [`Domain`]-like parsers (e.g., [`Adblock`]).
#[allow(clippy::exhaustive_enums)]
#[derive(Clone, Copy, Debug)]
pub enum Value<'a, T: ParsedDomain<'a>> {
@@ -1015,10 +247,9 @@ impl<'a, T: ParsedDomain<'a>> Value<'a, T> {
}
}
}
-/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s.
-/// When parsed into a [`Value::Domain`], the domain can
-/// be written to a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone)
-/// file.
+/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. When parsed into a
+/// [`Value::Domain`], the domain can be written to a
+/// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file.
pub trait ParsedDomain<'a>: Sized {
type Error;
/// Parses a `str` into a `Value`.
@@ -1027,7 +258,7 @@ pub trait ParsedDomain<'a>: Sized {
/// Errors iff `val` is unable to be parsed into a `Value`.
fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error>;
/// Reference to the contained `Domain`.
- fn domain(&self) -> &Domain<'a>;
+ fn domain(&self) -> &Domain<&'a str>;
/// Writes `self` as RPZ lines via `writer`.
///
/// # Errors
@@ -1035,63 +266,62 @@ pub trait ParsedDomain<'a>: Sized {
/// Errors iff `writer` errors.
fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>;
}
-/// Domain constructed from an [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax)
-/// with the requirement that the rule conforms to the following extended regex:
+/// Domain constructed from an
+/// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax) with the
+/// requirement that the rule conforms to the following extended regex:
///
/// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$`
///
-/// where `<domain>` conforms to a valid [`Domain`] with the added requirement that it does not contain `$`, and
-/// `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
+/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`] with the added requirement that it
+/// does not contain `$`, is not of the form of an IPv4 address, and `<ws>` is any sequence of
+/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
///
-/// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments
-/// after a valid domain are ignored and will be parsed into a [`Value::Domain`].
+/// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments after a valid
+/// domain are ignored and will be parsed into a [`Value::Domain`].
///
-/// Note that this means some valid Adblock-style rules are not considered valid since
-/// such rules often contain path information or modifiers (e.g., “third-party”), but this only
-/// considers domain-only rules.
+/// Note that this means some valid Adblock-style rules are not considered valid since such rules often contain
+/// path information or modifiers (e.g., “third-party”), but this only considers domain-only rules.
#[derive(Clone, Debug)]
pub struct Adblock<'a> {
/// The `Domain`.
- domain: Domain<'a>,
- /// `true` iff `domain` represents all subdomains.
- /// Note that this includes `domain` itself.
+ domain: Domain<&'a str>,
+ /// `true` iff `domain` represents all subdomains. Note that this includes `domain` itself.
subdomains: bool,
}
impl<'a> Adblock<'a> {
- /// Returns `true` iff the contained [`Domain`] represents all subdomains.
- /// Note this includes the `Domain` itself.
+ /// Returns `true` iff the contained [`Domain`] represents all subdomains. Note this includes the
+ /// `Domain` itself.
#[inline]
#[must_use]
pub const fn is_subdomains(&self) -> bool {
self.subdomains
}
- /// Since `DomainOnly` and `Hosts` are treated the same,
- /// we have this helper function that can be used for both.
+ /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used
+ /// for both.
#[inline]
#[must_use]
- fn cmp_dom(&self, other: &Domain<'_>) -> Ordering {
- cmp_doms(&self.domain, other).map_or_else(
- |ord| {
- if ord == Ordering::Equal && self.subdomains {
+ fn cmp_dom(&self, other: &Domain<&str>) -> Ordering {
+ match self.domain.cmp_by_domain_ordering(other) {
+ DomainOrdering::Less => Ordering::Less,
+ DomainOrdering::Shorter => {
+ if self.subdomains {
Ordering::Greater
} else {
- ord
+ Ordering::Less
}
- },
- |()| {
- // At this point `self` and `other` have different number of labels;
- // otherwise they would have the same domain which was already
- // checked for in `cmp_doms`.
+ }
+ DomainOrdering::Equal => {
if self.subdomains {
Ordering::Greater
} else {
- self.domain.label_count().cmp(&other.label_count())
+ Ordering::Equal
}
- },
- )
+ }
+ DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater,
+ }
}
/// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs.
/// 2. If 1. evaluates as not equivalent, then return the result.
/// 3. If `self` represents a single `Domain` (i.e., `!self.is_subdomains()`),
/// then return the comparison of label counts.
@@ -1110,7 +340,7 @@ impl<'a> Adblock<'a> {
self.cmp_dom(&other.domain)
}
/// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs.
/// 2. If 1. evaluates as not equivalent, then return the result.
/// 3. If both domains represent a single `Domain`, then return the comparison
/// of label counts.
@@ -1125,48 +355,45 @@ impl<'a> Adblock<'a> {
#[inline]
#[must_use]
pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering {
- cmp_doms(&self.domain, &other.domain).map_or_else(
- |ord| {
- if ord == Ordering::Equal {
- if self.subdomains {
- Ordering::Greater
- } else if other.proper_subdomains {
- Ordering::Less
- } else {
- ord
- }
+ match self.domain.cmp_by_domain_ordering(&other.domain) {
+ DomainOrdering::Less => Ordering::Less,
+ DomainOrdering::Shorter => {
+ if self.subdomains {
+ Ordering::Greater
} else {
- ord
+ Ordering::Less
}
- },
- |()| {
- // At this point `self` and `other` have different number of labels;
- // otherwise they would have the same domain which was already
- // checked for in `cmp_doms`.
+ }
+ DomainOrdering::Equal => {
if self.subdomains {
- if !other.proper_subdomains
- || self.domain.label_count() < other.domain.label_count()
- {
- Ordering::Greater
- } else {
+ Ordering::Greater
+ } else if other.proper_subdomains {
+ Ordering::Less
+ } else {
+ Ordering::Equal
+ }
+ }
+ DomainOrdering::Longer => {
+ if self.subdomains {
+ if other.proper_subdomains {
Ordering::Less
+ } else {
+ Ordering::Greater
}
- } else if other.proper_subdomains
- || self.domain.label_count() < other.domain.label_count()
- {
+ } else if other.proper_subdomains {
Ordering::Less
} else {
Ordering::Greater
}
- },
- )
+ }
+ DomainOrdering::Greater => Ordering::Greater,
+ }
}
- /// Same as [`Adblock::cardinality`] except
- /// that a `BigUint` is returned.
- /// Note the count _includes_ the `Domain` itself
- /// when `self.is_subdomains()`.
+ /// Same as [`Adblock::cardinality`] except that a `BigUint` is returned. Note the count _includes_
+ /// the `Domain` itself when `self.is_subdomains()`.
///
/// `!self.is_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`.
+ #[allow(clippy::arithmetic_side_effects)]
#[inline]
#[must_use]
pub fn domain_count(&self) -> BigUint {
@@ -1178,7 +405,6 @@ impl<'a> Adblock<'a> {
}
}
impl Display for Adblock<'_> {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
@@ -1229,7 +455,7 @@ impl PartialOrd<Adblock<'_>> for Adblock<'_> {
}
impl Ord for Adblock<'_> {
/// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs.
/// 2. If 1. evaluates as not equivalent, then return the result.
/// 3. If both domains represent a single `Domain`, then return the comparison
/// of label counts.
@@ -1240,42 +466,43 @@ impl Ord for Adblock<'_> {
#[inline]
#[must_use]
fn cmp(&self, other: &Self) -> Ordering {
- cmp_doms(&self.domain, &other.domain).map_or_else(
- |ord| {
- if ord == Ordering::Equal {
- if self.subdomains {
- if other.subdomains {
- ord
- } else {
- Ordering::Greater
- }
- } else if other.subdomains {
- Ordering::Less
+ match self.domain.cmp_by_domain_ordering(&other.domain) {
+ DomainOrdering::Less => Ordering::Less,
+ DomainOrdering::Shorter => {
+ if self.subdomains {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ }
+ }
+ DomainOrdering::Equal => {
+ if self.subdomains {
+ if other.subdomains {
+ Ordering::Equal
} else {
- ord
+ Ordering::Greater
}
+ } else if other.subdomains {
+ Ordering::Less
} else {
- ord
+ Ordering::Equal
}
- },
- |()| {
- // At this point `self` and `other` have different number of labels;
- // otherwise they would have the same domain which was already
- // checked for in `cmp_doms`.
+ }
+ DomainOrdering::Longer => {
if self.subdomains {
- if !other.subdomains || self.domain.label_count() < other.domain.label_count() {
- Ordering::Greater
- } else {
+ if other.subdomains {
Ordering::Less
+ } else {
+ Ordering::Greater
}
- } else if other.subdomains || self.domain.label_count() < other.domain.label_count()
- {
+ } else if other.subdomains {
Ordering::Less
} else {
Ordering::Greater
}
- },
- )
+ }
+ DomainOrdering::Greater => Ordering::Greater,
+ }
}
}
impl PartialOrd<DomainOnly<'_>> for Adblock<'_> {
@@ -1297,7 +524,7 @@ impl PartialOrd<Wildcard<'_>> for Adblock<'_> {
}
}
impl<'a> Set for Adblock<'a> {
- type Elem = Domain<'a>;
+ type Elem = Domain<&'a str>;
#[inline]
fn bounded_cardinality(&self) -> BoundedCardinality {
BoundedCardinality::from_biguint_exact(self.domain_count())
@@ -1313,43 +540,20 @@ impl<'a> Set for Adblock<'a> {
{
if self.subdomains {
let dom2 = elem.borrow();
- self.domain.label_count() <= dom2.label_count()
- && if self.domain.flag.eq_ignore_case(dom2.flag) {
- self.domain.same_labels_ignore_case(dom2)
- } else {
- self.domain.same_labels(dom2)
- }
+ self.domain.label_count() <= dom2.label_count() && self.domain.same_branch(dom2)
} else {
self.domain == *elem.borrow()
}
}
#[inline]
fn is_proper_subset(&self, val: &Self) -> bool {
- // A single domain can never be a proper superset.
- // Subdomains` cannot be a proper superset
- // if it has more labels or the same number of labels
- // as another subdomains.
- // In all other cases, we need to recursively check from the TLD
- // that the labels are the same.
+ // A single domain can never be a proper superset. Subdomains` cannot be a proper superset if it has
+ // more labels or the same number of labels as another subdomains. In all other cases, we need to
+ // recursively check from the TLD that the labels are the same.
val.subdomains
&& match val.domain.label_count().cmp(&self.domain.label_count()) {
- Ordering::Less => {
- if self.domain.flag.eq_ignore_case(val.domain.flag) {
- val.domain.same_labels_ignore_case(&self.domain)
- } else {
- val.domain.same_labels(&self.domain)
- }
- }
- Ordering::Equal => {
- !self.subdomains
- && if val.domain.flag.eq_ignore_case(self.domain.flag) {
- val.domain.value.eq_ignore_ascii_case(self.domain.value)
- || val.domain.same_labels_ignore_case(&self.domain)
- } else {
- val.domain.value == self.domain.value
- || val.domain.same_labels(&self.domain)
- }
- }
+ Ordering::Less => val.domain.same_branch(&self.domain),
+ Ordering::Equal => !self.subdomains && val.domain.same_branch(&self.domain),
Ordering::Greater => false,
}
}
@@ -1359,46 +563,30 @@ impl<'a> Set for Adblock<'a> {
}
}
impl SetOrd for Adblock<'_> {}
-impl<'a> AsRef<Domain<'a>> for Adblock<'a> {
- #[inline]
- fn as_ref(&self) -> &Domain<'a> {
- &self.domain
- }
-}
-impl<'a> AsRef<str> for Adblock<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
impl<'a> Deref for Adblock<'a> {
- type Target = Domain<'a>;
+ type Target = Domain<&'a str>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.domain
}
}
impl<'a> ParsedDomain<'a> for Adblock<'a> {
- type Error = DomainErr;
- #[allow(unsafe_code, clippy::indexing_slicing)]
+ type Error = FirefoxDomainErr;
+ #[allow(unsafe_code, clippy::indexing_slicing, clippy::into_iter_on_ref)]
#[inline]
fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> {
- // First remove leading whitepace.
- // Then check for comments via '#' and '!'.
- // Return Blank iff empty.
- // Return Comment iff '#' or '!' is the first character.
- // Remove trailing whitespace.
- // Next remove the last byte if it is '^' as well as whitespace before.
- // Next track and remove '||' at the beginning and any subsequent whitespace.
+ // First remove leading whitepace. Then check for comments via '#' and '!'. Return Blank iff empty.
+ // Return Comment iff '#' or '!' is the first character. Remove trailing whitespace. Next remove the
+ // last byte if it is '^' as well as whitespace before. Next track and remove '||' at the beginning
+ // and any subsequent whitespace.
let mut value = val.as_bytes().trim_ascii_start();
value.first().map_or_else(
|| Ok(Value::Blank),
|byt| {
if *byt == b'#' || *byt == b'!' {
// SAFETY:
- // `value` came from `val` with leading ASCII whitespace removed
- // which is still valid UTF-8. Since the first byte is '#' or '$'
- // the remaining bytes is still valid UTF-8.
+ // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8
+ // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8.
let comment = unsafe { str::from_utf8_unchecked(&value[1..]) };
Ok(Value::Comment(comment))
} else {
@@ -1421,20 +609,19 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> {
}
},
);
- // `Domain`s allow `$`, but we don't want to allow that symbol
- // for Adblock-style rules.
+ // `Domain`s allow `$`, but we don't want to allow that symbol for Adblock-style rules.
val2.into_iter()
.try_fold((), |(), byt2| {
if *byt2 == b'$' {
- Err(DomainErr::InvalidAdblockDomain)
+ Err(FirefoxDomainErr::InvalidAdblockDomain)
} else {
Ok(())
}
})
.and_then(|()| {
- Domain::try_from_slice(val2).map(|domain| {
- // A domain of length 252 or 253 can't have subdomains
- // due to there not being enough characters.
+ domain_no_ip(val2).map(|domain| {
+ // A domain of length 252 or 253 can't have subdomains due to there not being enough
+ // characters.
Value::Domain(Self {
subdomains: if domain.len().get() > 251 {
false
@@ -1450,7 +637,7 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> {
)
}
#[inline]
- fn domain(&self) -> &Domain<'a> {
+ fn domain(&self) -> &Domain<&'a str> {
&self.domain
}
#[inline]
@@ -1464,20 +651,21 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> {
})
}
}
-/// Domain constructed from a [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax)
-/// with the requirement that the rule conforms to the following regex:
+/// Domain constructed from a
+/// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax) with the
+/// requirement that the rule conforms to the following regex:
///
/// `^<ws>*<domain><ws>*(#.*)?$`
///
-/// where `<domain>` conforms to a valid [`Domain`], and `<ws>` is any sequence of
-/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
+/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], is not of the form of an IPv4
+/// address, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
///
-/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments
-/// after a valid domain are ignored and will be parsed into a [`Value::Domain`].
+/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain
+/// are ignored and will be parsed into a [`Value::Domain`].
#[derive(Clone, Debug)]
pub struct DomainOnly<'a> {
/// The `Domain`.
- domain: Domain<'a>,
+ domain: Domain<&'a str>,
}
impl<'a> DomainOnly<'a> {
/// Read [`Adblock::cmp_domain_only`].
@@ -1498,8 +686,7 @@ impl<'a> DomainOnly<'a> {
pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering {
other.cmp_domain_only(self).reverse()
}
- /// Same as [`DomainOnly::cardinality`] except
- /// that a `NonZeroU8` is returned.
+ /// Same as [`DomainOnly::cardinality`] except that a `NonZeroU8` is returned.
///
/// The value is always 1.
#[allow(unsafe_code)]
@@ -1574,14 +761,13 @@ impl PartialOrd<Wildcard<'_>> for DomainOnly<'_> {
}
}
impl Display for DomainOnly<'_> {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.domain.fmt(f)
}
}
impl<'a> Set for DomainOnly<'a> {
- type Elem = Domain<'a>;
+ type Elem = Domain<&'a str>;
#[inline]
fn bounded_cardinality(&self) -> BoundedCardinality {
BoundedCardinality::from_biguint_exact(self.domain_count().get().into())
@@ -1607,34 +793,21 @@ impl<'a> Set for DomainOnly<'a> {
}
}
impl SetOrd for DomainOnly<'_> {}
-impl<'a> AsRef<Domain<'a>> for DomainOnly<'a> {
- #[inline]
- fn as_ref(&self) -> &Domain<'a> {
- &self.domain
- }
-}
-impl<'a> AsRef<str> for DomainOnly<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
-impl<'a> Borrow<Domain<'a>> for DomainOnly<'a> {
- #[inline]
- fn borrow(&self) -> &Domain<'a> {
- &self.domain
- }
-}
impl<'a> Deref for DomainOnly<'a> {
- type Target = Domain<'a>;
+ type Target = Domain<&'a str>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.domain
}
}
impl<'a> ParsedDomain<'a> for DomainOnly<'a> {
- type Error = DomainErr;
- #[allow(unsafe_code, clippy::indexing_slicing)]
+ type Error = FirefoxDomainErr;
+ #[allow(
+ unsafe_code,
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref
+ )]
#[inline]
fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> {
let value = val.as_bytes().trim_ascii_start();
@@ -1643,13 +816,12 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> {
|byt| {
if *byt == b'#' {
// SAFETY:
- // `value` came from `val` with leading ASCII whitespace removed
- // which is still valid UTF-8. Since the first byte is '#' or '$'
- // the remaining bytes is still valid UTF-8.
+ // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8
+ // since the first byte is '#' or '$' the remaining bytes are still valid UTF-8.
let comment = unsafe { str::from_utf8_unchecked(&value[1..]) };
Ok(Value::Comment(comment))
} else {
- Domain::try_from_slice(
+ domain_no_ip(
value[..value
.into_iter()
.try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) })
@@ -1662,7 +834,7 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> {
)
}
#[inline]
- fn domain(&self) -> &Domain<'a> {
+ fn domain(&self) -> &Domain<&'a str> {
&self.domain
}
#[inline]
@@ -1670,23 +842,24 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> {
write_rpz_line(&mut writer, self.domain(), action, false)
}
}
-/// Domain constructed from a [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax)
-/// with the requirement that the rule conforms to the following extended regex:
+/// Domain constructed from a
+/// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax) with the
+/// requirement that the rule conforms to the following extended regex:
///
/// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$`
///
-/// where `<domain>` conforms to a valid [`Domain`], `<ws>` is any sequence of
-/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and
+/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], is not of the form of an IPv4
+/// address, `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and
/// `<ip>` is one of the following:
///
/// `::`, `::1`, `0.0.0.0`, or `127.0.0.1`.
///
-/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments
-/// after a valid domain are ignored and will be parsed into a [`Value::Domain`].
+/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain
+/// are ignored and will be parsed into a [`Value::Domain`].
#[derive(Clone, Debug)]
pub struct Hosts<'a> {
/// The `Domain`.
- domain: Domain<'a>,
+ domain: Domain<&'a str>,
}
impl<'a> Hosts<'a> {
/// Read [`Adblock::cmp_hosts`].
@@ -1707,8 +880,7 @@ impl<'a> Hosts<'a> {
pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering {
other.cmp_hosts(self).reverse()
}
- /// Same as [`Hosts::cardinality`] except
- /// that a `NonZeroU8` is returned.
+ /// Same as [`Hosts::cardinality`] except that a `NonZeroU8` is returned.
///
/// The value is always 1.
#[allow(unsafe_code)]
@@ -1783,14 +955,13 @@ impl PartialOrd<Wildcard<'_>> for Hosts<'_> {
}
}
impl Display for Hosts<'_> {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.domain.fmt(f)
}
}
impl<'a> Set for Hosts<'a> {
- type Elem = Domain<'a>;
+ type Elem = Domain<&'a str>;
#[inline]
fn bounded_cardinality(&self) -> BoundedCardinality {
BoundedCardinality::from_biguint_exact(self.domain_count().get().into())
@@ -1816,34 +987,21 @@ impl<'a> Set for Hosts<'a> {
}
}
impl SetOrd for Hosts<'_> {}
-impl<'a> AsRef<Domain<'a>> for Hosts<'a> {
- #[inline]
- fn as_ref(&self) -> &Domain<'a> {
- &self.domain
- }
-}
-impl<'a> AsRef<str> for Hosts<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
-impl<'a> Borrow<Domain<'a>> for Hosts<'a> {
- #[inline]
- fn borrow(&self) -> &Domain<'a> {
- &self.domain
- }
-}
impl<'a> Deref for Hosts<'a> {
- type Target = Domain<'a>;
+ type Target = Domain<&'a str>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.domain
}
}
impl<'a> ParsedDomain<'a> for Hosts<'a> {
- type Error = DomainErr;
- #[allow(unsafe_code, clippy::indexing_slicing)]
+ type Error = FirefoxDomainErr;
+ #[allow(
+ unsafe_code,
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref
+ )]
#[inline]
fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> {
let mut value = val.as_bytes().trim_ascii_start();
@@ -1852,15 +1010,14 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> {
|byt| {
if *byt == b'#' {
// SAFETY:
- // `value` came from `val` with leading ASCII whitespace removed
- // which is still valid UTF-8. Since the first byte is '#' or '$'
- // the remaining bytes is still valid UTF-8.
+ // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8
+ // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8.
let comment = unsafe { str::from_utf8_unchecked(&value[1..]) };
Ok(Value::Comment(comment))
} else {
value = value
.get(..3)
- .ok_or(DomainErr::InvalidHostsIP)
+ .ok_or(FirefoxDomainErr::InvalidHostsIP)
.and_then(|fst| {
if fst == b"::1" {
Ok(&value[3..])
@@ -1869,19 +1026,19 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> {
} else {
value
.get(..7)
- .ok_or(DomainErr::InvalidHostsIP)
+ .ok_or(FirefoxDomainErr::InvalidHostsIP)
.and_then(|fst2| {
if fst2 == b"0.0.0.0" {
Ok(&value[7..])
} else {
value
.get(..9)
- .ok_or(DomainErr::InvalidHostsIP)
+ .ok_or(FirefoxDomainErr::InvalidHostsIP)
.and_then(|fst3| {
if fst3 == b"127.0.0.1" {
Ok(&value[9..])
} else {
- Err(DomainErr::InvalidHostsIP)
+ Err(FirefoxDomainErr::InvalidHostsIP)
}
})
}
@@ -1891,11 +1048,10 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> {
let len = value.len();
value = value.trim_ascii_start();
if len == value.len() {
- // There has to be at least one space or tab between
- // the IP and domain.
- Err(DomainErr::InvalidHostsIP)
+ // There has to be at least one space or tab between the IP and domain.
+ Err(FirefoxDomainErr::InvalidHostsIP)
} else {
- Domain::try_from_slice(
+ domain_no_ip(
value[..value
.into_iter()
.try_fold(
@@ -1912,7 +1068,7 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> {
)
}
#[inline]
- fn domain(&self) -> &Domain<'a> {
+ fn domain(&self) -> &Domain<&'a str> {
&self.domain
}
#[inline]
@@ -1920,29 +1076,29 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> {
write_rpz_line(&mut writer, self.domain(), action, false)
}
}
-/// Domain constructed from a [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext)
+/// Domain constructed from a
+/// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext)
/// with the requirement that the rule conforms to the following extended regex:
///
/// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$`
///
-/// where `<domain>` conforms to a valid [`Domain`], and `<ws>` is any sequence of
-/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
+/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], is not of the form of an IPv4
+/// address, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
///
/// If `domain` begins with `*.`, then `domain` must have length less than 252.
///
-/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments
-/// after a valid domain are ignored and will be parsed into a [`Value::Domain`].
+/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain
+/// are ignored and will be parsed into a [`Value::Domain`].
#[derive(Clone, Debug)]
pub struct Wildcard<'a> {
/// The `Domain`.
- domain: Domain<'a>,
- /// `true` iff `domain` represents all proper subdomains.
- /// Note that this does _not_ include `domain` itself.
+ domain: Domain<&'a str>,
+ /// `true` iff `domain` represents all proper subdomains. Note that this does _not_ include `domain` itself.
proper_subdomains: bool,
}
impl<'a> Wildcard<'a> {
- /// Returns `true` iff the contained [`Domain`] represents all proper subdomains.
- /// Note this does _not_ include the `Domain` itself.
+ /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. Note this does _not_
+ /// include the `Domain` itself.
#[inline]
#[must_use]
pub const fn is_proper_subdomains(&self) -> bool {
@@ -1954,33 +1110,32 @@ impl<'a> Wildcard<'a> {
pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering {
other.cmp_wildcard(self).reverse()
}
- /// Since `DomainOnly` and `Hosts` are treated the same,
- /// we have this helper function that can be used for both.
+ /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used
+ /// for both.
#[inline]
#[must_use]
- fn cmp_dom(&self, other: &Domain<'_>) -> Ordering {
- cmp_doms(&self.domain, other).map_or_else(
- |ord| {
- if ord == Ordering::Equal && self.proper_subdomains {
+ fn cmp_dom(&self, other: &Domain<&str>) -> Ordering {
+ match self.domain.cmp_by_domain_ordering(other) {
+ DomainOrdering::Less => Ordering::Less,
+ DomainOrdering::Shorter => {
+ if self.proper_subdomains {
Ordering::Greater
} else {
- ord
+ Ordering::Less
}
- },
- |()| {
- // At this point `self` and `other` have different number of labels;
- // otherwise they would have the same domain which was already
- // checked for in `cmp_doms`.
+ }
+ DomainOrdering::Equal => {
if self.proper_subdomains {
Ordering::Greater
} else {
- self.domain.label_count().cmp(&other.label_count())
+ Ordering::Equal
}
- },
- )
+ }
+ DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater,
+ }
}
/// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs.
/// 2. If 1. evaluates as not equivalent, then return the result.
/// 3. If `self` represents a single `Domain` (i.e., `!self.is_proper_subdomains()`),
/// then return the comparison of label counts.
@@ -1998,10 +1153,8 @@ impl<'a> Wildcard<'a> {
pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering {
self.cmp_dom(&other.domain)
}
- /// Same as [`Wildcard::cardinality`] except
- /// that a `BigUint` is returned.
- /// Note the count does _not_ include the `Domain` itself
- /// when `self.is_proper_subdomains()`.
+ /// Same as [`Wildcard::cardinality`] except that a `BigUint` is returned. Note the count does _not_ include
+ /// the `Domain` itself when `self.is_proper_subdomains()`.
///
/// `!self.is_proper_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`.
#[inline]
@@ -2053,7 +1206,7 @@ impl PartialOrd<Wildcard<'_>> for Wildcard<'_> {
}
impl Ord for Wildcard<'_> {
/// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs.
/// 2. If 1. evaluates as not equivalent, then return the result.
/// 3. If both domains represent a single `Domain`, then return the comparison
/// of label counts.
@@ -2064,45 +1217,43 @@ impl Ord for Wildcard<'_> {
#[inline]
#[must_use]
fn cmp(&self, other: &Self) -> Ordering {
- cmp_doms(&self.domain, &other.domain).map_or_else(
- |ord| {
- if ord == Ordering::Equal {
- if self.proper_subdomains {
- if other.proper_subdomains {
- ord
- } else {
- Ordering::Greater
- }
- } else if other.proper_subdomains {
- Ordering::Less
+ match self.domain.cmp_by_domain_ordering(&other.domain) {
+ DomainOrdering::Less => Ordering::Less,
+ DomainOrdering::Shorter => {
+ if self.proper_subdomains {
+ Ordering::Greater
+ } else {
+ Ordering::Less
+ }
+ }
+ DomainOrdering::Equal => {
+ if self.proper_subdomains {
+ if other.proper_subdomains {
+ Ordering::Equal
} else {
- ord
+ Ordering::Greater
}
+ } else if other.proper_subdomains {
+ Ordering::Less
} else {
- ord
+ Ordering::Equal
}
- },
- |()| {
- // At this point `self` and `other` have different number of labels;
- // otherwise they would have the same domain which was already
- // checked for in `cmp_doms`.
+ }
+ DomainOrdering::Longer => {
if self.proper_subdomains {
- if !other.proper_subdomains
- || self.domain.label_count() < other.domain.label_count()
- {
- Ordering::Greater
- } else {
+ if other.proper_subdomains {
Ordering::Less
+ } else {
+ Ordering::Greater
}
- } else if other.proper_subdomains
- || self.domain.label_count() < other.domain.label_count()
- {
+ } else if other.proper_subdomains {
Ordering::Less
} else {
Ordering::Greater
}
- },
- )
+ }
+ DomainOrdering::Greater => Ordering::Greater,
+ }
}
}
impl PartialOrd<Adblock<'_>> for Wildcard<'_> {
@@ -2124,7 +1275,6 @@ impl PartialOrd<Hosts<'_>> for Wildcard<'_> {
}
}
impl Display for Wildcard<'_> {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
@@ -2136,7 +1286,7 @@ impl Display for Wildcard<'_> {
}
}
impl<'a> Set for Wildcard<'a> {
- type Elem = Domain<'a>;
+ type Elem = Domain<&'a str>;
#[inline]
fn bounded_cardinality(&self) -> BoundedCardinality {
BoundedCardinality::from_biguint_exact(self.domain_count())
@@ -2152,31 +1302,19 @@ impl<'a> Set for Wildcard<'a> {
{
if self.proper_subdomains {
let dom2 = elem.borrow();
- self.domain.label_count() < dom2.label_count()
- && if self.domain.flag.eq_ignore_case(dom2.flag) {
- self.domain.same_labels_ignore_case(dom2)
- } else {
- self.domain.same_labels(dom2)
- }
+ self.domain.label_count() < dom2.label_count() && self.domain.same_branch(dom2)
} else {
self.domain == *elem.borrow()
}
}
#[inline]
fn is_proper_subset(&self, val: &Self) -> bool {
- // A single domain can never be a proper superset.
- // Proper subdomains cannot be a proper superset
- // if it has more labels or the same number of labels
- // as another domain.
- // In all other cases, we need to recursively check from the TLD
- // that the labels are the same.
+ // A single domain can never be a proper superset. Proper subdomains cannot be a proper superset if it
+ // has more labels or the same number of labels as another domain. In all other cases, we need to
+ // recursively check from the TLD that the labels are the same.
val.proper_subdomains
&& val.domain.label_count() < self.domain.label_count()
- && if self.domain.flag.eq_ignore_case(val.domain.flag) {
- val.domain.same_labels_ignore_case(&self.domain)
- } else {
- val.domain.same_labels(&self.domain)
- }
+ && val.domain.same_branch(&self.domain)
}
#[inline]
fn is_subset(&self, val: &Self) -> bool {
@@ -2184,28 +1322,21 @@ impl<'a> Set for Wildcard<'a> {
}
}
impl SetOrd for Wildcard<'_> {}
-impl<'a> AsRef<Domain<'a>> for Wildcard<'a> {
- #[inline]
- fn as_ref(&self) -> &Domain<'a> {
- &self.domain
- }
-}
-impl<'a> AsRef<str> for Wildcard<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
impl<'a> Deref for Wildcard<'a> {
- type Target = Domain<'a>;
+ type Target = Domain<&'a str>;
#[inline]
fn deref(&self) -> &Self::Target {
&self.domain
}
}
impl<'a> ParsedDomain<'a> for Wildcard<'a> {
- type Error = DomainErr;
- #[allow(unsafe_code, clippy::indexing_slicing)]
+ type Error = FirefoxDomainErr;
+ #[allow(
+ unsafe_code,
+ clippy::arithmetic_side_effects,
+ clippy::indexing_slicing,
+ clippy::into_iter_on_ref
+ )]
#[inline]
fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> {
let value = val.as_bytes().trim_ascii_start();
@@ -2214,9 +1345,8 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> {
|byt| {
if *byt == b'#' {
// SAFETY:
- // `value` came from `val` with leading ASCII whitespace removed
- // which is still valid UTF-8. Since the first byte is '#' or '$'
- // the remaining bytes is still valid UTF-8.
+ // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8
+ // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8.
let comment = unsafe { str::from_utf8_unchecked(&value[1..]) };
Ok(Value::Comment(comment))
} else {
@@ -2230,7 +1360,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> {
}
},
);
- Domain::try_from_slice(
+ domain_no_ip(
val2[..val2
.into_iter()
.try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) })
@@ -2240,7 +1370,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> {
.and_then(|domain| {
if proper_subdomains {
if domain.len().get() > 251 {
- Err(DomainErr::InvalidWildcardDomain)
+ Err(FirefoxDomainErr::InvalidWildcardDomain)
} else {
Ok(Value::Domain(Self {
domain,
@@ -2259,7 +1389,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> {
)
}
#[inline]
- fn domain(&self) -> &Domain<'a> {
+ fn domain(&self) -> &Domain<&'a str> {
&self.domain
}
#[inline]
@@ -2267,8 +1397,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> {
write_rpz_line(&mut writer, self.domain(), action, self.proper_subdomains)
}
}
-/// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone)
-/// file.
+/// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file.
#[allow(clippy::exhaustive_enums)]
#[derive(Clone, Debug)]
pub enum RpzDomain<'a> {
@@ -2283,6 +1412,7 @@ pub enum RpzDomain<'a> {
}
impl<'a> RpzDomain<'a> {
/// Returns `true` iff `self` represents a single [`Domain`].
+ #[allow(clippy::ref_patterns)]
#[inline]
#[must_use]
pub const fn is_domain(&self) -> bool {
@@ -2292,9 +1422,9 @@ impl<'a> RpzDomain<'a> {
Self::Wildcard(ref dom) => !dom.proper_subdomains,
}
}
- /// Returns `true` iff `self` represents proper subdomains of
- /// the contained [`Domain`] (i.e., is a [`Wildcard`] such that
- /// [`Wildcard::is_proper_subdomains`]).
+ /// Returns `true` iff `self` represents proper subdomains of the contained [`Domain`] (i.e.,
+ /// is a [`Wildcard`] such that [`Wildcard::is_proper_subdomains`]).
+ #[allow(clippy::ref_patterns)]
#[inline]
#[must_use]
pub const fn is_proper_subdomains(&self) -> bool {
@@ -2303,9 +1433,9 @@ impl<'a> RpzDomain<'a> {
Self::Wildcard(ref dom) => dom.proper_subdomains,
}
}
- /// Returns `true` iff `self` represents subdomains of
- /// the contained [`Domain`] (i.e., is an [`Adblock`] such that
- /// [`Adblock::is_subdomains`]).
+ /// Returns `true` iff `self` represents subdomains of the contained [`Domain`] (i.e., is an
+ /// [`Adblock`] such that [`Adblock::is_subdomains`]).
+ #[allow(clippy::ref_patterns)]
#[inline]
#[must_use]
pub const fn is_subdomains(&self) -> bool {
@@ -2314,9 +1444,9 @@ impl<'a> RpzDomain<'a> {
Self::DomainOnly(_) | Self::Hosts(_) | Self::Wildcard(_) => false,
}
}
- /// Returns the count of [`Domain`]s represented by `self`.
- /// This function is the same as [`RpzDomain::cardinality`]
- /// except that it returns a `BigUint`.
+ /// Returns the count of [`Domain`]s represented by `self`. This function is the same as
+ /// [`RpzDomain::cardinality`] except that it returns a `BigUint`.
+ #[allow(clippy::ref_patterns)]
#[inline]
#[must_use]
pub fn domain_count(&self) -> BigUint {
@@ -2329,6 +1459,7 @@ impl<'a> RpzDomain<'a> {
}
}
impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> {
+ #[allow(clippy::ref_patterns)]
#[inline]
fn eq(&self, other: &RpzDomain<'_>) -> bool {
match *self {
@@ -2374,7 +1505,7 @@ impl PartialOrd<RpzDomain<'_>> for RpzDomain<'_> {
}
impl Ord for RpzDomain<'_> {
/// The total order that is defined follows the following hierarchy:
- /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs.
+ /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs.
/// 2. If 1. evaluates as not equivalent, then return the result.
/// 3. If both domains represent a single `Domain`, then return the comparison
/// of label counts.
@@ -2386,6 +1517,7 @@ impl Ord for RpzDomain<'_> {
/// ascending order:
///
/// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net`
+ #[allow(clippy::ref_patterns)]
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
match *self {
@@ -2417,7 +1549,7 @@ impl Ord for RpzDomain<'_> {
}
}
impl Display for RpzDomain<'_> {
- #[allow(clippy::min_ident_chars)]
+ #[allow(clippy::ref_patterns)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
@@ -2429,7 +1561,7 @@ impl Display for RpzDomain<'_> {
}
}
impl<'a> Set for RpzDomain<'a> {
- type Elem = Domain<'a>;
+ type Elem = Domain<&'a str>;
#[inline]
fn bounded_cardinality(&self) -> BoundedCardinality {
BoundedCardinality::from_biguint_exact(self.domain_count())
@@ -2438,6 +1570,7 @@ impl<'a> Set for RpzDomain<'a> {
fn cardinality(&self) -> Option<Cardinality> {
Some(Cardinality::Finite(self.domain_count()))
}
+ #[allow(clippy::ref_patterns)]
#[inline]
fn contains<Q>(&self, elem: &Q) -> bool
where
@@ -2450,17 +1583,13 @@ impl<'a> Set for RpzDomain<'a> {
Self::Wildcard(ref dom) => dom.contains(elem),
}
}
+ #[allow(clippy::ref_patterns)]
#[inline]
fn is_proper_subset(&self, val: &Self) -> bool {
/// Helper function that verifies all labels are the same.
#[inline]
- fn helper(left: &Domain<'_>, right: &Domain<'_>) -> bool {
- left.label_count() <= right.label_count()
- && if left.flag.eq_ignore_case(right.flag) {
- left.same_labels_ignore_case(right)
- } else {
- left.same_labels(right)
- }
+ fn helper(left: &Domain<&str>, right: &Domain<&str>) -> bool {
+ left.label_count() <= right.label_count() && left.same_branch(right)
}
match *val {
Self::Adblock(ref dom) => {
@@ -2529,23 +1658,17 @@ impl<'a> Set for RpzDomain<'a> {
}
}
impl SetOrd for RpzDomain<'_> {}
-impl<'a> AsRef<Domain<'a>> for RpzDomain<'a> {
- #[inline]
- fn as_ref(&self) -> &Domain<'a> {
- self.domain()
- }
-}
-impl<'a> AsRef<str> for RpzDomain<'a> {
- #[inline]
- fn as_ref(&self) -> &'a str {
- self.as_str()
- }
-}
impl<'a> Deref for RpzDomain<'a> {
- type Target = Domain<'a>;
+ type Target = Domain<&'a str>;
+ #[allow(clippy::ref_patterns)]
#[inline]
fn deref(&self) -> &Self::Target {
- self.domain()
+ match *self {
+ Self::Adblock(ref dom) => &dom.domain,
+ Self::DomainOnly(ref dom) => &dom.domain,
+ Self::Hosts(ref dom) => &dom.domain,
+ Self::Wildcard(ref dom) => &dom.domain,
+ }
}
}
impl<'a: 'b, 'b> From<Adblock<'a>> for RpzDomain<'b> {
@@ -2573,7 +1696,7 @@ impl<'a: 'b, 'b> From<Wildcard<'a>> for RpzDomain<'b> {
}
}
impl<'a> ParsedDomain<'a> for RpzDomain<'a> {
- type Error = DomainErr;
+ type Error = FirefoxDomainErr;
#[inline]
fn parse_value<'b: 'a>(value: &'b str) -> Result<Value<'a, Self>, Self::Error> {
DomainOnly::parse_value(value).map_or_else(
@@ -2615,15 +1738,17 @@ impl<'a> ParsedDomain<'a> for RpzDomain<'a> {
},
)
}
+ #[allow(clippy::ref_patterns)]
#[inline]
- fn domain(&self) -> &Domain<'a> {
+ fn domain(&self) -> &Domain<&'a str> {
match *self {
- Self::Adblock(ref dom) => dom.domain(),
- Self::DomainOnly(ref dom) => dom.domain(),
- Self::Hosts(ref dom) => dom.domain(),
- Self::Wildcard(ref dom) => dom.domain(),
+ Self::Adblock(ref dom) => &dom.domain,
+ Self::DomainOnly(ref dom) => &dom.domain,
+ Self::Hosts(ref dom) => &dom.domain,
+ Self::Wildcard(ref dom) => &dom.domain,
}
}
+ #[allow(clippy::ref_patterns)]
#[inline]
fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error> {
match *self {
@@ -2637,289 +1762,18 @@ impl<'a> ParsedDomain<'a> for RpzDomain<'a> {
#[cfg(test)]
mod tests {
use super::{
- Adblock, Domain, DomainErr, DomainOnly, Hosts, ParsedDomain, RpzDomain, Value, Wildcard,
+ Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard,
};
- use core::cmp::Ordering;
+ use ascii_domain::dom::DomainErr;
use num_bigint::BigUint;
use superset_map::SupersetSet;
#[test]
- fn test_dom_parse() {
- // Test Ipv4Addr is error.
- assert!(Domain::try_from("1.1.1.1").map_or_else(|e| e == DomainErr::Ipv4, |_| false));
- // Test empty is error.
- assert!(Domain::try_from("").map_or_else(|e| e == DomainErr::Empty, |_| false));
- // Test empty label is error.
- assert!(Domain::try_from("a..com").map_or_else(|e| e == DomainErr::EmptyLabel, |_| false));
- // Test label too long.
- let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com";
- // 4 + 64 + 4
- assert!(val.len() == 72);
- assert!(Domain::try_from(val).map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false));
- assert!(Domain::try_from(
- "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"
- )
- .map_or(false, |d| d.len().get() == 71));
- // Test domain too long.
- assert!(Domain::try_from("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false));
- assert!(Domain::try_from("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 ));
- // Test max labels.
- assert!(Domain::try_from("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false));
- assert!(Domain::try_from("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253));
- assert!(Domain::try_from("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253));
- // Test removal of trailing '.'.
- assert!(Domain::try_from("com").map_or(false, |d| d.value == b"com"));
- // Test case-insensitivity.
- assert!(Domain::try_from("wwW.ExAMple.COm")
- .map_or(false, |d| Domain::try_from("www.example.com")
- .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal)));
- assert!(
- Domain::try_from("ww_W.com").map_or(false, |d| Domain::try_from("Ww_w.com")
- .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal))
- );
- // Test valid bytes
- let mut vec = Vec::new();
- let mut counter = 0;
- for i in 0u8..=255 {
- vec.push(i);
- match i {
- b'!'
- | b'$'
- | b'&'..=b')'
- | b'+'..=b'-'
- | b'0'..=b'9'
- | b';'
- | b'='
- | b'A'..=b'Z'
- | b'_'..=b'{'
- | b'}'..=b'~' => {
- counter += 1;
- assert!(Domain::try_from_slice(vec.as_slice())
- .map_or(false, |d| d.value.len() == 1 && d.value[0] == i))
- }
- b'.' => {
- vec.push(i);
- vec[0] = b'a';
- assert!(
- Domain::try_from_slice(vec.as_slice())
- .map_or(false, |d| d.value.len() == 1 && d.value[0] == b'a')
- && vec.pop() == Some(i)
- )
- }
- _ => assert!(Domain::try_from_slice(vec.as_slice())
- .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false)),
- }
- vec.pop();
- }
- assert!(counter == 78);
- }
- #[test]
- fn test_dom_into_iter() {
- assert!(Domain::try_from("www.example.com").map_or(false, |d| {
- let mut iter = d.into_iter();
- if iter.len() != 3 {
- return false;
- }
- let Some(l) = iter.next() else {
- return false;
- };
- if l.value != "com" {
- return false;
- }
- if iter.len() != 2 {
- return false;
- }
- let Some(l) = iter.next() else { return false };
- if l.value != "example" {
- return false;
- }
- if iter.len() != 1 {
- return false;
- }
- let Some(l) = iter.next() else {
- return false;
- };
- if iter.len() != 0 {
- return false;
- }
- if l.value != "www" {
- return false;
- }
- iter.next().is_none()
- }));
- assert!(Domain::try_from("www.example.com").map_or(false, |d| {
- let mut iter = d.into_iter();
- if iter.len() != 3 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "www" {
- return false;
- }
- if iter.len() != 2 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "example" {
- return false;
- }
- if iter.len() != 1 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "com" {
- return false;
- }
- if iter.len() != 0 {
- return false;
- }
- iter.next_back().is_none()
- }));
- assert!(Domain::try_from("www.example.com").map_or(false, |d| {
- let mut iter = d.into_iter();
- if iter.len() != 3 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "www" {
- return false;
- }
- if iter.len() != 2 {
- return false;
- }
- let Some(l) = iter.next() else { return false };
- if l.value != "com" {
- return false;
- }
- if iter.len() != 1 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "example" {
- return false;
- }
- if iter.len() != 0 {
- return false;
- }
- iter.next().is_none() && iter.next_back().is_none()
- }));
- }
- #[test]
- fn test_dom_iter() {
- assert!(Domain::try_from("www.example.com").map_or(false, |d| {
- let mut iter = d.iter();
- if iter.len() != 3 {
- return false;
- }
- let Some(l) = iter.next() else {
- return false;
- };
- if l.value != "com" {
- return false;
- }
- if iter.len() != 2 {
- return false;
- }
- let Some(l) = iter.next() else { return false };
- if l.value != "example" {
- return false;
- }
- if iter.len() != 1 {
- return false;
- }
- let Some(l) = iter.next() else {
- return false;
- };
- if iter.len() != 0 {
- return false;
- }
- if l.value != "www" {
- return false;
- }
- iter.next().is_none()
- }));
- assert!(Domain::try_from("www.example.com").map_or(false, |d| {
- let mut iter = d.iter();
- if iter.len() != 3 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "www" {
- return false;
- }
- if iter.len() != 2 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "example" {
- return false;
- }
- if iter.len() != 1 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "com" {
- return false;
- }
- if iter.len() != 0 {
- return false;
- }
- iter.next_back().is_none()
- }));
- assert!(Domain::try_from("www.example.com").map_or(false, |d| {
- let mut iter = d.iter();
- if iter.len() != 3 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "www" {
- return false;
- }
- if iter.len() != 2 {
- return false;
- }
- let Some(l) = iter.next() else { return false };
- if l.value != "com" {
- return false;
- }
- if iter.len() != 1 {
- return false;
- }
- let Some(l) = iter.next_back() else {
- return false;
- };
- if l.value != "example" {
- return false;
- }
- if iter.len() != 0 {
- return false;
- }
- iter.next().is_none() && iter.next_back().is_none()
- }));
- }
- #[test]
fn test_adblock_parse() {
// Test subdomains.
assert!(
Adblock::parse_value("||www.example.com").map_or(false, |val| match val {
- Value::Domain(ref dom) => dom.subdomains && dom.domain.value == b"www.example.com",
+ Value::Domain(ref dom) =>
+ dom.subdomains && dom.domain.as_bytes() == b"www.example.com",
Value::Comment(_) | Value::Blank => false,
})
);
@@ -2929,7 +1783,7 @@ mod tests {
false,
|val| match val {
Value::Domain(ref dom) =>
- dom.subdomains && dom.domain.value == b"www.example.com",
+ dom.subdomains && dom.domain.as_bytes() == b"www.example.com",
Value::Comment(_) | Value::Blank => false,
}
)
@@ -2938,14 +1792,16 @@ mod tests {
Adblock::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| {
match val {
Value::Domain(ref dom) => {
- !dom.subdomains && dom.domain.value == b"www.example.com"
+ !dom.subdomains && dom.domain.as_bytes() == b"www.example.com"
}
Value::Comment(_) | Value::Blank => false,
}
})
);
- assert!(Adblock::parse_value("www .example.com")
- .map_or_else(|err| err == DomainErr::InvalidByte(b' '), |_| false));
+ assert!(Adblock::parse_value("www .example.com").map_or_else(
+ |err| err == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')),
+ |_| false
+ ));
assert!(
Adblock::parse_value("||www.ExAMPle.COm").map_or(false, |val| {
match val {
@@ -2986,13 +1842,16 @@ mod tests {
assert!(
DomainOnly::parse_value(" \t\t \t\t \twww.example.com#asdflkj asdf alskdfj ")
.map_or(false, |val| match val {
- Value::Domain(ref dom) => dom.domain.value == b"www.example.com",
+ Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com",
Value::Comment(_) | Value::Blank => false,
})
);
assert!(
DomainOnly::parse_value(" \t\t \t\t \twww.example.com \t\t ^ \t\t ")
- .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false)
+ .map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')),
+ |_| false
+ )
);
// Test case-insensitivity.
assert!(
@@ -3023,24 +1882,29 @@ mod tests {
" \t\t 127.0.0.1\t\t \twww.example.com#asdflkj asdf alskdfj "
)
.map_or(false, |val| match val {
- Value::Domain(ref dom) => dom.domain.value == b"www.example.com",
+ Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com",
Value::Comment(_) | Value::Blank => false,
}));
assert!(
Hosts::parse_value(" \t\t 0.0.0.0\t\t \twww.example.com \t\t ^ \t\t ")
- .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false)
+ .map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')),
+ |_| false
+ )
);
- assert!(Hosts::parse_value("::1\twww .example.com")
- .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false));
+ assert!(Hosts::parse_value("::1\twww .example.com").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')),
+ |_| false
+ ));
// Test invalid IP
assert!(Hosts::parse_value("::2 www.example.com")
- .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false));
+ .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false));
assert!(Hosts::parse_value(":2 www.example.com")
- .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false));
+ .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false));
assert!(Hosts::parse_value("www.example.com")
- .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false));
+ .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false));
assert!(Hosts::parse_value("10.4.2.256 www.example.com")
- .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false));
+ .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false));
// Test case-insensitivity.
assert!(
Hosts::parse_value(":: www.ExAMPle.Com").map_or(false, |val| match val {
@@ -3067,19 +1931,30 @@ mod tests {
#[test]
fn test_wildcard_parse_value() {
// Test bad asterisk.
- assert!(Wildcard::parse_value("*")
- .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false));
- assert!(Wildcard::parse_value("www*.example.com")
- .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false));
- assert!(Wildcard::parse_value("www.*.com")
- .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false));
- assert!(
- Wildcard::parse_value("*..com").map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)
- );
- assert!(Wildcard::parse_value("www.com*")
- .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false));
- assert!(Wildcard::parse_value("ww*w.com")
- .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false));
+ assert!(Wildcard::parse_value("*").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')),
+ |_| false
+ ));
+ assert!(Wildcard::parse_value("www*.example.com").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')),
+ |_| false
+ ));
+ assert!(Wildcard::parse_value("www.*.com").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')),
+ |_| false
+ ));
+ assert!(Wildcard::parse_value("*..com").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::EmptyLabel),
+ |_| false
+ ));
+ assert!(Wildcard::parse_value("www.com*").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')),
+ |_| false
+ ));
+ assert!(Wildcard::parse_value("ww*w.com").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')),
+ |_| false
+ ));
// Test case-insensitivity.
assert!(
Wildcard::parse_value("*.wWw.ExamPLE.com").map_or(false, |val| match val {
@@ -3099,7 +1974,7 @@ mod tests {
assert!(
Wildcard::parse_value("*.www.example.com").map_or(false, |val| match val {
Value::Domain(ref dom) =>
- dom.domain.value == b"www.example.com" && dom.proper_subdomains,
+ dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains,
Value::Comment(_) | Value::Blank => false,
})
);
@@ -3108,7 +1983,7 @@ mod tests {
Wildcard::parse_value(" \t\t \t\t \t*.www.example.com#asdflkj asdf alskdfj ")
.map_or(false, |val| match val {
Value::Domain(ref dom) =>
- dom.domain.value == b"www.example.com" && dom.proper_subdomains,
+ dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains,
Value::Comment(_) | Value::Blank => false,
})
);
@@ -3116,7 +1991,7 @@ mod tests {
Wildcard::parse_value(" \t\t \t\t \twww.example.com #asdflkj asdf alskdfj ")
.map_or(false, |val| match val {
Value::Domain(ref dom) =>
- dom.domain.value == b"www.example.com" && !dom.proper_subdomains,
+ dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains,
Value::Comment(_) | Value::Blank => false,
})
);
@@ -3125,7 +2000,7 @@ mod tests {
Wildcard::parse_value(" \t\t *.www.example.com \t\t \t ").map_or(false, |val| {
match val {
Value::Domain(ref dom) => {
- dom.domain.value == b"www.example.com" && dom.proper_subdomains
+ dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains
}
Value::Comment(_) | Value::Blank => false,
}
@@ -3135,16 +2010,18 @@ mod tests {
Wildcard::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| {
match val {
Value::Domain(ref dom) => {
- dom.domain.value == b"www.example.com" && !dom.proper_subdomains
+ dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains
}
Value::Comment(_) | Value::Blank => false,
}
})
);
- assert!(Wildcard::parse_value("www .example.com")
- .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false));
+ assert!(Wildcard::parse_value("www .example.com").map_or_else(
+ |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')),
+ |_| false
+ ));
// Test 127 labels after wildcard error.
- assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == DomainErr::InvalidWildcardDomain, |_| false));
+ assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false));
// Test 126 labels after wildcard is ok.
assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val {
Value::Domain(ref dom) => dom.domain.label_count().get() == 126 && dom.proper_subdomains,
@@ -3166,27 +2043,27 @@ mod tests {
assert!(
RpzDomain::parse_value("*.www.example.com").map_or(false, |val| {
let dom = val.unwrap_domain();
- dom.is_proper_subdomains() && dom.domain().value == b"www.example.com"
+ dom.is_proper_subdomains() && dom.domain().as_bytes() == b"www.example.com"
})
);
assert!(
RpzDomain::parse_value("||www.example.com").map_or(false, |val| {
let dom = val.unwrap_domain();
- dom.is_subdomains() && dom.domain().value == b"www.example.com"
+ dom.is_subdomains() && dom.domain().as_bytes() == b"www.example.com"
})
);
assert!(
RpzDomain::parse_value("0.0.0.0 www.example.com").map_or(false, |val| {
let dom = val.unwrap_domain();
!(dom.is_subdomains() || dom.is_proper_subdomains())
- && dom.domain().value == b"www.example.com"
+ && dom.domain().as_bytes() == b"www.example.com"
})
);
assert!(
RpzDomain::parse_value("www.example.com").map_or(false, |val| {
let dom = val.unwrap_domain();
!(dom.is_subdomains() || dom.is_proper_subdomains())
- && dom.domain().value == b"www.example.com"
+ && dom.domain().as_bytes() == b"www.example.com"
})
);
// Test case-insensitivity.
@@ -3245,19 +2122,19 @@ mod tests {
doms
}).into_iter();
assert!(iter.next().map_or(false, |d| {
- d.domain().value == b"aawww.abc" && d.is_subdomains()
+ d.domain().as_bytes() == b"aawww.abc" && d.is_subdomains()
}));
assert!(iter.next().map_or(false, |d| {
- d.domain().value == b"abc.abc" && d.is_domain()
+ d.domain().as_bytes() == b"abc.abc" && d.is_domain()
}));
assert!(iter.next().map_or(false, |d| {
- d.domain().value == b"abc.abc" && d.is_proper_subdomains()
+ d.domain().as_bytes() == b"abc.abc" && d.is_proper_subdomains()
}));
assert!(iter.next().map_or(false, |d| {
- d.domain().value == b"com" && d.is_proper_subdomains()
+ d.domain().as_bytes() == b"com" && d.is_proper_subdomains()
}));
assert!(iter.next().map_or(false, |d| {
- d.domain().value == b"NeT" && d.is_proper_subdomains()
+ d.domain().as_bytes() == b"NeT" && d.is_proper_subdomains()
}));
assert!(iter.next().is_none());
}
@@ -3284,7 +2161,7 @@ mod tests {
2621943843, 4223295645, 1753858368, 130924388, 965594304, 3942586845, 1573844087,
4237886128, 481383133, 56931017,
]) && Wildcard::parse_value("*.1").map_or(false, |val2| {
- val2.unwrap_domain().domain_count() == (val - BigUint::new(vec![256u32.pow(3)]))
+ val2.unwrap_domain().domain_count() == (val - BigUint::new(vec![366u32.pow(3)]))
})
}));
}
diff --git a/src/dom_count_auto_gen.rs b/src/dom_count_auto_gen.rs
@@ -1,61 +1,47 @@
-use crate::dom::Domain;
+use ascii_domain::dom::Domain;
use num_bigint::BigUint;
/// The count of proper subdomains for both `Adblock` and `Wildcard` `Domain` when subdomains
/// and proper subdomains are represented respectively.
#[allow(
clippy::arithmetic_side_effects,
clippy::as_conversions,
- clippy::implicit_return,
clippy::indexing_slicing,
clippy::too_many_lines,
- clippy::unreadable_literal,
- clippy::unseparated_literal_suffix
+ clippy::unreadable_literal
)]
#[inline]
-pub fn proper_subdomain_count(dom: &Domain<'_>) -> BigUint {
- /// Returns how many proper subdomains are IPv4 addresses.
- /// We need to calculate this so that we can subtract this value
- /// from the cached cardinalities. Note that we don't have to worry
- /// about the `Domain` itself since an IPv4 address can't be parsed into
- /// a `Domain`.
+// 10 + 90 + 156 = 256
+// 30 + 180 + 156 = 366
+pub fn proper_subdomain_count(dom: &Domain<&str>) -> BigUint {
+ /// Returns how many proper subdomains are IPv4 addresses. We need to calculate this so that we can
+ /// subtract this value from the cached cardinalities. Note that we don't have to worry about the `Domain`
+ /// itself since an IPv4 address can't be parsed into a `Domain` via `crate::dom::domain_no_ip`.
#[allow(clippy::cast_lossless)]
#[inline]
- fn ip_count(dom: &Domain<'_>) -> u32 {
- // `Domain`s that have 4 or more `Label`s can't be an IPv4 address.
- // Also `Domain`s must have at least one `Label`, so
- // `0 < 4 - dom.label_count < 4` and `256^3 <= u32::MAX`.
+ fn ip_count(dom: &Domain<&str>) -> u32 {
+ // `Domain`s that have 4 or more `Label`s can't be an IPv4 address. Also `Domain`s must have at least one
+ // `Label`, so `0 < 4 - dom.label_count < 4` and `(10 * 3 + 90 * 2 + 156 * 1)^3 = (30 + 180 + 156)^3
+ // = 366^3 <= u32::MAX`.
if dom.label_count().get() < 4 {
dom.into_iter()
.try_fold((), |(), label| {
- // Only a sequence of decimal numbers between 0 and 255 without leading
- // 0s can be an IPv4 address. For `Domain`s that have such `Label`s,
- // the total number of IPv4 addresses is simply 256^(4- label count).
- // Note that `Label`s always have a length of at least 1 and
- // any `Label` longer than 3 cannot be a valid `u8` without leading
- // 0s.
- match label.len() {
- 1 => match label.as_str().as_bytes()[0] {
- b'0'..=b'9' => Ok(()),
- _ => Err(()),
- },
- 2 => label.as_str().parse::<u8>().map_or(Err(()), |val| {
- if val < 10 {
- Err(())
- } else {
- Ok(())
- }
- }),
- 3 => label.as_str().parse::<u8>().map_or(Err(()), |val| {
- if val < 100 {
- Err(())
- } else {
- Ok(())
- }
- }),
- _ => Err(()),
+ // Only a sequence of 1 to 3 digits whose value is between 0 and 255 is a valid octet
+ // in an IPv4 address. For `Domain`s that have such `Label`s, the total number of IPv4
+ // addresses is simply 366^(4- label count).
+ // 366 comes from the fact that there are 3 distinct ways to represents integers < 10,
+ // 2 distinct ways to represent integers inclusively between 10 and 99, and 1 way
+ // to represent integers greater than 99 giving (3 * 10) + (2 * 90) + (1 * 156) = 366
+ // ways a `Label` can be a valid octet for an IPv4 address.
+ //
+ // Note that `Label`s always have a length of at least 1 and any `Label` longer than 3
+ // cannot be a valid octet in an IPv4 address.
+ if label.len() < 4 {
+ label.as_str().parse::<u8>().map_or(Err(()), |_| Ok(()))
+ } else {
+ Err(())
}
})
- .map_or(0, |()| 256u32.pow(4 - dom.label_count().get() as u32))
+ .map_or(0, |()| 366u32.pow(4 - dom.label_count().get() as u32))
} else {
0
}
diff --git a/src/file.rs b/src/file.rs
@@ -1,32 +1,29 @@
-#![allow(
- clippy::exhaustive_structs,
- clippy::implicit_return,
- clippy::into_iter_on_ref,
- clippy::missing_trait_methods,
- clippy::multiple_unsafe_ops_per_block,
- clippy::question_mark_used,
- clippy::ref_patterns,
- clippy::single_char_lifetime_names,
- clippy::wildcard_enum_match_arm
-)]
extern crate alloc;
-use crate::dom::{Adblock, DomainErr, DomainOnly, Hosts, ParsedDomain, RpzDomain, Value, Wildcard};
+use crate::dom::{
+ Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard,
+};
use alloc::string::FromUtf8Error;
-use core::borrow::Borrow;
-use core::fmt::{self, Display, Formatter};
-use core::hash::Hash;
-use core::ops::Deref;
-use core::time::Duration;
+use core::{
+ borrow::Borrow,
+ fmt::{self, Display, Formatter},
+ hash::Hash,
+ ops::Deref,
+ time::Duration,
+};
use reqwest::Client;
use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor};
-use std::collections::{HashMap, HashSet};
-use std::error::Error;
-use std::fs;
-use std::io::{self, ErrorKind};
-use std::path::{Path, PathBuf};
+use std::{
+ collections::{HashMap, HashSet},
+ error::Error,
+ fs,
+ io::{self, ErrorKind},
+ path::{Path, PathBuf},
+};
use superset_map::SupersetSet;
-use tokio::task::{JoinError, JoinSet};
-use tokio::time::{self, error::Elapsed};
+use tokio::{
+ task::{JoinError, JoinSet},
+ time::{self, error::Elapsed},
+};
use url::Url;
/// Wrapper around an absolute [`PathBuf`] to a directory or file depending on `IS_DIR`.
///
@@ -148,7 +145,7 @@ impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> {
fn expecting(&self, formatter: &mut Formatter) -> fmt::Result {
formatter.write_str("struct AbsFilePath")
}
- #[allow(clippy::arithmetic_side_effects, clippy::min_ident_chars)]
+ #[allow(clippy::arithmetic_side_effects)]
#[inline]
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
@@ -207,7 +204,6 @@ impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> {
},
)
}
- #[allow(clippy::min_ident_chars)]
#[inline]
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
@@ -266,7 +262,6 @@ impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> {
}
}
impl<const IS_DIR: bool> Display for AbsFilePath<IS_DIR> {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
f.write_str(&self.path.to_string_lossy())
@@ -313,7 +308,6 @@ impl Deref for HttpUrl {
}
}
impl Display for HttpUrl {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.url.fmt(f)
@@ -339,7 +333,6 @@ impl<'de> Deserialize<'de> for HttpUrl {
fn expecting(&self, formatter: &mut Formatter) -> fmt::Result {
formatter.write_str("struct HttpUrl")
}
- #[allow(clippy::min_ident_chars)]
#[inline]
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
@@ -383,7 +376,6 @@ pub enum Kind {
Wildcard,
}
impl Display for Kind {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
@@ -404,7 +396,7 @@ pub enum Name {
Url(HttpUrl),
}
impl Display for Name {
- #[allow(clippy::min_ident_chars)]
+ #[allow(clippy::ref_patterns)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
@@ -422,7 +414,6 @@ pub struct File {
pub data: String,
}
impl Display for File {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
self.name.fmt(f)
@@ -446,7 +437,6 @@ pub struct Summary<'a, E: Eq + Hash> {
pub errors: HashMap<E, usize>,
}
impl<E: Display + Eq + Hash> Display for Summary<'_, E> {
- #[allow(clippy::min_ident_chars)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
write!(
@@ -586,23 +576,23 @@ impl Files {
///
/// Returns a `Vec` containing `Summary` information for each
/// [`File`] that was parsed.
- #[allow(clippy::arithmetic_side_effects)]
+ #[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)]
#[inline]
pub fn add_to_superset<'a: 'b, 'b>(
&'a self,
doms: &mut SupersetSet<RpzDomain<'b>>,
- ) -> Vec<Summary<'a, DomainErr>> {
+ ) -> Vec<Summary<'a, FirefoxDomainErr>> {
/// Iterates each `String` from `files` and transforms each line
/// into `T` before adding it as an `RpzDomain` into `doms`.
#[inline]
fn insert<
'a,
'b: 'a,
- T: Into<RpzDomain<'a>> + ParsedDomain<'a, Error = DomainErr> + Helper,
+ T: Into<RpzDomain<'a>> + ParsedDomain<'a, Error = FirefoxDomainErr> + Helper,
>(
doms: &mut SupersetSet<RpzDomain<'a>>,
files: &'b [File],
- summaries: &mut Vec<Summary<'b, DomainErr>>,
+ summaries: &mut Vec<Summary<'b, FirefoxDomainErr>>,
) {
let kind = T::kind();
files.into_iter().fold((), |(), file| {
@@ -672,6 +662,7 @@ impl LocalFiles {
///
/// Returns [`io::Error`] iff reading said files causes an error. Note that
/// it is _not_ an error if a directory does not exist.
+ #[allow(clippy::wildcard_enum_match_arm)]
#[inline]
pub fn from_path(dir: AbsFilePath<true>) -> Result<Option<Self>, io::Error> {
/// Checks if `path` exists.
@@ -758,7 +749,7 @@ pub enum ExtFileErr {
InvalidUtf8(FromUtf8Error),
}
impl Display for ExtFileErr {
- #[allow(clippy::min_ident_chars)]
+ #[allow(clippy::ref_patterns)]
#[inline]
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match *self {
diff --git a/src/lib.rs b/src/lib.rs
@@ -9,7 +9,6 @@
//! The purpose of these types is to make fetching, parsing, and transforming
//! ad-blocking files into a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone)
//! file easier.
-#![feature(addr_parse_ascii)]
#![feature(btree_cursors)]
#![feature(byte_slice_trim_ascii)]
#![feature(io_error_more)]
@@ -31,8 +30,15 @@
)]
#![allow(
clippy::blanket_clippy_restriction_lints,
+ clippy::exhaustive_structs,
+ clippy::implicit_return,
+ clippy::min_ident_chars,
+ clippy::missing_trait_methods,
clippy::multiple_crate_versions,
- clippy::single_call_fn
+ clippy::question_mark_used,
+ clippy::single_call_fn,
+ clippy::single_char_lifetime_names,
+ clippy::unseparated_literal_suffix
)]
/// Module for hostname-like domains including parsing [`str`]s
/// from a variety of formats.
diff --git a/src/main.rs b/src/main.rs
@@ -26,6 +26,7 @@
clippy::multiple_crate_versions,
clippy::question_mark_used,
clippy::single_call_fn,
+ clippy::single_char_lifetime_names,
clippy::unseparated_literal_suffix
)]
/// Contains a wrapper of block and unblock `RpzDomain`s
@@ -39,21 +40,29 @@ mod config;
/// Contains functions for `pledge(2)` and `unveil(2)` on OpenBSD platforms when compiled
/// with the `priv_sep` feature; otherwise almost all functions are no-ops.
mod priv_sep;
-use crate::app::Domains;
-use crate::args::{ArgsErr, ConfigPath, Opts};
-use crate::config::Config;
-use core::fmt::{self, Display, Formatter};
-use core::time::Duration;
+use crate::{
+ app::Domains,
+ args::{ArgsErr, ConfigPath, Opts},
+ config::Config,
+};
+use core::{
+ fmt::{self, Display, Formatter},
+ time::Duration,
+};
#[cfg(all(feature = "priv_sep", target_os = "openbsd"))]
use priv_sep::UnveilErr;
use reqwest::Client;
-use rpz::dom::DomainErr;
-use rpz::file::{AbsFilePath, ExtFileErr, ExternalFiles, Files, HttpUrl, LocalFiles, Summary};
-use std::collections::HashSet;
-use std::error::Error;
-use std::fs;
-use std::io::{self, Read, Write};
-use std::sync::OnceLock;
+use rpz::{
+ dom::FirefoxDomainErr,
+ file::{AbsFilePath, ExtFileErr, ExternalFiles, Files, HttpUrl, LocalFiles, Summary},
+};
+use std::{
+ collections::HashSet,
+ error::Error,
+ fs,
+ io::{self, Read, Write},
+ sync::OnceLock,
+};
use tokio::runtime::Builder;
use toml::{self, de};
/// The HTTP(S) client that is used to download all files.
@@ -262,7 +271,7 @@ enum Verbosity {
/// option was not passed.
#[inline]
fn write_summary(
- summaries: Vec<Summary<'_, DomainErr>>,
+ summaries: Vec<Summary<'_, FirefoxDomainErr>>,
verbose: bool,
unblock_count: usize,
block_count: usize,
diff --git a/src/priv_sep.rs b/src/priv_sep.rs
@@ -1,4 +1,3 @@
-#![allow(clippy::implicit_return, clippy::pub_use, clippy::ref_patterns)]
#[cfg(all(feature = "priv_sep", target_os = "openbsd"))]
pub use priv_sep::UnveilErr;
#[cfg(all(feature = "priv_sep", target_os = "openbsd"))]
@@ -7,8 +6,10 @@ use priv_sep::{self, Permissions, Promise, Promises};
use std::env;
#[cfg(not(all(feature = "priv_sep", target_os = "openbsd")))]
use std::fs;
-use std::io::{Error, ErrorKind};
-use std::path::Path;
+use std::{
+ io::{Error, ErrorKind},
+ path::Path,
+};
/// Used instead of `()` for the parameter
/// in the `pledge` functions. This allows
/// one to avoid having to disable certain lints.