rpz

Response policy zone (RPZ) file generator.
git clone https://git.philomathiclife.com/repos/rpz
Log | Files | Refs | README

commit 9fd5ba3850cf6fe8d46d38ed1e30512bfc2eeb81
parent 06b89378239dbcc1eb0a2679bf8e62171e9d78e8
Author: Zack Newman <zack@philomathiclife.com>
Date:   Mon,  5 Feb 2024 12:00:00 -0700

refactor to use ascii_domain

Diffstat:
MCargo.toml | 3++-
MREADME.md | 18+++++++++---------
Msrc/app.rs | 32+++++++++++++++-----------------
Msrc/args.rs | 8+++++---
Msrc/config.rs | 24++++++++++--------------
Msrc/dom.rs | 1973+++++++++++++++++--------------------------------------------------------------
Msrc/dom_count_auto_gen.rs | 68+++++++++++++++++++++++++++-----------------------------------------
Msrc/file.rs | 67+++++++++++++++++++++++++++++--------------------------------------
Msrc/lib.rs | 10++++++++--
Msrc/main.rs | 35++++++++++++++++++++++-------------
Msrc/priv_sep.rs | 7++++---
11 files changed, 556 insertions(+), 1689 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0" name = "rpz" readme = "README.md" repository = "https://git.philomathiclife.com/repos/rpz/" -version = "0.2.1" +version = "0.3.0" [lib] name = "rpz" @@ -20,6 +20,7 @@ name = "rpz" path = "src/main.rs" [dependencies] +ascii_domain = { version = "0.3.0", default-features = false } num-bigint = { version = "0.4.4", default-features = false } reqwest = { version = "0.11.23", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] } serde = { version = "1.0.196", default-features = false } diff --git a/README.md b/README.md @@ -62,7 +62,7 @@ All ad-(un)block files must be valid UTF-8; however for a given domain, each lab `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, and `~`. Labels must be delimited by `.`. Domains in the file must be delimited by a line feed or carriage return and line feed. A domain must be less than 254 characters in length including the `.` label separator. Domains are treated as case-insensitive with uppercase letters treated as lowercase. Domains must not be an -[Ipv4Addr](https://doc.rust-lang.org/std/net/struct.Ipv4Addr.html). +IPv4 address. ### Adblock-style @@ -71,8 +71,9 @@ with the requirement that the rule conforms to the following extended regex: `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` -where `<domain>` conforms to a valid [`Domain`](https://docs.rs/rpz/latest/rpz/dom/struct.Domain.html) with the added requirement that it does not contain `$`, and -`<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). +where `<domain>` conforms to a valid [`Domain`](https://docs.rs/ascii_domain/latest/ascii_domain/dom/struct.Domain.html) based on +[`ASCII_FIREFOX`](https://docs.rs/ascii_domain/latest/ascii_domain/char_set/constant.ASCII_FIREFOX.html) with the added requirements +that it does not have the form of an IPv4 address and does not contain `$`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). Lines that begin with `||` cause all subdomains to be blocked (i.e., the domain itself and all proper subdomains); without `||`, only the specific domain is blocked. @@ -89,7 +90,7 @@ with the requirement that the rule conforms to the following regex: `^<ws>*<domain><ws>*(#.*)?$` -where `<domain>` conforms to a valid `Domain`, and `<ws>` is any sequence of ASCII whitespace. +where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX` but does not have the form of an IPv4 address, and `<ws>` is any sequence of ASCII whitespace. Domains only represent themselves (i.e., proper subdomains will not be blocked). @@ -100,7 +101,7 @@ with the requirement that the rule conforms to the following extended regex: `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` -where `<domain>` conforms to a valid `Domain`, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following: +where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX` but does not have the form of an IPv4 address, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following: `::`, `::1`, `0.0.0.0`, or `127.0.0.1`. @@ -113,7 +114,7 @@ with the requirement that the rule conforms to the following extended regex: `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` -where `<domain>` conforms to a valid `Domain`, and `<ws>` is any sequence of ASCII whitespace. +where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX` but does not have the form of an IPv4 address, and `<ws>` is any sequence of ASCII whitespace. If `domain` begins with `*.`, then `domain` must have length less than 252 and all proper subdomains are blocked—this does _not_ include the domain itself; otherwise, only the `domain` is blocked. @@ -180,7 +181,7 @@ Parsing errors are ignored; all other errors are written to `stderr` before prog ### Status -This package will be actively maintained until it is deemed “feature complete”. +This package is actively maintained. The crates are only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but they should work on any [Tier 1 with Host Tools](https://doc.rust-lang.org/beta/rustc/platform-support.html) @@ -188,5 +189,4 @@ target. Nightly `rustc` is required. Once `BTreeMap` [cursors are stabilized](https://github.com/rust-lang/rust/issues/107540), stable `rustc` will work. On OpenBSD-stable, one can use the `rust` port as long as `RUSTC_BOOTSTRAP` is `export`ed with a value of `1` before invoking -`cargo build --all-features --release` or `cargo install --all-features rpz`. Note that the `rust-ring` port must also be installed with -the `[patch]` section of `Cargo.toml` or `~/.cargo/config.toml` configured appropriately. +`cargo build --all-features --release` or `cargo install --all-features rpz`. diff --git a/src/app.rs b/src/app.rs @@ -1,17 +1,14 @@ -#![allow( - clippy::into_iter_on_ref, - clippy::ref_patterns, - clippy::single_char_lifetime_names, - clippy::unseparated_literal_suffix -)] use core::convert; use rpz::dom::{ - Adblock, DomainErr, DomainOnly, Hosts, ParsedDomain, RpzAction, RpzDomain, Value, Wildcard, + Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzAction, RpzDomain, Value, + Wildcard, }; use rpz::file::{AbsFilePath, File, Files, Kind, LocalFiles, Summary}; -use std::collections::HashMap; -use std::fs; -use std::io::{self, Error, Write}; +use std::{ + collections::HashMap, + fs, + io::{self, Error, Write}, +}; use superset_map::SupersetSet; /// Helper that returns the `Kind` of a file. pub trait Helper { @@ -69,7 +66,7 @@ impl<'unblock, 'block> Domains<'unblock, 'block> { #[inline] pub fn new_with<'c: 'unblock + 'block>( local: &'c LocalFiles, - ) -> (Self, Vec<Summary<'c, DomainErr>>) { + ) -> (Self, Vec<Summary<'c, FirefoxDomainErr>>) { let mut val = Self { unblock: SupersetSet::new(), block: SupersetSet::new(), @@ -85,11 +82,11 @@ impl<'unblock, 'block> Domains<'unblock, 'block> { #[inline] fn add_block_file< 'c: 'block, - T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = DomainErr> + Helper, + T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = FirefoxDomainErr> + Helper, >( &mut self, file: &'c File, - summaries: &mut Vec<Summary<'c, DomainErr>>, + summaries: &mut Vec<Summary<'c, FirefoxDomainErr>>, ) { let mut summary = Summary { file, @@ -133,23 +130,24 @@ impl<'unblock, 'block> Domains<'unblock, 'block> { pub fn add_block_files<'c: 'block>( &mut self, files: &'c Files, - summaries: &mut Vec<Summary<'c, DomainErr>>, + summaries: &mut Vec<Summary<'c, FirefoxDomainErr>>, ) { /// Parses each line in the `String`s in `files` as an `RpzDomain` before /// adding it to `Domains::block` iff `Domains::unblock` does not contain /// a superset of it. /// /// All parsing errors are ignored. + #[allow(clippy::into_iter_on_ref)] #[inline] fn add_files< 'unblock, 'block, 'c: 'block, - T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = DomainErr> + Helper, + T: Into<RpzDomain<'block>> + ParsedDomain<'block, Error = FirefoxDomainErr> + Helper, >( doms: &mut Domains<'unblock, 'block>, files: &'c [File], - summaries: &mut Vec<Summary<'c, DomainErr>>, + summaries: &mut Vec<Summary<'c, FirefoxDomainErr>>, ) { files .into_iter() @@ -170,7 +168,7 @@ impl<'unblock, 'block> Domains<'unblock, 'block> { fn add_local_files<'c: 'unblock + 'block>( &mut self, files: &'c LocalFiles, - ) -> Vec<Summary<'c, DomainErr>> { + ) -> Vec<Summary<'c, FirefoxDomainErr>> { let mut summaries = files.unblock.add_to_superset(&mut self.unblock); self.add_block_files(&files.block, &mut summaries); summaries diff --git a/src/args.rs b/src/args.rs @@ -1,8 +1,9 @@ -#![allow(clippy::question_mark_used, clippy::ref_patterns)] use core::fmt::{self, Display, Formatter}; use rpz::file::AbsFilePath; -use std::env::{self, Args}; -use std::error::Error; +use std::{ + env::{self, Args}, + error::Error, +}; /// Error returned when parsing arguments passed to the application. #[allow(clippy::exhaustive_enums, clippy::module_name_repetitions)] #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] @@ -26,6 +27,7 @@ pub enum ArgsErr { QuietAndVerbose, } impl Display for ArgsErr { + #[allow(clippy::ref_patterns)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { diff --git a/src/config.rs b/src/config.rs @@ -1,15 +1,9 @@ -#![allow( - clippy::exhaustive_structs, - clippy::implicit_return, - clippy::into_iter_on_ref, - clippy::missing_trait_methods, - clippy::question_mark_used, - clippy::single_char_lifetime_names -)] extern crate alloc; use alloc::borrow::Cow; -use core::fmt::{self, Display, Formatter}; -use core::time::Duration; +use core::{ + fmt::{self, Display, Formatter}, + time::Duration, +}; use rpz::file::{AbsFilePath, HttpUrl}; use serde::de::{Deserialize, Deserializer, Error, MapAccess, SeqAccess, Unexpected, Visitor}; use std::collections::HashSet; @@ -36,11 +30,10 @@ pub struct Config { pub wildcard: HashSet<HttpUrl>, } impl Display for Config { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { /// Helper function that writes the `Url`s in a `HashSet<HttpUrl>`. - #[allow(clippy::min_ident_chars)] + #[allow(clippy::into_iter_on_ref)] #[inline] fn keys(set: &HashSet<HttpUrl>, f: &mut Formatter<'_>, name: &str) -> fmt::Result { write!(f, "{name}: [").and_then(|()| { @@ -112,7 +105,6 @@ impl<'de> Deserialize<'de> for Config { "'timeout', 'rpz', 'local_dir', 'adblock', 'domain', 'hosts', or 'wildcard'", ) } - #[allow(clippy::min_ident_chars)] #[inline] fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> where @@ -148,7 +140,11 @@ impl<'de> Deserialize<'de> for Config { A: MapAccess<'d>, { /// Verifies that the `HashSet`s are pairwise disjoint. - #[allow(clippy::arithmetic_side_effects, clippy::indexing_slicing)] + #[allow( + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + clippy::into_iter_on_ref + )] #[inline] fn hash_overlap<E: Error>(maps: &[&HashSet<HttpUrl>]) -> Result<(), E> { /// Verifies the intersection of `left` and `right` is empty. diff --git a/src/dom.rs b/src/dom.rs @@ -1,479 +1,33 @@ -#![allow( - clippy::arithmetic_side_effects, - clippy::missing_trait_methods, - clippy::implicit_return, - clippy::into_iter_on_ref, - clippy::question_mark_used, - clippy::ref_patterns, - clippy::single_char_lifetime_names, - clippy::unseparated_literal_suffix, - clippy::wildcard_enum_match_arm -)] use crate::dom_count_auto_gen::proper_subdomain_count; -use core::borrow::Borrow; -use core::cmp::Ordering; -use core::convert::{self, AsRef}; -use core::fmt::{self, Display, Formatter}; -use core::hash::{Hash, Hasher}; -use core::iter::FusedIterator; -use core::net::Ipv4Addr; -use core::num::NonZeroU8; -use core::ops::Deref; -use core::str; +use ascii_domain::{ + char_set::{AllowedAscii, ASCII_FIREFOX}, + dom::{Domain, DomainErr, DomainOrdering}, +}; +use core::{ + borrow::Borrow, + cmp::Ordering, + convert, + fmt::{self, Display, Formatter}, + hash::{Hash, Hasher}, + num::NonZeroU8, + ops::Deref, + str, +}; use num_bigint::BigUint; -use std::error; -use std::io::{Error, Write}; +use std::{ + error, + io::{Error, Write}, +}; use superset_map::SetOrd; use zfc::{BoundedCardinality, Cardinality, Set}; -/// A flag used to indicate information about the characters -/// in a `Domain`. This flag is used to perform more efficient -/// comparisons that can potentially avoid temporary -/// memory allocations to treat uppercase letters as if they -/// were lowercase. -#[allow(clippy::exhaustive_enums)] -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -enum CharFlag { - /// No letters, ticks, or underscores. - None, - /// Lowercase letters but no uppercase letters, ticks, or underscores. - Lower, - /// Uppercase letters but no lowercase letters, ticks, or underscores. - Upper, - /// Ticks or underscores but no letters. - Ticks, - /// Uppercase and lowercase letters but no ticks or underscores. - LowerUpper, - /// Lowercase letters and ticks or underscores, but no uppercase letters. - LowerTicks, - /// Uppercase letters and ticks or underscores, but no lowercase letters. - UpperTicks, - /// Uppercase letters, lowercase letters, and ticks or underscores. - All, -} -impl CharFlag { - /// Returns a `bool` that indicates - /// if equivalence must be done in a case - /// insensitive way. - #[inline] - const fn eq_ignore_case(self, other: Self) -> bool { - match self { - Self::None | Self::Ticks => false, - Self::Lower | Self::LowerTicks => !matches!( - other, - Self::None | Self::Ticks | Self::Lower | Self::LowerTicks - ), - Self::Upper | Self::UpperTicks => !matches!( - other, - Self::None | Self::Ticks | Self::Upper | Self::UpperTicks - ), - Self::LowerUpper | Self::All => !matches!(other, Self::None | Self::Ticks), - } - } -} -/// A domain that consists of at least one [`Label`]. -/// The total length of a `Domain` is at most 253 -/// characters in length including the `.` seperator. -/// The trailing `.`, if one exists, is always removed. -/// -/// This is more restrictive than what a domain is allowed to be -/// per the [Domain Name System (DNS)](https://www.rfc-editor.org/rfc/rfc2181), -/// but it is more permissive than what [RFC 1123](https://www.rfc-editor.org/rfc/rfc1123) -/// and [RFC 5891](https://datatracker.ietf.org/doc/html/rfc5891) allow. -/// In particular only the ASCII/UTF-8 encoding of the following Unicode scalar values is allowed in a `Label`: -/// -/// `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, `~`. -/// -/// with each `Label` delimited by `.`. Uppercase letters are treated as lowercase; -/// however for better comparison performance that doesn't lead to intermediate memory allocations, -/// two `Domain`s should consist entirely of the same case. `Domain`s must not be an [`Ipv4Addr`]. -/// -/// Those Unicode scalar values were chosen based on what [Firefox](https://www.mozilla.org/en-US/firefox/) -/// allows as of 2023-09-03T20:50+00:00. -#[derive(Clone, Debug)] -pub struct Domain<'a> { - /// The domain value. - /// Guaranteed to have length between 1 and 253. - /// Guaranteed to be the UTF-8 encoding of a - /// sequence of Unicode scalar values from the set: - /// `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, - /// `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `.`, `{`, `}`, and `~`. - /// This is stored as a slice of bytes to - /// allow for easier construction of `Label`s. - value: &'a [u8], - /// The lengths of each label. - /// Guaranteed to have length between 1 and 127 - /// with each value being between 1 and 63. - label_lens: Vec<NonZeroU8>, - /// Flag that contain information about the kind of - /// characters in `value`. - flag: CharFlag, -} -impl<'a> Domain<'a> { - /// The maximum length of a `Domain`. - /// `Domain`s don't include the trailing `.` nor the 0-octet root label, - /// so this is 253. - // SAFETY: 0 < 253 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(253) }; - /// The minimum length of a `Domain`. - /// `Domain`s don't include the trailing `.` nor the 0-octet root label, - /// so this is 1. - // SAFETY: 0 < 1 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) }; - /// The domain which always has at least one label. - /// The last label is _not_ trailed by `.` - #[allow(unsafe_code)] - #[inline] - #[must_use] - pub const fn as_str(&self) -> &'a str { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - unsafe { str::from_utf8_unchecked(self.value) } - } - /// Returns the count of [`Label`]s. - /// Due to length requirements of `Label` and `Domain`, - /// this is less than `128`. - #[inline] - #[must_use] - #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)] - pub fn label_count(&self) -> NonZeroU8 { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - // Due to the length requirements of both `Domain` and `Label`, - // `label_lens` has between 1 and 127 values which is a valid - // `NonZeroU8`. - unsafe { NonZeroU8::new_unchecked(self.label_lens.len() as u8) } - } - /// The length of the `Domain`. - /// This is the same as `self.as_str().len()`. - #[inline] - #[must_use] - #[allow(unsafe_code, clippy::as_conversions, clippy::cast_possible_truncation)] - pub const fn len(&self) -> NonZeroU8 { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - // Due to the length requirements of `Domain` , - // `self.value` is guaranteed to have length > 0 and < 254. - unsafe { NonZeroU8::new_unchecked(self.value.len() as u8) } - } - /// Returns an [`Iterator`] of [`Label`]s without - /// consuming the `Domain`. - #[inline] - #[must_use] - pub fn iter(&self) -> LabelIter<'a, '_> { - self.into_iter() - } - /// Recursively checks from the TLD if each label is the same - /// until one domain has no more labels. - #[inline] - fn same_labels(&self, other: &Domain<'_>) -> bool { - self.into_iter() - .zip(other) - .try_fold((), |(), (label, label2)| { - if label.value == label2.value { - Ok(()) - } else { - Err(()) - } - }) - .map_or(false, |()| true) - } - /// Recursively checks from the TLD if each label is the same ignoring case - /// until one domain has no more labels. - #[inline] - fn same_labels_ignore_case(&self, other: &Domain<'_>) -> bool { - self.into_iter() - .zip(other) - .try_fold((), |(), (label, label2)| { - if label.value.eq_ignore_ascii_case(label2.value) { - Ok(()) - } else { - Err(()) - } - }) - .map_or(false, |()| true) - } - /// Function that transforms a slice of bytes into a `Domain`. - /// It is not public since we only want to allow valid `str`s. - /// Trailing `.` is removed first. - #[allow( - clippy::as_conversions, - clippy::indexing_slicing, - clippy::option_if_let_else, - clippy::unreachable - )] - #[inline] - fn try_from_slice<'b: 'a>(mut value: &'b [u8]) -> Result<Self, DomainErr> { - value = match value.last() { - None => return Err(DomainErr::Empty), - Some(byt) => { - if *byt == b'.' { - &value[..value.len() - 1] - } else { - value - } - } - }; - if value.len() > Self::MAX_LEN.get() as usize { - Err(DomainErr::LenExceeds253(value.len())) - } else { - let mut label_lens = Vec::with_capacity(3); - let mut label_len = 0; - // Bitwise flag that means: - // 0 => no letters, ticks, or underscores, - // 1 => lowercase letters; but no uppercase letters, ticks, or underscores. - // 2 => uppercase letters; but no lowercase letters, ticks, or underscores. - // 4 => ticks or underscores, but no letters. - let mut flag = 0u8; - value - .into_iter() - .try_fold((), |(), byt| { - match *byt { - b'.' => { - return NonZeroU8::new(label_len).map_or( - Err(DomainErr::EmptyLabel), - |length| { - label_lens.push(length); - label_len = 0; - Ok(()) - }, - ) - } - b'A'..=b'Z' => flag |= 2, - b'`' | b'_' => flag |= 4, - b'a'..=b'z' => flag |= 1, - 0..=b' ' - | b'"'..=b'#' - | b'%' - | b'*' - | b'/' - | b':' - | b'<' - | b'>'..=b'@' - | b'['..=b'^' - | b'|' - | 127.. => return Err(DomainErr::InvalidByte(*byt)), - _ => (), - } - if label_len == 63 { - Err(DomainErr::LabelLenExceeds63) - } else { - label_len += 1; - Ok(()) - } - }) - .and_then(|()| { - NonZeroU8::new(label_len) - .ok_or(DomainErr::EmptyLabel) - .and_then(|length| { - Ipv4Addr::parse_ascii(value).map_or_else( - |_| { - label_lens.push(length); - Ok(Self { - value, - label_lens, - flag: match flag { - 0 => CharFlag::None, - 1 => CharFlag::Lower, - 2 => CharFlag::Upper, - 3 => CharFlag::LowerUpper, - 4 => CharFlag::Ticks, - 5 => CharFlag::LowerTicks, - 6 => CharFlag::UpperTicks, - 7 => CharFlag::All, - _ => unreachable!( - "there is a bug in Domain::try_from_slice" - ), - }, - }) - }, - |_| Err(DomainErr::Ipv4), - ) - }) - }) - } - } -} -impl PartialEq<Domain<'_>> for Domain<'_> { - #[inline] - fn eq(&self, other: &Domain<'_>) -> bool { - if self.flag.eq_ignore_case(other.flag) { - self.value.eq_ignore_ascii_case(other.value) - } else { - self.value == other.value - } - } -} -impl Eq for Domain<'_> {} -impl PartialOrd<Domain<'_>> for Domain<'_> { - #[inline] - fn partial_cmp(&self, other: &Domain<'_>) -> Option<Ordering> { - Some(self.cmp(other)) - } -} -impl<'a> AsRef<str> for Domain<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} -impl<'a> Deref for Domain<'a> { - type Target = str; - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} -/// `Ok(())` is returned iff the `Domain`s have the same labels -/// with one `Domain` having more labels than the other. -#[inline] -fn cmp_doms(left: &Domain<'_>, right: &Domain<'_>) -> Result<(), Ordering> { - let left_input: Vec<u8>; - let right_input: Vec<u8>; - let left_dom: Domain<'_>; - let right_dom: Domain<'_>; - let (left_ref, right_ref) = match (left.flag, right.flag) { - (CharFlag::None, _) - | (_, CharFlag::None) - | ( - CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks, - CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks, - ) - | (CharFlag::Upper, CharFlag::Upper) => (left, right), - (CharFlag::Lower | CharFlag::LowerTicks | CharFlag::Ticks, _) => { - right_input = right.value.to_ascii_lowercase(); - right_dom = Domain { - value: right_input.as_slice(), - label_lens: right.label_lens.clone(), - flag: CharFlag::LowerTicks, - }; - (left, &right_dom) - } - (CharFlag::Upper, CharFlag::LowerUpper) => { - right_input = right.value.to_ascii_uppercase(); - right_dom = Domain { - value: right_input.as_slice(), - label_lens: right.label_lens.clone(), - flag: CharFlag::Upper, - }; - (left, &right_dom) - } - (_, CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks) => { - left_input = left.value.to_ascii_lowercase(); - left_dom = Domain { - value: left_input.as_slice(), - label_lens: left.label_lens.clone(), - flag: CharFlag::LowerTicks, - }; - (&left_dom, right) - } - (CharFlag::LowerUpper, CharFlag::Upper) => { - left_input = left.value.to_ascii_uppercase(); - left_dom = Domain { - value: left_input.as_slice(), - label_lens: left.label_lens.clone(), - flag: CharFlag::Upper, - }; - (&left_dom, right) - } - (_, _) => { - left_input = left.value.to_ascii_lowercase(); - left_dom = Domain { - value: left_input.as_slice(), - label_lens: left.label_lens.clone(), - flag: CharFlag::LowerTicks, - }; - right_input = right.value.to_ascii_lowercase(); - right_dom = Domain { - value: right_input.as_slice(), - label_lens: right.label_lens.clone(), - flag: CharFlag::LowerTicks, - }; - (&left_dom, &right_dom) - } - }; - // Faster to compare the entire value when we can instead of each label. - if left_ref.value == right_ref.value { - Err(Ordering::Equal) - } else { - left_ref - .into_iter() - .zip(right_ref) - .try_fold((), |(), (label, label2)| { - match label.value.cmp(label2.value) { - Ordering::Less => Err(Ordering::Less), - Ordering::Equal => Ok(()), - Ordering::Greater => Err(Ordering::Greater), - } - }) - } -} -impl Ord for Domain<'_> { - /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. - /// 2. If 1. evaluates as not equivalent, then return the result. - /// 3. Return the comparison of `Label` counts. - /// - /// For example, `com` `<` `example.com` `<` `net` `<` `example.net`. - /// - /// This is the same as the [canonical DNS name order](https://datatracker.ietf.org/doc/html/rfc4034#section-6.1). - #[inline] - fn cmp(&self, other: &Self) -> Ordering { - cmp_doms(self, other).map_or_else(convert::identity, |()| { - self.label_count().cmp(&other.label_count()) - }) - } -} -impl Hash for Domain<'_> { - #[inline] - fn hash<H: Hasher>(&self, state: &mut H) { - match self.flag { - CharFlag::None | CharFlag::Lower | CharFlag::Ticks | CharFlag::LowerTicks => { - self.value.hash(state); - } - _ => self.value.to_ascii_lowercase().hash(state), - } - } -} -impl Display for Domain<'_> { - #[inline] - #[allow(clippy::min_ident_chars)] - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.write_str(self.as_str()) - } -} -/// Error returned when an invalid string -/// is passed to [`Domain::try_from`], [`Adblock::parse_value`], -/// [`DomainOnly::parse_value`], [`Hosts::parse_value`], [`Wildcard::parse_value`], -/// or [`RpzDomain::parse_value`]. +/// Error returned when an invalid string is passed to [`Adblock::parse_value`], [`DomainOnly::parse_value`], +/// [`Hosts::parse_value`], [`Wildcard::parse_value`], or [`RpzDomain::parse_value`]. #[allow(clippy::exhaustive_enums)] #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] -pub enum DomainErr { - /// The domain was empty. - Empty, - /// The length of the string had length greater than 253 - /// not counting a terminating `.` if there was one. - LenExceeds253(usize), - /// The domain contained at least one empty label. - EmptyLabel, - /// The domain contained at least one label whose length exceeded 63. - LabelLenExceeds63, - /// The domain contained an invalid byte value. - /// - /// Note the contained `u8` is ASCII iff it is `<= 127`; otherwise - /// it is the first UTF-8 code unit of a multi-byte Unicode scalar value. - InvalidByte(u8), - /// The domain was an [`Ipv4Addr`]. +pub enum FirefoxDomainErr { + /// The domain is invalid based on [`Domain`] using [`ASCII_FIREFOX`]. + InvalidDomain(DomainErr), + /// The domain was an IPv4 address. Ipv4, /// The string passed to [`Adblock::parse_value`] contained `$`. InvalidAdblockDomain, @@ -485,32 +39,11 @@ pub enum DomainErr { /// no proper subdomains. InvalidWildcardDomain, } -impl Display for DomainErr { +impl Display for FirefoxDomainErr { #[inline] - #[allow(unsafe_code, clippy::min_ident_chars)] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { - Self::Empty => f.write_str("domain is empty"), - Self::LenExceeds253(len) => write!( - f, - "domain has length {len} which is greater than the max length of 253" - ), - Self::EmptyLabel => f.write_str("domain has an empty label"), - Self::LabelLenExceeds63 => { - f.write_str("domain has a label that exceeds the max length of 63") - } - Self::InvalidByte(byt) => { - if byt > 31 && byt < 127 { - let utf8 = [byt]; - // SAFETY: - // `byt` is inclusively between 32 and 126 which is valid ASCII which - // in turn is valid UTF-8. - let printable_ascii = unsafe { str::from_utf8_unchecked(utf8.as_slice()) }; - write!(f, "domain has a label with the invalid character '{printable_ascii}'") - } else { - write!(f, "domain has a label with the invalid byte value {byt}") - } - } + Self::InvalidDomain(err) => err.fmt(f), Self::Ipv4 => f.write_str("domain was an IPv4 address"), Self::InvalidAdblockDomain => f.write_str("Adblock-style domain contained a '$'"), Self::InvalidHostsIP => f.write_str("hosts-style domain does not begin with the IP '::', '::1', '0.0.0.0', or '127.0.0.1' followed by at least one space or tab"), @@ -518,363 +51,61 @@ impl Display for DomainErr { } } } -impl error::Error for DomainErr {} -impl<'a: 'b, 'b> From<Domain<'a>> for &'b str { - #[inline] - fn from(value: Domain<'a>) -> Self { - value.as_str() - } -} -impl<'a: 'c, 'b, 'c> From<&'b Domain<'a>> for &'c str { - #[inline] - fn from(value: &'b Domain<'a>) -> Self { - value.as_str() - } -} -impl<'a: 'b, 'b> TryFrom<&'a str> for Domain<'b> { - type Error = DomainErr; - #[inline] - fn try_from(val: &'a str) -> Result<Self, Self::Error> { - Self::try_from_slice(val.as_bytes()) - } -} -/// A label of a [`Domain`]. -/// The total length of a `Label` is between 1 and 63 -/// bytes with each Unicode scalar value being one of the following: -/// -/// `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, `~`. -/// -/// Note that the uppercase letters are treated as lowercase. -#[derive(Debug, Clone, Copy, Hash)] -pub struct Label<'a> { - /// The label value. - value: &'a str, -} -impl<'a> Label<'a> { - /// The maximum length of a `Label` which is 63. - // SAFETY: 0 < 63 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MAX_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(63) }; - /// The minimum length of a `Label` which is 1. - // SAFETY: 0 < 1 < 256. - #[allow(unsafe_code, clippy::undocumented_unsafe_blocks)] - pub const MIN_LEN: NonZeroU8 = unsafe { NonZeroU8::new_unchecked(1) }; - /// The label. - #[inline] - #[must_use] - pub const fn as_str(self) -> &'a str { - self.value - } -} -impl Display for Label<'_> { - #[inline] - #[allow(clippy::min_ident_chars)] - fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { - f.write_str(self.value) - } -} -impl PartialEq<Label<'_>> for Label<'_> { - #[inline] - fn eq(&self, other: &Label<'_>) -> bool { - self.value.eq_ignore_ascii_case(other.value) - } -} -impl Eq for Label<'_> {} -impl PartialOrd<Label<'_>> for Label<'_> { - #[inline] - fn partial_cmp(&self, other: &Label<'_>) -> Option<Ordering> { - Some(self.cmp(other)) - } -} -impl Ord for Label<'_> { - #[inline] - fn cmp(&self, other: &Self) -> Ordering { - self.value - .to_ascii_lowercase() - .cmp(&other.value.to_ascii_lowercase()) - } -} -impl<'a: 'b, 'b> From<Label<'a>> for &'b str { - #[inline] - fn from(value: Label<'a>) -> Self { - value.as_str() - } -} -impl<'a> AsRef<str> for Label<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} -impl Deref for Label<'_> { - type Target = str; - #[inline] - fn deref(&self) -> &Self::Target { - self.as_str() - } -} -/// [`Iterator`] that iterates [`Label`]s -/// from a [`Domain`] starting from the TLD -/// down. -pub struct IntoLabelIter<'a> { - /// Domain that contains `Label`s to iterate. - domain: Domain<'a>, - /// Starts at domain.label_count().get() - 1 - /// which is valid since domain.label_count().get() > 0. - /// idx is 255 when the iterator is exhausted. - /// Since idx is decremented each time and it starts - /// at a value less than 254, this is a valid value to use - /// as a flag. - idx: u8, - /// This is used to mark the start of a label before - /// the length of the label has been subtracted. - /// After a label is read, 1 must be subtracted - /// to account for '.'. - start: u8, - /// Starts at 0 which is valid since domain.label_count().get() > 0. - /// idx_back is 255 when the iterator is exhausted. - /// Since idx_back is incremented each time and the max label count - /// is 127, this is a valid value to use as a flag. - idx_back: u8, - /// This is used to mark the start of a label before - /// the length of the label has been added. - /// After a label is read, 1 must be added - /// to account for '.'. - start_back: u8, -} -impl<'a> IntoLabelIter<'a> { - /// Helper function to construct an instance. - #[inline] - #[must_use] - fn new(domain: Domain<'a>) -> IntoLabelIter<'a> { - Self { - idx: domain.label_count().get() - 1, - start: domain.len().get(), - idx_back: 0, - start_back: 0, - domain, - } - } -} -impl<'a> Iterator for IntoLabelIter<'a> { - type Item = Label<'a>; - #[inline] - #[allow( - unsafe_code, - clippy::as_conversions, - clippy::cast_possible_truncation, - clippy::indexing_slicing - )] - fn next(&mut self) -> Option<Self::Item> { - self.domain.label_lens.get(self.idx as usize).map(|len| { - self.start -= len.get(); - let lbl = &self.domain.value[self.start as usize..(self.start + len.get()) as usize]; - let label = Label { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - value: unsafe { str::from_utf8_unchecked(lbl) }, - }; - if self.idx == 0 || self.idx <= self.idx_back { - self.idx = 255; - self.idx_back = 255; - } else { - self.idx -= 1; - self.start -= 1; - } - label - }) - } -} -impl FusedIterator for IntoLabelIter<'_> {} -impl ExactSizeIterator for IntoLabelIter<'_> { - #[allow(clippy::as_conversions)] - #[inline] - fn len(&self) -> usize { - if self.idx == 255 { - 0 - } else { - (self.idx - self.idx_back + 1) as usize - } - } -} -impl DoubleEndedIterator for IntoLabelIter<'_> { - #[inline] - #[allow( - unsafe_code, - clippy::as_conversions, - clippy::cast_possible_truncation, - clippy::indexing_slicing - )] - fn next_back(&mut self) -> Option<Self::Item> { - self.domain - .label_lens - .get(self.idx_back as usize) - .map(|len| { - let lbl = &self.domain.value - [self.start_back as usize..(self.start_back + len.get()) as usize]; - let label = Label { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - value: unsafe { str::from_utf8_unchecked(lbl) }, - }; - if self.idx_back + 1 == self.domain.label_count().get() || self.idx_back >= self.idx - { - self.idx = 255; - self.idx_back = 255; - } else { - self.idx_back += 1; - self.start_back += len.get() + 1; - } - label - }) - } -} -/// [`Iterator`] that iterates [`Label`]s -/// from a borrowed [`Domain`] starting from the TLD -/// down. -pub struct LabelIter<'a, 'b> { - /// Domain that contains `Label`s to iterate. - domain: &'b Domain<'a>, - /// Starts at domain.label_count().get() - 1 - /// which is valid since domain.label_count().get() > 0. - /// idx is 255 when the iterator is exhausted. - /// Since idx is decremented each time and it starts - /// at a value less than 254, this is a valid value to use - /// as a flag. - idx: u8, - /// This is used to mark the start of a label before - /// the length of the label has been subtracted. - /// After a label is read, 1 must be subtracted - /// to account for '.'. - start: u8, - /// Starts at 0 which is valid since domain.label_count().get() > 0. - /// idx_back is 255 when the iterator is exhausted. - /// Since idx_back is incremented each time and the max label count - /// is 127, this is a valid value to use as a flag. - idx_back: u8, - /// This is used to mark the start of a label before - /// the length of the label has been added. - /// After a label is read, 1 must be added - /// to account for '.'. - start_back: u8, -} -impl<'a, 'b> LabelIter<'a, 'b> { - /// Helper function to construct an instance. - #[inline] - #[must_use] - fn new(domain: &'b Domain<'a>) -> LabelIter<'a, 'b> { - Self { - idx: domain.label_count().get() - 1, - start: domain.len().get(), - idx_back: 0, - start_back: 0, - domain, - } - } -} -impl<'a> Iterator for LabelIter<'a, '_> { - type Item = Label<'a>; - #[inline] - #[allow( - unsafe_code, - clippy::as_conversions, - clippy::cast_possible_truncation, - clippy::indexing_slicing - )] - fn next(&mut self) -> Option<Self::Item> { - self.domain.label_lens.get(self.idx as usize).map(|len| { - self.start -= len.get(); - let lbl = &self.domain.value[self.start as usize..(self.start + len.get()) as usize]; - let label = Label { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - value: unsafe { str::from_utf8_unchecked(lbl) }, - }; - if self.idx == 0 || self.idx <= self.idx_back { - self.idx = 255; - self.idx_back = 255; +impl error::Error for FirefoxDomainErr {} +/// The ASCII we allow domains to have. +const CHARS: &AllowedAscii<[u8; 78]> = &ASCII_FIREFOX; +/// Parses a `[u8]` into a `Domain` using `CHARS` with the added restriction that the `Domain` does not +/// have the format of an IPv4 address. +#[allow( + clippy::arithmetic_side_effects, + clippy::as_conversions, + clippy::cast_lossless, + clippy::into_iter_on_ref +)] +#[inline] +fn domain_no_ip<'a: 'b, 'b>(val: &'a [u8]) -> Result<Domain<&'b str>, FirefoxDomainErr> { + Domain::try_from_bytes(val, CHARS) + .map_err(FirefoxDomainErr::InvalidDomain) + .and_then(|dom| { + // Faster to consult the metadata first to hopefully avoid parsing as an IPv4 address. + if dom.len().get() < 16 + && dom.label_count().get() == 4 + // We don't use `std::net::Ipv4Addr::from_str` since that does not consider octets with leading + // 0s as valid. This means something like `0.0.0.01` is not considered an IPv4 address, but we + // want to consider that as an IP. + && dom + .into_iter() + .try_fold((), |(), label| { + if label.len() < 4 { + label + .as_bytes() + .into_iter() + .try_fold(0u16, |octet, byt| { + if byt.is_ascii_digit() { + // We already verified the length is at most 3, and we only perform + // this arithmetic on integers between 0 and 9. This means the max value + // of these operations is 999 which is smaller than `u16::MAX`. We verified + // `byt` is an ASCII digit so we know `byt - b'0'` will be inclusively between + // 0 and 9. So no overflow, underflow, or truncation will occur. + Ok(octet * 10 + (byt - b'0') as u16) + } else { + Err(()) + } + }) + .and_then(|int| u8::try_from(int).map_or(Err(()), |_| Ok(()))) + } else { + Err(()) + } + }) + .is_ok() + { + Err(FirefoxDomainErr::Ipv4) } else { - self.idx -= 1; - self.start -= 1; + Ok(dom.into()) } - label }) - } -} -impl FusedIterator for LabelIter<'_, '_> {} -impl ExactSizeIterator for LabelIter<'_, '_> { - #[allow(clippy::as_conversions)] - #[inline] - fn len(&self) -> usize { - if self.idx == 255 { - 0 - } else { - (self.idx - self.idx_back + 1) as usize - } - } } -impl DoubleEndedIterator for LabelIter<'_, '_> { - #[inline] - #[allow( - unsafe_code, - clippy::as_conversions, - clippy::cast_possible_truncation, - clippy::indexing_slicing - )] - fn next_back(&mut self) -> Option<Self::Item> { - self.domain - .label_lens - .get(self.idx_back as usize) - .map(|len| { - let lbl = &self.domain.value - [self.start_back as usize..(self.start_back + len.get()) as usize]; - let label = Label { - // SAFETY: - // The only way to construct a `Domain` is via - // `try_from` which only uses a valid ASCII - // substring from the original `str` input. - // `Domain` is immutable ensuring such invariants are kept. - value: unsafe { str::from_utf8_unchecked(lbl) }, - }; - if self.idx_back + 1 == self.domain.label_count().get() || self.idx_back >= self.idx - { - self.idx = 255; - self.idx_back = 255; - } else { - self.idx_back += 1; - self.start_back += len.get() + 1; - } - label - }) - } -} -impl<'a> IntoIterator for Domain<'a> { - type Item = Label<'a>; - type IntoIter = IntoLabelIter<'a>; - #[inline] - fn into_iter(self) -> Self::IntoIter { - IntoLabelIter::new(self) - } -} -impl<'a, 'b> IntoIterator for &'b Domain<'a> { - type Item = Label<'a>; - type IntoIter = LabelIter<'a, 'b>; - #[inline] - fn into_iter(self) -> Self::IntoIter { - LabelIter::new(self) - } -} -/// Action taken by a DNS server when a domain -/// matches. +/// Action taken by a DNS server when a domain matches. #[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum RpzAction { @@ -890,7 +121,6 @@ pub enum RpzAction { TcpOnly, } impl Display for RpzAction { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -915,12 +145,15 @@ impl Display for RpzAction { /// /// Returns [`Error`] iff [`writeln`] does. #[inline] -pub fn write_rpz_line<W: Write>( +pub fn write_rpz_line<W: Write, T>( mut writer: W, - dom: &Domain<'_>, + dom: &Domain<T>, action: RpzAction, wildcard: bool, -) -> Result<(), Error> { +) -> Result<(), Error> +where + Domain<T>: Display, +{ writeln!( writer, "{}{} CNAME {}.", @@ -935,8 +168,7 @@ pub fn write_rpz_line<W: Write>( } ) } -/// Type that can be returned by [`Domain`]-like -/// parsers (e.g., [`Adblock`]). +/// Type that can be returned by [`Domain`]-like parsers (e.g., [`Adblock`]). #[allow(clippy::exhaustive_enums)] #[derive(Clone, Copy, Debug)] pub enum Value<'a, T: ParsedDomain<'a>> { @@ -1015,10 +247,9 @@ impl<'a, T: ParsedDomain<'a>> Value<'a, T> { } } } -/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. -/// When parsed into a [`Value::Domain`], the domain can -/// be written to a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) -/// file. +/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. When parsed into a +/// [`Value::Domain`], the domain can be written to a +/// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. pub trait ParsedDomain<'a>: Sized { type Error; /// Parses a `str` into a `Value`. @@ -1027,7 +258,7 @@ pub trait ParsedDomain<'a>: Sized { /// Errors iff `val` is unable to be parsed into a `Value`. fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error>; /// Reference to the contained `Domain`. - fn domain(&self) -> &Domain<'a>; + fn domain(&self) -> &Domain<&'a str>; /// Writes `self` as RPZ lines via `writer`. /// /// # Errors @@ -1035,63 +266,62 @@ pub trait ParsedDomain<'a>: Sized { /// Errors iff `writer` errors. fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>; } -/// Domain constructed from an [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax) -/// with the requirement that the rule conforms to the following extended regex: +/// Domain constructed from an +/// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax) with the +/// requirement that the rule conforms to the following extended regex: /// /// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` /// -/// where `<domain>` conforms to a valid [`Domain`] with the added requirement that it does not contain `$`, and -/// `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). +/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`] with the added requirement that it +/// does not contain `$`, is not of the form of an IPv4 address, and `<ws>` is any sequence of +/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). /// -/// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments -/// after a valid domain are ignored and will be parsed into a [`Value::Domain`]. +/// Comments are any lines that start with `!` or `#` (ignoring whitespace). Any in-line comments after a valid +/// domain are ignored and will be parsed into a [`Value::Domain`]. /// -/// Note that this means some valid Adblock-style rules are not considered valid since -/// such rules often contain path information or modifiers (e.g., “third-party”), but this only -/// considers domain-only rules. +/// Note that this means some valid Adblock-style rules are not considered valid since such rules often contain +/// path information or modifiers (e.g., “third-party”), but this only considers domain-only rules. #[derive(Clone, Debug)] pub struct Adblock<'a> { /// The `Domain`. - domain: Domain<'a>, - /// `true` iff `domain` represents all subdomains. - /// Note that this includes `domain` itself. + domain: Domain<&'a str>, + /// `true` iff `domain` represents all subdomains. Note that this includes `domain` itself. subdomains: bool, } impl<'a> Adblock<'a> { - /// Returns `true` iff the contained [`Domain`] represents all subdomains. - /// Note this includes the `Domain` itself. + /// Returns `true` iff the contained [`Domain`] represents all subdomains. Note this includes the + /// `Domain` itself. #[inline] #[must_use] pub const fn is_subdomains(&self) -> bool { self.subdomains } - /// Since `DomainOnly` and `Hosts` are treated the same, - /// we have this helper function that can be used for both. + /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used + /// for both. #[inline] #[must_use] - fn cmp_dom(&self, other: &Domain<'_>) -> Ordering { - cmp_doms(&self.domain, other).map_or_else( - |ord| { - if ord == Ordering::Equal && self.subdomains { + fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { + match self.domain.cmp_by_domain_ordering(other) { + DomainOrdering::Less => Ordering::Less, + DomainOrdering::Shorter => { + if self.subdomains { Ordering::Greater } else { - ord + Ordering::Less } - }, - |()| { - // At this point `self` and `other` have different number of labels; - // otherwise they would have the same domain which was already - // checked for in `cmp_doms`. + } + DomainOrdering::Equal => { if self.subdomains { Ordering::Greater } else { - self.domain.label_count().cmp(&other.label_count()) + Ordering::Equal } - }, - ) + } + DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, + } } /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. + /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. /// 2. If 1. evaluates as not equivalent, then return the result. /// 3. If `self` represents a single `Domain` (i.e., `!self.is_subdomains()`), /// then return the comparison of label counts. @@ -1110,7 +340,7 @@ impl<'a> Adblock<'a> { self.cmp_dom(&other.domain) } /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. + /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. /// 2. If 1. evaluates as not equivalent, then return the result. /// 3. If both domains represent a single `Domain`, then return the comparison /// of label counts. @@ -1125,48 +355,45 @@ impl<'a> Adblock<'a> { #[inline] #[must_use] pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { - cmp_doms(&self.domain, &other.domain).map_or_else( - |ord| { - if ord == Ordering::Equal { - if self.subdomains { - Ordering::Greater - } else if other.proper_subdomains { - Ordering::Less - } else { - ord - } + match self.domain.cmp_by_domain_ordering(&other.domain) { + DomainOrdering::Less => Ordering::Less, + DomainOrdering::Shorter => { + if self.subdomains { + Ordering::Greater } else { - ord + Ordering::Less } - }, - |()| { - // At this point `self` and `other` have different number of labels; - // otherwise they would have the same domain which was already - // checked for in `cmp_doms`. + } + DomainOrdering::Equal => { if self.subdomains { - if !other.proper_subdomains - || self.domain.label_count() < other.domain.label_count() - { - Ordering::Greater - } else { + Ordering::Greater + } else if other.proper_subdomains { + Ordering::Less + } else { + Ordering::Equal + } + } + DomainOrdering::Longer => { + if self.subdomains { + if other.proper_subdomains { Ordering::Less + } else { + Ordering::Greater } - } else if other.proper_subdomains - || self.domain.label_count() < other.domain.label_count() - { + } else if other.proper_subdomains { Ordering::Less } else { Ordering::Greater } - }, - ) + } + DomainOrdering::Greater => Ordering::Greater, + } } - /// Same as [`Adblock::cardinality`] except - /// that a `BigUint` is returned. - /// Note the count _includes_ the `Domain` itself - /// when `self.is_subdomains()`. + /// Same as [`Adblock::cardinality`] except that a `BigUint` is returned. Note the count _includes_ + /// the `Domain` itself when `self.is_subdomains()`. /// /// `!self.is_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. + #[allow(clippy::arithmetic_side_effects)] #[inline] #[must_use] pub fn domain_count(&self) -> BigUint { @@ -1178,7 +405,6 @@ impl<'a> Adblock<'a> { } } impl Display for Adblock<'_> { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( @@ -1229,7 +455,7 @@ impl PartialOrd<Adblock<'_>> for Adblock<'_> { } impl Ord for Adblock<'_> { /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. + /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. /// 2. If 1. evaluates as not equivalent, then return the result. /// 3. If both domains represent a single `Domain`, then return the comparison /// of label counts. @@ -1240,42 +466,43 @@ impl Ord for Adblock<'_> { #[inline] #[must_use] fn cmp(&self, other: &Self) -> Ordering { - cmp_doms(&self.domain, &other.domain).map_or_else( - |ord| { - if ord == Ordering::Equal { - if self.subdomains { - if other.subdomains { - ord - } else { - Ordering::Greater - } - } else if other.subdomains { - Ordering::Less + match self.domain.cmp_by_domain_ordering(&other.domain) { + DomainOrdering::Less => Ordering::Less, + DomainOrdering::Shorter => { + if self.subdomains { + Ordering::Greater + } else { + Ordering::Less + } + } + DomainOrdering::Equal => { + if self.subdomains { + if other.subdomains { + Ordering::Equal } else { - ord + Ordering::Greater } + } else if other.subdomains { + Ordering::Less } else { - ord + Ordering::Equal } - }, - |()| { - // At this point `self` and `other` have different number of labels; - // otherwise they would have the same domain which was already - // checked for in `cmp_doms`. + } + DomainOrdering::Longer => { if self.subdomains { - if !other.subdomains || self.domain.label_count() < other.domain.label_count() { - Ordering::Greater - } else { + if other.subdomains { Ordering::Less + } else { + Ordering::Greater } - } else if other.subdomains || self.domain.label_count() < other.domain.label_count() - { + } else if other.subdomains { Ordering::Less } else { Ordering::Greater } - }, - ) + } + DomainOrdering::Greater => Ordering::Greater, + } } } impl PartialOrd<DomainOnly<'_>> for Adblock<'_> { @@ -1297,7 +524,7 @@ impl PartialOrd<Wildcard<'_>> for Adblock<'_> { } } impl<'a> Set for Adblock<'a> { - type Elem = Domain<'a>; + type Elem = Domain<&'a str>; #[inline] fn bounded_cardinality(&self) -> BoundedCardinality { BoundedCardinality::from_biguint_exact(self.domain_count()) @@ -1313,43 +540,20 @@ impl<'a> Set for Adblock<'a> { { if self.subdomains { let dom2 = elem.borrow(); - self.domain.label_count() <= dom2.label_count() - && if self.domain.flag.eq_ignore_case(dom2.flag) { - self.domain.same_labels_ignore_case(dom2) - } else { - self.domain.same_labels(dom2) - } + self.domain.label_count() <= dom2.label_count() && self.domain.same_branch(dom2) } else { self.domain == *elem.borrow() } } #[inline] fn is_proper_subset(&self, val: &Self) -> bool { - // A single domain can never be a proper superset. - // Subdomains` cannot be a proper superset - // if it has more labels or the same number of labels - // as another subdomains. - // In all other cases, we need to recursively check from the TLD - // that the labels are the same. + // A single domain can never be a proper superset. Subdomains` cannot be a proper superset if it has + // more labels or the same number of labels as another subdomains. In all other cases, we need to + // recursively check from the TLD that the labels are the same. val.subdomains && match val.domain.label_count().cmp(&self.domain.label_count()) { - Ordering::Less => { - if self.domain.flag.eq_ignore_case(val.domain.flag) { - val.domain.same_labels_ignore_case(&self.domain) - } else { - val.domain.same_labels(&self.domain) - } - } - Ordering::Equal => { - !self.subdomains - && if val.domain.flag.eq_ignore_case(self.domain.flag) { - val.domain.value.eq_ignore_ascii_case(self.domain.value) - || val.domain.same_labels_ignore_case(&self.domain) - } else { - val.domain.value == self.domain.value - || val.domain.same_labels(&self.domain) - } - } + Ordering::Less => val.domain.same_branch(&self.domain), + Ordering::Equal => !self.subdomains && val.domain.same_branch(&self.domain), Ordering::Greater => false, } } @@ -1359,46 +563,30 @@ impl<'a> Set for Adblock<'a> { } } impl SetOrd for Adblock<'_> {} -impl<'a> AsRef<Domain<'a>> for Adblock<'a> { - #[inline] - fn as_ref(&self) -> &Domain<'a> { - &self.domain - } -} -impl<'a> AsRef<str> for Adblock<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} impl<'a> Deref for Adblock<'a> { - type Target = Domain<'a>; + type Target = Domain<&'a str>; #[inline] fn deref(&self) -> &Self::Target { &self.domain } } impl<'a> ParsedDomain<'a> for Adblock<'a> { - type Error = DomainErr; - #[allow(unsafe_code, clippy::indexing_slicing)] + type Error = FirefoxDomainErr; + #[allow(unsafe_code, clippy::indexing_slicing, clippy::into_iter_on_ref)] #[inline] fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { - // First remove leading whitepace. - // Then check for comments via '#' and '!'. - // Return Blank iff empty. - // Return Comment iff '#' or '!' is the first character. - // Remove trailing whitespace. - // Next remove the last byte if it is '^' as well as whitespace before. - // Next track and remove '||' at the beginning and any subsequent whitespace. + // First remove leading whitepace. Then check for comments via '#' and '!'. Return Blank iff empty. + // Return Comment iff '#' or '!' is the first character. Remove trailing whitespace. Next remove the + // last byte if it is '^' as well as whitespace before. Next track and remove '||' at the beginning + // and any subsequent whitespace. let mut value = val.as_bytes().trim_ascii_start(); value.first().map_or_else( || Ok(Value::Blank), |byt| { if *byt == b'#' || *byt == b'!' { // SAFETY: - // `value` came from `val` with leading ASCII whitespace removed - // which is still valid UTF-8. Since the first byte is '#' or '$' - // the remaining bytes is still valid UTF-8. + // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 + // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; Ok(Value::Comment(comment)) } else { @@ -1421,20 +609,19 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> { } }, ); - // `Domain`s allow `$`, but we don't want to allow that symbol - // for Adblock-style rules. + // `Domain`s allow `$`, but we don't want to allow that symbol for Adblock-style rules. val2.into_iter() .try_fold((), |(), byt2| { if *byt2 == b'$' { - Err(DomainErr::InvalidAdblockDomain) + Err(FirefoxDomainErr::InvalidAdblockDomain) } else { Ok(()) } }) .and_then(|()| { - Domain::try_from_slice(val2).map(|domain| { - // A domain of length 252 or 253 can't have subdomains - // due to there not being enough characters. + domain_no_ip(val2).map(|domain| { + // A domain of length 252 or 253 can't have subdomains due to there not being enough + // characters. Value::Domain(Self { subdomains: if domain.len().get() > 251 { false @@ -1450,7 +637,7 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> { ) } #[inline] - fn domain(&self) -> &Domain<'a> { + fn domain(&self) -> &Domain<&'a str> { &self.domain } #[inline] @@ -1464,20 +651,21 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> { }) } } -/// Domain constructed from a [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax) -/// with the requirement that the rule conforms to the following regex: +/// Domain constructed from a +/// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax) with the +/// requirement that the rule conforms to the following regex: /// /// `^<ws>*<domain><ws>*(#.*)?$` /// -/// where `<domain>` conforms to a valid [`Domain`], and `<ws>` is any sequence of -/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). +/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], is not of the form of an IPv4 +/// address, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). /// -/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments -/// after a valid domain are ignored and will be parsed into a [`Value::Domain`]. +/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain +/// are ignored and will be parsed into a [`Value::Domain`]. #[derive(Clone, Debug)] pub struct DomainOnly<'a> { /// The `Domain`. - domain: Domain<'a>, + domain: Domain<&'a str>, } impl<'a> DomainOnly<'a> { /// Read [`Adblock::cmp_domain_only`]. @@ -1498,8 +686,7 @@ impl<'a> DomainOnly<'a> { pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { other.cmp_domain_only(self).reverse() } - /// Same as [`DomainOnly::cardinality`] except - /// that a `NonZeroU8` is returned. + /// Same as [`DomainOnly::cardinality`] except that a `NonZeroU8` is returned. /// /// The value is always 1. #[allow(unsafe_code)] @@ -1574,14 +761,13 @@ impl PartialOrd<Wildcard<'_>> for DomainOnly<'_> { } } impl Display for DomainOnly<'_> { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { self.domain.fmt(f) } } impl<'a> Set for DomainOnly<'a> { - type Elem = Domain<'a>; + type Elem = Domain<&'a str>; #[inline] fn bounded_cardinality(&self) -> BoundedCardinality { BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) @@ -1607,34 +793,21 @@ impl<'a> Set for DomainOnly<'a> { } } impl SetOrd for DomainOnly<'_> {} -impl<'a> AsRef<Domain<'a>> for DomainOnly<'a> { - #[inline] - fn as_ref(&self) -> &Domain<'a> { - &self.domain - } -} -impl<'a> AsRef<str> for DomainOnly<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} -impl<'a> Borrow<Domain<'a>> for DomainOnly<'a> { - #[inline] - fn borrow(&self) -> &Domain<'a> { - &self.domain - } -} impl<'a> Deref for DomainOnly<'a> { - type Target = Domain<'a>; + type Target = Domain<&'a str>; #[inline] fn deref(&self) -> &Self::Target { &self.domain } } impl<'a> ParsedDomain<'a> for DomainOnly<'a> { - type Error = DomainErr; - #[allow(unsafe_code, clippy::indexing_slicing)] + type Error = FirefoxDomainErr; + #[allow( + unsafe_code, + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + clippy::into_iter_on_ref + )] #[inline] fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { let value = val.as_bytes().trim_ascii_start(); @@ -1643,13 +816,12 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> { |byt| { if *byt == b'#' { // SAFETY: - // `value` came from `val` with leading ASCII whitespace removed - // which is still valid UTF-8. Since the first byte is '#' or '$' - // the remaining bytes is still valid UTF-8. + // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 + // since the first byte is '#' or '$' the remaining bytes are still valid UTF-8. let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; Ok(Value::Comment(comment)) } else { - Domain::try_from_slice( + domain_no_ip( value[..value .into_iter() .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) @@ -1662,7 +834,7 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> { ) } #[inline] - fn domain(&self) -> &Domain<'a> { + fn domain(&self) -> &Domain<&'a str> { &self.domain } #[inline] @@ -1670,23 +842,24 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> { write_rpz_line(&mut writer, self.domain(), action, false) } } -/// Domain constructed from a [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax) -/// with the requirement that the rule conforms to the following extended regex: +/// Domain constructed from a +/// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax) with the +/// requirement that the rule conforms to the following extended regex: /// /// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` /// -/// where `<domain>` conforms to a valid [`Domain`], `<ws>` is any sequence of -/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and +/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], is not of the form of an IPv4 +/// address, `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace), and /// `<ip>` is one of the following: /// /// `::`, `::1`, `0.0.0.0`, or `127.0.0.1`. /// -/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments -/// after a valid domain are ignored and will be parsed into a [`Value::Domain`]. +/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain +/// are ignored and will be parsed into a [`Value::Domain`]. #[derive(Clone, Debug)] pub struct Hosts<'a> { /// The `Domain`. - domain: Domain<'a>, + domain: Domain<&'a str>, } impl<'a> Hosts<'a> { /// Read [`Adblock::cmp_hosts`]. @@ -1707,8 +880,7 @@ impl<'a> Hosts<'a> { pub fn cmp_wildcard(&self, other: &Wildcard<'_>) -> Ordering { other.cmp_hosts(self).reverse() } - /// Same as [`Hosts::cardinality`] except - /// that a `NonZeroU8` is returned. + /// Same as [`Hosts::cardinality`] except that a `NonZeroU8` is returned. /// /// The value is always 1. #[allow(unsafe_code)] @@ -1783,14 +955,13 @@ impl PartialOrd<Wildcard<'_>> for Hosts<'_> { } } impl Display for Hosts<'_> { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { self.domain.fmt(f) } } impl<'a> Set for Hosts<'a> { - type Elem = Domain<'a>; + type Elem = Domain<&'a str>; #[inline] fn bounded_cardinality(&self) -> BoundedCardinality { BoundedCardinality::from_biguint_exact(self.domain_count().get().into()) @@ -1816,34 +987,21 @@ impl<'a> Set for Hosts<'a> { } } impl SetOrd for Hosts<'_> {} -impl<'a> AsRef<Domain<'a>> for Hosts<'a> { - #[inline] - fn as_ref(&self) -> &Domain<'a> { - &self.domain - } -} -impl<'a> AsRef<str> for Hosts<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} -impl<'a> Borrow<Domain<'a>> for Hosts<'a> { - #[inline] - fn borrow(&self) -> &Domain<'a> { - &self.domain - } -} impl<'a> Deref for Hosts<'a> { - type Target = Domain<'a>; + type Target = Domain<&'a str>; #[inline] fn deref(&self) -> &Self::Target { &self.domain } } impl<'a> ParsedDomain<'a> for Hosts<'a> { - type Error = DomainErr; - #[allow(unsafe_code, clippy::indexing_slicing)] + type Error = FirefoxDomainErr; + #[allow( + unsafe_code, + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + clippy::into_iter_on_ref + )] #[inline] fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { let mut value = val.as_bytes().trim_ascii_start(); @@ -1852,15 +1010,14 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> { |byt| { if *byt == b'#' { // SAFETY: - // `value` came from `val` with leading ASCII whitespace removed - // which is still valid UTF-8. Since the first byte is '#' or '$' - // the remaining bytes is still valid UTF-8. + // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 + // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; Ok(Value::Comment(comment)) } else { value = value .get(..3) - .ok_or(DomainErr::InvalidHostsIP) + .ok_or(FirefoxDomainErr::InvalidHostsIP) .and_then(|fst| { if fst == b"::1" { Ok(&value[3..]) @@ -1869,19 +1026,19 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> { } else { value .get(..7) - .ok_or(DomainErr::InvalidHostsIP) + .ok_or(FirefoxDomainErr::InvalidHostsIP) .and_then(|fst2| { if fst2 == b"0.0.0.0" { Ok(&value[7..]) } else { value .get(..9) - .ok_or(DomainErr::InvalidHostsIP) + .ok_or(FirefoxDomainErr::InvalidHostsIP) .and_then(|fst3| { if fst3 == b"127.0.0.1" { Ok(&value[9..]) } else { - Err(DomainErr::InvalidHostsIP) + Err(FirefoxDomainErr::InvalidHostsIP) } }) } @@ -1891,11 +1048,10 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> { let len = value.len(); value = value.trim_ascii_start(); if len == value.len() { - // There has to be at least one space or tab between - // the IP and domain. - Err(DomainErr::InvalidHostsIP) + // There has to be at least one space or tab between the IP and domain. + Err(FirefoxDomainErr::InvalidHostsIP) } else { - Domain::try_from_slice( + domain_no_ip( value[..value .into_iter() .try_fold( @@ -1912,7 +1068,7 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> { ) } #[inline] - fn domain(&self) -> &Domain<'a> { + fn domain(&self) -> &Domain<&'a str> { &self.domain } #[inline] @@ -1920,29 +1076,29 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> { write_rpz_line(&mut writer, self.domain(), action, false) } } -/// Domain constructed from a [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext) +/// Domain constructed from a +/// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext) /// with the requirement that the rule conforms to the following extended regex: /// /// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` /// -/// where `<domain>` conforms to a valid [`Domain`], and `<ws>` is any sequence of -/// [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). +/// where `<domain>` conforms to a valid [`Domain`] based on [`ASCII_FIREFOX`], is not of the form of an IPv4 +/// address, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). /// /// If `domain` begins with `*.`, then `domain` must have length less than 252. /// -/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments -/// after a valid domain are ignored and will be parsed into a [`Value::Domain`]. +/// Comments are any lines that start with `#` (ignoring whitespace). Any in-line comments after a valid domain +/// are ignored and will be parsed into a [`Value::Domain`]. #[derive(Clone, Debug)] pub struct Wildcard<'a> { /// The `Domain`. - domain: Domain<'a>, - /// `true` iff `domain` represents all proper subdomains. - /// Note that this does _not_ include `domain` itself. + domain: Domain<&'a str>, + /// `true` iff `domain` represents all proper subdomains. Note that this does _not_ include `domain` itself. proper_subdomains: bool, } impl<'a> Wildcard<'a> { - /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. - /// Note this does _not_ include the `Domain` itself. + /// Returns `true` iff the contained [`Domain`] represents all proper subdomains. Note this does _not_ + /// include the `Domain` itself. #[inline] #[must_use] pub const fn is_proper_subdomains(&self) -> bool { @@ -1954,33 +1110,32 @@ impl<'a> Wildcard<'a> { pub fn cmp_adblock(&self, other: &Adblock<'_>) -> Ordering { other.cmp_wildcard(self).reverse() } - /// Since `DomainOnly` and `Hosts` are treated the same, - /// we have this helper function that can be used for both. + /// Since `DomainOnly` and `Hosts` are treated the same, we have this helper function that can be used + /// for both. #[inline] #[must_use] - fn cmp_dom(&self, other: &Domain<'_>) -> Ordering { - cmp_doms(&self.domain, other).map_or_else( - |ord| { - if ord == Ordering::Equal && self.proper_subdomains { + fn cmp_dom(&self, other: &Domain<&str>) -> Ordering { + match self.domain.cmp_by_domain_ordering(other) { + DomainOrdering::Less => Ordering::Less, + DomainOrdering::Shorter => { + if self.proper_subdomains { Ordering::Greater } else { - ord + Ordering::Less } - }, - |()| { - // At this point `self` and `other` have different number of labels; - // otherwise they would have the same domain which was already - // checked for in `cmp_doms`. + } + DomainOrdering::Equal => { if self.proper_subdomains { Ordering::Greater } else { - self.domain.label_count().cmp(&other.label_count()) + Ordering::Equal } - }, - ) + } + DomainOrdering::Longer | DomainOrdering::Greater => Ordering::Greater, + } } /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. + /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. /// 2. If 1. evaluates as not equivalent, then return the result. /// 3. If `self` represents a single `Domain` (i.e., `!self.is_proper_subdomains()`), /// then return the comparison of label counts. @@ -1998,10 +1153,8 @@ impl<'a> Wildcard<'a> { pub fn cmp_hosts(&self, other: &Hosts<'_>) -> Ordering { self.cmp_dom(&other.domain) } - /// Same as [`Wildcard::cardinality`] except - /// that a `BigUint` is returned. - /// Note the count does _not_ include the `Domain` itself - /// when `self.is_proper_subdomains()`. + /// Same as [`Wildcard::cardinality`] except that a `BigUint` is returned. Note the count does _not_ include + /// the `Domain` itself when `self.is_proper_subdomains()`. /// /// `!self.is_proper_subdomains()` ⇔ `self.domain_count() == BigUint::new(vec![1])`. #[inline] @@ -2053,7 +1206,7 @@ impl PartialOrd<Wildcard<'_>> for Wildcard<'_> { } impl Ord for Wildcard<'_> { /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. + /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. /// 2. If 1. evaluates as not equivalent, then return the result. /// 3. If both domains represent a single `Domain`, then return the comparison /// of label counts. @@ -2064,45 +1217,43 @@ impl Ord for Wildcard<'_> { #[inline] #[must_use] fn cmp(&self, other: &Self) -> Ordering { - cmp_doms(&self.domain, &other.domain).map_or_else( - |ord| { - if ord == Ordering::Equal { - if self.proper_subdomains { - if other.proper_subdomains { - ord - } else { - Ordering::Greater - } - } else if other.proper_subdomains { - Ordering::Less + match self.domain.cmp_by_domain_ordering(&other.domain) { + DomainOrdering::Less => Ordering::Less, + DomainOrdering::Shorter => { + if self.proper_subdomains { + Ordering::Greater + } else { + Ordering::Less + } + } + DomainOrdering::Equal => { + if self.proper_subdomains { + if other.proper_subdomains { + Ordering::Equal } else { - ord + Ordering::Greater } + } else if other.proper_subdomains { + Ordering::Less } else { - ord + Ordering::Equal } - }, - |()| { - // At this point `self` and `other` have different number of labels; - // otherwise they would have the same domain which was already - // checked for in `cmp_doms`. + } + DomainOrdering::Longer => { if self.proper_subdomains { - if !other.proper_subdomains - || self.domain.label_count() < other.domain.label_count() - { - Ordering::Greater - } else { + if other.proper_subdomains { Ordering::Less + } else { + Ordering::Greater } - } else if other.proper_subdomains - || self.domain.label_count() < other.domain.label_count() - { + } else if other.proper_subdomains { Ordering::Less } else { Ordering::Greater } - }, - ) + } + DomainOrdering::Greater => Ordering::Greater, + } } } impl PartialOrd<Adblock<'_>> for Wildcard<'_> { @@ -2124,7 +1275,6 @@ impl PartialOrd<Hosts<'_>> for Wildcard<'_> { } } impl Display for Wildcard<'_> { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( @@ -2136,7 +1286,7 @@ impl Display for Wildcard<'_> { } } impl<'a> Set for Wildcard<'a> { - type Elem = Domain<'a>; + type Elem = Domain<&'a str>; #[inline] fn bounded_cardinality(&self) -> BoundedCardinality { BoundedCardinality::from_biguint_exact(self.domain_count()) @@ -2152,31 +1302,19 @@ impl<'a> Set for Wildcard<'a> { { if self.proper_subdomains { let dom2 = elem.borrow(); - self.domain.label_count() < dom2.label_count() - && if self.domain.flag.eq_ignore_case(dom2.flag) { - self.domain.same_labels_ignore_case(dom2) - } else { - self.domain.same_labels(dom2) - } + self.domain.label_count() < dom2.label_count() && self.domain.same_branch(dom2) } else { self.domain == *elem.borrow() } } #[inline] fn is_proper_subset(&self, val: &Self) -> bool { - // A single domain can never be a proper superset. - // Proper subdomains cannot be a proper superset - // if it has more labels or the same number of labels - // as another domain. - // In all other cases, we need to recursively check from the TLD - // that the labels are the same. + // A single domain can never be a proper superset. Proper subdomains cannot be a proper superset if it + // has more labels or the same number of labels as another domain. In all other cases, we need to + // recursively check from the TLD that the labels are the same. val.proper_subdomains && val.domain.label_count() < self.domain.label_count() - && if self.domain.flag.eq_ignore_case(val.domain.flag) { - val.domain.same_labels_ignore_case(&self.domain) - } else { - val.domain.same_labels(&self.domain) - } + && val.domain.same_branch(&self.domain) } #[inline] fn is_subset(&self, val: &Self) -> bool { @@ -2184,28 +1322,21 @@ impl<'a> Set for Wildcard<'a> { } } impl SetOrd for Wildcard<'_> {} -impl<'a> AsRef<Domain<'a>> for Wildcard<'a> { - #[inline] - fn as_ref(&self) -> &Domain<'a> { - &self.domain - } -} -impl<'a> AsRef<str> for Wildcard<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} impl<'a> Deref for Wildcard<'a> { - type Target = Domain<'a>; + type Target = Domain<&'a str>; #[inline] fn deref(&self) -> &Self::Target { &self.domain } } impl<'a> ParsedDomain<'a> for Wildcard<'a> { - type Error = DomainErr; - #[allow(unsafe_code, clippy::indexing_slicing)] + type Error = FirefoxDomainErr; + #[allow( + unsafe_code, + clippy::arithmetic_side_effects, + clippy::indexing_slicing, + clippy::into_iter_on_ref + )] #[inline] fn parse_value<'b: 'a>(val: &'b str) -> Result<Value<'a, Self>, Self::Error> { let value = val.as_bytes().trim_ascii_start(); @@ -2214,9 +1345,8 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> { |byt| { if *byt == b'#' { // SAFETY: - // `value` came from `val` with leading ASCII whitespace removed - // which is still valid UTF-8. Since the first byte is '#' or '$' - // the remaining bytes is still valid UTF-8. + // `value` came from `val` with leading ASCII whitespace removed which is still valid UTF-8 + // since the first byte is '#' or '$' the remaining bytes is still valid UTF-8. let comment = unsafe { str::from_utf8_unchecked(&value[1..]) }; Ok(Value::Comment(comment)) } else { @@ -2230,7 +1360,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> { } }, ); - Domain::try_from_slice( + domain_no_ip( val2[..val2 .into_iter() .try_fold(0, |i, byt2| if *byt2 == b'#' { Err(i) } else { Ok(i + 1) }) @@ -2240,7 +1370,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> { .and_then(|domain| { if proper_subdomains { if domain.len().get() > 251 { - Err(DomainErr::InvalidWildcardDomain) + Err(FirefoxDomainErr::InvalidWildcardDomain) } else { Ok(Value::Domain(Self { domain, @@ -2259,7 +1389,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> { ) } #[inline] - fn domain(&self) -> &Domain<'a> { + fn domain(&self) -> &Domain<&'a str> { &self.domain } #[inline] @@ -2267,8 +1397,7 @@ impl<'a> ParsedDomain<'a> for Wildcard<'a> { write_rpz_line(&mut writer, self.domain(), action, self.proper_subdomains) } } -/// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) -/// file. +/// A [`Domain`] in a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. #[allow(clippy::exhaustive_enums)] #[derive(Clone, Debug)] pub enum RpzDomain<'a> { @@ -2283,6 +1412,7 @@ pub enum RpzDomain<'a> { } impl<'a> RpzDomain<'a> { /// Returns `true` iff `self` represents a single [`Domain`]. + #[allow(clippy::ref_patterns)] #[inline] #[must_use] pub const fn is_domain(&self) -> bool { @@ -2292,9 +1422,9 @@ impl<'a> RpzDomain<'a> { Self::Wildcard(ref dom) => !dom.proper_subdomains, } } - /// Returns `true` iff `self` represents proper subdomains of - /// the contained [`Domain`] (i.e., is a [`Wildcard`] such that - /// [`Wildcard::is_proper_subdomains`]). + /// Returns `true` iff `self` represents proper subdomains of the contained [`Domain`] (i.e., + /// is a [`Wildcard`] such that [`Wildcard::is_proper_subdomains`]). + #[allow(clippy::ref_patterns)] #[inline] #[must_use] pub const fn is_proper_subdomains(&self) -> bool { @@ -2303,9 +1433,9 @@ impl<'a> RpzDomain<'a> { Self::Wildcard(ref dom) => dom.proper_subdomains, } } - /// Returns `true` iff `self` represents subdomains of - /// the contained [`Domain`] (i.e., is an [`Adblock`] such that - /// [`Adblock::is_subdomains`]). + /// Returns `true` iff `self` represents subdomains of the contained [`Domain`] (i.e., is an + /// [`Adblock`] such that [`Adblock::is_subdomains`]). + #[allow(clippy::ref_patterns)] #[inline] #[must_use] pub const fn is_subdomains(&self) -> bool { @@ -2314,9 +1444,9 @@ impl<'a> RpzDomain<'a> { Self::DomainOnly(_) | Self::Hosts(_) | Self::Wildcard(_) => false, } } - /// Returns the count of [`Domain`]s represented by `self`. - /// This function is the same as [`RpzDomain::cardinality`] - /// except that it returns a `BigUint`. + /// Returns the count of [`Domain`]s represented by `self`. This function is the same as + /// [`RpzDomain::cardinality`] except that it returns a `BigUint`. + #[allow(clippy::ref_patterns)] #[inline] #[must_use] pub fn domain_count(&self) -> BigUint { @@ -2329,6 +1459,7 @@ impl<'a> RpzDomain<'a> { } } impl PartialEq<RpzDomain<'_>> for RpzDomain<'_> { + #[allow(clippy::ref_patterns)] #[inline] fn eq(&self, other: &RpzDomain<'_>) -> bool { match *self { @@ -2374,7 +1505,7 @@ impl PartialOrd<RpzDomain<'_>> for RpzDomain<'_> { } impl Ord for RpzDomain<'_> { /// The total order that is defined follows the following hierarchy: - /// 1. Pairwise comparisons of each [`Label`] starting from the TLDs. + /// 1. Pairwise comparisons of each [`ascii_domain::dom::Label`] starting from the TLDs. /// 2. If 1. evaluates as not equivalent, then return the result. /// 3. If both domains represent a single `Domain`, then return the comparison /// of label counts. @@ -2386,6 +1517,7 @@ impl Ord for RpzDomain<'_> { /// ascending order: /// /// `bar.com`, `www.bar.com`, `*.www.bar.com`, `||www.bar.com`, `*.bar.com`, `||bar.com`, `example.com`, `www.example.com`, `*.www.example.com`, `||www.example.com`, `*.example.com`, `||example.com`, `foo.com`, `www.foo.com`, `*.foo.com`, `*.com`, `example.net`, `*.net` + #[allow(clippy::ref_patterns)] #[inline] fn cmp(&self, other: &Self) -> Ordering { match *self { @@ -2417,7 +1549,7 @@ impl Ord for RpzDomain<'_> { } } impl Display for RpzDomain<'_> { - #[allow(clippy::min_ident_chars)] + #[allow(clippy::ref_patterns)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -2429,7 +1561,7 @@ impl Display for RpzDomain<'_> { } } impl<'a> Set for RpzDomain<'a> { - type Elem = Domain<'a>; + type Elem = Domain<&'a str>; #[inline] fn bounded_cardinality(&self) -> BoundedCardinality { BoundedCardinality::from_biguint_exact(self.domain_count()) @@ -2438,6 +1570,7 @@ impl<'a> Set for RpzDomain<'a> { fn cardinality(&self) -> Option<Cardinality> { Some(Cardinality::Finite(self.domain_count())) } + #[allow(clippy::ref_patterns)] #[inline] fn contains<Q>(&self, elem: &Q) -> bool where @@ -2450,17 +1583,13 @@ impl<'a> Set for RpzDomain<'a> { Self::Wildcard(ref dom) => dom.contains(elem), } } + #[allow(clippy::ref_patterns)] #[inline] fn is_proper_subset(&self, val: &Self) -> bool { /// Helper function that verifies all labels are the same. #[inline] - fn helper(left: &Domain<'_>, right: &Domain<'_>) -> bool { - left.label_count() <= right.label_count() - && if left.flag.eq_ignore_case(right.flag) { - left.same_labels_ignore_case(right) - } else { - left.same_labels(right) - } + fn helper(left: &Domain<&str>, right: &Domain<&str>) -> bool { + left.label_count() <= right.label_count() && left.same_branch(right) } match *val { Self::Adblock(ref dom) => { @@ -2529,23 +1658,17 @@ impl<'a> Set for RpzDomain<'a> { } } impl SetOrd for RpzDomain<'_> {} -impl<'a> AsRef<Domain<'a>> for RpzDomain<'a> { - #[inline] - fn as_ref(&self) -> &Domain<'a> { - self.domain() - } -} -impl<'a> AsRef<str> for RpzDomain<'a> { - #[inline] - fn as_ref(&self) -> &'a str { - self.as_str() - } -} impl<'a> Deref for RpzDomain<'a> { - type Target = Domain<'a>; + type Target = Domain<&'a str>; + #[allow(clippy::ref_patterns)] #[inline] fn deref(&self) -> &Self::Target { - self.domain() + match *self { + Self::Adblock(ref dom) => &dom.domain, + Self::DomainOnly(ref dom) => &dom.domain, + Self::Hosts(ref dom) => &dom.domain, + Self::Wildcard(ref dom) => &dom.domain, + } } } impl<'a: 'b, 'b> From<Adblock<'a>> for RpzDomain<'b> { @@ -2573,7 +1696,7 @@ impl<'a: 'b, 'b> From<Wildcard<'a>> for RpzDomain<'b> { } } impl<'a> ParsedDomain<'a> for RpzDomain<'a> { - type Error = DomainErr; + type Error = FirefoxDomainErr; #[inline] fn parse_value<'b: 'a>(value: &'b str) -> Result<Value<'a, Self>, Self::Error> { DomainOnly::parse_value(value).map_or_else( @@ -2615,15 +1738,17 @@ impl<'a> ParsedDomain<'a> for RpzDomain<'a> { }, ) } + #[allow(clippy::ref_patterns)] #[inline] - fn domain(&self) -> &Domain<'a> { + fn domain(&self) -> &Domain<&'a str> { match *self { - Self::Adblock(ref dom) => dom.domain(), - Self::DomainOnly(ref dom) => dom.domain(), - Self::Hosts(ref dom) => dom.domain(), - Self::Wildcard(ref dom) => dom.domain(), + Self::Adblock(ref dom) => &dom.domain, + Self::DomainOnly(ref dom) => &dom.domain, + Self::Hosts(ref dom) => &dom.domain, + Self::Wildcard(ref dom) => &dom.domain, } } + #[allow(clippy::ref_patterns)] #[inline] fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error> { match *self { @@ -2637,289 +1762,18 @@ impl<'a> ParsedDomain<'a> for RpzDomain<'a> { #[cfg(test)] mod tests { use super::{ - Adblock, Domain, DomainErr, DomainOnly, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, + Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, }; - use core::cmp::Ordering; + use ascii_domain::dom::DomainErr; use num_bigint::BigUint; use superset_map::SupersetSet; #[test] - fn test_dom_parse() { - // Test Ipv4Addr is error. - assert!(Domain::try_from("1.1.1.1").map_or_else(|e| e == DomainErr::Ipv4, |_| false)); - // Test empty is error. - assert!(Domain::try_from("").map_or_else(|e| e == DomainErr::Empty, |_| false)); - // Test empty label is error. - assert!(Domain::try_from("a..com").map_or_else(|e| e == DomainErr::EmptyLabel, |_| false)); - // Test label too long. - let val = "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com"; - // 4 + 64 + 4 - assert!(val.len() == 72); - assert!(Domain::try_from(val).map_or_else(|e| e == DomainErr::LabelLenExceeds63, |_| false)); - assert!(Domain::try_from( - "www.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.com" - ) - .map_or(false, |d| d.len().get() == 71)); - // Test domain too long. - assert!(Domain::try_from("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or_else(|e| e == DomainErr::LenExceeds253(254), |_| false)); - assert!(Domain::try_from("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa").map_or(false, |d| d.len().get() == 253 )); - // Test max labels. - assert!(Domain::try_from("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == DomainErr::LenExceeds253(255), |_| false)); - assert!(Domain::try_from("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253)); - assert!(Domain::try_from("a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.").map_or(false, |d| d.label_count().get() == 127 && d.len().get() == 253)); - // Test removal of trailing '.'. - assert!(Domain::try_from("com").map_or(false, |d| d.value == b"com")); - // Test case-insensitivity. - assert!(Domain::try_from("wwW.ExAMple.COm") - .map_or(false, |d| Domain::try_from("www.example.com") - .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal))); - assert!( - Domain::try_from("ww_W.com").map_or(false, |d| Domain::try_from("Ww_w.com") - .map_or(false, |d2| d == d2 && d.cmp(&d2) == Ordering::Equal)) - ); - // Test valid bytes - let mut vec = Vec::new(); - let mut counter = 0; - for i in 0u8..=255 { - vec.push(i); - match i { - b'!' - | b'$' - | b'&'..=b')' - | b'+'..=b'-' - | b'0'..=b'9' - | b';' - | b'=' - | b'A'..=b'Z' - | b'_'..=b'{' - | b'}'..=b'~' => { - counter += 1; - assert!(Domain::try_from_slice(vec.as_slice()) - .map_or(false, |d| d.value.len() == 1 && d.value[0] == i)) - } - b'.' => { - vec.push(i); - vec[0] = b'a'; - assert!( - Domain::try_from_slice(vec.as_slice()) - .map_or(false, |d| d.value.len() == 1 && d.value[0] == b'a') - && vec.pop() == Some(i) - ) - } - _ => assert!(Domain::try_from_slice(vec.as_slice()) - .map_or_else(|e| e == DomainErr::InvalidByte(i), |_| false)), - } - vec.pop(); - } - assert!(counter == 78); - } - #[test] - fn test_dom_into_iter() { - assert!(Domain::try_from("www.example.com").map_or(false, |d| { - let mut iter = d.into_iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next() else { - return false; - }; - if l.value != "com" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next() else { return false }; - if l.value != "example" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next() else { - return false; - }; - if iter.len() != 0 { - return false; - } - if l.value != "www" { - return false; - } - iter.next().is_none() - })); - assert!(Domain::try_from("www.example.com").map_or(false, |d| { - let mut iter = d.into_iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "www" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "example" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "com" { - return false; - } - if iter.len() != 0 { - return false; - } - iter.next_back().is_none() - })); - assert!(Domain::try_from("www.example.com").map_or(false, |d| { - let mut iter = d.into_iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "www" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next() else { return false }; - if l.value != "com" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "example" { - return false; - } - if iter.len() != 0 { - return false; - } - iter.next().is_none() && iter.next_back().is_none() - })); - } - #[test] - fn test_dom_iter() { - assert!(Domain::try_from("www.example.com").map_or(false, |d| { - let mut iter = d.iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next() else { - return false; - }; - if l.value != "com" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next() else { return false }; - if l.value != "example" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next() else { - return false; - }; - if iter.len() != 0 { - return false; - } - if l.value != "www" { - return false; - } - iter.next().is_none() - })); - assert!(Domain::try_from("www.example.com").map_or(false, |d| { - let mut iter = d.iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "www" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "example" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "com" { - return false; - } - if iter.len() != 0 { - return false; - } - iter.next_back().is_none() - })); - assert!(Domain::try_from("www.example.com").map_or(false, |d| { - let mut iter = d.iter(); - if iter.len() != 3 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "www" { - return false; - } - if iter.len() != 2 { - return false; - } - let Some(l) = iter.next() else { return false }; - if l.value != "com" { - return false; - } - if iter.len() != 1 { - return false; - } - let Some(l) = iter.next_back() else { - return false; - }; - if l.value != "example" { - return false; - } - if iter.len() != 0 { - return false; - } - iter.next().is_none() && iter.next_back().is_none() - })); - } - #[test] fn test_adblock_parse() { // Test subdomains. assert!( Adblock::parse_value("||www.example.com").map_or(false, |val| match val { - Value::Domain(ref dom) => dom.subdomains && dom.domain.value == b"www.example.com", + Value::Domain(ref dom) => + dom.subdomains && dom.domain.as_bytes() == b"www.example.com", Value::Comment(_) | Value::Blank => false, }) ); @@ -2929,7 +1783,7 @@ mod tests { false, |val| match val { Value::Domain(ref dom) => - dom.subdomains && dom.domain.value == b"www.example.com", + dom.subdomains && dom.domain.as_bytes() == b"www.example.com", Value::Comment(_) | Value::Blank => false, } ) @@ -2938,14 +1792,16 @@ mod tests { Adblock::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { match val { Value::Domain(ref dom) => { - !dom.subdomains && dom.domain.value == b"www.example.com" + !dom.subdomains && dom.domain.as_bytes() == b"www.example.com" } Value::Comment(_) | Value::Blank => false, } }) ); - assert!(Adblock::parse_value("www .example.com") - .map_or_else(|err| err == DomainErr::InvalidByte(b' '), |_| false)); + assert!(Adblock::parse_value("www .example.com").map_or_else( + |err| err == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), + |_| false + )); assert!( Adblock::parse_value("||www.ExAMPle.COm").map_or(false, |val| { match val { @@ -2986,13 +1842,16 @@ mod tests { assert!( DomainOnly::parse_value(" \t\t \t\t \twww.example.com#asdflkj asdf alskdfj ") .map_or(false, |val| match val { - Value::Domain(ref dom) => dom.domain.value == b"www.example.com", + Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", Value::Comment(_) | Value::Blank => false, }) ); assert!( DomainOnly::parse_value(" \t\t \t\t \twww.example.com \t\t ^ \t\t ") - .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false) + .map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), + |_| false + ) ); // Test case-insensitivity. assert!( @@ -3023,24 +1882,29 @@ mod tests { " \t\t 127.0.0.1\t\t \twww.example.com#asdflkj asdf alskdfj " ) .map_or(false, |val| match val { - Value::Domain(ref dom) => dom.domain.value == b"www.example.com", + Value::Domain(ref dom) => dom.domain.as_bytes() == b"www.example.com", Value::Comment(_) | Value::Blank => false, })); assert!( Hosts::parse_value(" \t\t 0.0.0.0\t\t \twww.example.com \t\t ^ \t\t ") - .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false) + .map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), + |_| false + ) ); - assert!(Hosts::parse_value("::1\twww .example.com") - .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false)); + assert!(Hosts::parse_value("::1\twww .example.com").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), + |_| false + )); // Test invalid IP assert!(Hosts::parse_value("::2 www.example.com") - .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false)); + .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); assert!(Hosts::parse_value(":2 www.example.com") - .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false)); + .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); assert!(Hosts::parse_value("www.example.com") - .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false)); + .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); assert!(Hosts::parse_value("10.4.2.256 www.example.com") - .map_or_else(|e| e == DomainErr::InvalidHostsIP, |_| false)); + .map_or_else(|e| e == FirefoxDomainErr::InvalidHostsIP, |_| false)); // Test case-insensitivity. assert!( Hosts::parse_value(":: www.ExAMPle.Com").map_or(false, |val| match val { @@ -3067,19 +1931,30 @@ mod tests { #[test] fn test_wildcard_parse_value() { // Test bad asterisk. - assert!(Wildcard::parse_value("*") - .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false)); - assert!(Wildcard::parse_value("www*.example.com") - .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false)); - assert!(Wildcard::parse_value("www.*.com") - .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false)); - assert!( - Wildcard::parse_value("*..com").map_or_else(|e| e == DomainErr::EmptyLabel, |_| false) - ); - assert!(Wildcard::parse_value("www.com*") - .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false)); - assert!(Wildcard::parse_value("ww*w.com") - .map_or_else(|e| e == DomainErr::InvalidByte(b'*'), |_| false)); + assert!(Wildcard::parse_value("*").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), + |_| false + )); + assert!(Wildcard::parse_value("www*.example.com").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), + |_| false + )); + assert!(Wildcard::parse_value("www.*.com").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), + |_| false + )); + assert!(Wildcard::parse_value("*..com").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::EmptyLabel), + |_| false + )); + assert!(Wildcard::parse_value("www.com*").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), + |_| false + )); + assert!(Wildcard::parse_value("ww*w.com").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b'*')), + |_| false + )); // Test case-insensitivity. assert!( Wildcard::parse_value("*.wWw.ExamPLE.com").map_or(false, |val| match val { @@ -3099,7 +1974,7 @@ mod tests { assert!( Wildcard::parse_value("*.www.example.com").map_or(false, |val| match val { Value::Domain(ref dom) => - dom.domain.value == b"www.example.com" && dom.proper_subdomains, + dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, Value::Comment(_) | Value::Blank => false, }) ); @@ -3108,7 +1983,7 @@ mod tests { Wildcard::parse_value(" \t\t \t\t \t*.www.example.com#asdflkj asdf alskdfj ") .map_or(false, |val| match val { Value::Domain(ref dom) => - dom.domain.value == b"www.example.com" && dom.proper_subdomains, + dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains, Value::Comment(_) | Value::Blank => false, }) ); @@ -3116,7 +1991,7 @@ mod tests { Wildcard::parse_value(" \t\t \t\t \twww.example.com #asdflkj asdf alskdfj ") .map_or(false, |val| match val { Value::Domain(ref dom) => - dom.domain.value == b"www.example.com" && !dom.proper_subdomains, + dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains, Value::Comment(_) | Value::Blank => false, }) ); @@ -3125,7 +2000,7 @@ mod tests { Wildcard::parse_value(" \t\t *.www.example.com \t\t \t ").map_or(false, |val| { match val { Value::Domain(ref dom) => { - dom.domain.value == b"www.example.com" && dom.proper_subdomains + dom.domain.as_bytes() == b"www.example.com" && dom.proper_subdomains } Value::Comment(_) | Value::Blank => false, } @@ -3135,16 +2010,18 @@ mod tests { Wildcard::parse_value("\t\t \twww.example.com \t\t \t\t ").map_or(false, |val| { match val { Value::Domain(ref dom) => { - dom.domain.value == b"www.example.com" && !dom.proper_subdomains + dom.domain.as_bytes() == b"www.example.com" && !dom.proper_subdomains } Value::Comment(_) | Value::Blank => false, } }) ); - assert!(Wildcard::parse_value("www .example.com") - .map_or_else(|e| e == DomainErr::InvalidByte(b' '), |_| false)); + assert!(Wildcard::parse_value("www .example.com").map_or_else( + |e| e == FirefoxDomainErr::InvalidDomain(DomainErr::InvalidByte(b' ')), + |_| false + )); // Test 127 labels after wildcard error. - assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == DomainErr::InvalidWildcardDomain, |_| false)); + assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or_else(|e| e == FirefoxDomainErr::InvalidWildcardDomain, |_| false)); // Test 126 labels after wildcard is ok. assert!(Wildcard::parse_value("*.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a").map_or(false, |val| match val { Value::Domain(ref dom) => dom.domain.label_count().get() == 126 && dom.proper_subdomains, @@ -3166,27 +2043,27 @@ mod tests { assert!( RpzDomain::parse_value("*.www.example.com").map_or(false, |val| { let dom = val.unwrap_domain(); - dom.is_proper_subdomains() && dom.domain().value == b"www.example.com" + dom.is_proper_subdomains() && dom.domain().as_bytes() == b"www.example.com" }) ); assert!( RpzDomain::parse_value("||www.example.com").map_or(false, |val| { let dom = val.unwrap_domain(); - dom.is_subdomains() && dom.domain().value == b"www.example.com" + dom.is_subdomains() && dom.domain().as_bytes() == b"www.example.com" }) ); assert!( RpzDomain::parse_value("0.0.0.0 www.example.com").map_or(false, |val| { let dom = val.unwrap_domain(); !(dom.is_subdomains() || dom.is_proper_subdomains()) - && dom.domain().value == b"www.example.com" + && dom.domain().as_bytes() == b"www.example.com" }) ); assert!( RpzDomain::parse_value("www.example.com").map_or(false, |val| { let dom = val.unwrap_domain(); !(dom.is_subdomains() || dom.is_proper_subdomains()) - && dom.domain().value == b"www.example.com" + && dom.domain().as_bytes() == b"www.example.com" }) ); // Test case-insensitivity. @@ -3245,19 +2122,19 @@ mod tests { doms }).into_iter(); assert!(iter.next().map_or(false, |d| { - d.domain().value == b"aawww.abc" && d.is_subdomains() + d.domain().as_bytes() == b"aawww.abc" && d.is_subdomains() })); assert!(iter.next().map_or(false, |d| { - d.domain().value == b"abc.abc" && d.is_domain() + d.domain().as_bytes() == b"abc.abc" && d.is_domain() })); assert!(iter.next().map_or(false, |d| { - d.domain().value == b"abc.abc" && d.is_proper_subdomains() + d.domain().as_bytes() == b"abc.abc" && d.is_proper_subdomains() })); assert!(iter.next().map_or(false, |d| { - d.domain().value == b"com" && d.is_proper_subdomains() + d.domain().as_bytes() == b"com" && d.is_proper_subdomains() })); assert!(iter.next().map_or(false, |d| { - d.domain().value == b"NeT" && d.is_proper_subdomains() + d.domain().as_bytes() == b"NeT" && d.is_proper_subdomains() })); assert!(iter.next().is_none()); } @@ -3284,7 +2161,7 @@ mod tests { 2621943843, 4223295645, 1753858368, 130924388, 965594304, 3942586845, 1573844087, 4237886128, 481383133, 56931017, ]) && Wildcard::parse_value("*.1").map_or(false, |val2| { - val2.unwrap_domain().domain_count() == (val - BigUint::new(vec![256u32.pow(3)])) + val2.unwrap_domain().domain_count() == (val - BigUint::new(vec![366u32.pow(3)])) }) })); } diff --git a/src/dom_count_auto_gen.rs b/src/dom_count_auto_gen.rs @@ -1,61 +1,47 @@ -use crate::dom::Domain; +use ascii_domain::dom::Domain; use num_bigint::BigUint; /// The count of proper subdomains for both `Adblock` and `Wildcard` `Domain` when subdomains /// and proper subdomains are represented respectively. #[allow( clippy::arithmetic_side_effects, clippy::as_conversions, - clippy::implicit_return, clippy::indexing_slicing, clippy::too_many_lines, - clippy::unreadable_literal, - clippy::unseparated_literal_suffix + clippy::unreadable_literal )] #[inline] -pub fn proper_subdomain_count(dom: &Domain<'_>) -> BigUint { - /// Returns how many proper subdomains are IPv4 addresses. - /// We need to calculate this so that we can subtract this value - /// from the cached cardinalities. Note that we don't have to worry - /// about the `Domain` itself since an IPv4 address can't be parsed into - /// a `Domain`. +// 10 + 90 + 156 = 256 +// 30 + 180 + 156 = 366 +pub fn proper_subdomain_count(dom: &Domain<&str>) -> BigUint { + /// Returns how many proper subdomains are IPv4 addresses. We need to calculate this so that we can + /// subtract this value from the cached cardinalities. Note that we don't have to worry about the `Domain` + /// itself since an IPv4 address can't be parsed into a `Domain` via `crate::dom::domain_no_ip`. #[allow(clippy::cast_lossless)] #[inline] - fn ip_count(dom: &Domain<'_>) -> u32 { - // `Domain`s that have 4 or more `Label`s can't be an IPv4 address. - // Also `Domain`s must have at least one `Label`, so - // `0 < 4 - dom.label_count < 4` and `256^3 <= u32::MAX`. + fn ip_count(dom: &Domain<&str>) -> u32 { + // `Domain`s that have 4 or more `Label`s can't be an IPv4 address. Also `Domain`s must have at least one + // `Label`, so `0 < 4 - dom.label_count < 4` and `(10 * 3 + 90 * 2 + 156 * 1)^3 = (30 + 180 + 156)^3 + // = 366^3 <= u32::MAX`. if dom.label_count().get() < 4 { dom.into_iter() .try_fold((), |(), label| { - // Only a sequence of decimal numbers between 0 and 255 without leading - // 0s can be an IPv4 address. For `Domain`s that have such `Label`s, - // the total number of IPv4 addresses is simply 256^(4- label count). - // Note that `Label`s always have a length of at least 1 and - // any `Label` longer than 3 cannot be a valid `u8` without leading - // 0s. - match label.len() { - 1 => match label.as_str().as_bytes()[0] { - b'0'..=b'9' => Ok(()), - _ => Err(()), - }, - 2 => label.as_str().parse::<u8>().map_or(Err(()), |val| { - if val < 10 { - Err(()) - } else { - Ok(()) - } - }), - 3 => label.as_str().parse::<u8>().map_or(Err(()), |val| { - if val < 100 { - Err(()) - } else { - Ok(()) - } - }), - _ => Err(()), + // Only a sequence of 1 to 3 digits whose value is between 0 and 255 is a valid octet + // in an IPv4 address. For `Domain`s that have such `Label`s, the total number of IPv4 + // addresses is simply 366^(4- label count). + // 366 comes from the fact that there are 3 distinct ways to represents integers < 10, + // 2 distinct ways to represent integers inclusively between 10 and 99, and 1 way + // to represent integers greater than 99 giving (3 * 10) + (2 * 90) + (1 * 156) = 366 + // ways a `Label` can be a valid octet for an IPv4 address. + // + // Note that `Label`s always have a length of at least 1 and any `Label` longer than 3 + // cannot be a valid octet in an IPv4 address. + if label.len() < 4 { + label.as_str().parse::<u8>().map_or(Err(()), |_| Ok(())) + } else { + Err(()) } }) - .map_or(0, |()| 256u32.pow(4 - dom.label_count().get() as u32)) + .map_or(0, |()| 366u32.pow(4 - dom.label_count().get() as u32)) } else { 0 } diff --git a/src/file.rs b/src/file.rs @@ -1,32 +1,29 @@ -#![allow( - clippy::exhaustive_structs, - clippy::implicit_return, - clippy::into_iter_on_ref, - clippy::missing_trait_methods, - clippy::multiple_unsafe_ops_per_block, - clippy::question_mark_used, - clippy::ref_patterns, - clippy::single_char_lifetime_names, - clippy::wildcard_enum_match_arm -)] extern crate alloc; -use crate::dom::{Adblock, DomainErr, DomainOnly, Hosts, ParsedDomain, RpzDomain, Value, Wildcard}; +use crate::dom::{ + Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, +}; use alloc::string::FromUtf8Error; -use core::borrow::Borrow; -use core::fmt::{self, Display, Formatter}; -use core::hash::Hash; -use core::ops::Deref; -use core::time::Duration; +use core::{ + borrow::Borrow, + fmt::{self, Display, Formatter}, + hash::Hash, + ops::Deref, + time::Duration, +}; use reqwest::Client; use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor}; -use std::collections::{HashMap, HashSet}; -use std::error::Error; -use std::fs; -use std::io::{self, ErrorKind}; -use std::path::{Path, PathBuf}; +use std::{ + collections::{HashMap, HashSet}, + error::Error, + fs, + io::{self, ErrorKind}, + path::{Path, PathBuf}, +}; use superset_map::SupersetSet; -use tokio::task::{JoinError, JoinSet}; -use tokio::time::{self, error::Elapsed}; +use tokio::{ + task::{JoinError, JoinSet}, + time::{self, error::Elapsed}, +}; use url::Url; /// Wrapper around an absolute [`PathBuf`] to a directory or file depending on `IS_DIR`. /// @@ -148,7 +145,7 @@ impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> { fn expecting(&self, formatter: &mut Formatter) -> fmt::Result { formatter.write_str("struct AbsFilePath") } - #[allow(clippy::arithmetic_side_effects, clippy::min_ident_chars)] + #[allow(clippy::arithmetic_side_effects)] #[inline] fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> where @@ -207,7 +204,6 @@ impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> { }, ) } - #[allow(clippy::min_ident_chars)] #[inline] fn visit_string<E>(self, v: String) -> Result<Self::Value, E> where @@ -266,7 +262,6 @@ impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> { } } impl<const IS_DIR: bool> Display for AbsFilePath<IS_DIR> { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { f.write_str(&self.path.to_string_lossy()) @@ -313,7 +308,6 @@ impl Deref for HttpUrl { } } impl Display for HttpUrl { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { self.url.fmt(f) @@ -339,7 +333,6 @@ impl<'de> Deserialize<'de> for HttpUrl { fn expecting(&self, formatter: &mut Formatter) -> fmt::Result { formatter.write_str("struct HttpUrl") } - #[allow(clippy::min_ident_chars)] #[inline] fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> where @@ -383,7 +376,6 @@ pub enum Kind { Wildcard, } impl Display for Kind { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -404,7 +396,7 @@ pub enum Name { Url(HttpUrl), } impl Display for Name { - #[allow(clippy::min_ident_chars)] + #[allow(clippy::ref_patterns)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { @@ -422,7 +414,6 @@ pub struct File { pub data: String, } impl Display for File { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { self.name.fmt(f) @@ -446,7 +437,6 @@ pub struct Summary<'a, E: Eq + Hash> { pub errors: HashMap<E, usize>, } impl<E: Display + Eq + Hash> Display for Summary<'_, E> { - #[allow(clippy::min_ident_chars)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { write!( @@ -586,23 +576,23 @@ impl Files { /// /// Returns a `Vec` containing `Summary` information for each /// [`File`] that was parsed. - #[allow(clippy::arithmetic_side_effects)] + #[allow(clippy::arithmetic_side_effects, clippy::into_iter_on_ref)] #[inline] pub fn add_to_superset<'a: 'b, 'b>( &'a self, doms: &mut SupersetSet<RpzDomain<'b>>, - ) -> Vec<Summary<'a, DomainErr>> { + ) -> Vec<Summary<'a, FirefoxDomainErr>> { /// Iterates each `String` from `files` and transforms each line /// into `T` before adding it as an `RpzDomain` into `doms`. #[inline] fn insert< 'a, 'b: 'a, - T: Into<RpzDomain<'a>> + ParsedDomain<'a, Error = DomainErr> + Helper, + T: Into<RpzDomain<'a>> + ParsedDomain<'a, Error = FirefoxDomainErr> + Helper, >( doms: &mut SupersetSet<RpzDomain<'a>>, files: &'b [File], - summaries: &mut Vec<Summary<'b, DomainErr>>, + summaries: &mut Vec<Summary<'b, FirefoxDomainErr>>, ) { let kind = T::kind(); files.into_iter().fold((), |(), file| { @@ -672,6 +662,7 @@ impl LocalFiles { /// /// Returns [`io::Error`] iff reading said files causes an error. Note that /// it is _not_ an error if a directory does not exist. + #[allow(clippy::wildcard_enum_match_arm)] #[inline] pub fn from_path(dir: AbsFilePath<true>) -> Result<Option<Self>, io::Error> { /// Checks if `path` exists. @@ -758,7 +749,7 @@ pub enum ExtFileErr { InvalidUtf8(FromUtf8Error), } impl Display for ExtFileErr { - #[allow(clippy::min_ident_chars)] + #[allow(clippy::ref_patterns)] #[inline] fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match *self { diff --git a/src/lib.rs b/src/lib.rs @@ -9,7 +9,6 @@ //! The purpose of these types is to make fetching, parsing, and transforming //! ad-blocking files into a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) //! file easier. -#![feature(addr_parse_ascii)] #![feature(btree_cursors)] #![feature(byte_slice_trim_ascii)] #![feature(io_error_more)] @@ -31,8 +30,15 @@ )] #![allow( clippy::blanket_clippy_restriction_lints, + clippy::exhaustive_structs, + clippy::implicit_return, + clippy::min_ident_chars, + clippy::missing_trait_methods, clippy::multiple_crate_versions, - clippy::single_call_fn + clippy::question_mark_used, + clippy::single_call_fn, + clippy::single_char_lifetime_names, + clippy::unseparated_literal_suffix )] /// Module for hostname-like domains including parsing [`str`]s /// from a variety of formats. diff --git a/src/main.rs b/src/main.rs @@ -26,6 +26,7 @@ clippy::multiple_crate_versions, clippy::question_mark_used, clippy::single_call_fn, + clippy::single_char_lifetime_names, clippy::unseparated_literal_suffix )] /// Contains a wrapper of block and unblock `RpzDomain`s @@ -39,21 +40,29 @@ mod config; /// Contains functions for `pledge(2)` and `unveil(2)` on OpenBSD platforms when compiled /// with the `priv_sep` feature; otherwise almost all functions are no-ops. mod priv_sep; -use crate::app::Domains; -use crate::args::{ArgsErr, ConfigPath, Opts}; -use crate::config::Config; -use core::fmt::{self, Display, Formatter}; -use core::time::Duration; +use crate::{ + app::Domains, + args::{ArgsErr, ConfigPath, Opts}, + config::Config, +}; +use core::{ + fmt::{self, Display, Formatter}, + time::Duration, +}; #[cfg(all(feature = "priv_sep", target_os = "openbsd"))] use priv_sep::UnveilErr; use reqwest::Client; -use rpz::dom::DomainErr; -use rpz::file::{AbsFilePath, ExtFileErr, ExternalFiles, Files, HttpUrl, LocalFiles, Summary}; -use std::collections::HashSet; -use std::error::Error; -use std::fs; -use std::io::{self, Read, Write}; -use std::sync::OnceLock; +use rpz::{ + dom::FirefoxDomainErr, + file::{AbsFilePath, ExtFileErr, ExternalFiles, Files, HttpUrl, LocalFiles, Summary}, +}; +use std::{ + collections::HashSet, + error::Error, + fs, + io::{self, Read, Write}, + sync::OnceLock, +}; use tokio::runtime::Builder; use toml::{self, de}; /// The HTTP(S) client that is used to download all files. @@ -262,7 +271,7 @@ enum Verbosity { /// option was not passed. #[inline] fn write_summary( - summaries: Vec<Summary<'_, DomainErr>>, + summaries: Vec<Summary<'_, FirefoxDomainErr>>, verbose: bool, unblock_count: usize, block_count: usize, diff --git a/src/priv_sep.rs b/src/priv_sep.rs @@ -1,4 +1,3 @@ -#![allow(clippy::implicit_return, clippy::pub_use, clippy::ref_patterns)] #[cfg(all(feature = "priv_sep", target_os = "openbsd"))] pub use priv_sep::UnveilErr; #[cfg(all(feature = "priv_sep", target_os = "openbsd"))] @@ -7,8 +6,10 @@ use priv_sep::{self, Permissions, Promise, Promises}; use std::env; #[cfg(not(all(feature = "priv_sep", target_os = "openbsd")))] use std::fs; -use std::io::{Error, ErrorKind}; -use std::path::Path; +use std::{ + io::{Error, ErrorKind}, + path::Path, +}; /// Used instead of `()` for the parameter /// in the `pledge` functions. This allows /// one to avoid having to disable certain lints.