commit 16f45137001497d044b253edbf352b0fd1eb9c64
parent fde07fe0c0038c76e6ddaa3deeb37db2ce94ab91
Author: Zack Newman <zack@philomathiclife.com>
Date: Sat, 7 Sep 2024 11:54:19 -0600
upgrade deps. handle lints
Diffstat:
4 files changed, 128 insertions(+), 105 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,29 +9,34 @@ license = "MIT OR Apache-2.0"
name = "rpz"
readme = "README.md"
repository = "https://git.philomathiclife.com/repos/rpz/"
-version = "1.0.0"
+version = "1.0.1"
[badges]
maintenance = { status = "actively-developed" }
[dependencies]
-ascii_domain = { version = "0.6.1", default-features = false }
+ascii_domain = { version = "0.6.2", default-features = false }
num-bigint = { version = "0.4.6", default-features = false }
reqwest = { version = "0.12.7", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] }
-serde = { version = "1.0.208", default-features = false }
-superset_map = { version = "0.2.3", default-features = false }
-tokio = { version = "1.39.3", default-features = false, features = ["rt", "time"] }
+serde = { version = "1.0.210", default-features = false }
+superset_map = { version = "0.3.0", default-features = false }
+tokio = { version = "1.40.0", default-features = false, features = ["rt", "time"] }
toml = { version = "0.8.19", default-features = false, features = ["parse"] }
url = { version = "2.5.2", default-features = false, features = ["serde"] }
-zfc = { version = "0.3.2", default-features = false }
+zfc = { version = "0.4.0", default-features = false }
[target.'cfg(target_os = "openbsd")'.dependencies]
-priv_sep = { version = "2.0.0", default-features = false, features = ["openbsd"], optional = true }
+priv_sep = { version = "2.1.0", default-features = false, features = ["openbsd"], optional = true }
+
+
+### FEATURES #################################################################
[features]
-priv_sep = ["dep:priv_sep"]
default = ["priv_sep"]
+# Provide pledge and unveil for the binary crate on OpenBSD platforms.
+priv_sep = ["dep:priv_sep"]
+
[profile.release]
lto = true
panic = 'abort'
diff --git a/README.md b/README.md
@@ -1,16 +1,16 @@
# rpz
-
+
`rpz` consists of a binary crate and [library crate](https://docs.rs/rpz/latest/rpz).
The binary crate, `rpz`, is an application that downloads, parses, and transforms ad-(un)block files from
URLs and local file paths into a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone)
file. This RPZ file can be consumed by a DNS server that supports such files
-(e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)).
-
-## rpz in action
-
+(e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)).
+
+## rpz in action
+
In this example it is assumed [`unbound.conf(5)`](https://unbound.docs.nlnetlabs.nl/en/latest/manpages/unbound.conf.html) is properly configured
and has `name` and `zonefile` in the `rpz` section set to `.` and `/var/unbound/db/rpz` respectively in addition to `control-enable` set to `true`
-in the `remote-control` section.
+in the `remote-control` section.
```bash
[zack@laptop ~]$ cat<<EOF>/usr/local/etc/rpz/config
@@ -56,71 +56,71 @@ www.pandi.co.zw CNAME .
[zack@laptop ~]$ unbound-control -q auth_zone_reload . && unbound-control -q flush_zone . && unbound-control -q flush_negative
```
-## Ad-(un)block file format and encoding
-
+## Ad-(un)block file format and encoding
+
All ad-(un)block files must be valid UTF-8; however for a given domain, each label must only contain 1–63 Unicode scalar values from the set:
`!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, and `~`. Labels must be delimited
by `.`. Domains in the file must be delimited by a line feed or carriage return and line feed. A domain must be less than 254 characters in length
including the `.` label separator. Domains are treated as case-insensitive with uppercase letters treated as lowercase. Domains must not be an
-IPv4 address.
-
-### Adblock-style
-
+IPv4 address.
+
+### Adblock-style
+
Domain constructed from an [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax)
-with the requirement that the rule conforms to the following extended regex:
-
-`^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$`
-
+with the requirement that the rule conforms to the following extended regex:
+
+`^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$`
+
where `<domain>` conforms to a valid [`Domain`](https://docs.rs/ascii_domain/latest/ascii_domain/dom/struct.Domain.html) based on
[`ASCII_FIREFOX`](https://docs.rs/ascii_domain/latest/ascii_domain/char_set/constant.ASCII_FIREFOX.html) with the added requirements
-that the TLD is either all letters or at least length five and begins with `xn--` and does not contain `$`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
-
+that the TLD is either all letters or at least length five and begins with `xn--` and does not contain `$`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace).
+
Lines that begin with `||` cause all subdomains to be blocked (i.e., the domain itself and all proper subdomains); without
-`||`, only the specific domain is blocked.
-
+`||`, only the specific domain is blocked.
+
Due to the conservative nature in how these files are processed, one is encouraged to still use an application-level
ad blocker (e.g., [uBlock Origin](https://ublockorigin.com/)). Adblock-style files often contain paths as well as
additional information (e.g., “third-party”) that require application-level information to process correctly as such
-entries will be considered “parsing errors” by `rpz`.
-
-### Domain-style
-
+entries will be considered “parsing errors” by `rpz`.
+
+### Domain-style
+
Domain constructed from a [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax)
-with the requirement that the rule conforms to the following regex:
-
-`^<ws>*<domain><ws>*(#.*)?$`
-
-where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace.
-
-Domains only represent themselves (i.e., proper subdomains will not be blocked).
-
-### Hosts-style
-
+with the requirement that the rule conforms to the following regex:
+
+`^<ws>*<domain><ws>*(#.*)?$`
+
+where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace.
+
+Domains only represent themselves (i.e., proper subdomains will not be blocked).
+
+### Hosts-style
+
Domain constructed from a [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax)
-with the requirement that the rule conforms to the following extended regex:
-
-`^<ws>*<ip><ws>+<domain><ws>*(#.*)?$`
-
-where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following:
-
-`::`, `::1`, `0.0.0.0`, or `127.0.0.1`.
-
-Domains only represent themselves (i.e., proper subdomains will not be blocked).
-
-### Wildcard-style
-
+with the requirement that the rule conforms to the following extended regex:
+
+`^<ws>*<ip><ws>+<domain><ws>*(#.*)?$`
+
+where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following:
+
+`::`, `::1`, `0.0.0.0`, or `127.0.0.1`.
+
+Domains only represent themselves (i.e., proper subdomains will not be blocked).
+
+### Wildcard-style
+
Domain constructed from a [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext)
-with the requirement that the rule conforms to the following extended regex:
-
-`^<ws>*(\*\.)?<domain><ws>*(#.*)?$`
-
-where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace.
-
+with the requirement that the rule conforms to the following extended regex:
+
+`^<ws>*(\*\.)?<domain><ws>*(#.*)?$`
+
+where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace.
+
If `domain` begins with `*.`, then `domain` must have length less than 252 and all proper subdomains are blocked—this
-does _not_ include the domain itself; otherwise, only the `domain` is blocked.
-
-## Config file
-
+does _not_ include the domain itself; otherwise, only the `domain` is blocked.
+
+## Config file
+
Either `-` or the absolute path to the TOML config file must be passed via the `-f`/`--file` CLI option. If `-` is passed, then `stdin` will be read. The
format of this file must conform to the following:
@@ -136,35 +136,35 @@ wildcard = [<HTTP(S)_URLs>]
If `rpz` does not exist, then the file will be written to `stdout`. If `local_dir` is specified, `block/` and `unblock/` subdirectories are searched; and for each of those subdirectories,
`adblock/`, `domain/`, `hosts/`, and `wildcard/` subdirectories are searched for files which are parsed according to the directory they are in. It is not
-an error if any of the directories do not exist.
-
+an error if any of the directories do not exist.
+
In the event keys are specified corresponding to arrays, URLs must be unique across all arrays. The files these URLs
-point to are interpreted as block files (i.e., unblock files are only allowed on the local file system).
-
+point to are interpreted as block files (i.e., unblock files are only allowed on the local file system).
+
The `timeout` corresponds to the maximum _seconds_ allowed for an HTTP(S) file to be downloaded.
If it does not exist or has a value of 0, then a timeout of one hour will be used. If the value specified exceeds one hour,
-then it will be truncated to one hour.
-
-## RPZ file
-
+then it will be truncated to one hour.
+
+## RPZ file
+
Unless `stdout` is the destination, a temporary RPZ file is written in the same location as the `rpz` value in the config file except with `tmp` appended to the name. Upon success, this file
is renamed to the `rpz` value in the config file. The contents of this file contain the minimum number of lines possible with unblock entries taking precedence
-over block entries.
-
-In the event there are no block entries or the temp file already exists, the program will abort.
-
-## Options
-
+over block entries.
+
+In the event there are no block entries or the temp file already exists, the program will abort.
+
+## Options
+
When `rpz` is passed `-V`/`--version`, the version of `rpz` will be printed to `stdout`. When passed `-h`/`--help`,
information about the program and its options will be printed to `stdout`. When passed `-f`/`--file` along with
`-` or the absolute path to the TOML config file, `rpz` will run normally printing summary information to `stdout`
upon completion. One can additionally pass `-q`/`--quiet` along with `-f`/`--file` in order to suppress summary
information from being printed to `stdout`. When `-v`/`--verbose` is passed along with `-f`/`--file`, in addition to
the normal summary information being printed to `stdout`, itemized summary information for each input file
-including the kinds of errors and counts of errors will be printed to `stdout`.
-
-### Example
-
+including the kinds of errors and counts of errors will be printed to `stdout`.
+
+### Example
+
If `www.example.com`, `*.example.com`, and `foo.com` are to be blocked while `foo.example.com` and `||foo.com` are to be unblocked, the RPZ file would look like the following:
```bash
@@ -173,19 +173,33 @@ foo.example.com CNAME rpz-passthru.
```
Upon success, the quantity of unblock, block, and total lines written is written to `stdout` in addition
-to the total number of domains, comments, blanks, and parsing errors.
-
-## Errors
-
-Parsing errors are ignored; all other errors are written to `stderr` before program abortion.
-
-### Status
-
-This package is actively maintained.
-
+to the total number of domains, comments, blanks, and parsing errors.
+
+## Errors
+
+Parsing errors are ignored; all other errors are written to `stderr` before program abortion.
+
+## License
+
+Licensed under either of
+
+* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0).
+* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT).
+
+at your option.
+
+## Contribution
+
+Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you,
+as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.
+
+### Status
+
+This package is actively maintained.
+
The crates are only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but
-they should work on platform.
-
+they should work on platform.
+
Nightly `rustc` is required. Once `BTreeMap` [cursors are stabilized](https://github.com/rust-lang/rust/issues/107540), stable `rustc` will work.
On OpenBSD-stable, one can use the `rust` port as long as `RUSTC_BOOTSTRAP` is `export`ed with a value of `1` before invoking
`cargo build --all-features --release` or `cargo install --all-features rpz`.
diff --git a/src/dom.rs b/src/dom.rs
@@ -221,8 +221,9 @@ impl<'a, T: ParsedDomain<'a>> Value<'a, T> {
}
}
}
-/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. When parsed into a
-/// [`Value::Domain`], the domain can be written to a
+/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s.
+///
+/// When parsed into a [`Value::Domain`], the domain can be written to a
/// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file.
pub trait ParsedDomain<'a>: Sized {
/// The error returned from [`Self::parse_value`].
@@ -242,8 +243,9 @@ pub trait ParsedDomain<'a>: Sized {
fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>;
}
/// Domain constructed from an
-/// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax) with the
-/// requirement that the rule conforms to the following extended regex:
+/// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax).
+///
+/// Specifically the domain must conform to the following extended regex:
///
/// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$`
///
@@ -716,8 +718,9 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> {
}
}
/// Domain constructed from a
-/// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax) with the
-/// requirement that the rule conforms to the following regex:
+/// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax).
+///
+/// Specifically the domain must conform to the following extended regex:
///
/// `^<ws>*<domain><ws>*(#.*)?$`
///
@@ -967,8 +970,9 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> {
}
}
/// Domain constructed from a
-/// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax) with the
-/// requirement that the rule conforms to the following extended regex:
+/// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax).
+///
+/// Specifically the domain must conform to the following extended regex:
///
/// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$`
///
@@ -1237,8 +1241,9 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> {
}
}
/// Domain constructed from a
-/// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext)
-/// with the requirement that the rule conforms to the following extended regex:
+/// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext).
+///
+/// Specifically the domain must conform to the following extended regex:
///
/// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$`
///
diff --git a/src/lib.rs b/src/lib.rs
@@ -11,7 +11,6 @@
//! file easier.
#![feature(btree_cursors)]
#![feature(io_error_more)]
-#![cfg_attr(doc, feature(doc_auto_cfg))]
#![deny(
future_incompatible,
let_underscore,