rpz

Response policy zone (RPZ) file generator.
git clone https://git.philomathiclife.com/repos/rpz
Log | Files | Refs | README

commit 16f45137001497d044b253edbf352b0fd1eb9c64
parent fde07fe0c0038c76e6ddaa3deeb37db2ce94ab91
Author: Zack Newman <zack@philomathiclife.com>
Date:   Sat,  7 Sep 2024 11:54:19 -0600

upgrade deps. handle lints

Diffstat:
MCargo.toml | 21+++++++++++++--------
MREADME.md | 186++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Msrc/dom.rs | 25+++++++++++++++----------
Msrc/lib.rs | 1-
4 files changed, 128 insertions(+), 105 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,29 +9,34 @@ license = "MIT OR Apache-2.0" name = "rpz" readme = "README.md" repository = "https://git.philomathiclife.com/repos/rpz/" -version = "1.0.0" +version = "1.0.1" [badges] maintenance = { status = "actively-developed" } [dependencies] -ascii_domain = { version = "0.6.1", default-features = false } +ascii_domain = { version = "0.6.2", default-features = false } num-bigint = { version = "0.4.6", default-features = false } reqwest = { version = "0.12.7", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] } -serde = { version = "1.0.208", default-features = false } -superset_map = { version = "0.2.3", default-features = false } -tokio = { version = "1.39.3", default-features = false, features = ["rt", "time"] } +serde = { version = "1.0.210", default-features = false } +superset_map = { version = "0.3.0", default-features = false } +tokio = { version = "1.40.0", default-features = false, features = ["rt", "time"] } toml = { version = "0.8.19", default-features = false, features = ["parse"] } url = { version = "2.5.2", default-features = false, features = ["serde"] } -zfc = { version = "0.3.2", default-features = false } +zfc = { version = "0.4.0", default-features = false } [target.'cfg(target_os = "openbsd")'.dependencies] -priv_sep = { version = "2.0.0", default-features = false, features = ["openbsd"], optional = true } +priv_sep = { version = "2.1.0", default-features = false, features = ["openbsd"], optional = true } + + +### FEATURES ################################################################# [features] -priv_sep = ["dep:priv_sep"] default = ["priv_sep"] +# Provide pledge and unveil for the binary crate on OpenBSD platforms. +priv_sep = ["dep:priv_sep"] + [profile.release] lto = true panic = 'abort' diff --git a/README.md b/README.md @@ -1,16 +1,16 @@ # rpz - + `rpz` consists of a binary crate and [library crate](https://docs.rs/rpz/latest/rpz). The binary crate, `rpz`, is an application that downloads, parses, and transforms ad-(un)block files from URLs and local file paths into a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. This RPZ file can be consumed by a DNS server that supports such files -(e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)). - -## rpz in action - +(e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)). + +## rpz in action + In this example it is assumed [`unbound.conf(5)`](https://unbound.docs.nlnetlabs.nl/en/latest/manpages/unbound.conf.html) is properly configured and has `name` and `zonefile` in the `rpz` section set to `.` and `/var/unbound/db/rpz` respectively in addition to `control-enable` set to `true` -in the `remote-control` section. +in the `remote-control` section. ```bash [zack@laptop ~]$ cat<<EOF>/usr/local/etc/rpz/config @@ -56,71 +56,71 @@ www.pandi.co.zw CNAME . [zack@laptop ~]$ unbound-control -q auth_zone_reload . && unbound-control -q flush_zone . && unbound-control -q flush_negative ``` -## Ad-(un)block file format and encoding - +## Ad-(un)block file format and encoding + All ad-(un)block files must be valid UTF-8; however for a given domain, each label must only contain 1–63 Unicode scalar values from the set: `!`, `$`, `&`, `'`, `(`, `)`, `+`, `,`, `-`, `0`–`9`, `;`, `=`, `_`, `` ` ``, `A`–`Z`, `a`–`z`, `{`, `}`, and `~`. Labels must be delimited by `.`. Domains in the file must be delimited by a line feed or carriage return and line feed. A domain must be less than 254 characters in length including the `.` label separator. Domains are treated as case-insensitive with uppercase letters treated as lowercase. Domains must not be an -IPv4 address. - -### Adblock-style - +IPv4 address. + +### Adblock-style + Domain constructed from an [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax) -with the requirement that the rule conforms to the following extended regex: - -`^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` - +with the requirement that the rule conforms to the following extended regex: + +`^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` + where `<domain>` conforms to a valid [`Domain`](https://docs.rs/ascii_domain/latest/ascii_domain/dom/struct.Domain.html) based on [`ASCII_FIREFOX`](https://docs.rs/ascii_domain/latest/ascii_domain/char_set/constant.ASCII_FIREFOX.html) with the added requirements -that the TLD is either all letters or at least length five and begins with `xn--` and does not contain `$`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). - +that the TLD is either all letters or at least length five and begins with `xn--` and does not contain `$`, and `<ws>` is any sequence of [ASCII whitespace](https://infra.spec.whatwg.org/#ascii-whitespace). + Lines that begin with `||` cause all subdomains to be blocked (i.e., the domain itself and all proper subdomains); without -`||`, only the specific domain is blocked. - +`||`, only the specific domain is blocked. + Due to the conservative nature in how these files are processed, one is encouraged to still use an application-level ad blocker (e.g., [uBlock Origin](https://ublockorigin.com/)). Adblock-style files often contain paths as well as additional information (e.g., “third-party”) that require application-level information to process correctly as such -entries will be considered “parsing errors” by `rpz`. - -### Domain-style - +entries will be considered “parsing errors” by `rpz`. + +### Domain-style + Domain constructed from a [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax) -with the requirement that the rule conforms to the following regex: - -`^<ws>*<domain><ws>*(#.*)?$` - -where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace. - -Domains only represent themselves (i.e., proper subdomains will not be blocked). - -### Hosts-style - +with the requirement that the rule conforms to the following regex: + +`^<ws>*<domain><ws>*(#.*)?$` + +where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace. + +Domains only represent themselves (i.e., proper subdomains will not be blocked). + +### Hosts-style + Domain constructed from a [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax) -with the requirement that the rule conforms to the following extended regex: - -`^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` - -where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following: - -`::`, `::1`, `0.0.0.0`, or `127.0.0.1`. - -Domains only represent themselves (i.e., proper subdomains will not be blocked). - -### Wildcard-style - +with the requirement that the rule conforms to the following extended regex: + +`^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` + +where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, `<ws>` is any sequence of ASCII whitespace, and `<ip>` is one of the following: + +`::`, `::1`, `0.0.0.0`, or `127.0.0.1`. + +Domains only represent themselves (i.e., proper subdomains will not be blocked). + +### Wildcard-style + Domain constructed from a [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext) -with the requirement that the rule conforms to the following extended regex: - -`^<ws>*(\*\.)?<domain><ws>*(#.*)?$` - -where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace. - +with the requirement that the rule conforms to the following extended regex: + +`^<ws>*(\*\.)?<domain><ws>*(#.*)?$` + +where `<domain>` conforms to a valid `Domain` based on `ASCII_FIREFOX`, the TLD is either all letters or at least length five and begins with `xn--`, and `<ws>` is any sequence of ASCII whitespace. + If `domain` begins with `*.`, then `domain` must have length less than 252 and all proper subdomains are blocked—this -does _not_ include the domain itself; otherwise, only the `domain` is blocked. - -## Config file - +does _not_ include the domain itself; otherwise, only the `domain` is blocked. + +## Config file + Either `-` or the absolute path to the TOML config file must be passed via the `-f`/`--file` CLI option. If `-` is passed, then `stdin` will be read. The format of this file must conform to the following: @@ -136,35 +136,35 @@ wildcard = [<HTTP(S)_URLs>] If `rpz` does not exist, then the file will be written to `stdout`. If `local_dir` is specified, `block/` and `unblock/` subdirectories are searched; and for each of those subdirectories, `adblock/`, `domain/`, `hosts/`, and `wildcard/` subdirectories are searched for files which are parsed according to the directory they are in. It is not -an error if any of the directories do not exist. - +an error if any of the directories do not exist. + In the event keys are specified corresponding to arrays, URLs must be unique across all arrays. The files these URLs -point to are interpreted as block files (i.e., unblock files are only allowed on the local file system). - +point to are interpreted as block files (i.e., unblock files are only allowed on the local file system). + The `timeout` corresponds to the maximum _seconds_ allowed for an HTTP(S) file to be downloaded. If it does not exist or has a value of 0, then a timeout of one hour will be used. If the value specified exceeds one hour, -then it will be truncated to one hour. - -## RPZ file - +then it will be truncated to one hour. + +## RPZ file + Unless `stdout` is the destination, a temporary RPZ file is written in the same location as the `rpz` value in the config file except with `tmp` appended to the name. Upon success, this file is renamed to the `rpz` value in the config file. The contents of this file contain the minimum number of lines possible with unblock entries taking precedence -over block entries. - -In the event there are no block entries or the temp file already exists, the program will abort. - -## Options - +over block entries. + +In the event there are no block entries or the temp file already exists, the program will abort. + +## Options + When `rpz` is passed `-V`/`--version`, the version of `rpz` will be printed to `stdout`. When passed `-h`/`--help`, information about the program and its options will be printed to `stdout`. When passed `-f`/`--file` along with `-` or the absolute path to the TOML config file, `rpz` will run normally printing summary information to `stdout` upon completion. One can additionally pass `-q`/`--quiet` along with `-f`/`--file` in order to suppress summary information from being printed to `stdout`. When `-v`/`--verbose` is passed along with `-f`/`--file`, in addition to the normal summary information being printed to `stdout`, itemized summary information for each input file -including the kinds of errors and counts of errors will be printed to `stdout`. - -### Example - +including the kinds of errors and counts of errors will be printed to `stdout`. + +### Example + If `www.example.com`, `*.example.com`, and `foo.com` are to be blocked while `foo.example.com` and `||foo.com` are to be unblocked, the RPZ file would look like the following: ```bash @@ -173,19 +173,33 @@ foo.example.com CNAME rpz-passthru. ``` Upon success, the quantity of unblock, block, and total lines written is written to `stdout` in addition -to the total number of domains, comments, blanks, and parsing errors. - -## Errors - -Parsing errors are ignored; all other errors are written to `stderr` before program abortion. - -### Status - -This package is actively maintained. - +to the total number of domains, comments, blanks, and parsing errors. + +## Errors + +Parsing errors are ignored; all other errors are written to `stderr` before program abortion. + +## License + +Licensed under either of + +* Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0). +* MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT). + +at your option. + +## Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, +as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. + +### Status + +This package is actively maintained. + The crates are only tested on the `x86_64-unknown-linux-gnu` and `x86_64-unknown-openbsd` targets, but -they should work on platform. - +they should work on platform. + Nightly `rustc` is required. Once `BTreeMap` [cursors are stabilized](https://github.com/rust-lang/rust/issues/107540), stable `rustc` will work. On OpenBSD-stable, one can use the `rust` port as long as `RUSTC_BOOTSTRAP` is `export`ed with a value of `1` before invoking `cargo build --all-features --release` or `cargo install --all-features rpz`. diff --git a/src/dom.rs b/src/dom.rs @@ -221,8 +221,9 @@ impl<'a, T: ParsedDomain<'a>> Value<'a, T> { } } } -/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. When parsed into a -/// [`Value::Domain`], the domain can be written to a +/// Structure of a [`Domain`]-like type that can parse [`prim@str`]s into [`Value`]s. +/// +/// When parsed into a [`Value::Domain`], the domain can be written to a /// [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) file. pub trait ParsedDomain<'a>: Sized { /// The error returned from [`Self::parse_value`]. @@ -242,8 +243,9 @@ pub trait ParsedDomain<'a>: Sized { fn write_to_rpz<W: Write>(&self, action: RpzAction, writer: W) -> Result<(), Error>; } /// Domain constructed from an -/// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax) with the -/// requirement that the rule conforms to the following extended regex: +/// [Adblock-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#adblock-style-syntax). +/// +/// Specifically the domain must conform to the following extended regex: /// /// `^<ws>*(\|\|)?<ws>*<domain><ws>*\^?<ws>*$` /// @@ -716,8 +718,9 @@ impl<'a> ParsedDomain<'a> for Adblock<'a> { } } /// Domain constructed from a -/// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax) with the -/// requirement that the rule conforms to the following regex: +/// [domains-only rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#domains-only-syntax). +/// +/// Specifically the domain must conform to the following extended regex: /// /// `^<ws>*<domain><ws>*(#.*)?$` /// @@ -967,8 +970,9 @@ impl<'a> ParsedDomain<'a> for DomainOnly<'a> { } } /// Domain constructed from a -/// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax) with the -/// requirement that the rule conforms to the following extended regex: +/// [`hosts(5)`-style rule](https://adguard-dns.io/kb/general/dns-filtering-syntax/#etc-hosts-syntax). +/// +/// Specifically the domain must conform to the following extended regex: /// /// `^<ws>*<ip><ws>+<domain><ws>*(#.*)?$` /// @@ -1237,8 +1241,9 @@ impl<'a> ParsedDomain<'a> for Hosts<'a> { } } /// Domain constructed from a -/// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext) -/// with the requirement that the rule conforms to the following extended regex: +/// [wildcard domain rule](https://pgl.yoyo.org/adservers/serverlist.php?hostformat=adblock&showintro=0&mimetype=plaintext). +/// +/// Specifically the domain must conform to the following extended regex: /// /// `^<ws>*(\*\.)?<domain><ws>*(#.*)?$` /// diff --git a/src/lib.rs b/src/lib.rs @@ -11,7 +11,6 @@ //! file easier. #![feature(btree_cursors)] #![feature(io_error_more)] -#![cfg_attr(doc, feature(doc_auto_cfg))] #![deny( future_incompatible, let_underscore,