rpz

Response policy zone (RPZ) file generator.
git clone https://git.philomathiclife.com/repos/rpz
Log | Files | Refs | README

commit 80e6b40b45a7ae88e0360a888dc661d5aa504ab4
parent 25565a5e2001cd4169d0c1e372d92bd47088ffe5
Author: Zack Newman <zack@philomathiclife.com>
Date:   Wed,  8 Nov 2023 15:55:55 -0700

fix http timeout so that it occurs per file. update deps. remove priv_sep dep on non-openbsd

Diffstat:
MCargo.toml | 10++++++----
MREADME.md | 5+++--
Msrc/file.rs | 153+++++++++++++++++++++++++------------------------------------------------------
Msrc/main.rs | 41++++++++++++++++++++++-------------------
4 files changed, 79 insertions(+), 130 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml @@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0" name = "rpz" readme = "README.md" repository = "https://git.philomathiclife.com/repos/rpz/" -version = "0.1.0" +version = "0.2.0" [lib] name = "rpz" @@ -21,15 +21,17 @@ path = "src/main.rs" [dependencies] num-bigint = { version = "0.4.4", default-features = false } -priv_sep = { version = "0.8.0", default-features = false, features = ["openbsd"], optional = true } reqwest = { version = "0.11.22", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] } -serde = { version = "1.0.190", default-features = false } +serde = { version = "1.0.192", default-features = false } superset_map = { version = "0.2.1", default-features = false } tokio = { version = "1.33.0", default-features = false, features = ["rt", "time"] } -toml = { version = "0.8.6", default-features = false, features = ["parse"] } +toml = { version = "0.8.8", default-features = false, features = ["parse"] } url = { version = "2.4.1", default-features = false, features = ["serde"] } zfc = { version = "0.3.1", default-features = false } +[target.'cfg(target_os = "openbsd")'.dependencies] +priv_sep = { version = "0.8.1", default-features = false, features = ["openbsd"], optional = true } + [build-dependencies] rustc_version = "0.4.0" diff --git a/README.md b/README.md @@ -2,8 +2,9 @@ `rpz` consists of a binary crate and [library crate](https://docs.rs/rpz/latest/rpz). The binary crate, `rpz`, is an application that downloads, parses, and transforms ad-(un)block files from -URLs and local file paths into a response policy zone (RPZ) file. This RPZ file can be consumed -by a DNS server that supports such files (e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)). +URLs and local file paths into a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone) +file. This RPZ file can be consumed by a DNS server that supports such files +(e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)). ## rpz in action diff --git a/src/file.rs b/src/file.rs @@ -521,37 +521,52 @@ impl Helper for Wildcard<'_> { } } impl Files { - /// Attempts to convert an `ExternalFiles` into a `Files`. + /// Attempts to convert an `ExternalFiles` into a `Files` + /// with `timeout_per_file` as the maximum amount of time allowed + /// for a file to finish downloading. + /// + /// Note that if `timeout_per_file` is 0 or larger than one hour, then + /// it'll be set to one hour. Similarly, if `timeout_per_file`, + /// is less than 1 second, it'll be be set to one second. /// /// # Errors /// /// Returns `ExtFileErr` iff any of the files in `ExternalFiles` /// errors when downloading them and transforming them into a [`String`]. #[inline] - pub async fn from_external(ext: ExternalFiles) -> Result<Self, ExtFileErr> { + pub async fn from_external( + ext: ExternalFiles, + mut timeout_per_file: Duration, + ) -> Result<Self, ExtFileErr> { /// `await`s each `Result` in `set` propagating any errors; otherwise /// the `String` is `push`ed into `files`. /// /// If an error occurs, then the function immediately returns. async fn add_file( files: &mut Vec<File>, - mut set: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>, + timeout: Duration, + mut set: JoinSet<Result<File, ExtFileErr>>, ) -> Result<(), ExtFileErr> { - while let Some(val) = set.join_next().await { - files.push(val???); + while let Some(file) = time::timeout(timeout, set.join_next()).await? { + files.push(file??); } Ok(()) } + timeout_per_file = if timeout_per_file == Duration::ZERO { + Duration::from_secs(3600) + } else { + timeout_per_file.clamp(Duration::from_secs(1), Duration::from_secs(3600)) + }; let mut files = Self { adblock: Vec::with_capacity(ext.adblock.len()), domain: Vec::with_capacity(ext.domain.len()), hosts: Vec::with_capacity(ext.hosts.len()), wildcard: Vec::with_capacity(ext.wildcard.len()), }; - add_file(&mut files.adblock, ext.adblock).await?; - add_file(&mut files.domain, ext.domain).await?; - add_file(&mut files.hosts, ext.hosts).await?; - add_file(&mut files.wildcard, ext.wildcard).await?; + add_file(&mut files.adblock, timeout_per_file, ext.adblock).await?; + add_file(&mut files.domain, timeout_per_file, ext.domain).await?; + add_file(&mut files.hosts, timeout_per_file, ext.hosts).await?; + add_file(&mut files.wildcard, timeout_per_file, ext.wildcard).await?; Ok(files) } /// Creates an empty instance of `Files`. @@ -783,22 +798,17 @@ impl Error for ExtFileErr {} #[derive(Debug)] pub struct ExternalFiles { /// [`Adblock`]-based files. - pub adblock: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>, + pub adblock: JoinSet<Result<File, ExtFileErr>>, /// [`DomainOnly`]-based files. - pub domain: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>, + pub domain: JoinSet<Result<File, ExtFileErr>>, /// [`Hosts`]-based files. - pub hosts: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>, + pub hosts: JoinSet<Result<File, ExtFileErr>>, /// [`Wildcard`]-based files. - pub wildcard: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>, + pub wildcard: JoinSet<Result<File, ExtFileErr>>, } impl ExternalFiles { - /// The maximum timeout for a task. - pub const MAX_TIMEOUT: Duration = Duration::from_secs(3600); - /// The minimum timeout for a task. - pub const MIN_TIMEOUT: Duration = Duration::from_secs(1); /// Returns `ExternalFiles` containing files downloaded - /// from the `HashSet`s with a timeout of [`Self::MAX_TIMEOUT`] - /// for each task. + /// from the `HashSet`s. #[inline] #[must_use] pub fn new_with_urls( @@ -808,36 +818,11 @@ impl ExternalFiles { hosts_urls: HashSet<HttpUrl>, wildcard_urls: HashSet<HttpUrl>, ) -> Self { - Self::new_with_urls_and_timeout( - client, - Self::MAX_TIMEOUT, - adblock_urls, - domain_urls, - hosts_urls, - wildcard_urls, - ) - } - /// Returns `ExternalFiles` containing files downloaded - /// from the `HashSet`s with a timeout for each task set to `timeout`. - /// - /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to - /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set - /// to `MAX_TIMEOUT`. - #[inline] - #[must_use] - pub fn new_with_urls_and_timeout( - client: &'static Client, - timeout: Duration, - adblock_urls: HashSet<HttpUrl>, - domain_urls: HashSet<HttpUrl>, - hosts_urls: HashSet<HttpUrl>, - wildcard_urls: HashSet<HttpUrl>, - ) -> Self { let mut val = Self::new(); - val.add_adblock(client, timeout, adblock_urls); - val.add_domain(client, timeout, domain_urls); - val.add_hosts(client, timeout, hosts_urls); - val.add_wildcard(client, timeout, wildcard_urls); + val.add_adblock(client, adblock_urls); + val.add_domain(client, domain_urls); + val.add_hosts(client, hosts_urls); + val.add_wildcard(client, wildcard_urls); val } /// Returns an empty `ExternalFiles`. @@ -851,79 +836,37 @@ impl ExternalFiles { wildcard: JoinSet::new(), } } - /// Downloads the [`Adblock`] files from `urls` and adds them to [`ExternalFiles::adblock`] - /// with a timeout for each task set to `timeout`. - /// - /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to - /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set - /// to `MAX_TIMEOUT`. + /// Downloads the [`Adblock`] files from `urls` and adds them to [`ExternalFiles::adblock`]. #[inline] - pub fn add_adblock( - &mut self, - client: &'static Client, - timeout: Duration, - urls: HashSet<HttpUrl>, - ) { - Self::get_external_files(&mut self.adblock, client, timeout, urls); + pub fn add_adblock(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { + Self::get_external_files(&mut self.adblock, client, urls); } - /// Downloads the [`DomainOnly`] files from `urls` and adds them to [`ExternalFiles::domain`] - /// with a timeout for each task set to `timeout`. - /// - /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to - /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set - /// to `MAX_TIMEOUT`. + /// Downloads the [`DomainOnly`] files from `urls` and adds them to [`ExternalFiles::domain`]. #[inline] - pub fn add_domain( - &mut self, - client: &'static Client, - timeout: Duration, - urls: HashSet<HttpUrl>, - ) { - Self::get_external_files(&mut self.domain, client, timeout, urls); + pub fn add_domain(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { + Self::get_external_files(&mut self.domain, client, urls); } - /// Downloads the [`Hosts`] files from `urls` and adds them to [`ExternalFiles::hosts`] - /// with a timeout for each task set to `timeout`. - /// - /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to - /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set - /// to `MAX_TIMEOUT`. + /// Downloads the [`Hosts`] files from `urls` and adds them to [`ExternalFiles::hosts`]. #[inline] - pub fn add_hosts( - &mut self, - client: &'static Client, - timeout: Duration, - urls: HashSet<HttpUrl>, - ) { - Self::get_external_files(&mut self.hosts, client, timeout, urls); + pub fn add_hosts(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { + Self::get_external_files(&mut self.hosts, client, urls); } - /// Downloads the [`Wildcard`] files from `urls` and adds them to [`ExternalFiles::wildcard`] - /// with a timeout for each task set to `timeout`. - /// - /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to - /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set - /// to `MAX_TIMEOUT`. + /// Downloads the [`Wildcard`] files from `urls` and adds them to [`ExternalFiles::wildcard`]. #[inline] - pub fn add_wildcard( - &mut self, - client: &'static Client, - timeout: Duration, - urls: HashSet<HttpUrl>, - ) { - Self::get_external_files(&mut self.wildcard, client, timeout, urls); + pub fn add_wildcard(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { + Self::get_external_files(&mut self.wildcard, client, urls); } /// Downloads the files from `urls` and converts them to `String`s adding the /// tasks to `set`. #[inline] fn get_external_files( - set: &mut JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>, + set: &mut JoinSet<Result<File, ExtFileErr>>, client: &'static Client, - mut timeout: Duration, urls: HashSet<HttpUrl>, ) { - timeout = timeout.clamp(Self::MIN_TIMEOUT, Self::MAX_TIMEOUT); for url in Into::<HashSet<HttpUrl>>::into(urls) { let url_clone = url.clone(); - set.spawn(time::timeout(timeout, async { + set.spawn(async { let resp = client .get::<Url>(url_clone.into()) .send() @@ -940,7 +883,7 @@ impl ExternalFiles { }) }, ) - })); + }); } } } diff --git a/src/main.rs b/src/main.rs @@ -221,24 +221,27 @@ fn get_external_files( |e| Err(E::Io(e)), |runtime| { runtime.block_on(async { - Files::from_external({ - let mut files = ExternalFiles::new(); - CLIENT - .set( - Client::builder() - .user_agent(USER_AGENT) - .use_rustls_tls() - .build() - .map_err(ExtFileErr::Http)?, - ) - .unwrap(); - let client = CLIENT.get().unwrap(); - files.add_adblock(client, timeout, adblock); - files.add_domain(client, timeout, domain); - files.add_hosts(client, timeout, hosts); - files.add_wildcard(client, timeout, wildcard); - files - }) + Files::from_external( + { + let mut files = ExternalFiles::new(); + CLIENT + .set( + Client::builder() + .user_agent(USER_AGENT) + .use_rustls_tls() + .build() + .map_err(ExtFileErr::Http)?, + ) + .unwrap(); + let client = CLIENT.get().unwrap(); + files.add_adblock(client, adblock); + files.add_domain(client, domain); + files.add_hosts(client, hosts); + files.add_wildcard(client, wildcard); + files + }, + timeout, + ) .await .map_err(E::ExtFile) }) @@ -361,7 +364,7 @@ fn main() -> Result<(), E> { .map_err(E::from) .and_then(|()| { get_external_files( - config.timeout.unwrap_or(ExternalFiles::MAX_TIMEOUT), + config.timeout.unwrap_or(Duration::from_secs(3600)), config.adblock, config.domain, config.hosts,