commit 80e6b40b45a7ae88e0360a888dc661d5aa504ab4
parent 25565a5e2001cd4169d0c1e372d92bd47088ffe5
Author: Zack Newman <zack@philomathiclife.com>
Date: Wed, 8 Nov 2023 15:55:55 -0700
fix http timeout so that it occurs per file. update deps. remove priv_sep dep on non-openbsd
Diffstat:
4 files changed, 79 insertions(+), 130 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
@@ -9,7 +9,7 @@ license = "MIT OR Apache-2.0"
name = "rpz"
readme = "README.md"
repository = "https://git.philomathiclife.com/repos/rpz/"
-version = "0.1.0"
+version = "0.2.0"
[lib]
name = "rpz"
@@ -21,15 +21,17 @@ path = "src/main.rs"
[dependencies]
num-bigint = { version = "0.4.4", default-features = false }
-priv_sep = { version = "0.8.0", default-features = false, features = ["openbsd"], optional = true }
reqwest = { version = "0.11.22", default-features = false, features = ["brotli", "deflate", "gzip", "rustls-tls-native-roots", "trust-dns"] }
-serde = { version = "1.0.190", default-features = false }
+serde = { version = "1.0.192", default-features = false }
superset_map = { version = "0.2.1", default-features = false }
tokio = { version = "1.33.0", default-features = false, features = ["rt", "time"] }
-toml = { version = "0.8.6", default-features = false, features = ["parse"] }
+toml = { version = "0.8.8", default-features = false, features = ["parse"] }
url = { version = "2.4.1", default-features = false, features = ["serde"] }
zfc = { version = "0.3.1", default-features = false }
+[target.'cfg(target_os = "openbsd")'.dependencies]
+priv_sep = { version = "0.8.1", default-features = false, features = ["openbsd"], optional = true }
+
[build-dependencies]
rustc_version = "0.4.0"
diff --git a/README.md b/README.md
@@ -2,8 +2,9 @@
`rpz` consists of a binary crate and [library crate](https://docs.rs/rpz/latest/rpz).
The binary crate, `rpz`, is an application that downloads, parses, and transforms ad-(un)block files from
-URLs and local file paths into a response policy zone (RPZ) file. This RPZ file can be consumed
-by a DNS server that supports such files (e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)).
+URLs and local file paths into a [response policy zone (RPZ)](https://en.wikipedia.org/wiki/Response_policy_zone)
+file. This RPZ file can be consumed by a DNS server that supports such files
+(e.g., [Unbound](https://nlnetlabs.nl/projects/unbound/about/)).
## rpz in action
diff --git a/src/file.rs b/src/file.rs
@@ -521,37 +521,52 @@ impl Helper for Wildcard<'_> {
}
}
impl Files {
- /// Attempts to convert an `ExternalFiles` into a `Files`.
+ /// Attempts to convert an `ExternalFiles` into a `Files`
+ /// with `timeout_per_file` as the maximum amount of time allowed
+ /// for a file to finish downloading.
+ ///
+ /// Note that if `timeout_per_file` is 0 or larger than one hour, then
+ /// it'll be set to one hour. Similarly, if `timeout_per_file`,
+ /// is less than 1 second, it'll be be set to one second.
///
/// # Errors
///
/// Returns `ExtFileErr` iff any of the files in `ExternalFiles`
/// errors when downloading them and transforming them into a [`String`].
#[inline]
- pub async fn from_external(ext: ExternalFiles) -> Result<Self, ExtFileErr> {
+ pub async fn from_external(
+ ext: ExternalFiles,
+ mut timeout_per_file: Duration,
+ ) -> Result<Self, ExtFileErr> {
/// `await`s each `Result` in `set` propagating any errors; otherwise
/// the `String` is `push`ed into `files`.
///
/// If an error occurs, then the function immediately returns.
async fn add_file(
files: &mut Vec<File>,
- mut set: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>,
+ timeout: Duration,
+ mut set: JoinSet<Result<File, ExtFileErr>>,
) -> Result<(), ExtFileErr> {
- while let Some(val) = set.join_next().await {
- files.push(val???);
+ while let Some(file) = time::timeout(timeout, set.join_next()).await? {
+ files.push(file??);
}
Ok(())
}
+ timeout_per_file = if timeout_per_file == Duration::ZERO {
+ Duration::from_secs(3600)
+ } else {
+ timeout_per_file.clamp(Duration::from_secs(1), Duration::from_secs(3600))
+ };
let mut files = Self {
adblock: Vec::with_capacity(ext.adblock.len()),
domain: Vec::with_capacity(ext.domain.len()),
hosts: Vec::with_capacity(ext.hosts.len()),
wildcard: Vec::with_capacity(ext.wildcard.len()),
};
- add_file(&mut files.adblock, ext.adblock).await?;
- add_file(&mut files.domain, ext.domain).await?;
- add_file(&mut files.hosts, ext.hosts).await?;
- add_file(&mut files.wildcard, ext.wildcard).await?;
+ add_file(&mut files.adblock, timeout_per_file, ext.adblock).await?;
+ add_file(&mut files.domain, timeout_per_file, ext.domain).await?;
+ add_file(&mut files.hosts, timeout_per_file, ext.hosts).await?;
+ add_file(&mut files.wildcard, timeout_per_file, ext.wildcard).await?;
Ok(files)
}
/// Creates an empty instance of `Files`.
@@ -783,22 +798,17 @@ impl Error for ExtFileErr {}
#[derive(Debug)]
pub struct ExternalFiles {
/// [`Adblock`]-based files.
- pub adblock: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>,
+ pub adblock: JoinSet<Result<File, ExtFileErr>>,
/// [`DomainOnly`]-based files.
- pub domain: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>,
+ pub domain: JoinSet<Result<File, ExtFileErr>>,
/// [`Hosts`]-based files.
- pub hosts: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>,
+ pub hosts: JoinSet<Result<File, ExtFileErr>>,
/// [`Wildcard`]-based files.
- pub wildcard: JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>,
+ pub wildcard: JoinSet<Result<File, ExtFileErr>>,
}
impl ExternalFiles {
- /// The maximum timeout for a task.
- pub const MAX_TIMEOUT: Duration = Duration::from_secs(3600);
- /// The minimum timeout for a task.
- pub const MIN_TIMEOUT: Duration = Duration::from_secs(1);
/// Returns `ExternalFiles` containing files downloaded
- /// from the `HashSet`s with a timeout of [`Self::MAX_TIMEOUT`]
- /// for each task.
+ /// from the `HashSet`s.
#[inline]
#[must_use]
pub fn new_with_urls(
@@ -808,36 +818,11 @@ impl ExternalFiles {
hosts_urls: HashSet<HttpUrl>,
wildcard_urls: HashSet<HttpUrl>,
) -> Self {
- Self::new_with_urls_and_timeout(
- client,
- Self::MAX_TIMEOUT,
- adblock_urls,
- domain_urls,
- hosts_urls,
- wildcard_urls,
- )
- }
- /// Returns `ExternalFiles` containing files downloaded
- /// from the `HashSet`s with a timeout for each task set to `timeout`.
- ///
- /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to
- /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set
- /// to `MAX_TIMEOUT`.
- #[inline]
- #[must_use]
- pub fn new_with_urls_and_timeout(
- client: &'static Client,
- timeout: Duration,
- adblock_urls: HashSet<HttpUrl>,
- domain_urls: HashSet<HttpUrl>,
- hosts_urls: HashSet<HttpUrl>,
- wildcard_urls: HashSet<HttpUrl>,
- ) -> Self {
let mut val = Self::new();
- val.add_adblock(client, timeout, adblock_urls);
- val.add_domain(client, timeout, domain_urls);
- val.add_hosts(client, timeout, hosts_urls);
- val.add_wildcard(client, timeout, wildcard_urls);
+ val.add_adblock(client, adblock_urls);
+ val.add_domain(client, domain_urls);
+ val.add_hosts(client, hosts_urls);
+ val.add_wildcard(client, wildcard_urls);
val
}
/// Returns an empty `ExternalFiles`.
@@ -851,79 +836,37 @@ impl ExternalFiles {
wildcard: JoinSet::new(),
}
}
- /// Downloads the [`Adblock`] files from `urls` and adds them to [`ExternalFiles::adblock`]
- /// with a timeout for each task set to `timeout`.
- ///
- /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to
- /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set
- /// to `MAX_TIMEOUT`.
+ /// Downloads the [`Adblock`] files from `urls` and adds them to [`ExternalFiles::adblock`].
#[inline]
- pub fn add_adblock(
- &mut self,
- client: &'static Client,
- timeout: Duration,
- urls: HashSet<HttpUrl>,
- ) {
- Self::get_external_files(&mut self.adblock, client, timeout, urls);
+ pub fn add_adblock(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
+ Self::get_external_files(&mut self.adblock, client, urls);
}
- /// Downloads the [`DomainOnly`] files from `urls` and adds them to [`ExternalFiles::domain`]
- /// with a timeout for each task set to `timeout`.
- ///
- /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to
- /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set
- /// to `MAX_TIMEOUT`.
+ /// Downloads the [`DomainOnly`] files from `urls` and adds them to [`ExternalFiles::domain`].
#[inline]
- pub fn add_domain(
- &mut self,
- client: &'static Client,
- timeout: Duration,
- urls: HashSet<HttpUrl>,
- ) {
- Self::get_external_files(&mut self.domain, client, timeout, urls);
+ pub fn add_domain(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
+ Self::get_external_files(&mut self.domain, client, urls);
}
- /// Downloads the [`Hosts`] files from `urls` and adds them to [`ExternalFiles::hosts`]
- /// with a timeout for each task set to `timeout`.
- ///
- /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to
- /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set
- /// to `MAX_TIMEOUT`.
+ /// Downloads the [`Hosts`] files from `urls` and adds them to [`ExternalFiles::hosts`].
#[inline]
- pub fn add_hosts(
- &mut self,
- client: &'static Client,
- timeout: Duration,
- urls: HashSet<HttpUrl>,
- ) {
- Self::get_external_files(&mut self.hosts, client, timeout, urls);
+ pub fn add_hosts(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
+ Self::get_external_files(&mut self.hosts, client, urls);
}
- /// Downloads the [`Wildcard`] files from `urls` and adds them to [`ExternalFiles::wildcard`]
- /// with a timeout for each task set to `timeout`.
- ///
- /// Note that when `timeout` is `<` [`Self::MIN_TIMEOUT`], it is set to
- /// `MIN_TIMEOUT`; similarly when it is `>` [`Self::MAX_TIMEOUT`], it is set
- /// to `MAX_TIMEOUT`.
+ /// Downloads the [`Wildcard`] files from `urls` and adds them to [`ExternalFiles::wildcard`].
#[inline]
- pub fn add_wildcard(
- &mut self,
- client: &'static Client,
- timeout: Duration,
- urls: HashSet<HttpUrl>,
- ) {
- Self::get_external_files(&mut self.wildcard, client, timeout, urls);
+ pub fn add_wildcard(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
+ Self::get_external_files(&mut self.wildcard, client, urls);
}
/// Downloads the files from `urls` and converts them to `String`s adding the
/// tasks to `set`.
#[inline]
fn get_external_files(
- set: &mut JoinSet<Result<Result<File, ExtFileErr>, Elapsed>>,
+ set: &mut JoinSet<Result<File, ExtFileErr>>,
client: &'static Client,
- mut timeout: Duration,
urls: HashSet<HttpUrl>,
) {
- timeout = timeout.clamp(Self::MIN_TIMEOUT, Self::MAX_TIMEOUT);
for url in Into::<HashSet<HttpUrl>>::into(urls) {
let url_clone = url.clone();
- set.spawn(time::timeout(timeout, async {
+ set.spawn(async {
let resp = client
.get::<Url>(url_clone.into())
.send()
@@ -940,7 +883,7 @@ impl ExternalFiles {
})
},
)
- }));
+ });
}
}
}
diff --git a/src/main.rs b/src/main.rs
@@ -221,24 +221,27 @@ fn get_external_files(
|e| Err(E::Io(e)),
|runtime| {
runtime.block_on(async {
- Files::from_external({
- let mut files = ExternalFiles::new();
- CLIENT
- .set(
- Client::builder()
- .user_agent(USER_AGENT)
- .use_rustls_tls()
- .build()
- .map_err(ExtFileErr::Http)?,
- )
- .unwrap();
- let client = CLIENT.get().unwrap();
- files.add_adblock(client, timeout, adblock);
- files.add_domain(client, timeout, domain);
- files.add_hosts(client, timeout, hosts);
- files.add_wildcard(client, timeout, wildcard);
- files
- })
+ Files::from_external(
+ {
+ let mut files = ExternalFiles::new();
+ CLIENT
+ .set(
+ Client::builder()
+ .user_agent(USER_AGENT)
+ .use_rustls_tls()
+ .build()
+ .map_err(ExtFileErr::Http)?,
+ )
+ .unwrap();
+ let client = CLIENT.get().unwrap();
+ files.add_adblock(client, adblock);
+ files.add_domain(client, domain);
+ files.add_hosts(client, hosts);
+ files.add_wildcard(client, wildcard);
+ files
+ },
+ timeout,
+ )
.await
.map_err(E::ExtFile)
})
@@ -361,7 +364,7 @@ fn main() -> Result<(), E> {
.map_err(E::from)
.and_then(|()| {
get_external_files(
- config.timeout.unwrap_or(ExternalFiles::MAX_TIMEOUT),
+ config.timeout.unwrap_or(Duration::from_secs(3600)),
config.adblock,
config.domain,
config.hosts,