rpz

Response policy zone (RPZ) file generator.
git clone https://git.philomathiclife.com/repos/rpz
Log | Files | Refs | README

file.rs (31042B)


      1 extern crate alloc;
      2 use crate::dom::{
      3     Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard,
      4 };
      5 use alloc::string::FromUtf8Error;
      6 use core::{
      7     borrow::Borrow,
      8     error::Error,
      9     fmt::{self, Display, Formatter},
     10     hash::Hash,
     11     ops::Deref,
     12     time::Duration,
     13 };
     14 use reqwest::Client;
     15 use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor};
     16 use std::{
     17     collections::{HashMap, HashSet},
     18     fs,
     19     io::{self, ErrorKind},
     20     path::{Path, PathBuf},
     21 };
     22 use superset_map::SupersetSet;
     23 use tokio::{
     24     task::{JoinError, JoinSet},
     25     time::{self, error::Elapsed},
     26 };
     27 use url::Url;
     28 /// Wrapper around an absolute [`PathBuf`] to a directory or file depending on `IS_DIR`.
     29 ///
     30 /// Note that `IS_DIR` iff the wrapped `PathBuf` ends with `/`.
     31 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
     32 pub struct AbsFilePath<const IS_DIR: bool> {
     33     /// The file or directory.
     34     path: PathBuf,
     35 }
     36 impl<const IS_DIR: bool> AbsFilePath<IS_DIR> {
     37     /// Returns `true` iff `val` was appended to `self`.
     38     #[inline]
     39     pub fn append(&mut self, val: &str) -> bool {
     40         let bytes = val.as_bytes();
     41         if IS_DIR {
     42             self.path.as_mut_os_string().push(val);
     43             bytes.last().map_or(true, |byt| {
     44                 if *byt != b'/' {
     45                     self.path.as_mut_os_string().push("/");
     46                 }
     47                 true
     48             })
     49             // When `!IS_DIR`, we have to verify `val` does not end
     50             // with `/` as well as verify the last component is not
     51             // `..` which is true iff `val == ".."` or the last 3
     52             // characters of `val` is `/..`.
     53         } else if bytes.last().map_or(false, |byt| {
     54             *byt == b'/'
     55                 || bytes
     56                     .get(bytes.len().wrapping_sub(2)..)
     57                     .map_or(false, |byts| {
     58                         byts == b".."
     59                             && bytes
     60                                 .get(bytes.len().wrapping_sub(3))
     61                                 .map_or(true, |byt2| *byt2 == b'/')
     62                     })
     63         }) {
     64             false
     65         } else {
     66             self.path.as_mut_os_string().push(val);
     67             true
     68         }
     69     }
     70     /// Returns `self` as a [`Path`] reference.
     71     #[inline]
     72     #[must_use]
     73     pub fn as_path(&self) -> &Path {
     74         self.path.as_path()
     75     }
     76     /// Returns an `AbsFilePath` iff `val` conforms to the following:
     77     /// * `PathBuf::from(val).is_absolute()`.
     78     /// * `PathBuf::from(val).as_bytes().last().unwrap() == b'/'` ⇒ `IS_DIR`.
     79     /// * `PathBuf::from(val).file_name().is_none()` ⇒ `IS_DIR`.
     80     ///
     81     /// If `IS_DIR` and `PathBuf::from(val).as_bytes().last().unwrap() != b'/'`, `val`
     82     /// will have `/` appended to it.
     83     #[expect(clippy::option_if_let_else, reason = "map will not work")]
     84     #[inline]
     85     #[must_use]
     86     pub fn from_string(val: String) -> Option<Self> {
     87         match val.as_bytes().last() {
     88             Some(byt) => {
     89                 let last = *byt;
     90                 let mut path = PathBuf::from(val);
     91                 if path.is_absolute() {
     92                     if last == b'/' {
     93                         IS_DIR.then_some(Self { path })
     94                     } else if IS_DIR {
     95                         path.as_mut_os_string().push("/");
     96                         Some(Self { path })
     97                     } else {
     98                         path.file_name().is_some().then_some(Self { path })
     99                     }
    100                 } else {
    101                     None
    102                 }
    103             }
    104             None => None,
    105         }
    106     }
    107 }
    108 impl<const IS_DIR: bool> AsRef<Path> for AbsFilePath<IS_DIR> {
    109     #[inline]
    110     fn as_ref(&self) -> &Path {
    111         self.as_path()
    112     }
    113 }
    114 impl<const IS_DIR: bool> Borrow<Path> for AbsFilePath<IS_DIR> {
    115     #[inline]
    116     fn borrow(&self) -> &Path {
    117         self.as_path()
    118     }
    119 }
    120 impl<const IS_DIR: bool> Deref for AbsFilePath<IS_DIR> {
    121     type Target = Path;
    122     #[inline]
    123     fn deref(&self) -> &Self::Target {
    124         self.as_path()
    125     }
    126 }
    127 impl<const IS_DIR: bool> From<AbsFilePath<IS_DIR>> for PathBuf {
    128     #[inline]
    129     fn from(value: AbsFilePath<IS_DIR>) -> Self {
    130         value.path
    131     }
    132 }
    133 impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> {
    134     #[expect(clippy::too_many_lines, reason = "this is fine")]
    135     #[inline]
    136     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    137     where
    138         D: Deserializer<'de>,
    139     {
    140         /// `Visitor` for `AbsFilePath`.
    141         struct FilePathVisitor<const IS_DIR: bool>;
    142         impl<'de, const IS_DIR: bool> Visitor<'de> for FilePathVisitor<IS_DIR> {
    143             type Value = AbsFilePath<IS_DIR>;
    144             fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
    145                 formatter.write_str("struct AbsFilePath")
    146             }
    147             #[expect(clippy::arithmetic_side_effects, reason = "math has been verified")]
    148             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
    149             where
    150                 E: de::Error,
    151             {
    152                 v.as_bytes().last().map_or_else(
    153                     || {
    154                         Err(E::invalid_value(
    155                             Unexpected::Str(v),
    156                             &"an absolute file path",
    157                         ))
    158                     },
    159                     |byt| {
    160                         if *byt == b'/' {
    161                             if IS_DIR {
    162                                 let path = PathBuf::with_capacity(v.len()).join(v);
    163                                 if path.is_absolute() {
    164                                     Ok(AbsFilePath { path })
    165                                 } else {
    166                                     Err(E::invalid_value(
    167                                         Unexpected::Str(v),
    168                                         &"an absolute file path to a directory",
    169                                     ))
    170                                 }
    171                             } else {
    172                                 Err(E::invalid_value(
    173                                     Unexpected::Str(v),
    174                                     &"an absolute file path to a file",
    175                                 ))
    176                             }
    177                         } else if IS_DIR {
    178                             let mut path = PathBuf::with_capacity(v.len() + 1).join(v);
    179                             path.as_mut_os_string().push("/");
    180                             if path.is_absolute() {
    181                                 Ok(AbsFilePath { path })
    182                             } else {
    183                                 Err(E::invalid_value(
    184                                     Unexpected::Str(v),
    185                                     &"an absolute file path to a directory",
    186                                 ))
    187                             }
    188                         } else {
    189                             let path = PathBuf::with_capacity(v.len()).join(v);
    190                             if path.is_absolute() && path.file_name().is_some() {
    191                                 Ok(AbsFilePath { path })
    192                             } else {
    193                                 Err(E::invalid_value(
    194                                     Unexpected::Str(v),
    195                                     &"an absolute file path to a file",
    196                                 ))
    197                             }
    198                         }
    199                     },
    200                 )
    201             }
    202             fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
    203             where
    204                 E: de::Error,
    205             {
    206                 match v.as_bytes().last() {
    207                     Some(byt) => {
    208                         if *byt == b'/' {
    209                             if IS_DIR {
    210                                 let path = PathBuf::from(v);
    211                                 if path.is_absolute() {
    212                                     Ok(AbsFilePath { path })
    213                                 } else {
    214                                     Err(E::invalid_value(
    215                                         Unexpected::Str(path.to_string_lossy().as_ref()),
    216                                         &"an absolute file path to a directory",
    217                                     ))
    218                                 }
    219                             } else {
    220                                 Err(E::invalid_value(
    221                                     Unexpected::Str(v.as_str()),
    222                                     &"an absolute file path to a file",
    223                                 ))
    224                             }
    225                         } else if IS_DIR {
    226                             let mut path = PathBuf::from(v);
    227                             path.as_mut_os_string().push("/");
    228                             if path.is_absolute() {
    229                                 Ok(AbsFilePath { path })
    230                             } else {
    231                                 Err(E::invalid_value(
    232                                     Unexpected::Str(path.to_string_lossy().as_ref()),
    233                                     &"an absolute file path to a directory",
    234                                 ))
    235                             }
    236                         } else {
    237                             let path = PathBuf::from(v);
    238                             if path.is_absolute() && path.file_name().is_some() {
    239                                 Ok(AbsFilePath { path })
    240                             } else {
    241                                 Err(E::invalid_value(
    242                                     Unexpected::Str(path.to_string_lossy().as_ref()),
    243                                     &"an absolute file path to a file",
    244                                 ))
    245                             }
    246                         }
    247                     }
    248                     None => Err(E::invalid_value(
    249                         Unexpected::Str(v.as_str()),
    250                         &"an absolute file path",
    251                     )),
    252                 }
    253             }
    254         }
    255         deserializer.deserialize_string(FilePathVisitor)
    256     }
    257 }
    258 impl<const IS_DIR: bool> Display for AbsFilePath<IS_DIR> {
    259     #[inline]
    260     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    261         f.write_str(&self.path.to_string_lossy())
    262     }
    263 }
    264 /// Wrapper around an absolute HTTP(S) [`Url`].
    265 #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
    266 pub struct HttpUrl {
    267     /// The HTTP(S) `Url`.
    268     url: Url,
    269 }
    270 impl HttpUrl {
    271     /// Returns `self` as a [`Url`] reference.
    272     #[inline]
    273     #[must_use]
    274     pub const fn as_url(&self) -> &Url {
    275         &self.url
    276     }
    277     /// Returns an `HttpUrl` iff `url` has a host and HTTP(S) scheme.
    278     #[inline]
    279     #[must_use]
    280     pub fn from_url(url: Url) -> Option<Self> {
    281         (url.has_host() && (url.scheme() == "http" || url.scheme() == "https"))
    282             .then_some(Self { url })
    283     }
    284 }
    285 impl AsRef<Url> for HttpUrl {
    286     #[inline]
    287     fn as_ref(&self) -> &Url {
    288         self.as_url()
    289     }
    290 }
    291 impl Borrow<Url> for HttpUrl {
    292     #[inline]
    293     fn borrow(&self) -> &Url {
    294         self.as_url()
    295     }
    296 }
    297 impl Deref for HttpUrl {
    298     type Target = Url;
    299     #[inline]
    300     fn deref(&self) -> &Self::Target {
    301         self.as_url()
    302     }
    303 }
    304 impl Display for HttpUrl {
    305     #[inline]
    306     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    307         self.url.fmt(f)
    308     }
    309 }
    310 impl From<HttpUrl> for Url {
    311     #[inline]
    312     fn from(value: HttpUrl) -> Self {
    313         value.url
    314     }
    315 }
    316 impl<'de> Deserialize<'de> for HttpUrl {
    317     #[inline]
    318     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    319     where
    320         D: Deserializer<'de>,
    321     {
    322         /// `Visitor` for `HttpUrl`.
    323         struct UrlVisitor;
    324         impl<'d> Visitor<'d> for UrlVisitor {
    325             type Value = HttpUrl;
    326             fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result {
    327                 formatter.write_str("struct HttpUrl")
    328             }
    329             fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
    330             where
    331                 E: de::Error,
    332             {
    333                 Url::parse(v).map_or_else(
    334                     |_| {
    335                         Err(E::invalid_type(
    336                             Unexpected::Str(v),
    337                             &"an absolute URL with HTTP(S) scheme",
    338                         ))
    339                     },
    340                     |url| {
    341                         if url.has_host() && (url.scheme() == "http" || url.scheme() == "https") {
    342                             Ok(HttpUrl { url })
    343                         } else {
    344                             Err(E::invalid_value(
    345                                 Unexpected::Other(v),
    346                                 &"an absolute URL with HTTP(S) scheme",
    347                             ))
    348                         }
    349                     },
    350                 )
    351             }
    352         }
    353         deserializer.deserialize_str(UrlVisitor)
    354     }
    355 }
    356 /// Represents the kind of [`ParsedDomain`]s a [`File`]
    357 /// contains.
    358 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    359 pub enum Kind {
    360     /// [`Adblock`] domains.
    361     Adblock,
    362     /// [`DomainOnly`] domains.
    363     DomainOnly,
    364     /// [`Hosts`] domains.
    365     Hosts,
    366     /// [`Wildcard`] domains.
    367     Wildcard,
    368 }
    369 impl Display for Kind {
    370     #[inline]
    371     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    372         match *self {
    373             Self::Adblock => f.write_str("Adblock"),
    374             Self::DomainOnly => f.write_str("Domain-only"),
    375             Self::Hosts => f.write_str("Hosts"),
    376             Self::Wildcard => f.write_str("Wildcard"),
    377         }
    378     }
    379 }
    380 /// The name where a [`File`] was sourced from.
    381 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    382 pub enum Name {
    383     /// The `File` came from the contained `AbsFilePath`.
    384     Path(AbsFilePath<false>),
    385     /// The `File` came from the contained `HttpUrl`.
    386     Url(HttpUrl),
    387 }
    388 impl Display for Name {
    389     #[inline]
    390     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    391         match *self {
    392             Self::Path(ref path) => path.fmt(f),
    393             Self::Url(ref url) => url.fmt(f),
    394         }
    395     }
    396 }
    397 /// An in-memory file sourced from [`Self::name`];
    398 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)]
    399 pub struct File {
    400     /// The name/origin of the file.
    401     pub name: Name,
    402     /// The contents of the file.
    403     pub data: String,
    404 }
    405 impl Display for File {
    406     #[inline]
    407     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    408         self.name.fmt(f)
    409     }
    410 }
    411 /// A summary of a [`File`] after it has been parsed
    412 /// into [`Value`]s.
    413 #[derive(Clone, Debug)]
    414 pub struct Summary<'a, E: Eq + Hash> {
    415     /// The `File` that was parsed.
    416     pub file: &'a File,
    417     /// The kind of file.
    418     pub kind: Kind,
    419     /// The quantity of domains parsed.
    420     pub domain_count: usize,
    421     /// The quantity of comments parsed.
    422     pub comment_count: usize,
    423     /// The quantity of blank lines parsed.
    424     pub blank_count: usize,
    425     /// Parsing errors and their counts.
    426     pub errors: HashMap<E, usize>,
    427 }
    428 impl<E: Display + Eq + Hash> Display for Summary<'_, E> {
    429     #[inline]
    430     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    431         write!(
    432             f,
    433             "({}) {} - domains parsed: {}, comments parsed: {}, blanks parsed: {}, parsing errors: {}",
    434             self.kind, self.file, self.domain_count, self.comment_count, self.blank_count, self.errors.values().sum::<usize>()
    435         ).and_then(|()| {
    436             if self.errors.is_empty() {
    437                 Ok(())
    438             } else {
    439                 f.write_str(", errors: [").and_then(|()| {
    440                     self.errors.iter().try_fold((), |(), tup| {
    441                         write!(f, "{}: {}, ", tup.0, tup.1)
    442                     })
    443                 }).and_then(|()| f.write_str("]"))
    444             }
    445         })
    446     }
    447 }
    448 impl<E: Eq + Hash> PartialEq<Summary<'_, E>> for Summary<'_, E> {
    449     #[inline]
    450     fn eq(&self, other: &Summary<'_, E>) -> bool {
    451         self.file == other.file
    452             && self.kind == other.kind
    453             && self.domain_count == other.domain_count
    454             && self.comment_count == other.comment_count
    455             && self.blank_count == other.blank_count
    456             && self.errors == other.errors
    457     }
    458 }
    459 impl<E: Eq + Hash> Eq for Summary<'_, E> {}
    460 /// Container of [`Adblock`], [`DomainOnly`], [`Hosts`], and [`Wildcard`]
    461 /// files.
    462 #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)]
    463 pub struct Files {
    464     /// [`Adblock`] files.
    465     pub adblock: Vec<File>,
    466     /// [`DomainOnly`] files.
    467     pub domain: Vec<File>,
    468     /// [`Hosts`] files.
    469     pub hosts: Vec<File>,
    470     /// [`Wildcard`] files.
    471     pub wildcard: Vec<File>,
    472 }
    473 /// Helper trait to track what kind of file.
    474 trait Helper {
    475     /// Returns the `Kind`.
    476     fn kind() -> Kind;
    477 }
    478 impl Helper for Adblock<'_> {
    479     fn kind() -> Kind {
    480         Kind::Adblock
    481     }
    482 }
    483 impl Helper for DomainOnly<'_> {
    484     fn kind() -> Kind {
    485         Kind::DomainOnly
    486     }
    487 }
    488 impl Helper for Hosts<'_> {
    489     fn kind() -> Kind {
    490         Kind::Hosts
    491     }
    492 }
    493 impl Helper for Wildcard<'_> {
    494     fn kind() -> Kind {
    495         Kind::Wildcard
    496     }
    497 }
    498 impl Files {
    499     /// Attempts to convert an `ExternalFiles` into a `Files`
    500     /// with `timeout_per_file` as the maximum amount of time allowed
    501     /// for a file to finish downloading.
    502     ///
    503     /// Note that if `timeout_per_file` is 0 or larger than one hour, then
    504     /// it'll be set to one hour. Similarly, if `timeout_per_file`,
    505     /// is less than 1 second, it'll be be set to one second.
    506     ///
    507     /// # Errors
    508     ///
    509     /// Returns `ExtFileErr` iff any of the files in `ExternalFiles`
    510     /// errors when downloading them and transforming them into a [`String`].
    511     #[inline]
    512     pub async fn from_external(
    513         ext: ExternalFiles,
    514         mut timeout_per_file: Duration,
    515     ) -> Result<Self, ExtFileErr> {
    516         /// `await`s each `Result` in `set` propagating any errors; otherwise
    517         /// the `String` is `push`ed into `files`.
    518         ///
    519         /// If an error occurs, then the function immediately returns.
    520         async fn add_file(
    521             files: &mut Vec<File>,
    522             timeout: Duration,
    523             mut set: JoinSet<Result<File, ExtFileErr>>,
    524         ) -> Result<(), ExtFileErr> {
    525             while let Some(file) = time::timeout(timeout, set.join_next()).await? {
    526                 files.push(file??);
    527             }
    528             Ok(())
    529         }
    530         timeout_per_file = if timeout_per_file == Duration::ZERO {
    531             Duration::from_secs(3600)
    532         } else {
    533             timeout_per_file.clamp(Duration::from_secs(1), Duration::from_secs(3600))
    534         };
    535         let mut files = Self {
    536             adblock: Vec::with_capacity(ext.adblock.len()),
    537             domain: Vec::with_capacity(ext.domain.len()),
    538             hosts: Vec::with_capacity(ext.hosts.len()),
    539             wildcard: Vec::with_capacity(ext.wildcard.len()),
    540         };
    541         add_file(&mut files.adblock, timeout_per_file, ext.adblock).await?;
    542         add_file(&mut files.domain, timeout_per_file, ext.domain).await?;
    543         add_file(&mut files.hosts, timeout_per_file, ext.hosts).await?;
    544         add_file(&mut files.wildcard, timeout_per_file, ext.wildcard).await?;
    545         Ok(files)
    546     }
    547     /// Creates an empty instance of `Files`.
    548     #[inline]
    549     #[must_use]
    550     pub const fn new() -> Self {
    551         Self {
    552             adblock: Vec::new(),
    553             domain: Vec::new(),
    554             hosts: Vec::new(),
    555             wildcard: Vec::new(),
    556         }
    557     }
    558     /// Reads each [`String`] from each field and attempts
    559     /// to transform each line into an `RpzDomain` before adding
    560     /// the domain into `doms`.
    561     ///
    562     /// Returns a `Vec` containing `Summary` information for each
    563     /// [`File`] that was parsed.
    564     #[expect(clippy::arithmetic_side_effects, reason = "math is verified")]
    565     #[inline]
    566     pub fn add_to_superset<'a: 'b, 'b>(
    567         &'a self,
    568         doms: &mut SupersetSet<RpzDomain<'b>>,
    569     ) -> Vec<Summary<'a, FirefoxDomainErr>> {
    570         /// Iterates each `String` from `files` and transforms each line
    571         /// into `T` before adding it as an `RpzDomain` into `doms`.
    572         fn insert<
    573             'a,
    574             'b: 'a,
    575             T: Into<RpzDomain<'a>> + ParsedDomain<'a, Error = FirefoxDomainErr> + Helper,
    576         >(
    577             doms: &mut SupersetSet<RpzDomain<'a>>,
    578             files: &'b [File],
    579             summaries: &mut Vec<Summary<'b, FirefoxDomainErr>>,
    580         ) {
    581             let kind = T::kind();
    582             files.iter().fold((), |(), file| {
    583                 let mut summary = Summary {
    584                     file,
    585                     kind,
    586                     domain_count: 0,
    587                     comment_count: 0,
    588                     blank_count: 0,
    589                     errors: HashMap::new(),
    590                 };
    591                 file.data
    592                     .lines()
    593                     .fold((), |(), line| match T::parse_value(line) {
    594                         Ok(val) => match val {
    595                             Value::Domain(dom) => {
    596                                 summary.domain_count = summary.domain_count.saturating_add(1);
    597                                 doms.insert(dom.into());
    598                             }
    599                             Value::Comment(_) => {
    600                                 summary.comment_count = summary.comment_count.saturating_add(1);
    601                             }
    602                             Value::Blank => {
    603                                 summary.blank_count = summary.blank_count.saturating_add(1);
    604                             }
    605                         },
    606                         Err(err) => {
    607                             let count = summary.errors.entry(err).or_insert(0);
    608                             *count = count.saturating_add(1);
    609                         }
    610                     });
    611                 summaries.push(summary);
    612             });
    613         }
    614         let mut summaries = Vec::with_capacity(
    615             self.adblock.len() + self.domain.len() + self.hosts.len() + self.wildcard.len(),
    616         );
    617         insert::<Adblock<'_>>(doms, self.adblock.as_slice(), &mut summaries);
    618         insert::<DomainOnly<'_>>(doms, self.domain.as_slice(), &mut summaries);
    619         insert::<Hosts<'_>>(doms, self.hosts.as_slice(), &mut summaries);
    620         insert::<Wildcard<'_>>(doms, self.wildcard.as_slice(), &mut summaries);
    621         summaries
    622     }
    623     /// Returns `true` iff there are no files.
    624     #[inline]
    625     #[must_use]
    626     pub fn is_empty(&self) -> bool {
    627         self.adblock.is_empty()
    628             && self.domain.is_empty()
    629             && self.hosts.is_empty()
    630             && self.wildcard.is_empty()
    631     }
    632 }
    633 impl Default for Files {
    634     #[inline]
    635     fn default() -> Self {
    636         Self::new()
    637     }
    638 }
    639 /// Block and unblock [`Files`] stored on the local file system.
    640 #[derive(Debug)]
    641 pub struct LocalFiles {
    642     /// Local file system files containing domains to not block.
    643     pub unblock: Files,
    644     /// Local file system files containing domains to block.
    645     pub block: Files,
    646 }
    647 impl LocalFiles {
    648     /// Reads all block and unblock files in the `adblock/`, `domain/`, `hosts/`, and `wildcard/` directories
    649     /// under `dir/block/` and `dir/unblock/` respectively.
    650     ///
    651     /// # Errors
    652     ///
    653     /// Returns [`io::Error`] iff reading said files causes an error. Note that
    654     /// it is _not_ an error if a directory does not exist.
    655     #[expect(
    656         clippy::wildcard_enum_match_arm,
    657         reason = "too many to enumerate manually"
    658     )]
    659     #[inline]
    660     pub fn from_path(dir: AbsFilePath<true>) -> Result<Option<Self>, io::Error> {
    661         /// Checks if `path` exists.
    662         fn exists<P: AsRef<Path>>(path: P) -> Result<bool, io::Error> {
    663             fs::metadata(path).map_or_else(
    664                 |err| match err.kind() {
    665                     ErrorKind::NotFound => Ok(false),
    666                     _ => Err(err),
    667                 },
    668                 |_| Ok(true),
    669             )
    670         }
    671         /// Reads all files stored in `adblock/`, `domain/`, `hosts/`, and `wildcard/`
    672         /// directories under `dir/name/`.
    673         fn get_files(
    674             mut dir: PathBuf,
    675             files: &mut Files,
    676             name: &str,
    677         ) -> Result<PathBuf, io::Error> {
    678             /// Reads all files under `dir/name/`.
    679             fn get_file(
    680                 files: &mut Vec<File>,
    681                 mut dir: PathBuf,
    682                 name: &str,
    683             ) -> Result<PathBuf, io::Error> {
    684                 dir.push(name);
    685                 if exists(dir.as_path())? {
    686                     for entry in fs::read_dir(dir.as_path())? {
    687                         let file = entry?;
    688                         if !file.file_type()?.is_dir() {
    689                             let path = file.path();
    690                             files.push(fs::read_to_string(path.as_path()).map(|data| File {
    691                                 name: Name::Path(AbsFilePath::<false> { path }),
    692                                 data,
    693                             })?);
    694                         }
    695                     }
    696                 }
    697                 dir.pop();
    698                 Ok(dir)
    699             }
    700             dir.push(name);
    701             if exists(dir.as_path())? {
    702                 get_file(&mut files.adblock, dir, "adblock/")
    703                     .and_then(|dir2| get_file(&mut files.domain, dir2, "domain/"))
    704                     .and_then(|dir2| get_file(&mut files.hosts, dir2, "hosts/"))
    705                     .and_then(|dir2| get_file(&mut files.wildcard, dir2, "wildcard/"))
    706                     .map(|mut dir2| {
    707                         dir2.pop();
    708                         dir2
    709                     })
    710             } else {
    711                 dir.pop();
    712                 Ok(dir)
    713             }
    714         }
    715         let mut unblock = Files::new();
    716         let mut block = Files::new();
    717         get_files(PathBuf::from(dir), &mut unblock, "unblock/").and_then(|dir2| {
    718             get_files(dir2, &mut block, "block/").map(|_| {
    719                 if unblock.is_empty() && block.is_empty() {
    720                     None
    721                 } else {
    722                     Some(Self { unblock, block })
    723                 }
    724             })
    725         })
    726     }
    727 }
    728 /// Error returned when downloading text files from HTTP(S) servers.
    729 #[derive(Debug)]
    730 pub enum ExtFileErr {
    731     /// Error when a task exceeds the specified timeout.
    732     Timeout(Elapsed),
    733     /// HTTP(S) error when attempting to download a file.
    734     Http(reqwest::Error),
    735     /// Error when a task fails to complete.
    736     Join(JoinError),
    737     /// Error when a file is not valid UTF-8.
    738     InvalidUtf8(FromUtf8Error),
    739 }
    740 impl Display for ExtFileErr {
    741     #[inline]
    742     fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
    743         match *self {
    744             Self::Timeout(ref err) => err.fmt(f),
    745             Self::Http(ref err) => err.fmt(f),
    746             Self::Join(ref err) => err.fmt(f),
    747             Self::InvalidUtf8(ref err) => err.fmt(f),
    748         }
    749     }
    750 }
    751 impl From<Elapsed> for ExtFileErr {
    752     #[inline]
    753     fn from(value: Elapsed) -> Self {
    754         Self::Timeout(value)
    755     }
    756 }
    757 impl From<reqwest::Error> for ExtFileErr {
    758     #[inline]
    759     fn from(value: reqwest::Error) -> Self {
    760         Self::Http(value)
    761     }
    762 }
    763 impl From<JoinError> for ExtFileErr {
    764     #[inline]
    765     fn from(value: JoinError) -> Self {
    766         Self::Join(value)
    767     }
    768 }
    769 impl From<FromUtf8Error> for ExtFileErr {
    770     #[inline]
    771     fn from(value: FromUtf8Error) -> Self {
    772         Self::InvalidUtf8(value)
    773     }
    774 }
    775 impl Error for ExtFileErr {}
    776 /// Tasks of downloaded files from HTTP(S) servers.
    777 #[derive(Debug)]
    778 pub struct ExternalFiles {
    779     /// [`Adblock`]-based files.
    780     pub adblock: JoinSet<Result<File, ExtFileErr>>,
    781     /// [`DomainOnly`]-based files.
    782     pub domain: JoinSet<Result<File, ExtFileErr>>,
    783     /// [`Hosts`]-based files.
    784     pub hosts: JoinSet<Result<File, ExtFileErr>>,
    785     /// [`Wildcard`]-based files.
    786     pub wildcard: JoinSet<Result<File, ExtFileErr>>,
    787 }
    788 impl ExternalFiles {
    789     /// Returns `ExternalFiles` containing files downloaded
    790     /// from the `HashSet`s.
    791     #[inline]
    792     #[must_use]
    793     pub fn new_with_urls(
    794         client: &'static Client,
    795         adblock_urls: HashSet<HttpUrl>,
    796         domain_urls: HashSet<HttpUrl>,
    797         hosts_urls: HashSet<HttpUrl>,
    798         wildcard_urls: HashSet<HttpUrl>,
    799     ) -> Self {
    800         let mut val = Self::new();
    801         val.add_adblock(client, adblock_urls);
    802         val.add_domain(client, domain_urls);
    803         val.add_hosts(client, hosts_urls);
    804         val.add_wildcard(client, wildcard_urls);
    805         val
    806     }
    807     /// Returns an empty `ExternalFiles`.
    808     #[inline]
    809     #[must_use]
    810     pub fn new() -> Self {
    811         Self {
    812             adblock: JoinSet::new(),
    813             domain: JoinSet::new(),
    814             hosts: JoinSet::new(),
    815             wildcard: JoinSet::new(),
    816         }
    817     }
    818     /// Downloads the [`Adblock`] files from `urls` and adds them to [`ExternalFiles::adblock`].
    819     #[inline]
    820     pub fn add_adblock(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
    821         Self::get_external_files(&mut self.adblock, client, urls);
    822     }
    823     /// Downloads the [`DomainOnly`] files from `urls` and adds them to [`ExternalFiles::domain`].
    824     #[inline]
    825     pub fn add_domain(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
    826         Self::get_external_files(&mut self.domain, client, urls);
    827     }
    828     /// Downloads the [`Hosts`] files from `urls` and adds them to [`ExternalFiles::hosts`].
    829     #[inline]
    830     pub fn add_hosts(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
    831         Self::get_external_files(&mut self.hosts, client, urls);
    832     }
    833     /// Downloads the [`Wildcard`] files from `urls` and adds them to [`ExternalFiles::wildcard`].
    834     #[inline]
    835     pub fn add_wildcard(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) {
    836         Self::get_external_files(&mut self.wildcard, client, urls);
    837     }
    838     /// Downloads the files from `urls` and converts them to `String`s adding the
    839     /// tasks to `set`.
    840     #[expect(clippy::iter_over_hash_type, reason = "order does not matter")]
    841     fn get_external_files(
    842         set: &mut JoinSet<Result<File, ExtFileErr>>,
    843         client: &'static Client,
    844         urls: HashSet<HttpUrl>,
    845     ) {
    846         for url in Into::<HashSet<HttpUrl>>::into(urls) {
    847             let url_clone = url.clone();
    848             set.spawn(async {
    849                 let resp = client
    850                     .get::<Url>(url_clone.into())
    851                     .send()
    852                     .await?
    853                     .error_for_status()?;
    854                 resp.bytes().await.map_or_else(
    855                     |err| Err(ExtFileErr::Http(err)),
    856                     |bytes| {
    857                         String::from_utf8(bytes.into())
    858                             .map_err(ExtFileErr::InvalidUtf8)
    859                             .map(|data| File {
    860                                 name: Name::Url(url),
    861                                 data,
    862                             })
    863                     },
    864                 )
    865             });
    866         }
    867     }
    868 }
    869 impl Default for ExternalFiles {
    870     #[inline]
    871     fn default() -> Self {
    872         Self::new()
    873     }
    874 }