file.rs (31042B)
1 extern crate alloc; 2 use crate::dom::{ 3 Adblock, DomainOnly, FirefoxDomainErr, Hosts, ParsedDomain, RpzDomain, Value, Wildcard, 4 }; 5 use alloc::string::FromUtf8Error; 6 use core::{ 7 borrow::Borrow, 8 error::Error, 9 fmt::{self, Display, Formatter}, 10 hash::Hash, 11 ops::Deref, 12 time::Duration, 13 }; 14 use reqwest::Client; 15 use serde::de::{self, Deserialize, Deserializer, Unexpected, Visitor}; 16 use std::{ 17 collections::{HashMap, HashSet}, 18 fs, 19 io::{self, ErrorKind}, 20 path::{Path, PathBuf}, 21 }; 22 use superset_map::SupersetSet; 23 use tokio::{ 24 task::{JoinError, JoinSet}, 25 time::{self, error::Elapsed}, 26 }; 27 use url::Url; 28 /// Wrapper around an absolute [`PathBuf`] to a directory or file depending on `IS_DIR`. 29 /// 30 /// Note that `IS_DIR` iff the wrapped `PathBuf` ends with `/`. 31 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 32 pub struct AbsFilePath<const IS_DIR: bool> { 33 /// The file or directory. 34 path: PathBuf, 35 } 36 impl<const IS_DIR: bool> AbsFilePath<IS_DIR> { 37 /// Returns `true` iff `val` was appended to `self`. 38 #[inline] 39 pub fn append(&mut self, val: &str) -> bool { 40 let bytes = val.as_bytes(); 41 if IS_DIR { 42 self.path.as_mut_os_string().push(val); 43 bytes.last().map_or(true, |byt| { 44 if *byt != b'/' { 45 self.path.as_mut_os_string().push("/"); 46 } 47 true 48 }) 49 // When `!IS_DIR`, we have to verify `val` does not end 50 // with `/` as well as verify the last component is not 51 // `..` which is true iff `val == ".."` or the last 3 52 // characters of `val` is `/..`. 53 } else if bytes.last().map_or(false, |byt| { 54 *byt == b'/' 55 || bytes 56 .get(bytes.len().wrapping_sub(2)..) 57 .map_or(false, |byts| { 58 byts == b".." 59 && bytes 60 .get(bytes.len().wrapping_sub(3)) 61 .map_or(true, |byt2| *byt2 == b'/') 62 }) 63 }) { 64 false 65 } else { 66 self.path.as_mut_os_string().push(val); 67 true 68 } 69 } 70 /// Returns `self` as a [`Path`] reference. 71 #[inline] 72 #[must_use] 73 pub fn as_path(&self) -> &Path { 74 self.path.as_path() 75 } 76 /// Returns an `AbsFilePath` iff `val` conforms to the following: 77 /// * `PathBuf::from(val).is_absolute()`. 78 /// * `PathBuf::from(val).as_bytes().last().unwrap() == b'/'` ⇒ `IS_DIR`. 79 /// * `PathBuf::from(val).file_name().is_none()` ⇒ `IS_DIR`. 80 /// 81 /// If `IS_DIR` and `PathBuf::from(val).as_bytes().last().unwrap() != b'/'`, `val` 82 /// will have `/` appended to it. 83 #[expect(clippy::option_if_let_else, reason = "map will not work")] 84 #[inline] 85 #[must_use] 86 pub fn from_string(val: String) -> Option<Self> { 87 match val.as_bytes().last() { 88 Some(byt) => { 89 let last = *byt; 90 let mut path = PathBuf::from(val); 91 if path.is_absolute() { 92 if last == b'/' { 93 IS_DIR.then_some(Self { path }) 94 } else if IS_DIR { 95 path.as_mut_os_string().push("/"); 96 Some(Self { path }) 97 } else { 98 path.file_name().is_some().then_some(Self { path }) 99 } 100 } else { 101 None 102 } 103 } 104 None => None, 105 } 106 } 107 } 108 impl<const IS_DIR: bool> AsRef<Path> for AbsFilePath<IS_DIR> { 109 #[inline] 110 fn as_ref(&self) -> &Path { 111 self.as_path() 112 } 113 } 114 impl<const IS_DIR: bool> Borrow<Path> for AbsFilePath<IS_DIR> { 115 #[inline] 116 fn borrow(&self) -> &Path { 117 self.as_path() 118 } 119 } 120 impl<const IS_DIR: bool> Deref for AbsFilePath<IS_DIR> { 121 type Target = Path; 122 #[inline] 123 fn deref(&self) -> &Self::Target { 124 self.as_path() 125 } 126 } 127 impl<const IS_DIR: bool> From<AbsFilePath<IS_DIR>> for PathBuf { 128 #[inline] 129 fn from(value: AbsFilePath<IS_DIR>) -> Self { 130 value.path 131 } 132 } 133 impl<'de, const IS_DIR: bool> Deserialize<'de> for AbsFilePath<IS_DIR> { 134 #[expect(clippy::too_many_lines, reason = "this is fine")] 135 #[inline] 136 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 137 where 138 D: Deserializer<'de>, 139 { 140 /// `Visitor` for `AbsFilePath`. 141 struct FilePathVisitor<const IS_DIR: bool>; 142 impl<'de, const IS_DIR: bool> Visitor<'de> for FilePathVisitor<IS_DIR> { 143 type Value = AbsFilePath<IS_DIR>; 144 fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { 145 formatter.write_str("struct AbsFilePath") 146 } 147 #[expect(clippy::arithmetic_side_effects, reason = "math has been verified")] 148 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> 149 where 150 E: de::Error, 151 { 152 v.as_bytes().last().map_or_else( 153 || { 154 Err(E::invalid_value( 155 Unexpected::Str(v), 156 &"an absolute file path", 157 )) 158 }, 159 |byt| { 160 if *byt == b'/' { 161 if IS_DIR { 162 let path = PathBuf::with_capacity(v.len()).join(v); 163 if path.is_absolute() { 164 Ok(AbsFilePath { path }) 165 } else { 166 Err(E::invalid_value( 167 Unexpected::Str(v), 168 &"an absolute file path to a directory", 169 )) 170 } 171 } else { 172 Err(E::invalid_value( 173 Unexpected::Str(v), 174 &"an absolute file path to a file", 175 )) 176 } 177 } else if IS_DIR { 178 let mut path = PathBuf::with_capacity(v.len() + 1).join(v); 179 path.as_mut_os_string().push("/"); 180 if path.is_absolute() { 181 Ok(AbsFilePath { path }) 182 } else { 183 Err(E::invalid_value( 184 Unexpected::Str(v), 185 &"an absolute file path to a directory", 186 )) 187 } 188 } else { 189 let path = PathBuf::with_capacity(v.len()).join(v); 190 if path.is_absolute() && path.file_name().is_some() { 191 Ok(AbsFilePath { path }) 192 } else { 193 Err(E::invalid_value( 194 Unexpected::Str(v), 195 &"an absolute file path to a file", 196 )) 197 } 198 } 199 }, 200 ) 201 } 202 fn visit_string<E>(self, v: String) -> Result<Self::Value, E> 203 where 204 E: de::Error, 205 { 206 match v.as_bytes().last() { 207 Some(byt) => { 208 if *byt == b'/' { 209 if IS_DIR { 210 let path = PathBuf::from(v); 211 if path.is_absolute() { 212 Ok(AbsFilePath { path }) 213 } else { 214 Err(E::invalid_value( 215 Unexpected::Str(path.to_string_lossy().as_ref()), 216 &"an absolute file path to a directory", 217 )) 218 } 219 } else { 220 Err(E::invalid_value( 221 Unexpected::Str(v.as_str()), 222 &"an absolute file path to a file", 223 )) 224 } 225 } else if IS_DIR { 226 let mut path = PathBuf::from(v); 227 path.as_mut_os_string().push("/"); 228 if path.is_absolute() { 229 Ok(AbsFilePath { path }) 230 } else { 231 Err(E::invalid_value( 232 Unexpected::Str(path.to_string_lossy().as_ref()), 233 &"an absolute file path to a directory", 234 )) 235 } 236 } else { 237 let path = PathBuf::from(v); 238 if path.is_absolute() && path.file_name().is_some() { 239 Ok(AbsFilePath { path }) 240 } else { 241 Err(E::invalid_value( 242 Unexpected::Str(path.to_string_lossy().as_ref()), 243 &"an absolute file path to a file", 244 )) 245 } 246 } 247 } 248 None => Err(E::invalid_value( 249 Unexpected::Str(v.as_str()), 250 &"an absolute file path", 251 )), 252 } 253 } 254 } 255 deserializer.deserialize_string(FilePathVisitor) 256 } 257 } 258 impl<const IS_DIR: bool> Display for AbsFilePath<IS_DIR> { 259 #[inline] 260 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 261 f.write_str(&self.path.to_string_lossy()) 262 } 263 } 264 /// Wrapper around an absolute HTTP(S) [`Url`]. 265 #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] 266 pub struct HttpUrl { 267 /// The HTTP(S) `Url`. 268 url: Url, 269 } 270 impl HttpUrl { 271 /// Returns `self` as a [`Url`] reference. 272 #[inline] 273 #[must_use] 274 pub const fn as_url(&self) -> &Url { 275 &self.url 276 } 277 /// Returns an `HttpUrl` iff `url` has a host and HTTP(S) scheme. 278 #[inline] 279 #[must_use] 280 pub fn from_url(url: Url) -> Option<Self> { 281 (url.has_host() && (url.scheme() == "http" || url.scheme() == "https")) 282 .then_some(Self { url }) 283 } 284 } 285 impl AsRef<Url> for HttpUrl { 286 #[inline] 287 fn as_ref(&self) -> &Url { 288 self.as_url() 289 } 290 } 291 impl Borrow<Url> for HttpUrl { 292 #[inline] 293 fn borrow(&self) -> &Url { 294 self.as_url() 295 } 296 } 297 impl Deref for HttpUrl { 298 type Target = Url; 299 #[inline] 300 fn deref(&self) -> &Self::Target { 301 self.as_url() 302 } 303 } 304 impl Display for HttpUrl { 305 #[inline] 306 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 307 self.url.fmt(f) 308 } 309 } 310 impl From<HttpUrl> for Url { 311 #[inline] 312 fn from(value: HttpUrl) -> Self { 313 value.url 314 } 315 } 316 impl<'de> Deserialize<'de> for HttpUrl { 317 #[inline] 318 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> 319 where 320 D: Deserializer<'de>, 321 { 322 /// `Visitor` for `HttpUrl`. 323 struct UrlVisitor; 324 impl<'d> Visitor<'d> for UrlVisitor { 325 type Value = HttpUrl; 326 fn expecting(&self, formatter: &mut Formatter<'_>) -> fmt::Result { 327 formatter.write_str("struct HttpUrl") 328 } 329 fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> 330 where 331 E: de::Error, 332 { 333 Url::parse(v).map_or_else( 334 |_| { 335 Err(E::invalid_type( 336 Unexpected::Str(v), 337 &"an absolute URL with HTTP(S) scheme", 338 )) 339 }, 340 |url| { 341 if url.has_host() && (url.scheme() == "http" || url.scheme() == "https") { 342 Ok(HttpUrl { url }) 343 } else { 344 Err(E::invalid_value( 345 Unexpected::Other(v), 346 &"an absolute URL with HTTP(S) scheme", 347 )) 348 } 349 }, 350 ) 351 } 352 } 353 deserializer.deserialize_str(UrlVisitor) 354 } 355 } 356 /// Represents the kind of [`ParsedDomain`]s a [`File`] 357 /// contains. 358 #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 359 pub enum Kind { 360 /// [`Adblock`] domains. 361 Adblock, 362 /// [`DomainOnly`] domains. 363 DomainOnly, 364 /// [`Hosts`] domains. 365 Hosts, 366 /// [`Wildcard`] domains. 367 Wildcard, 368 } 369 impl Display for Kind { 370 #[inline] 371 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 372 match *self { 373 Self::Adblock => f.write_str("Adblock"), 374 Self::DomainOnly => f.write_str("Domain-only"), 375 Self::Hosts => f.write_str("Hosts"), 376 Self::Wildcard => f.write_str("Wildcard"), 377 } 378 } 379 } 380 /// The name where a [`File`] was sourced from. 381 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 382 pub enum Name { 383 /// The `File` came from the contained `AbsFilePath`. 384 Path(AbsFilePath<false>), 385 /// The `File` came from the contained `HttpUrl`. 386 Url(HttpUrl), 387 } 388 impl Display for Name { 389 #[inline] 390 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 391 match *self { 392 Self::Path(ref path) => path.fmt(f), 393 Self::Url(ref url) => url.fmt(f), 394 } 395 } 396 } 397 /// An in-memory file sourced from [`Self::name`]; 398 #[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] 399 pub struct File { 400 /// The name/origin of the file. 401 pub name: Name, 402 /// The contents of the file. 403 pub data: String, 404 } 405 impl Display for File { 406 #[inline] 407 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 408 self.name.fmt(f) 409 } 410 } 411 /// A summary of a [`File`] after it has been parsed 412 /// into [`Value`]s. 413 #[derive(Clone, Debug)] 414 pub struct Summary<'a, E: Eq + Hash> { 415 /// The `File` that was parsed. 416 pub file: &'a File, 417 /// The kind of file. 418 pub kind: Kind, 419 /// The quantity of domains parsed. 420 pub domain_count: usize, 421 /// The quantity of comments parsed. 422 pub comment_count: usize, 423 /// The quantity of blank lines parsed. 424 pub blank_count: usize, 425 /// Parsing errors and their counts. 426 pub errors: HashMap<E, usize>, 427 } 428 impl<E: Display + Eq + Hash> Display for Summary<'_, E> { 429 #[inline] 430 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 431 write!( 432 f, 433 "({}) {} - domains parsed: {}, comments parsed: {}, blanks parsed: {}, parsing errors: {}", 434 self.kind, self.file, self.domain_count, self.comment_count, self.blank_count, self.errors.values().sum::<usize>() 435 ).and_then(|()| { 436 if self.errors.is_empty() { 437 Ok(()) 438 } else { 439 f.write_str(", errors: [").and_then(|()| { 440 self.errors.iter().try_fold((), |(), tup| { 441 write!(f, "{}: {}, ", tup.0, tup.1) 442 }) 443 }).and_then(|()| f.write_str("]")) 444 } 445 }) 446 } 447 } 448 impl<E: Eq + Hash> PartialEq<Summary<'_, E>> for Summary<'_, E> { 449 #[inline] 450 fn eq(&self, other: &Summary<'_, E>) -> bool { 451 self.file == other.file 452 && self.kind == other.kind 453 && self.domain_count == other.domain_count 454 && self.comment_count == other.comment_count 455 && self.blank_count == other.blank_count 456 && self.errors == other.errors 457 } 458 } 459 impl<E: Eq + Hash> Eq for Summary<'_, E> {} 460 /// Container of [`Adblock`], [`DomainOnly`], [`Hosts`], and [`Wildcard`] 461 /// files. 462 #[derive(Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord)] 463 pub struct Files { 464 /// [`Adblock`] files. 465 pub adblock: Vec<File>, 466 /// [`DomainOnly`] files. 467 pub domain: Vec<File>, 468 /// [`Hosts`] files. 469 pub hosts: Vec<File>, 470 /// [`Wildcard`] files. 471 pub wildcard: Vec<File>, 472 } 473 /// Helper trait to track what kind of file. 474 trait Helper { 475 /// Returns the `Kind`. 476 fn kind() -> Kind; 477 } 478 impl Helper for Adblock<'_> { 479 fn kind() -> Kind { 480 Kind::Adblock 481 } 482 } 483 impl Helper for DomainOnly<'_> { 484 fn kind() -> Kind { 485 Kind::DomainOnly 486 } 487 } 488 impl Helper for Hosts<'_> { 489 fn kind() -> Kind { 490 Kind::Hosts 491 } 492 } 493 impl Helper for Wildcard<'_> { 494 fn kind() -> Kind { 495 Kind::Wildcard 496 } 497 } 498 impl Files { 499 /// Attempts to convert an `ExternalFiles` into a `Files` 500 /// with `timeout_per_file` as the maximum amount of time allowed 501 /// for a file to finish downloading. 502 /// 503 /// Note that if `timeout_per_file` is 0 or larger than one hour, then 504 /// it'll be set to one hour. Similarly, if `timeout_per_file`, 505 /// is less than 1 second, it'll be be set to one second. 506 /// 507 /// # Errors 508 /// 509 /// Returns `ExtFileErr` iff any of the files in `ExternalFiles` 510 /// errors when downloading them and transforming them into a [`String`]. 511 #[inline] 512 pub async fn from_external( 513 ext: ExternalFiles, 514 mut timeout_per_file: Duration, 515 ) -> Result<Self, ExtFileErr> { 516 /// `await`s each `Result` in `set` propagating any errors; otherwise 517 /// the `String` is `push`ed into `files`. 518 /// 519 /// If an error occurs, then the function immediately returns. 520 async fn add_file( 521 files: &mut Vec<File>, 522 timeout: Duration, 523 mut set: JoinSet<Result<File, ExtFileErr>>, 524 ) -> Result<(), ExtFileErr> { 525 while let Some(file) = time::timeout(timeout, set.join_next()).await? { 526 files.push(file??); 527 } 528 Ok(()) 529 } 530 timeout_per_file = if timeout_per_file == Duration::ZERO { 531 Duration::from_secs(3600) 532 } else { 533 timeout_per_file.clamp(Duration::from_secs(1), Duration::from_secs(3600)) 534 }; 535 let mut files = Self { 536 adblock: Vec::with_capacity(ext.adblock.len()), 537 domain: Vec::with_capacity(ext.domain.len()), 538 hosts: Vec::with_capacity(ext.hosts.len()), 539 wildcard: Vec::with_capacity(ext.wildcard.len()), 540 }; 541 add_file(&mut files.adblock, timeout_per_file, ext.adblock).await?; 542 add_file(&mut files.domain, timeout_per_file, ext.domain).await?; 543 add_file(&mut files.hosts, timeout_per_file, ext.hosts).await?; 544 add_file(&mut files.wildcard, timeout_per_file, ext.wildcard).await?; 545 Ok(files) 546 } 547 /// Creates an empty instance of `Files`. 548 #[inline] 549 #[must_use] 550 pub const fn new() -> Self { 551 Self { 552 adblock: Vec::new(), 553 domain: Vec::new(), 554 hosts: Vec::new(), 555 wildcard: Vec::new(), 556 } 557 } 558 /// Reads each [`String`] from each field and attempts 559 /// to transform each line into an `RpzDomain` before adding 560 /// the domain into `doms`. 561 /// 562 /// Returns a `Vec` containing `Summary` information for each 563 /// [`File`] that was parsed. 564 #[expect(clippy::arithmetic_side_effects, reason = "math is verified")] 565 #[inline] 566 pub fn add_to_superset<'a: 'b, 'b>( 567 &'a self, 568 doms: &mut SupersetSet<RpzDomain<'b>>, 569 ) -> Vec<Summary<'a, FirefoxDomainErr>> { 570 /// Iterates each `String` from `files` and transforms each line 571 /// into `T` before adding it as an `RpzDomain` into `doms`. 572 fn insert< 573 'a, 574 'b: 'a, 575 T: Into<RpzDomain<'a>> + ParsedDomain<'a, Error = FirefoxDomainErr> + Helper, 576 >( 577 doms: &mut SupersetSet<RpzDomain<'a>>, 578 files: &'b [File], 579 summaries: &mut Vec<Summary<'b, FirefoxDomainErr>>, 580 ) { 581 let kind = T::kind(); 582 files.iter().fold((), |(), file| { 583 let mut summary = Summary { 584 file, 585 kind, 586 domain_count: 0, 587 comment_count: 0, 588 blank_count: 0, 589 errors: HashMap::new(), 590 }; 591 file.data 592 .lines() 593 .fold((), |(), line| match T::parse_value(line) { 594 Ok(val) => match val { 595 Value::Domain(dom) => { 596 summary.domain_count = summary.domain_count.saturating_add(1); 597 doms.insert(dom.into()); 598 } 599 Value::Comment(_) => { 600 summary.comment_count = summary.comment_count.saturating_add(1); 601 } 602 Value::Blank => { 603 summary.blank_count = summary.blank_count.saturating_add(1); 604 } 605 }, 606 Err(err) => { 607 let count = summary.errors.entry(err).or_insert(0); 608 *count = count.saturating_add(1); 609 } 610 }); 611 summaries.push(summary); 612 }); 613 } 614 let mut summaries = Vec::with_capacity( 615 self.adblock.len() + self.domain.len() + self.hosts.len() + self.wildcard.len(), 616 ); 617 insert::<Adblock<'_>>(doms, self.adblock.as_slice(), &mut summaries); 618 insert::<DomainOnly<'_>>(doms, self.domain.as_slice(), &mut summaries); 619 insert::<Hosts<'_>>(doms, self.hosts.as_slice(), &mut summaries); 620 insert::<Wildcard<'_>>(doms, self.wildcard.as_slice(), &mut summaries); 621 summaries 622 } 623 /// Returns `true` iff there are no files. 624 #[inline] 625 #[must_use] 626 pub fn is_empty(&self) -> bool { 627 self.adblock.is_empty() 628 && self.domain.is_empty() 629 && self.hosts.is_empty() 630 && self.wildcard.is_empty() 631 } 632 } 633 impl Default for Files { 634 #[inline] 635 fn default() -> Self { 636 Self::new() 637 } 638 } 639 /// Block and unblock [`Files`] stored on the local file system. 640 #[derive(Debug)] 641 pub struct LocalFiles { 642 /// Local file system files containing domains to not block. 643 pub unblock: Files, 644 /// Local file system files containing domains to block. 645 pub block: Files, 646 } 647 impl LocalFiles { 648 /// Reads all block and unblock files in the `adblock/`, `domain/`, `hosts/`, and `wildcard/` directories 649 /// under `dir/block/` and `dir/unblock/` respectively. 650 /// 651 /// # Errors 652 /// 653 /// Returns [`io::Error`] iff reading said files causes an error. Note that 654 /// it is _not_ an error if a directory does not exist. 655 #[expect( 656 clippy::wildcard_enum_match_arm, 657 reason = "too many to enumerate manually" 658 )] 659 #[inline] 660 pub fn from_path(dir: AbsFilePath<true>) -> Result<Option<Self>, io::Error> { 661 /// Checks if `path` exists. 662 fn exists<P: AsRef<Path>>(path: P) -> Result<bool, io::Error> { 663 fs::metadata(path).map_or_else( 664 |err| match err.kind() { 665 ErrorKind::NotFound => Ok(false), 666 _ => Err(err), 667 }, 668 |_| Ok(true), 669 ) 670 } 671 /// Reads all files stored in `adblock/`, `domain/`, `hosts/`, and `wildcard/` 672 /// directories under `dir/name/`. 673 fn get_files( 674 mut dir: PathBuf, 675 files: &mut Files, 676 name: &str, 677 ) -> Result<PathBuf, io::Error> { 678 /// Reads all files under `dir/name/`. 679 fn get_file( 680 files: &mut Vec<File>, 681 mut dir: PathBuf, 682 name: &str, 683 ) -> Result<PathBuf, io::Error> { 684 dir.push(name); 685 if exists(dir.as_path())? { 686 for entry in fs::read_dir(dir.as_path())? { 687 let file = entry?; 688 if !file.file_type()?.is_dir() { 689 let path = file.path(); 690 files.push(fs::read_to_string(path.as_path()).map(|data| File { 691 name: Name::Path(AbsFilePath::<false> { path }), 692 data, 693 })?); 694 } 695 } 696 } 697 dir.pop(); 698 Ok(dir) 699 } 700 dir.push(name); 701 if exists(dir.as_path())? { 702 get_file(&mut files.adblock, dir, "adblock/") 703 .and_then(|dir2| get_file(&mut files.domain, dir2, "domain/")) 704 .and_then(|dir2| get_file(&mut files.hosts, dir2, "hosts/")) 705 .and_then(|dir2| get_file(&mut files.wildcard, dir2, "wildcard/")) 706 .map(|mut dir2| { 707 dir2.pop(); 708 dir2 709 }) 710 } else { 711 dir.pop(); 712 Ok(dir) 713 } 714 } 715 let mut unblock = Files::new(); 716 let mut block = Files::new(); 717 get_files(PathBuf::from(dir), &mut unblock, "unblock/").and_then(|dir2| { 718 get_files(dir2, &mut block, "block/").map(|_| { 719 if unblock.is_empty() && block.is_empty() { 720 None 721 } else { 722 Some(Self { unblock, block }) 723 } 724 }) 725 }) 726 } 727 } 728 /// Error returned when downloading text files from HTTP(S) servers. 729 #[derive(Debug)] 730 pub enum ExtFileErr { 731 /// Error when a task exceeds the specified timeout. 732 Timeout(Elapsed), 733 /// HTTP(S) error when attempting to download a file. 734 Http(reqwest::Error), 735 /// Error when a task fails to complete. 736 Join(JoinError), 737 /// Error when a file is not valid UTF-8. 738 InvalidUtf8(FromUtf8Error), 739 } 740 impl Display for ExtFileErr { 741 #[inline] 742 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { 743 match *self { 744 Self::Timeout(ref err) => err.fmt(f), 745 Self::Http(ref err) => err.fmt(f), 746 Self::Join(ref err) => err.fmt(f), 747 Self::InvalidUtf8(ref err) => err.fmt(f), 748 } 749 } 750 } 751 impl From<Elapsed> for ExtFileErr { 752 #[inline] 753 fn from(value: Elapsed) -> Self { 754 Self::Timeout(value) 755 } 756 } 757 impl From<reqwest::Error> for ExtFileErr { 758 #[inline] 759 fn from(value: reqwest::Error) -> Self { 760 Self::Http(value) 761 } 762 } 763 impl From<JoinError> for ExtFileErr { 764 #[inline] 765 fn from(value: JoinError) -> Self { 766 Self::Join(value) 767 } 768 } 769 impl From<FromUtf8Error> for ExtFileErr { 770 #[inline] 771 fn from(value: FromUtf8Error) -> Self { 772 Self::InvalidUtf8(value) 773 } 774 } 775 impl Error for ExtFileErr {} 776 /// Tasks of downloaded files from HTTP(S) servers. 777 #[derive(Debug)] 778 pub struct ExternalFiles { 779 /// [`Adblock`]-based files. 780 pub adblock: JoinSet<Result<File, ExtFileErr>>, 781 /// [`DomainOnly`]-based files. 782 pub domain: JoinSet<Result<File, ExtFileErr>>, 783 /// [`Hosts`]-based files. 784 pub hosts: JoinSet<Result<File, ExtFileErr>>, 785 /// [`Wildcard`]-based files. 786 pub wildcard: JoinSet<Result<File, ExtFileErr>>, 787 } 788 impl ExternalFiles { 789 /// Returns `ExternalFiles` containing files downloaded 790 /// from the `HashSet`s. 791 #[inline] 792 #[must_use] 793 pub fn new_with_urls( 794 client: &'static Client, 795 adblock_urls: HashSet<HttpUrl>, 796 domain_urls: HashSet<HttpUrl>, 797 hosts_urls: HashSet<HttpUrl>, 798 wildcard_urls: HashSet<HttpUrl>, 799 ) -> Self { 800 let mut val = Self::new(); 801 val.add_adblock(client, adblock_urls); 802 val.add_domain(client, domain_urls); 803 val.add_hosts(client, hosts_urls); 804 val.add_wildcard(client, wildcard_urls); 805 val 806 } 807 /// Returns an empty `ExternalFiles`. 808 #[inline] 809 #[must_use] 810 pub fn new() -> Self { 811 Self { 812 adblock: JoinSet::new(), 813 domain: JoinSet::new(), 814 hosts: JoinSet::new(), 815 wildcard: JoinSet::new(), 816 } 817 } 818 /// Downloads the [`Adblock`] files from `urls` and adds them to [`ExternalFiles::adblock`]. 819 #[inline] 820 pub fn add_adblock(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { 821 Self::get_external_files(&mut self.adblock, client, urls); 822 } 823 /// Downloads the [`DomainOnly`] files from `urls` and adds them to [`ExternalFiles::domain`]. 824 #[inline] 825 pub fn add_domain(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { 826 Self::get_external_files(&mut self.domain, client, urls); 827 } 828 /// Downloads the [`Hosts`] files from `urls` and adds them to [`ExternalFiles::hosts`]. 829 #[inline] 830 pub fn add_hosts(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { 831 Self::get_external_files(&mut self.hosts, client, urls); 832 } 833 /// Downloads the [`Wildcard`] files from `urls` and adds them to [`ExternalFiles::wildcard`]. 834 #[inline] 835 pub fn add_wildcard(&mut self, client: &'static Client, urls: HashSet<HttpUrl>) { 836 Self::get_external_files(&mut self.wildcard, client, urls); 837 } 838 /// Downloads the files from `urls` and converts them to `String`s adding the 839 /// tasks to `set`. 840 #[expect(clippy::iter_over_hash_type, reason = "order does not matter")] 841 fn get_external_files( 842 set: &mut JoinSet<Result<File, ExtFileErr>>, 843 client: &'static Client, 844 urls: HashSet<HttpUrl>, 845 ) { 846 for url in Into::<HashSet<HttpUrl>>::into(urls) { 847 let url_clone = url.clone(); 848 set.spawn(async { 849 let resp = client 850 .get::<Url>(url_clone.into()) 851 .send() 852 .await? 853 .error_for_status()?; 854 resp.bytes().await.map_or_else( 855 |err| Err(ExtFileErr::Http(err)), 856 |bytes| { 857 String::from_utf8(bytes.into()) 858 .map_err(ExtFileErr::InvalidUtf8) 859 .map(|data| File { 860 name: Name::Url(url), 861 data, 862 }) 863 }, 864 ) 865 }); 866 } 867 } 868 } 869 impl Default for ExternalFiles { 870 #[inline] 871 fn default() -> Self { 872 Self::new() 873 } 874 }