commit 54729f3c1e800caad03fafc9debdacbba4207c64
parent 6b6ea3c8bf267a99ae44202d71b8c0ecf9acd7a6
Author: Daniel GarcĂa <dani-garcia@users.noreply.github.com>
Date: Sun, 26 Jun 2022 21:54:10 +0200
Merge branch 'BlackDex-optimize-icon-html-parsing' into main
Diffstat:
M | Cargo.lock | | | 155 | +++++++++++++++++++++++++++++++++---------------------------------------------- |
M | Cargo.toml | | | 6 | +++--- |
M | src/api/icons.rs | | | 47 | ++++++++++++++++++++++++++++++++++------------- |
3 files changed, 102 insertions(+), 106 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -378,7 +378,7 @@ dependencies = [
"rand",
"sha2",
"subtle",
- "time 0.3.9",
+ "time 0.3.11",
"version_check",
]
@@ -394,7 +394,7 @@ dependencies = [
"publicsuffix",
"serde",
"serde_json",
- "time 0.3.9",
+ "time 0.3.11",
"url 2.2.2",
]
@@ -445,12 +445,12 @@ dependencies = [
[[package]]
name = "crossbeam-utils"
-version = "0.8.8"
+version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38"
+checksum = "7d82ee10ce34d7bc12c2122495e7593a9c41347ecdd64185af4ecf72cb1a7f83"
dependencies = [
"cfg-if",
- "lazy_static",
+ "once_cell",
]
[[package]]
@@ -526,7 +526,7 @@ dependencies = [
"cfg-if",
"hashbrown 0.12.1",
"lock_api",
- "parking_lot_core 0.9.3",
+ "parking_lot_core",
]
[[package]]
@@ -967,7 +967,7 @@ dependencies = [
"futures-timer",
"no-std-compat",
"nonzero_ext",
- "parking_lot 0.12.1",
+ "parking_lot",
"quanta",
"rand",
"smallvec",
@@ -1071,9 +1071,9 @@ dependencies = [
[[package]]
name = "html5gum"
-version = "0.4.0"
+version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2dad48b66db55322add2819ae1d7bda0c32f3415269a08330679dbc8b0afeb30"
+checksum = "3404cc217cc3e11d09c8ac9ccf8b1e540f64477c253d6dc70b5a5074782d934d"
dependencies = [
"jetscii",
]
@@ -1179,12 +1179,12 @@ dependencies = [
[[package]]
name = "indexmap"
-version = "1.8.2"
+version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6012d540c5baa3589337a98ce73408de9b5a25ec9fc2c6fd6be8f0d39e0ca5a"
+checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e"
dependencies = [
"autocfg",
- "hashbrown 0.11.2",
+ "hashbrown 0.12.1",
"serde",
]
@@ -1246,18 +1246,18 @@ dependencies = [
[[package]]
name = "js-sys"
-version = "0.3.57"
+version = "0.3.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "671a26f820db17c2a2750743f1dd03bafd15b98c9f30c7c2628c024c05d73397"
+checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27"
dependencies = [
"wasm-bindgen",
]
[[package]]
name = "jsonwebtoken"
-version = "8.1.0"
+version = "8.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc9051c17f81bae79440afa041b3a278e1de71bfb96d32454b477fd4703ccb6f"
+checksum = "1aa4b4af834c6cfd35d8763d359661b90f2e45d8f750a0849156c7f4671af09c"
dependencies = [
"base64",
"pem",
@@ -1466,9 +1466,9 @@ dependencies = [
[[package]]
name = "mio"
-version = "0.8.3"
+version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "713d550d9b44d89174e066b7a6217ae06234c10cb47819a88290d2b353c31799"
+checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf"
dependencies = [
"libc",
"log",
@@ -1678,9 +1678,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf"
[[package]]
name = "openssl-src"
-version = "111.20.0+1.1.1o"
+version = "111.21.0+1.1.1p"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92892c4f87d56e376e469ace79f1128fdaded07646ddf73aa0be4706ff712dec"
+checksum = "6d0a8313729211913936f1b95ca47a5fc7f2e04cd658c115388287f8a8361008"
dependencies = [
"cc",
]
@@ -1701,37 +1701,12 @@ dependencies = [
[[package]]
name = "parking_lot"
-version = "0.11.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d17b78036a60663b797adeaee46f5c9dfebb86948d1255007a1d6be0271ff99"
-dependencies = [
- "instant",
- "lock_api",
- "parking_lot_core 0.8.5",
-]
-
-[[package]]
-name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
- "parking_lot_core 0.9.3",
-]
-
-[[package]]
-name = "parking_lot_core"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d76e8e1493bcac0d2766c42737f34458f1c8c50c0d23bcb24ea953affb273216"
-dependencies = [
- "cfg-if",
- "instant",
- "libc",
- "redox_syscall",
- "smallvec",
- "winapi",
+ "parking_lot_core",
]
[[package]]
@@ -1947,9 +1922,9 @@ checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
-version = "1.0.39"
+version = "1.0.40"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f"
+checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7"
dependencies = [
"unicode-ident",
]
@@ -2009,9 +1984,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0"
[[package]]
name = "quote"
-version = "1.0.18"
+version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1"
+checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804"
dependencies = [
"proc-macro2",
]
@@ -2024,12 +1999,12 @@ checksum = "3fee2dce59f7a43418e3382c766554c614e06a552d53a8f07ef499ea4b332c0f"
[[package]]
name = "r2d2"
-version = "0.8.9"
+version = "0.8.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "545c5bc2b880973c9c10e4067418407a0ccaa3091781d1671d46eb35107cb26f"
+checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93"
dependencies = [
"log",
- "parking_lot 0.11.2",
+ "parking_lot",
"scheduled-thread-pool",
]
@@ -2257,7 +2232,7 @@ dependencies = [
"memchr",
"multer",
"num_cpus",
- "parking_lot 0.12.1",
+ "parking_lot",
"pin-project-lite",
"rand",
"ref-cast",
@@ -2267,7 +2242,7 @@ dependencies = [
"serde_json",
"state",
"tempfile",
- "time 0.3.9",
+ "time 0.3.11",
"tokio",
"tokio-stream",
"tokio-util 0.7.3",
@@ -2316,7 +2291,7 @@ dependencies = [
"smallvec",
"stable-pattern",
"state",
- "time 0.3.9",
+ "time 0.3.11",
"tokio",
"tokio-rustls",
"uncased",
@@ -2351,9 +2326,9 @@ dependencies = [
[[package]]
name = "rustversion"
-version = "1.0.6"
+version = "1.0.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f"
+checksum = "a0a5f7c728f5d284929a1cccb5bc19884422bfe6ef4d6c409da2c41838983fcf"
[[package]]
name = "ryu"
@@ -2386,7 +2361,7 @@ version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "977a7519bff143a44f842fd07e80ad1329295bd71686457f18e496736f4bf9bf"
dependencies = [
- "parking_lot 0.12.1",
+ "parking_lot",
]
[[package]]
@@ -2559,7 +2534,7 @@ dependencies = [
"num-bigint",
"num-traits",
"thiserror",
- "time 0.3.9",
+ "time 0.3.11",
]
[[package]]
@@ -2576,9 +2551,9 @@ checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32"
[[package]]
name = "smallvec"
-version = "1.8.0"
+version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83"
+checksum = "cc88c725d61fc6c3132893370cac4a0200e3fedf5da8331c570664b1987f5ca2"
[[package]]
name = "socket2"
@@ -2634,9 +2609,9 @@ checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601"
[[package]]
name = "syn"
-version = "1.0.96"
+version = "1.0.98"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0748dd251e24453cb8717f0354206b91557e4ec8703673a4b30208f2abaf1ebf"
+checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd"
dependencies = [
"proc-macro2",
"quote",
@@ -2653,7 +2628,7 @@ dependencies = [
"hostname",
"libc",
"log",
- "time 0.3.9",
+ "time 0.3.11",
]
[[package]]
@@ -2720,9 +2695,9 @@ dependencies = [
[[package]]
name = "time"
-version = "0.3.9"
+version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c2702e08a7a860f005826c6815dcac101b19b5eb330c27fe4a5928fec1d20ddd"
+checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217"
dependencies = [
"itoa",
"libc",
@@ -2763,7 +2738,7 @@ dependencies = [
"mio",
"num_cpus",
"once_cell",
- "parking_lot 0.12.1",
+ "parking_lot",
"pin-project-lite",
"signal-hook-registry",
"socket2",
@@ -2889,9 +2864,9 @@ dependencies = [
[[package]]
name = "tower-service"
-version = "0.3.1"
+version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "360dfd1d6d30e05fda32ace2c8c70e9c0a9da713275777f5a4dbb8a1893930c6"
+checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52"
[[package]]
name = "tracing"
@@ -2919,9 +2894,9 @@ dependencies = [
[[package]]
name = "tracing-core"
-version = "0.1.27"
+version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7709595b8878a4965ce5e87ebf880a7d39c9afc6837721b21a5a816a8117d921"
+checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7"
dependencies = [
"once_cell",
"valuable",
@@ -2993,7 +2968,7 @@ dependencies = [
"lazy_static",
"log",
"lru-cache",
- "parking_lot 0.12.1",
+ "parking_lot",
"resolv-conf",
"smallvec",
"thiserror",
@@ -3071,9 +3046,9 @@ checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c"
[[package]]
name = "unicode-normalization"
-version = "0.1.19"
+version = "0.1.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
+checksum = "81dee68f85cab8cf68dec42158baf3a79a1cdc065a8b103025965d6ccb7f6cbd"
dependencies = [
"tinyvec",
]
@@ -3190,7 +3165,7 @@ dependencies = [
"serde",
"serde_json",
"syslog",
- "time 0.3.9",
+ "time 0.3.11",
"tokio",
"tokio-tungstenite",
"totp-lite",
@@ -3248,9 +3223,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "wasm-bindgen"
-version = "0.2.80"
+version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27370197c907c55e3f1a9fbe26f44e937fe6451368324e009cba39e139dc08ad"
+checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994"
dependencies = [
"cfg-if",
"wasm-bindgen-macro",
@@ -3258,9 +3233,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
-version = "0.2.80"
+version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53e04185bfa3a779273da532f5025e33398409573f348985af9a1cbf3774d3f4"
+checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a"
dependencies = [
"bumpalo",
"lazy_static",
@@ -3273,9 +3248,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
-version = "0.4.30"
+version = "0.4.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f741de44b75e14c35df886aff5f1eb73aa114fa5d4d00dcd37b5e01259bf3b2"
+checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f"
dependencies = [
"cfg-if",
"js-sys",
@@ -3285,9 +3260,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
-version = "0.2.80"
+version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17cae7ff784d7e83a2fe7611cfe766ecf034111b49deb850a3dc7699c08251f5"
+checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa"
dependencies = [
"quote",
"wasm-bindgen-macro-support",
@@ -3295,9 +3270,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
-version = "0.2.80"
+version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99ec0dc7a4756fffc231aab1b9f2f578d23cd391390ab27f952ae0c9b3ece20b"
+checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048"
dependencies = [
"proc-macro2",
"quote",
@@ -3308,15 +3283,15 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
-version = "0.2.80"
+version = "0.2.81"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d554b7f530dee5964d9a9468d95c1f8b8acae4f282807e7d27d4b03099a46744"
+checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be"
[[package]]
name = "web-sys"
-version = "0.3.57"
+version = "0.3.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b17e741662c70c8bd24ac5c5b18de314a2c26c32bf8346ee1e6f53de919c283"
+checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90"
dependencies = [
"js-sys",
"wasm-bindgen",
diff --git a/Cargo.toml b/Cargo.toml
@@ -84,7 +84,7 @@ uuid = { version = "1.1.2", features = ["v4"] }
# Date and time libraries
chrono = { version = "0.4.19", features = ["clock", "serde"], default-features = false }
chrono-tz = "0.6.1"
-time = "0.3.9"
+time = "0.3.11"
# Job scheduler
job_scheduler_ng = "2.0.1"
@@ -93,7 +93,7 @@ job_scheduler_ng = "2.0.1"
data-encoding = "2.3.2"
# JWT library
-jsonwebtoken = "8.1.0"
+jsonwebtoken = "8.1.1"
# TOTP library
totp-lite = "2.0.0"
@@ -118,7 +118,7 @@ handlebars = { version = "4.3.1", features = ["dir_source"] }
reqwest = { version = "0.11.11", features = ["stream", "json", "gzip", "brotli", "socks", "cookies", "trust-dns"] }
# For favicon extraction from main website
-html5gum = "0.4.0"
+html5gum = "0.5.2"
regex = { version = "1.5.6", features = ["std", "perf", "unicode-perl"], default-features = false }
data-url = "0.1.1"
bytes = "1.1.0"
diff --git a/src/api/icons.rs b/src/api/icons.rs
@@ -19,7 +19,7 @@ use tokio::{
net::lookup_host,
};
-use html5gum::{Emitter, EndTag, InfallibleTokenizer, Readable, StartTag, StringReader, Tokenizer};
+use html5gum::{Emitter, EndTag, HtmlString, InfallibleTokenizer, Readable, StartTag, StringReader, Tokenizer};
use crate::{
error::Error,
@@ -433,7 +433,7 @@ async fn get_favicons_node(
for token in dom {
match token {
FaviconToken::StartTag(tag) => {
- if tag.name == TAG_LINK
+ if *tag.name == TAG_LINK
&& tag.attributes.contains_key(ATTR_REL)
&& tag.attributes.contains_key(ATTR_HREF)
{
@@ -443,7 +443,7 @@ async fn get_favicons_node(
if rel_value.contains("icon") && !rel_value.contains("mask-icon") {
icon_tags.push(tag);
}
- } else if tag.name == TAG_BASE && tag.attributes.contains_key(ATTR_HREF) {
+ } else if *tag.name == TAG_BASE && tag.attributes.contains_key(ATTR_HREF) {
let href = std::str::from_utf8(tag.attributes.get(ATTR_HREF).unwrap()).unwrap_or_default();
debug!("Found base href: {href}");
base_url = match base_url.join(href) {
@@ -453,7 +453,7 @@ async fn get_favicons_node(
}
}
FaviconToken::EndTag(tag) => {
- if tag.name == TAG_HEAD {
+ if *tag.name == TAG_HEAD {
break;
}
}
@@ -830,17 +830,18 @@ impl reqwest::cookie::CookieStore for Jar {
/// Therefor parsing the HTML content is faster.
use std::collections::{BTreeSet, VecDeque};
+#[derive(Debug)]
enum FaviconToken {
StartTag(StartTag),
EndTag(EndTag),
}
-#[derive(Default)]
+#[derive(Default, Debug)]
struct FaviconEmitter {
current_token: Option<FaviconToken>,
- last_start_tag: Vec<u8>,
- current_attribute: Option<(Vec<u8>, Vec<u8>)>,
- seen_attributes: BTreeSet<Vec<u8>>,
+ last_start_tag: HtmlString,
+ current_attribute: Option<(HtmlString, HtmlString)>,
+ seen_attributes: BTreeSet<HtmlString>,
emitted_tokens: VecDeque<FaviconToken>,
}
@@ -887,18 +888,38 @@ impl Emitter for FaviconEmitter {
self.seen_attributes.clear();
}
- fn emit_current_tag(&mut self) {
+ fn emit_current_tag(&mut self) -> Option<html5gum::State> {
self.flush_current_attribute();
let mut token = self.current_token.take().unwrap();
+ let mut emit = false;
match token {
- FaviconToken::EndTag(_) => {
+ FaviconToken::EndTag(ref mut tag) => {
+ // Always clean seen attributes
self.seen_attributes.clear();
+
+ // Only trigger an emit for the </head> tag.
+ // This is matched, and will break the for-loop.
+ if *tag.name == b"head" {
+ emit = true;
+ }
}
FaviconToken::StartTag(ref mut tag) => {
- self.set_last_start_tag(Some(&tag.name));
+ // Only trriger an emit for <link> and <base> tags.
+ // These are the only tags we want to parse.
+ if *tag.name == b"link" || *tag.name == b"base" {
+ self.set_last_start_tag(Some(&tag.name));
+ emit = true;
+ } else {
+ self.set_last_start_tag(None);
+ }
}
}
- self.emit_token(token);
+
+ // Only emit the tags we want to parse.
+ if emit {
+ self.emit_token(token);
+ }
+ None
}
fn push_tag_name(&mut self, s: &[u8]) {
@@ -921,7 +942,7 @@ impl Emitter for FaviconEmitter {
fn init_attribute(&mut self) {
self.flush_current_attribute();
- self.current_attribute = Some((Vec::new(), Vec::new()));
+ self.current_attribute = Some(Default::default());
}
fn push_attribute_name(&mut self, s: &[u8]) {