commit f270f2ed652459cec2d5251b998eef17a88ea49e
parent aba5b234af4ab1b9a2b7cf60ef02dcf84ba24a46
Author: BlackDex <black.dex@gmail.com>
Date: Sun, 16 May 2021 15:29:13 +0200
Updated icon fetching and crates.
- Updated some crates
- Updated icon fetching code:
+ Use a cookie jar and set Max-Age to 2 minutes for all cookies
+ Locate the base href tag to fix some locations
+ Changed User-Agent (Helps on some sites to get HTML instead of JS)
+ Reduced HTML code limit from 512KB to 384KB
+ Allow some large icons higer-up in the sort
+ Allow GIF images
+ Ignore cookie_store and hyper::client debug messages
Diffstat:
M | Cargo.lock | | | 198 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------- |
M | Cargo.toml | | | 14 | ++++++++++---- |
M | src/api/icons.rs | | | 169 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
M | src/main.rs | | | 3 | +++ |
4 files changed, 272 insertions(+), 112 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
@@ -322,6 +322,49 @@ dependencies = [
]
[[package]]
+name = "cookie"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ffdf8865bac3d9a3bde5bde9088ca431b11f5d37c7a578b8086af77248b76627"
+dependencies = [
+ "percent-encoding 2.1.0",
+ "time 0.2.26",
+ "version_check 0.9.3",
+]
+
+[[package]]
+name = "cookie_store"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3818dfca4b0cb5211a659bbcbb94225b7127407b2b135e650d717bfb78ab10d3"
+dependencies = [
+ "cookie 0.14.4",
+ "idna 0.2.3",
+ "log 0.4.14",
+ "publicsuffix 1.5.6",
+ "serde",
+ "serde_json",
+ "time 0.2.26",
+ "url 2.2.2",
+]
+
+[[package]]
+name = "cookie_store"
+version = "0.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "55b4ac5559dd39f7bdc516f769cb412b151585d8886d216871a8435ed7f862cd"
+dependencies = [
+ "cookie 0.15.0",
+ "idna 0.2.3",
+ "log 0.4.14",
+ "publicsuffix 2.1.0",
+ "serde",
+ "serde_json",
+ "time 0.2.26",
+ "url 2.2.2",
+]
+
+[[package]]
name = "core-foundation"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -339,9 +382,9 @@ checksum = "ea221b5284a47e40033bf9b66f35f984ec0ea2931eb03505246cd27a963f981b"
[[package]]
name = "cpufeatures"
-version = "0.1.1"
+version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dec1028182c380cc45a2e2c5ec841134f2dfd0f8f5f0a5bcd68004f81b5efdf4"
+checksum = "ed00c67cb5d0a7d64a44f6ad2668db7e7530311dd53ea79bcd4fb022c64911c8"
dependencies = [
"libc",
]
@@ -395,7 +438,7 @@ checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57"
[[package]]
name = "data-url"
version = "0.1.0"
-source = "git+https://github.com/servo/rust-url?rev=540ede02d0771824c0c80ff9f57fe8eff38b1291#540ede02d0771824c0c80ff9f57fe8eff38b1291"
+source = "git+https://github.com/servo/rust-url?rev=eb7330b5296c0d43816d1346211b74182bb4ae37#eb7330b5296c0d43816d1346211b74182bb4ae37"
dependencies = [
"matches",
]
@@ -648,9 +691,9 @@ dependencies = [
[[package]]
name = "futures"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
+checksum = "0e7e43a803dae2fa37c1f6a8fe121e1f7bf9548b4dfc0522a42f34145dadfc27"
dependencies = [
"futures-channel",
"futures-core",
@@ -663,9 +706,9 @@ dependencies = [
[[package]]
name = "futures-channel"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
+checksum = "e682a68b29a882df0545c143dc3646daefe80ba479bcdede94d5a703de2871e2"
dependencies = [
"futures-core",
"futures-sink",
@@ -673,15 +716,15 @@ dependencies = [
[[package]]
name = "futures-core"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
+checksum = "0402f765d8a89a26043b889b26ce3c4679d268fa6bb22cd7c6aad98340e179d1"
[[package]]
name = "futures-executor"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
+checksum = "badaa6a909fac9e7236d0620a2f57f7664640c56575b71a7552fbd68deafab79"
dependencies = [
"futures-core",
"futures-task",
@@ -690,16 +733,17 @@ dependencies = [
[[package]]
name = "futures-io"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
+checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1"
[[package]]
name = "futures-macro"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
+checksum = "a4c40298486cdf52cc00cd6d6987892ba502c7656a16a4192a9992b1ccedd121"
dependencies = [
+ "autocfg",
"proc-macro-hack",
"proc-macro2 1.0.26",
"quote 1.0.9",
@@ -708,22 +752,23 @@ dependencies = [
[[package]]
name = "futures-sink"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
+checksum = "a57bead0ceff0d6dde8f465ecd96c9338121bb7717d3e7b108059531870c4282"
[[package]]
name = "futures-task"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
+checksum = "8a16bef9fc1a4dddb5bee51c989e3fbba26569cbb0e31f5b303c184e3dd33dae"
[[package]]
name = "futures-util"
-version = "0.3.14"
+version = "0.3.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
+checksum = "feb5c238d27e2bf94ffdfd27b2c29e3df4a68c4193bb6427384259e2bf191967"
dependencies = [
+ "autocfg",
"futures-channel",
"futures-core",
"futures-io",
@@ -842,6 +887,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
[[package]]
+name = "hashbrown"
+version = "0.11.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
+
+[[package]]
name = "hermit-abi"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -920,9 +971,9 @@ dependencies = [
[[package]]
name = "httparse"
-version = "1.4.0"
+version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4a1ce40d6fc9764887c2fdc7305c3dcc429ba11ff981c1509416afd5697e4437"
+checksum = "f3a87b616e37e93c22fb19bcd386f02f3af5ea98a25670ad0fce773de23c5e68"
[[package]]
name = "httpdate"
@@ -1027,7 +1078,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3"
dependencies = [
"autocfg",
- "hashbrown",
+ "hashbrown 0.9.1",
]
[[package]]
@@ -1072,9 +1123,9 @@ dependencies = [
[[package]]
name = "js-sys"
-version = "0.3.50"
+version = "0.3.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d99f9e3e84b8f67f846ef5b4cbbc3b1c29f6c759fcbce6f01aa0e73d932a24c"
+checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062"
dependencies = [
"wasm-bindgen",
]
@@ -1123,9 +1174,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
[[package]]
name = "lettre"
-version = "0.10.0-beta.4"
+version = "0.10.0-rc.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2b7fd0c394e97e38d87bd2dfdf91983ab406b044a0bfd4e5b5c82bdfa0324526"
+checksum = "4be4ff7e8bcb0e0c6902815554a286889b0e99b4ea6e898afb7b9f53174b1929"
dependencies = [
"base64 0.13.0",
"fastrand",
@@ -1572,9 +1623,9 @@ dependencies = [
[[package]]
name = "openssl-probe"
-version = "0.1.2"
+version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de"
+checksum = "28988d872ab76095a6e6ac88d99b54fd267702734fd7ffe610ca27f533ddb95a"
[[package]]
name = "openssl-src"
@@ -1910,6 +1961,34 @@ dependencies = [
]
[[package]]
+name = "psl-types"
+version = "2.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "66b398073e7cdd6f05934389a8f5961e3aabfa66675b6f440df4e2c793d51a4f"
+
+[[package]]
+name = "publicsuffix"
+version = "1.5.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95b4ce31ff0a27d93c8de1849cf58162283752f065a90d508f1105fa6c9a213f"
+dependencies = [
+ "idna 0.2.3",
+ "url 2.2.2",
+]
+
+[[package]]
+name = "publicsuffix"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3ac055aef7cc7a1caefbc65144be879e862467dcd9b8a8d57b64a13e7dce15d"
+dependencies = [
+ "byteorder",
+ "hashbrown 0.11.2",
+ "idna 0.2.3",
+ "psl-types",
+]
+
+[[package]]
name = "quick-error"
version = "1.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2134,6 +2213,8 @@ dependencies = [
"async-compression",
"base64 0.13.0",
"bytes 1.0.1",
+ "cookie 0.14.4",
+ "cookie_store 0.12.0",
"encoding_rs",
"futures-core",
"futures-util",
@@ -2152,6 +2233,7 @@ dependencies = [
"serde",
"serde_json",
"serde_urlencoded",
+ "time 0.2.26",
"tokio",
"tokio-native-tls",
"tokio-socks",
@@ -2248,7 +2330,7 @@ name = "rocket_http"
version = "0.5.0-dev"
source = "git+https://github.com/SergioBenitez/Rocket?rev=263e39b5b429de1913ce7e3036575a7b4d88b6d7#263e39b5b429de1913ce7e3036575a7b4d88b6d7"
dependencies = [
- "cookie",
+ "cookie 0.14.4",
"hyper 0.10.16",
"hyper-sync-rustls",
"indexmap",
@@ -2391,18 +2473,18 @@ checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
[[package]]
name = "serde"
-version = "1.0.125"
+version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "558dc50e1a5a5fa7112ca2ce4effcb321b0300c0d4ccf0776a9f60cd89031171"
+checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
-version = "1.0.125"
+version = "1.0.126"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b093b7a2bb58203b5da3056c05b4ec1fed827dcfdb37347a8841695263b3d06d"
+checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
dependencies = [
"proc-macro2 1.0.26",
"quote 1.0.9",
@@ -2459,9 +2541,9 @@ dependencies = [
[[package]]
name = "sha-1"
-version = "0.9.5"
+version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b659df5fc3ce22274daac600ffb845300bd2125bcfaec047823075afdab81c00"
+checksum = "8c4cfa741c5832d0ef7fab46cabed29c2aae926db0b11bb2069edd8db5e64e16"
dependencies = [
"block-buffer 0.9.0",
"cfg-if 1.0.0",
@@ -2804,9 +2886,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
[[package]]
name = "tokio"
-version = "1.5.0"
+version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
+checksum = "bd3076b5c8cc18138b8f8814895c11eb4de37114a5d127bafdc5e55798ceef37"
dependencies = [
"autocfg",
"bytes 1.0.1",
@@ -2841,9 +2923,9 @@ dependencies = [
[[package]]
name = "tokio-util"
-version = "0.6.6"
+version = "0.6.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "940a12c99365c31ea8dd9ba04ec1be183ffe4920102bb7122c2f515437601e8e"
+checksum = "1caa0b0c8d94a049db56b5acf8cba99dc0623aab1b26d5b5f5e2d945846b3592"
dependencies = [
"bytes 1.0.1",
"futures-core",
@@ -3054,9 +3136,12 @@ name = "vaultwarden"
version = "1.0.0"
dependencies = [
"backtrace",
+ "bytes 1.0.1",
"chashmap",
"chrono",
"chrono-tz",
+ "cookie 0.15.0",
+ "cookie_store 0.15.0",
"data-encoding",
"data-url",
"diesel",
@@ -3095,6 +3180,7 @@ dependencies = [
"time 0.2.26",
"tracing",
"u2f",
+ "url 2.2.2",
"uuid",
"yubico",
]
@@ -3152,9 +3238,9 @@ checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "wasm-bindgen"
-version = "0.2.73"
+version = "0.2.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83240549659d187488f91f33c0f8547cbfef0b2088bc470c116d1d260ef623d9"
+checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd"
dependencies = [
"cfg-if 1.0.0",
"serde",
@@ -3164,9 +3250,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-backend"
-version = "0.2.73"
+version = "0.2.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ae70622411ca953215ca6d06d3ebeb1e915f0f6613e3b495122878d7ebec7dae"
+checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900"
dependencies = [
"bumpalo",
"lazy_static",
@@ -3179,9 +3265,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-futures"
-version = "0.4.23"
+version = "0.4.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81b8b767af23de6ac18bf2168b690bed2902743ddf0fb39252e36f9e2bfc63ea"
+checksum = "5fba7978c679d53ce2d0ac80c8c175840feb849a161664365d1287b41f2e67f1"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
@@ -3191,9 +3277,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro"
-version = "0.2.73"
+version = "0.2.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e734d91443f177bfdb41969de821e15c516931c3c3db3d318fa1b68975d0f6f"
+checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4"
dependencies = [
"quote 1.0.9",
"wasm-bindgen-macro-support",
@@ -3201,9 +3287,9 @@ dependencies = [
[[package]]
name = "wasm-bindgen-macro-support"
-version = "0.2.73"
+version = "0.2.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d53739ff08c8a68b0fdbcd54c372b8ab800b1449ab3c9d706503bc7dd1621b2c"
+checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97"
dependencies = [
"proc-macro2 1.0.26",
"quote 1.0.9",
@@ -3214,15 +3300,15 @@ dependencies = [
[[package]]
name = "wasm-bindgen-shared"
-version = "0.2.73"
+version = "0.2.74"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9a543ae66aa233d14bb765ed9af4a33e81b8b58d1584cf1b47ff8cd0b9e4489"
+checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f"
[[package]]
name = "web-sys"
-version = "0.3.50"
+version = "0.3.51"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a905d57e488fec8861446d3393670fb50d27a262344013181c2cdf9fff5481be"
+checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582"
dependencies = [
"js-sys",
"wasm-bindgen",
@@ -3345,7 +3431,7 @@ dependencies = [
"hmac 0.10.1",
"rand 0.8.3",
"reqwest",
- "sha-1 0.9.5",
+ "sha-1 0.9.6",
"threadpool",
"url 1.7.2",
]
diff --git a/Cargo.toml b/Cargo.toml
@@ -32,7 +32,13 @@ rocket = { version = "0.5.0-dev", features = ["tls"], default-features = false }
rocket_contrib = "0.5.0-dev"
# HTTP client
-reqwest = { version = "0.11.3", features = ["blocking", "json", "gzip", "brotli", "socks"] }
+reqwest = { version = "0.11.3", features = ["blocking", "json", "gzip", "brotli", "socks", "cookies"] }
+
+# Used for custom short lived cookie jar
+cookie = "0.15.0"
+cookie_store = "0.15.0"
+bytes = "1.0.1"
+url = "2.2.2"
# multipart/form-data support
multipart = { version = "0.17.1", features = ["server"], default-features = false }
@@ -47,7 +53,7 @@ rmpv = "0.4.7"
chashmap = "2.2.2"
# A generic serialization/deserialization framework
-serde = { version = "1.0.125", features = ["derive"] }
+serde = { version = "1.0.126", features = ["derive"] }
serde_json = "1.0.64"
# Logging
@@ -103,7 +109,7 @@ num-derive = "0.3.3"
# Email libraries
tracing = { version = "0.1.26", features = ["log"] } # Needed to have lettre trace logging used when SMTP_DEBUG is enabled.
-lettre = { version = "0.10.0-beta.4", features = ["smtp-transport", "builder", "serde", "native-tls", "hostname", "tracing"], default-features = false }
+lettre = { version = "0.10.0-rc.1", features = ["smtp-transport", "builder", "serde", "native-tls", "hostname", "tracing"], default-features = false }
# Template library
handlebars = { version = "3.5.5", features = ["dir_source"] }
@@ -137,7 +143,7 @@ rocket = { git = 'https://github.com/SergioBenitez/Rocket', rev = '263e39b5b429d
rocket_contrib = { git = 'https://github.com/SergioBenitez/Rocket', rev = '263e39b5b429de1913ce7e3036575a7b4d88b6d7' }
# For favicon extraction from main website
-data-url = { git = 'https://github.com/servo/rust-url', package="data-url", rev = '540ede02d0771824c0c80ff9f57fe8eff38b1291' }
+data-url = { git = 'https://github.com/servo/rust-url', package="data-url", rev = 'eb7330b5296c0d43816d1346211b74182bb4ae37' }
# The maintainer of the `job_scheduler` crate doesn't seem to have responded
# to any issues or PRs for almost a year (as of April 2021). This hopefully
diff --git a/src/api/icons.rs b/src/api/icons.rs
@@ -3,14 +3,14 @@ use std::{
fs::{create_dir_all, remove_file, symlink_metadata, File},
io::prelude::*,
net::{IpAddr, ToSocketAddrs},
- sync::RwLock,
+ sync::{Arc, RwLock},
time::{Duration, SystemTime},
};
use once_cell::sync::Lazy;
use regex::Regex;
-use reqwest::{blocking::Client, blocking::Response, header, Url};
-use rocket::{http::ContentType, http::Cookie, response::Content, Route};
+use reqwest::{blocking::Client, blocking::Response, header};
+use rocket::{http::ContentType, response::Content, Route};
use crate::{
error::Error,
@@ -25,19 +25,17 @@ pub fn routes() -> Vec<Route> {
static CLIENT: Lazy<Client> = Lazy::new(|| {
// Generate the default headers
let mut default_headers = header::HeaderMap::new();
- default_headers.insert(header::USER_AGENT, header::HeaderValue::from_static("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15"));
- default_headers.insert(header::ACCEPT_LANGUAGE, header::HeaderValue::from_static("en-US,en;q=0.8"));
+ default_headers
+ .insert(header::USER_AGENT, header::HeaderValue::from_static("Links (2.22; Linux X86_64; GNU C; text)"));
+ default_headers
+ .insert(header::ACCEPT, header::HeaderValue::from_static("text/html, text/*;q=0.5, image/*, */*;q=0.1"));
+ default_headers.insert(header::ACCEPT_LANGUAGE, header::HeaderValue::from_static("en,*;q=0.1"));
default_headers.insert(header::CACHE_CONTROL, header::HeaderValue::from_static("no-cache"));
default_headers.insert(header::PRAGMA, header::HeaderValue::from_static("no-cache"));
- default_headers.insert(
- header::ACCEPT,
- header::HeaderValue::from_static(
- "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8",
- ),
- );
// Reuse the client between requests
get_reqwest_client_builder()
+ .cookie_provider(Arc::new(Jar::default()))
.timeout(Duration::from_secs(CONFIG.icon_download_timeout()))
.default_headers(default_headers)
.build()
@@ -80,7 +78,7 @@ fn is_valid_domain(domain: &str) -> bool {
const ALLOWED_CHARS: &str = "_-.";
// If parsing the domain fails using Url, it will not work with reqwest.
- if let Err(parse_error) = Url::parse(format!("https://{}", domain).as_str()) {
+ if let Err(parse_error) = url::Url::parse(format!("https://{}", domain).as_str()) {
debug!("Domain parse error: '{}' - {:?}", domain, parse_error);
return false;
} else if domain.is_empty()
@@ -360,7 +358,51 @@ impl Icon {
}
}
-fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Vec<Icon>, url: &Url) {
+/// Iterates over the HTML document to find <base href="http://domain.tld">
+/// When found it will stop the iteration and the found base href will be shared deref via `base_href`.
+///
+/// # Arguments
+/// * `node` - A Parsed HTML document via html5ever::parse_document()
+/// * `base_href` - a mutable url::Url which will be overwritten when a base href tag has been found.
+///
+fn get_base_href(node: &std::rc::Rc<markup5ever_rcdom::Node>, base_href: &mut url::Url) -> bool {
+ if let markup5ever_rcdom::NodeData::Element {
+ name,
+ attrs,
+ ..
+ } = &node.data
+ {
+ if name.local.as_ref() == "base" {
+ let attrs = attrs.borrow();
+ for attr in attrs.iter() {
+ let attr_name = attr.name.local.as_ref();
+ let attr_value = attr.value.as_ref();
+
+ if attr_name == "href" {
+ debug!("Found base href: {}", attr_value);
+ *base_href = match base_href.join(attr_value) {
+ Ok(href) => href,
+ _ => base_href.clone(),
+ };
+ return true;
+ }
+ }
+ return true;
+ }
+ }
+
+ // TODO: Might want to limit the recursion depth?
+ for child in node.children.borrow().iter() {
+ // Check if we got a true back and stop the iter.
+ // This means we found a <base> tag and can stop processing the html.
+ if get_base_href(child, base_href) {
+ return true;
+ }
+ }
+ false
+}
+
+fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Vec<Icon>, url: &url::Url) {
if let markup5ever_rcdom::NodeData::Element {
name,
attrs,
@@ -406,12 +448,11 @@ fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Ve
struct IconUrlResult {
iconlist: Vec<Icon>,
- cookies: String,
referer: String,
}
-/// Returns a Result/Tuple which holds a Vector IconList and a string which holds the cookies from the last response.
-/// There will always be a result with a string which will contain https://example.com/favicon.ico and an empty string for the cookies.
+/// Returns a IconUrlResult which holds a Vector IconList and a string which holds the referer.
+/// There will always two items within the iconlist which holds http(s)://domain.tld/favicon.ico.
/// This does not mean that that location does exists, but it is the default location browser use.
///
/// # Argument
@@ -419,8 +460,8 @@ struct IconUrlResult {
///
/// # Example
/// ```
-/// let (mut iconlist, cookie_str) = get_icon_url("github.com")?;
-/// let (mut iconlist, cookie_str) = get_icon_url("gitlab.com")?;
+/// let icon_result = get_icon_url("github.com")?;
+/// let icon_result = get_icon_url("vaultwarden.discourse.group")?;
/// ```
fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// Default URL with secure and insecure schemes
@@ -468,32 +509,12 @@ fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// Create the iconlist
let mut iconlist: Vec<Icon> = Vec::new();
-
- // Create the cookie_str to fill it all the cookies from the response
- // These cookies can be used to request/download the favicon image.
- // Some sites have extra security in place with for example XSRF Tokens.
- let mut cookie_str = "".to_string();
- let mut referer = "".to_string();
+ let mut referer = String::from("");
if let Ok(content) = resp {
// Extract the URL from the respose in case redirects occured (like @ gitlab.com)
let url = content.url().clone();
- // Get all the cookies and pass it on to the next function.
- // Needed for XSRF Cookies for example (like @ mijn.ing.nl)
- let raw_cookies = content.headers().get_all("set-cookie");
- cookie_str = raw_cookies
- .iter()
- .filter_map(|raw_cookie| raw_cookie.to_str().ok())
- .map(|cookie_str| {
- if let Ok(cookie) = Cookie::parse(cookie_str) {
- format!("{}={}; ", cookie.name(), cookie.value())
- } else {
- String::new()
- }
- })
- .collect::<String>();
-
// Set the referer to be used on the final request, some sites check this.
// Mostly used to prevent direct linking and other security resons.
referer = url.as_str().to_string();
@@ -501,16 +522,17 @@ fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// Add the default favicon.ico to the list with the domain the content responded from.
iconlist.push(Icon::new(35, String::from(url.join("/favicon.ico").unwrap())));
- // 512KB should be more than enough for the HTML, though as we only really need
- // the HTML header, it could potentially be reduced even further
- let mut limited_reader = content.take(512 * 1024);
+ // 384KB should be more than enough for the HTML, though as we only really need the HTML header.
+ let mut limited_reader = content.take(384 * 1024);
use html5ever::tendril::TendrilSink;
let dom = html5ever::parse_document(markup5ever_rcdom::RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut limited_reader)?;
- get_favicons_node(&dom.document, &mut iconlist, &url);
+ let mut base_url: url::Url = url;
+ get_base_href(&dom.document, &mut base_url);
+ get_favicons_node(&dom.document, &mut iconlist, &base_url);
} else {
// Add the default favicon.ico to the list with just the given domain
iconlist.push(Icon::new(35, format!("{}/favicon.ico", ssldomain)));
@@ -523,24 +545,20 @@ fn get_icon_url(domain: &str) -> Result<IconUrlResult, Error> {
// There always is an icon in the list, so no need to check if it exists, and just return the first one
Ok(IconUrlResult {
iconlist,
- cookies: cookie_str,
referer,
})
}
fn get_page(url: &str) -> Result<Response, Error> {
- get_page_with_cookies(url, "", "")
+ get_page_with_referer(url, "")
}
-fn get_page_with_cookies(url: &str, cookie_str: &str, referer: &str) -> Result<Response, Error> {
- if is_domain_blacklisted(Url::parse(url).unwrap().host_str().unwrap_or_default()) {
+fn get_page_with_referer(url: &str, referer: &str) -> Result<Response, Error> {
+ if is_domain_blacklisted(url::Url::parse(url).unwrap().host_str().unwrap_or_default()) {
err!("Favicon rel linked to a blacklisted domain!");
}
let mut client = CLIENT.get(url);
- if !cookie_str.is_empty() {
- client = client.header("Cookie", cookie_str)
- }
if !referer.is_empty() {
client = client.header("Referer", referer)
}
@@ -573,7 +591,7 @@ fn get_icon_priority(href: &str, sizes: Option<&str>) -> u8 {
1
} else if width == 64 {
2
- } else if (24..=128).contains(&width) {
+ } else if (24..=192).contains(&width) {
3
} else if width == 16 {
4
@@ -661,7 +679,7 @@ fn download_icon(domain: &str) -> Result<(Vec<u8>, Option<&str>), Error> {
_ => warn!("Extracted icon from data:image uri is invalid"),
};
} else {
- match get_page_with_cookies(&icon.href, &icon_result.cookies, &icon_result.referer) {
+ match get_page_with_referer(&icon.href, &icon_result.referer) {
Ok(mut res) => {
res.copy_to(&mut buffer)?;
// Check if the icon type is allowed, else try an icon from the list.
@@ -706,7 +724,54 @@ fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
[0, 0, 1, 0, ..] => Some("x-icon"),
[82, 73, 70, 70, ..] => Some("webp"),
[255, 216, 255, ..] => Some("jpeg"),
+ [71, 73, 70, 56, ..] => Some("gif"),
[66, 77, ..] => Some("bmp"),
_ => None,
}
}
+
+/// This is an implementation of the default Cookie Jar from Reqwest and reqwest_cookie_store build by pfernie.
+/// The default cookie jar used by Reqwest keeps all the cookies based upon the Max-Age or Expires which could be a long time.
+/// That could be used for tracking, to prevent this we force the lifespan of the cookies to always be max two minutes.
+/// A Cookie Jar is needed because some sites force a redirect with cookies to verify if a request uses cookies or not.
+use cookie_store::CookieStore;
+#[derive(Default)]
+pub struct Jar(RwLock<CookieStore>);
+
+impl reqwest::cookie::CookieStore for Jar {
+ fn set_cookies(&self, cookie_headers: &mut dyn Iterator<Item = &header::HeaderValue>, url: &url::Url) {
+ use cookie::{Cookie as RawCookie, ParseError as RawCookieParseError};
+ use time::Duration;
+
+ let mut cookie_store = self.0.write().unwrap();
+ let cookies = cookie_headers.filter_map(|val| {
+ std::str::from_utf8(val.as_bytes())
+ .map_err(RawCookieParseError::from)
+ .and_then(RawCookie::parse)
+ .map(|mut c| {
+ c.set_expires(None);
+ c.set_max_age(Some(Duration::minutes(2)));
+ c.into_owned()
+ })
+ .ok()
+ });
+ cookie_store.store_response_cookies(cookies, url);
+ }
+
+ fn cookies(&self, url: &url::Url) -> Option<header::HeaderValue> {
+ use bytes::Bytes;
+
+ let cookie_store = self.0.read().unwrap();
+ let s = cookie_store
+ .get_request_values(url)
+ .map(|(name, value)| format!("{}={}", name, value))
+ .collect::<Vec<_>>()
+ .join("; ");
+
+ if s.is_empty() {
+ return None;
+ }
+
+ header::HeaderValue::from_maybe_shared(Bytes::from(s)).ok()
+ }
+}
diff --git a/src/main.rs b/src/main.rs
@@ -122,6 +122,9 @@ fn init_logging(level: log::LevelFilter) -> Result<(), fern::InitError> {
// Never show html5ever and hyper::proto logs, too noisy
.level_for("html5ever", log::LevelFilter::Off)
.level_for("hyper::proto", log::LevelFilter::Off)
+ .level_for("hyper::client", log::LevelFilter::Off)
+ // Prevent cookie_store logs
+ .level_for("cookie_store", log::LevelFilter::Off)
.chain(std::io::stdout());
// Enable smtp debug logging only specifically for smtp when need.