commit d3921b973b48cb2eb6746c714b93d18ad0644ca4
parent cf6ad3cb158cd42c730938838f67f4ddbd5ba47b
Author: Daniel GarcĂa <dani-garcia@users.noreply.github.com>
Date: Tue, 6 Apr 2021 19:35:51 +0200
Merge pull request #1583 from BlackDex/icon-updates
Updated icon fetching.
Diffstat:
M | src/api/icons.rs | | | 58 | +++++++++++++++++++++++++++++++++++++++++++++------------- |
1 file changed, 45 insertions(+), 13 deletions(-)
diff --git a/src/api/icons.rs b/src/api/icons.rs
@@ -37,6 +37,7 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
// Build Regex only once since this takes a lot of time.
static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)icon$|apple.*icon").unwrap());
+static ICON_REL_BLACKLIST: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)mask-icon").unwrap());
static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap());
// Special HashMap which holds the user defined Regex to speedup matching the regex.
@@ -52,7 +53,9 @@ fn icon(domain: String) -> Cached<Content<Vec<u8>>> {
}
match get_icon(&domain) {
- Some(i) => Cached::ttl(Content(ContentType::new("image", "x-icon"), i), CONFIG.icon_cache_ttl()),
+ Some((icon, icon_type)) => {
+ Cached::ttl(Content(ContentType::new("image", icon_type), icon), CONFIG.icon_cache_ttl())
+ },
_ => Cached::ttl(Content(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), CONFIG.icon_cache_negttl()),
}
}
@@ -243,7 +246,7 @@ fn is_domain_blacklisted(domain: &str) -> bool {
is_blacklisted
}
-fn get_icon(domain: &str) -> Option<Vec<u8>> {
+fn get_icon(domain: &str) -> Option<(Vec<u8>, String)> {
let path = format!("{}/{}.png", CONFIG.icon_cache_folder(), domain);
// Check for expiration of negatively cached copy
@@ -252,7 +255,11 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> {
}
if let Some(icon) = get_cached_icon(&path) {
- return Some(icon);
+ let icon_type = match get_icon_type(&icon) {
+ Some(x) => x,
+ _ => "x-icon",
+ };
+ return Some((icon, icon_type.to_string()));
}
if CONFIG.disable_icon_download() {
@@ -261,9 +268,9 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> {
// Get the icon, or None in case of error
match download_icon(&domain) {
- Ok(icon) => {
+ Ok((icon, icon_type)) => {
save_icon(&path, &icon);
- Some(icon)
+ Some((icon, icon_type.unwrap_or("x-icon").to_string()))
}
Err(e) => {
error!("Error downloading icon: {:?}", e);
@@ -324,7 +331,6 @@ fn icon_is_expired(path: &str) -> bool {
expired.unwrap_or(true)
}
-#[derive(Debug)]
struct Icon {
priority: u8,
href: String,
@@ -348,7 +354,7 @@ fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Ve
let attr_name = attr.name.local.as_ref();
let attr_value = attr.value.as_ref();
- if attr_name == "rel" && ICON_REL_REGEX.is_match(attr_value) {
+ if attr_name == "rel" && ICON_REL_REGEX.is_match(attr_value) && !ICON_REL_BLACKLIST.is_match(attr_value) {
has_rel = true;
} else if attr_name == "href" {
href = Some(attr_value);
@@ -597,7 +603,7 @@ fn parse_sizes(sizes: Option<&str>) -> (u16, u16) {
(width, height)
}
-fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
+fn download_icon(domain: &str) -> Result<(Vec<u8>, Option<&str>), Error> {
if is_domain_blacklisted(domain) {
err!("Domain is blacklisted", domain)
}
@@ -605,6 +611,7 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
let icon_result = get_icon_url(&domain)?;
let mut buffer = Vec::new();
+ let mut icon_type: Option<&str> = None;
use data_url::DataUrl;
@@ -616,17 +623,31 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
Ok((body, _fragment)) => {
// Also check if the size is atleast 67 bytes, which seems to be the smallest png i could create
if body.len() >= 67 {
+ // Check if the icon type is allowed, else try an icon from the list.
+ icon_type = get_icon_type(&body);
+ if icon_type.is_none() {
+ debug!("Icon from {} data:image uri, is not a valid image type", domain);
+ continue;
+ }
+ info!("Extracted icon from data:image uri for {}", domain);
buffer = body;
break;
}
}
- _ => warn!("data uri is invalid"),
+ _ => warn!("Extracted icon from data:image uri is invalid"),
};
} else {
match get_page_with_cookies(&icon.href, &icon_result.cookies, &icon_result.referer) {
Ok(mut res) => {
- info!("Downloaded icon from {}", icon.href);
res.copy_to(&mut buffer)?;
+ // Check if the icon type is allowed, else try an icon from the list.
+ icon_type = get_icon_type(&buffer);
+ if icon_type.is_none() {
+ buffer.clear();
+ debug!("Icon from {}, is not a valid image type", icon.href);
+ continue;
+ }
+ info!("Downloaded icon from {}", icon.href);
break;
}
_ => warn!("Download failed for {}", icon.href),
@@ -635,10 +656,10 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
}
if buffer.is_empty() {
- err!("Empty response")
+ err!("Empty response downloading icon")
}
- Ok(buffer)
+ Ok((buffer, icon_type))
}
fn save_icon(path: &str, icon: &[u8]) {
@@ -650,7 +671,18 @@ fn save_icon(path: &str, icon: &[u8]) {
create_dir_all(&CONFIG.icon_cache_folder()).expect("Error creating icon cache");
}
Err(e) => {
- info!("Icon save error: {:?}", e);
+ warn!("Icon save error: {:?}", e);
}
}
}
+
+fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
+ match bytes {
+ [137, 80, 78, 71, ..] => Some("png"),
+ [0, 0, 1, 0, ..] => Some("x-icon"),
+ [82, 73, 70, 70, ..] => Some("webp"),
+ [255, 216, 255, ..] => Some("jpeg"),
+ [66, 77, ..] => Some("bmp"),
+ _ => None
+ }
+}