Merge pull request #1583 from BlackDex/icon-updates

Updated icon fetching.
This commit is contained in:
Daniel García 2021-04-06 19:35:51 +02:00 committed by GitHub
commit d3921b973b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -37,6 +37,7 @@ static CLIENT: Lazy<Client> = Lazy::new(|| {
// Build Regex only once since this takes a lot of time.
static ICON_REL_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)icon$|apple.*icon").unwrap());
static ICON_REL_BLACKLIST: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?i)mask-icon").unwrap());
static ICON_SIZE_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"(?x)(\d+)\D*(\d+)").unwrap());
// Special HashMap which holds the user defined Regex to speedup matching the regex.
@ -52,7 +53,9 @@ fn icon(domain: String) -> Cached<Content<Vec<u8>>> {
}
match get_icon(&domain) {
Some(i) => Cached::ttl(Content(ContentType::new("image", "x-icon"), i), CONFIG.icon_cache_ttl()),
Some((icon, icon_type)) => {
Cached::ttl(Content(ContentType::new("image", icon_type), icon), CONFIG.icon_cache_ttl())
},
_ => Cached::ttl(Content(ContentType::new("image", "png"), FALLBACK_ICON.to_vec()), CONFIG.icon_cache_negttl()),
}
}
@ -243,7 +246,7 @@ fn is_domain_blacklisted(domain: &str) -> bool {
is_blacklisted
}
fn get_icon(domain: &str) -> Option<Vec<u8>> {
fn get_icon(domain: &str) -> Option<(Vec<u8>, String)> {
let path = format!("{}/{}.png", CONFIG.icon_cache_folder(), domain);
// Check for expiration of negatively cached copy
@ -252,7 +255,11 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> {
}
if let Some(icon) = get_cached_icon(&path) {
return Some(icon);
let icon_type = match get_icon_type(&icon) {
Some(x) => x,
_ => "x-icon",
};
return Some((icon, icon_type.to_string()));
}
if CONFIG.disable_icon_download() {
@ -261,9 +268,9 @@ fn get_icon(domain: &str) -> Option<Vec<u8>> {
// Get the icon, or None in case of error
match download_icon(&domain) {
Ok(icon) => {
Ok((icon, icon_type)) => {
save_icon(&path, &icon);
Some(icon)
Some((icon, icon_type.unwrap_or("x-icon").to_string()))
}
Err(e) => {
error!("Error downloading icon: {:?}", e);
@ -324,7 +331,6 @@ fn icon_is_expired(path: &str) -> bool {
expired.unwrap_or(true)
}
#[derive(Debug)]
struct Icon {
priority: u8,
href: String,
@ -348,7 +354,7 @@ fn get_favicons_node(node: &std::rc::Rc<markup5ever_rcdom::Node>, icons: &mut Ve
let attr_name = attr.name.local.as_ref();
let attr_value = attr.value.as_ref();
if attr_name == "rel" && ICON_REL_REGEX.is_match(attr_value) {
if attr_name == "rel" && ICON_REL_REGEX.is_match(attr_value) && !ICON_REL_BLACKLIST.is_match(attr_value) {
has_rel = true;
} else if attr_name == "href" {
href = Some(attr_value);
@ -597,7 +603,7 @@ fn parse_sizes(sizes: Option<&str>) -> (u16, u16) {
(width, height)
}
fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
fn download_icon(domain: &str) -> Result<(Vec<u8>, Option<&str>), Error> {
if is_domain_blacklisted(domain) {
err!("Domain is blacklisted", domain)
}
@ -605,6 +611,7 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
let icon_result = get_icon_url(&domain)?;
let mut buffer = Vec::new();
let mut icon_type: Option<&str> = None;
use data_url::DataUrl;
@ -616,17 +623,31 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
Ok((body, _fragment)) => {
// Also check if the size is atleast 67 bytes, which seems to be the smallest png i could create
if body.len() >= 67 {
// Check if the icon type is allowed, else try an icon from the list.
icon_type = get_icon_type(&body);
if icon_type.is_none() {
debug!("Icon from {} data:image uri, is not a valid image type", domain);
continue;
}
info!("Extracted icon from data:image uri for {}", domain);
buffer = body;
break;
}
}
_ => warn!("data uri is invalid"),
_ => warn!("Extracted icon from data:image uri is invalid"),
};
} else {
match get_page_with_cookies(&icon.href, &icon_result.cookies, &icon_result.referer) {
Ok(mut res) => {
info!("Downloaded icon from {}", icon.href);
res.copy_to(&mut buffer)?;
// Check if the icon type is allowed, else try an icon from the list.
icon_type = get_icon_type(&buffer);
if icon_type.is_none() {
buffer.clear();
debug!("Icon from {}, is not a valid image type", icon.href);
continue;
}
info!("Downloaded icon from {}", icon.href);
break;
}
_ => warn!("Download failed for {}", icon.href),
@ -635,10 +656,10 @@ fn download_icon(domain: &str) -> Result<Vec<u8>, Error> {
}
if buffer.is_empty() {
err!("Empty response")
err!("Empty response downloading icon")
}
Ok(buffer)
Ok((buffer, icon_type))
}
fn save_icon(path: &str, icon: &[u8]) {
@ -650,7 +671,18 @@ fn save_icon(path: &str, icon: &[u8]) {
create_dir_all(&CONFIG.icon_cache_folder()).expect("Error creating icon cache");
}
Err(e) => {
info!("Icon save error: {:?}", e);
warn!("Icon save error: {:?}", e);
}
}
}
fn get_icon_type(bytes: &[u8]) -> Option<&'static str> {
match bytes {
[137, 80, 78, 71, ..] => Some("png"),
[0, 0, 1, 0, ..] => Some("x-icon"),
[82, 73, 70, 70, ..] => Some("webp"),
[255, 216, 255, ..] => Some("jpeg"),
[66, 77, ..] => Some("bmp"),
_ => None
}
}