Fix image extraction for URLs with query strings
Previously, image extraction wouldn't catch images with a query string (or anything else) appended. This fixes that by parsing extracted URLs and only checking the path for what looks like an image file.
This commit is contained in:
parent
34d196376e
commit
563ea5b25b
15
posts.go
15
posts.go
|
@ -16,6 +16,7 @@ import (
|
|||
"fmt"
|
||||
"html/template"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -1543,7 +1544,7 @@ func (rp *RawPost) Created8601() string {
|
|||
return rp.Created.Format("2006-01-02T15:04:05Z")
|
||||
}
|
||||
|
||||
var imageURLRegex = regexp.MustCompile(`(?i)^https?:\/\/[^ ]*\.(gif|png|jpg|jpeg|image)$`)
|
||||
var imageURLRegex = regexp.MustCompile(`(?i)[^ ]+\.(gif|png|jpg|jpeg|image)$`)
|
||||
|
||||
func (p *Post) extractImages() {
|
||||
p.Images = extractImages(p.Content)
|
||||
|
@ -1553,11 +1554,17 @@ func extractImages(content string) []string {
|
|||
matches := extract.ExtractUrls(content)
|
||||
urls := map[string]bool{}
|
||||
for i := range matches {
|
||||
u := matches[i].Text
|
||||
if !imageURLRegex.MatchString(u) {
|
||||
uRaw := matches[i].Text
|
||||
// Parse the extracted text so we can examine the path
|
||||
u, err := url.Parse(uRaw)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
urls[u] = true
|
||||
// Ensure the path looks like it leads to an image file
|
||||
if !imageURLRegex.MatchString(u.Path) {
|
||||
continue
|
||||
}
|
||||
urls[uRaw] = true
|
||||
}
|
||||
|
||||
resURLs := make([]string, 0)
|
||||
|
|
Loading…
Reference in New Issue