Fix image extraction for URLs with query strings

Previously, image extraction wouldn't catch images with a query string (or anything else) appended. This fixes that by parsing extracted URLs and only checking the path for what looks like an image file.
2020-02-19 17:07:02 -05:00 · 2020-02-19 17:07:02 -05:00 · 563ea5b25b
parent 34d196376e
commit 563ea5b25b
1 changed files with 11 additions and 4 deletions
--- a/posts.go
+++ b/posts.go
@ -16,6 +16,7 @@ import (
 	"fmt"
 	"html/template"
 	"net/http"
+	"net/url"
 	"regexp"
 	"strings"
 	"time"
@ -1543,7 +1544,7 @@ func (rp *RawPost) Created8601() string {
 	return rp.Created.Format("2006-01-02T15:04:05Z")
 }

-var imageURLRegex = regexp.MustCompile(`(?i)^https?:\/\/[^ ]*\.(gif|png|jpg|jpeg|image)$`)
+var imageURLRegex = regexp.MustCompile(`(?i)[^ ]+\.(gif|png|jpg|jpeg|image)$`)

 func (p *Post) extractImages() {
 	p.Images = extractImages(p.Content)
@ -1553,11 +1554,17 @@ func extractImages(content string) []string {
 	matches := extract.ExtractUrls(content)
 	urls := map[string]bool{}
 	for i := range matches {
-		u := matches[i].Text
-		if !imageURLRegex.MatchString(u) {
+		uRaw := matches[i].Text
+		// Parse the extracted text so we can examine the path
+		u, err := url.Parse(uRaw)
+		if err != nil {
 			continue
 		}
-		urls[u] = true
+		// Ensure the path looks like it leads to an image file
+		if !imageURLRegex.MatchString(u.Path) {
+			continue
+		}
+		urls[uRaw] = true
 	}

 	resURLs := make([]string, 0)