From 34d196376ead77fdce3ad93f575e12b99905dbf9 Mon Sep 17 00:00:00 2001 From: Matt Baer Date: Wed, 19 Feb 2020 16:38:50 -0500 Subject: [PATCH 1/2] Include extracted images in draft social metadata Previously, we didn't extract images for draft posts. This fixes that. --- posts.go | 10 ++++++++-- templates/post.tmpl | 4 ++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/posts.go b/posts.go index a0e4588..a9c8c11 100644 --- a/posts.go +++ b/posts.go @@ -62,6 +62,7 @@ type ( Description string Author string Views int64 + Images []string IsPlainText bool IsCode bool IsLinkable bool @@ -381,6 +382,7 @@ func handleViewPost(app *App, w http.ResponseWriter, r *http.Request) error { } if !isRaw { post.HTMLContent = template.HTML(applyMarkdown([]byte(content), "", app.cfg)) + post.Images = extractImages(post.Content) } } @@ -1544,7 +1546,11 @@ func (rp *RawPost) Created8601() string { var imageURLRegex = regexp.MustCompile(`(?i)^https?:\/\/[^ ]*\.(gif|png|jpg|jpeg|image)$`) func (p *Post) extractImages() { - matches := extract.ExtractUrls(p.Content) + p.Images = extractImages(p.Content) +} + +func extractImages(content string) []string { + matches := extract.ExtractUrls(content) urls := map[string]bool{} for i := range matches { u := matches[i].Text @@ -1558,5 +1564,5 @@ func (p *Post) extractImages() { for k := range urls { resURLs = append(resURLs, k) } - p.Images = resURLs + return resURLs } diff --git a/templates/post.tmpl b/templates/post.tmpl index e9edfed..15b479d 100644 --- a/templates/post.tmpl +++ b/templates/post.tmpl @@ -23,13 +23,13 @@ {{if gt .Views 1}} {{end}} - + {{if gt (len .Images) 0}}{{else}}{{end}} - + {{range .Images}}{{else}}{{end}} {{if .Author}}{{end}} {{template "highlighting" .}} From 563ea5b25b306ce0cde59f156a10eb4369a70c64 Mon Sep 17 00:00:00 2001 From: Matt Baer Date: Wed, 19 Feb 2020 17:07:02 -0500 Subject: [PATCH 2/2] Fix image extraction for URLs with query strings Previously, image extraction wouldn't catch images with a query string (or anything else) appended. This fixes that by parsing extracted URLs and only checking the path for what looks like an image file. --- posts.go | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/posts.go b/posts.go index a9c8c11..35e9bd3 100644 --- a/posts.go +++ b/posts.go @@ -16,6 +16,7 @@ import ( "fmt" "html/template" "net/http" + "net/url" "regexp" "strings" "time" @@ -1543,7 +1544,7 @@ func (rp *RawPost) Created8601() string { return rp.Created.Format("2006-01-02T15:04:05Z") } -var imageURLRegex = regexp.MustCompile(`(?i)^https?:\/\/[^ ]*\.(gif|png|jpg|jpeg|image)$`) +var imageURLRegex = regexp.MustCompile(`(?i)[^ ]+\.(gif|png|jpg|jpeg|image)$`) func (p *Post) extractImages() { p.Images = extractImages(p.Content) @@ -1553,11 +1554,17 @@ func extractImages(content string) []string { matches := extract.ExtractUrls(content) urls := map[string]bool{} for i := range matches { - u := matches[i].Text - if !imageURLRegex.MatchString(u) { + uRaw := matches[i].Text + // Parse the extracted text so we can examine the path + u, err := url.Parse(uRaw) + if err != nil { continue } - urls[u] = true + // Ensure the path looks like it leads to an image file + if !imageURLRegex.MatchString(u.Path) { + continue + } + urls[uRaw] = true } resURLs := make([]string, 0)