From 34d196376ead77fdce3ad93f575e12b99905dbf9 Mon Sep 17 00:00:00 2001
From: Matt Baer <matt@write.as>
Date: Wed, 19 Feb 2020 16:38:50 -0500
Subject: [PATCH 1/2] Include extracted images in draft social metadata

Previously, we didn't extract images for draft posts. This fixes that.
---
 posts.go            | 10 ++++++++--
 templates/post.tmpl |  4 ++--
 2 files changed, 10 insertions(+), 4 deletions(-)
diff --git a/posts.go b/posts.go
index a0e4588..a9c8c11 100644
--- a/posts.go
+++ b/posts.go
@@ -62,6 +62,7 @@ type (
 		Description string
 		Author      string
 		Views       int64
+		Images      []string
 		IsPlainText bool
 		IsCode      bool
 		IsLinkable  bool
@@ -381,6 +382,7 @@ func handleViewPost(app *App, w http.ResponseWriter, r *http.Request) error {
 		}
 		if !isRaw {
 			post.HTMLContent = template.HTML(applyMarkdown([]byte(content), "", app.cfg))
+			post.Images = extractImages(post.Content)
 		}
 	}
 
@@ -1544,7 +1546,11 @@ func (rp *RawPost) Created8601() string {
 var imageURLRegex = regexp.MustCompile(`(?i)^https?:\/\/[^ ]*\.(gif|png|jpg|jpeg|image)$`)
 
 func (p *Post) extractImages() {
-	matches := extract.ExtractUrls(p.Content)
+	p.Images = extractImages(p.Content)
+}
+
+func extractImages(content string) []string {
+	matches := extract.ExtractUrls(content)
 	urls := map[string]bool{}
 	for i := range matches {
 		u := matches[i].Text
@@ -1558,5 +1564,5 @@ func (p *Post) extractImages() {
 	for k := range urls {
 		resURLs = append(resURLs, k)
 	}
-	p.Images = resURLs
+	return resURLs
 }
diff --git a/templates/post.tmpl b/templates/post.tmpl
index e9edfed..15b479d 100644
--- a/templates/post.tmpl
+++ b/templates/post.tmpl
@@ -23,13 +23,13 @@
 		<meta name="twitter:description" content="{{.Description}}">
 		{{if gt .Views 1}}<meta name="twitter:label1" value="Views">
 		<meta name="twitter:data1" value="{{largeNumFmt .Views}}">{{end}}
-		<meta name="twitter:image" content="{{.Host}}/img/wf-sq.png">
+		{{if gt (len .Images) 0}}<meta name="twitter:image" content="{{index .Images 0}}">{{else}}<meta name="twitter:image" content="{{.Host}}/img/wf-sq.png">{{end}}
 		<meta property="og:title" content="{{if .Title}}{{.Title}}{{else}}{{.GenTitle}}{{end}}" />
 		<meta property="og:site_name" content="{{.SiteName}}" />
 		<meta property="og:type" content="article" />
 		<meta property="og:url" content="{{.Host}}/{{if .SingleUser}}d/{{end}}{{.ID}}" />
 		<meta property="og:description" content="{{.Description}}" />
-		<meta property="og:image" content="{{.Host}}/img/wf-sq.png">
+		{{range .Images}}<meta property="og:image" content="{{.}}" />{{else}}<meta property="og:image" content="{{.Host}}/img/wf-sq.png">{{end}}
 		{{if .Author}}<meta property="article:author" content="https://{{.Author}}" />{{end}}
 		<!-- Add highlighting logic -->
 		{{template "highlighting" .}}

From 563ea5b25b306ce0cde59f156a10eb4369a70c64 Mon Sep 17 00:00:00 2001
From: Matt Baer <matt@write.as>
Date: Wed, 19 Feb 2020 17:07:02 -0500
Subject: [PATCH 2/2] Fix image extraction for URLs with query strings

Previously, image extraction wouldn't catch images with a query string
(or anything else) appended. This fixes that by parsing extracted URLs
and only checking the path for what looks like an image file.
---
 posts.go | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/posts.go b/posts.go
index a9c8c11..35e9bd3 100644
--- a/posts.go
+++ b/posts.go
@@ -16,6 +16,7 @@ import (
 	"fmt"
 	"html/template"
 	"net/http"
+	"net/url"
 	"regexp"
 	"strings"
 	"time"
@@ -1543,7 +1544,7 @@ func (rp *RawPost) Created8601() string {
 	return rp.Created.Format("2006-01-02T15:04:05Z")
 }
 
-var imageURLRegex = regexp.MustCompile(`(?i)^https?:\/\/[^ ]*\.(gif|png|jpg|jpeg|image)$`)
+var imageURLRegex = regexp.MustCompile(`(?i)[^ ]+\.(gif|png|jpg|jpeg|image)$`)
 
 func (p *Post) extractImages() {
 	p.Images = extractImages(p.Content)
@@ -1553,11 +1554,17 @@ func extractImages(content string) []string {
 	matches := extract.ExtractUrls(content)
 	urls := map[string]bool{}
 	for i := range matches {
-		u := matches[i].Text
-		if !imageURLRegex.MatchString(u) {
+		uRaw := matches[i].Text
+		// Parse the extracted text so we can examine the path
+		u, err := url.Parse(uRaw)
+		if err != nil {
 			continue
 		}
-		urls[u] = true
+		// Ensure the path looks like it leads to an image file
+		if !imageURLRegex.MatchString(u.Path) {
+			continue
+		}
+		urls[uRaw] = true
 	}
 
 	resURLs := make([]string, 0)