diff --git a/plugin/httpgetter/html_meta.go b/plugin/httpgetter/html_meta.go
index f69bad61..24968d20 100644
--- a/plugin/httpgetter/html_meta.go
+++ b/plugin/httpgetter/html_meta.go
@@ -3,6 +3,7 @@ package httpgetter
import (
"errors"
"io"
+ "net"
"net/http"
"net/url"
@@ -17,7 +18,7 @@ type HTMLMeta struct {
}
func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
- if _, err := url.Parse(urlStr); err != nil {
+ if err := validateURL(urlStr); err != nil {
return nil, err
}
@@ -35,6 +36,8 @@ func GetHTMLMeta(urlStr string) (*HTMLMeta, error) {
return nil, errors.New("not a HTML page")
}
+ // TODO: limit the size of the response body
+
htmlMeta := extractHTMLMeta(response.Body)
return htmlMeta, nil
}
@@ -96,3 +99,25 @@ func extractMetaProperty(token html.Token, prop string) (content string, ok bool
}
return content, ok
}
+
+func validateURL(urlStr string) error {
+ u, err := url.Parse(urlStr)
+ if err != nil {
+ return errors.New("invalid URL format")
+ }
+
+ if u.Scheme != "http" && u.Scheme != "https" {
+ return errors.New("only http/https protocols are allowed")
+ }
+
+ if host := u.Hostname(); host != "" {
+ ip := net.ParseIP(host)
+ if ip != nil {
+ if ip.IsLoopback() || ip.IsPrivate() || ip.IsLinkLocalUnicast() {
+ return errors.New("internal IP addresses are not allowed")
+ }
+ }
+ }
+
+ return nil
+}