Snippets/ShioriFeed.py

530 lines
22 KiB
Python
Executable File

#!/usr/bin/env python3
# *----------------------------------------------------------------------* #
# | [ ShioriFeed 🔖 (OctoSpacc) ] | #
# | Simple service for getting an Atom/RSS feed from your Shiori profile | #
# *----------------------------------------------------------------------* #
Version = '2023-02-28'
# *----------------------------------------------------------------------* #
# *-------------------------------------------* #
# | Configuration | #
# *-------------------------------------------* #
Host = ('localhost', 8176)
Debug = False
UserAgent = f'ShioriFeed v{Version} at {Host[0]}'
DefFeedType = 'atom'
# *-------------------------------------------* #
# External Requirements: urllib3
# TODO:
# - Cheking if Content mode content is actually present, otherwise fall back to Archive mode or original link (using API data is unreliable it seems)
# - HTML proxy (direct access to web UI, without JS)
# - Actually valid RSS
# - XML stylesheet
# - Filtering (tags, etc.)
# - Write privacy policy
# - Fix the URL copy thing
# - Minification (?)
# *-------------------------------------------------------------------------* #
import json
import threading
import traceback
from base64 import urlsafe_b64decode as b64UrlDecode, urlsafe_b64encode as b64UrlEncode, standard_b64encode as b64Encode
from html import escape as HtmlEscape
from http.server import HTTPServer, BaseHTTPRequestHandler
from socketserver import ThreadingMixIn
from urllib.parse import unquote as UrlUnquote
from urllib.request import urlopen, Request
HtmlHead = '''
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1.0"/>
<!--
"bookmark" Emoji icon - Copyright 2021 Google Inc. All Rights Reserved.
<https://fonts.google.com/noto/specimen/Noto+Color+Emoji/about>
<https://scripts.sil.org/cms/scripts/page.php?item_id=OFL_web>
-->
<link rel="shortcut icon" type="image/png" href=""/>
<title>ShioriFeed 🔖</title>
<meta name="description" content="Simple service for getting an Atom/RSS feed from your Shiori profile"/>
<meta property="og:title" content="ShioriFeed 🔖"/>
<meta property="og:description" content="Simple service for getting an Atom/RSS feed from your Shiori profile"/>
'''
HomeTemplate = '''\
<!DOCTYPE html>
<html lang="en">
<head>
{{HtmlHead}}
<style>
:root {
--cFore0: #232323;
--cFore1: #292929;
--cAccent: #f44336;
--cBack0: #e9e9e9;
--cBack1: #ffffff;
/*--cGray: #c9c9c9;*/
}
@media (prefers-color-scheme: dark) {
:root {
--cFore0: #ffffff;
--cFore1: #eeeeee;
--cBack0: #292929;
--cBack1: #1f1f1f;
--cGray: #606060;
}
}
* { box-sizing: border-box; }
.Underline { text-decoration: underline; }
.NoSelect {
user-select: none;
-ms-user-select: none;
-moz-user-select: none;
-khtml-user-select: none;
-webkit-user-select: none;
-webkit-touch-callout: none;
}
body {
color: var(--cFore0);
background: var(--cBack0);
font-family: "Source Sans Pro", sans-serif;
margin: 0px;
padding-top: 24px;
padding-bottom: 24px;
padding-left: 10%;
padding-right: 10%;
word-break: break-word;
}
a { color: var(--cAccent); }
form > label { padding: 8px; }
form > label > span { padding-bottom: 4px; }
form > label, form > label > span {
display: inline-block;
width: 100%;
}
textarea {
width: 100%;
height: 5em;
font-size: large;
resize: none;
}
input { height: 2em; }
input[type="submit"], button { font-size: large; }
input, textarea, details {
width: 100%;
border-radius: 2px;
}
input, textarea, button {
color: var(--cFore1);
background: var(--cBack1);
border: none;
}
details {
background: var(--cBack1)/*var(--cGray)*/;
padding: 8px;
}
details > summary > h4 { display: inline; }
span.Separator {
display: inline-block;
width: 0.25em;
height: 0.25em;
margin: 0.25em;
vertical-align: middle;
background: var(--cFore1);
}
/* {{PostCss}} */
</style>
</head>
<body>
<div class="NoSelect">
<h2>ShioriFeed 🔖</h2>
<p class="PostObscure">
Enter the details of your account on a
<a href="https://github.com/go-shiori/">Shiori</a>
server to get an Atom/RSS feed link.
</p>
<p class="PostObscure">
<small>Note: still a work-in-progress!</small>
</p>
<br />
<!-- {{PostResult}} -->
<p class="PostObscure">
<form action="./" method="POST">
<label class="PostObscure">
<span>Server <small>(must start with protocol prefix)</small>:</span>
<input type="text" name="Remote" placeholder="http[s]://..."/>
</label>
<br />
<label class="PostObscure">
<span>Username:</span>
<input type="text" name="Username" placeholder="erre"/>
</label>
<br />
<label class="PostObscure">
<span>Password:</span>
<input type="password" name="Password" placeholder="**********"/>
</label>
<br />
<label class="PostObscure">
<span>&nbsp;</span>
<input type="submit" value="Submit"/>
</label>
</form>
</p>
<br />
</div>
<!--
NOTE TO SELF-HOSTERS:
You should probably either adjust or remove this :)
For sure you should at least write your own domain.
-->
<p>
<details>
<summary class="NoSelect">
<!-- Change the domain if self-hosting! -->
<h4>Privacy Policy</h4>
(applies to <em class="Underline">ShioriFeed.Octt.eu.org</em>)
</summary>
<p><!--
By using this service
(doing any action that sends/requests data to/from the server),
you understand and agree to the following:
<ul>
<li>
</li>
</ul>-->
<!--<ul>
<li>-->
I still have to write this... tough luck.
I'm not yet actively inviting anyone to use this instance right now,
if you're worried about your security then just host the software yourself.
<!-- </li>
</ul>-->
</p>
</details>
</p>
<p class="NoSelect">
<span>v. {{Version}}</span>
<span class="Separator"></span>
<a href="https://gitlab.com/octospacc/Snippets/-/blob/main/ShioriFeed.py">Source Code</a>
</p>
<script>
var Box = document.querySelector('textarea');
if (Box) {
//BoxFocused = false;
Box.value = location.origin + Box.value.substring('http[s]://<THIS SHIORIFEED SERVER ADDRESS>'.length);
//Box.onfocusout = function() { console.log(1); BoxFocused = false; };
//Box.onfocusin = function() { console.log(2); BoxFocused = true; };
Box.onclick = function() {
try {
//if (BoxFocused) {
navigator.clipboard.writeText(Box.value);
alert('Copied to clipboard!');
//};
} catch(e) {};
};
BtnChangeFeed.onclick = function() {
var CurType = Box.value.split('/')[-1].split('?')[0].split('.')[0];
if (CurType == 'atom') {
} else
if (CurType == 'rss') {
};
};
};
</script>
</body>
</html>
'''.replace('{{HtmlHead}}', HtmlHead).replace('{{Version}}', Version)
XmlHead = '''\
<?xml version="1.0" encoding="utf-8"?>
<?xml-stylesheet type="text/xsl" href="#stylesheet"?>
<!DOCTYPE xml [<!ATTLIST xsl:stylesheet id ID #REQUIRED>]>
'''
XmlStyle='''
<!-- Partially derived from https://gist.github.com/andrewstiefel/57a0a400aa2deb6c9fe18c6da4e16e0f -->
<xsl:stylesheet
id="stylesheet"
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:atom="http://www.w3.org/2005/Atom"
exclude-result-prefixes="atom"
>
<xsl:output method="html" version="1.0" encoding="UTF-8" indent="yes"/>
<xsl:template match="/">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
{{HtmlHead}}
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
<style type="text/css">
* { max-width: 100%; height: auto; box-sizing: border-box; }
body { overflow-wrap: break-word; background: #ffffff; color: #000000; }
details { border: 2px solid gray; margin: 16px; }
details > summary { padding: 8px; background: #dddddd; }
details > summary > h2 { display: inline; }
details > summary img { max-height: 50vh; }
details > div { padding: 8px; }
</style>
<script type="application/javascript">
// TODO: Output escaping doesn't work on Firefox so we must parse XML via scripts to properly display it
//var Req = new XMLHttpRequest();
//Req.open('GET', window.location, false);
//Req.send();
//var Xml = Req.responseXML;
//var XsltProc = new XSLTProcessor(); // Get only the stylesheet from the XML
//XsltProc.importStylesheet(Xsl);
//var Result = XsltProc.transformToFragment(Xml, document);
//body.innerHTML = '';
//document.body.appendChild(Result);
//alert(1);
</script>
</head>
<body>
<section>
<xsl:apply-templates select="atom:feed" />
</section>
<section>
<xsl:apply-templates select="atom:feed/atom:entry" />
</section>
</body>
</html>
</xsl:template>
<xsl:template match="atom:feed"></xsl:template>
<xsl:template match="atom:entry">
<details class="entry">
<summary>
<h2>
<a>
<xsl:attribute name="href">
<xsl:value-of select="atom:id"/>
</xsl:attribute>
<xsl:value-of select="atom:title"/>
</a>
</h2>
<p>
<small>
Date: <xsl:value-of select="atom:updated"/>
</small>
<xsl:value-of select="atom:summary" disable-output-escaping="yes"/>
</p>
</summary>
<div>
<p>
<xsl:value-of select="atom:content" disable-output-escaping="yes"/>
</p>
</div>
</details>
</xsl:template>
</xsl:stylesheet>
'''.replace('{{HtmlHead}}', HtmlHead)
def RetDebugIf():
return f'\n\n{traceback.format_exc()}' if Debug else ''
def SessionHash(Remote, Username, Password):
return f'{hash(Remote)}{hash(Username)}{hash(Password)}'
def MkFeed(Data, Remote, Username, Session, Type=DefFeedType):
Feed = ''
FeedTitle = f'<title>ShioriFeed ({HtmlEscape(Username)}) 🔖</title>'
Generator = f'<generator uri="https://gitlab.com/octospacc/Snippets/-/blob/main/ShioriFeed.py" version="{Version}">ShioriFeed</generator>'
FeedDate = Data['bookmarks'][0]['modified'] if Data['bookmarks'] else ''
for Mark in Data['bookmarks']:
Id = Mark['id']
EntryTitle = f'<title>{HtmlEscape(Mark["title"])}</title>'
EntryAuthor = f'<author>{HtmlEscape(Mark["author"])}</author>' if Mark['author'] else ''
EntryLink = f'{Remote}/bookmark/{Id}/content'
# NOTE: when shiori issue #578 is fixed, this should use a thumb URL from the original article HTML to cope with private bookmarks
EntryCover = f'<p><a href="{EntryLink}"><img src="{Remote}/bookmark/{Id}/thumb"/></a></p>' if Mark['imageURL'] else ''
# Not so sure about this chief, downloading and embedding EVERY cover image into the XML is slow (~8s per 1 req) and traffic-hungry (~10 simultaneous requests are enough to temporarily DoS the Raspi)
#ImgData = GetContent(Remote, f'bookmark/{Id}/thumb', Session) if Mark['imageURL'] else None
#Cover = f'<![CDATA[<a href="{Link}"><img src="data:{ImgData["Content-Type"]};base64,{b64Encode(ImgData["Body"]).decode()}"/></a><br /><br />]]>' if ImgData else ''
EntryPreview = f'<![CDATA[{EntryCover}<p>{HtmlEscape(Mark["excerpt"])}</p>]]>'
EntryContent = f'{HtmlEscape(GetContent(Remote, f"bookmark/{Id}/content", Session)["Body"].decode())}'
if Type == 'atom':
Feed += f'''
<entry>
{EntryTitle}
{EntryAuthor}
<summary>{EntryPreview}</summary>
<content type="text/html">{EntryContent}</content>
<link rel="alternate" href="{EntryLink}"/>
<published>{Mark['modified']}</published>
<updated>{Mark['modified']}</updated>
<id>{EntryLink}</id>
</entry>
'''
elif Type == 'rss':
Feed += f'''
<item>
{EntryTitle}
{EntryAuthor}
<description>{EntryPreview}</description>
<content:encoded type="text/html">{EntryContent}</content:encoded>
<link>{EntryLink}</link>
<pubDate>{Mark['modified']}</pubDate>
<guid isPermaLink="false">{EntryLink}</guid>
</item>
'''
if Type == 'atom':
return f'''\
{XmlHead}
<feed xmlns="http://www.w3.org/2005/Atom">
{XmlStyle}
{FeedTitle}
{Generator}
<updated>{FeedDate}</updated>
{Feed}
</feed>
'''
elif Type == 'rss':
return f'''\
{XmlHead}
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:media="http://search.yahoo.com/mrss/">
{XmlStyle}
<channel>
{FeedTitle}
{Generator}
<pubDate>{FeedDate}</pubDate>
<lastBuildDate>{FeedDate}</lastBuildDate>
{Feed}
</channel>
</rss>
'''
def MkUrl(Post, Type=DefFeedType):
Args = {}
for Arg in Post.split('&'):
Arg = Arg.split('=')
Args.update({Arg[0]: UrlUnquote(Arg[1])})
return f'''\
http[s]://<THIS SHIORIFEED SERVER ADDRESS>\
/{Args['Remote']}\
/{b64UrlEncode(Args['Username'].encode()).decode()}\
/{b64UrlEncode(Args['Password'].encode()).decode()}\
/{Type}.xml'''
def GetSession(Remote, Username, Password):
try:
Rq = urlopen(Request(f'{Remote}/api/login',
data=json.dumps({'username': Username, 'password': Password, 'remember': True, 'owner': True}).encode(),
headers={'User-Agent': UserAgent}))
if Rq.code == 200:
Data = {SessionHash(Remote, Username, Password): json.loads(Rq.read().decode())['session']}
Sessions.update(Data)
return {'Code': 200, 'Body': Data}
else:
return {'Code': Rq.code, 'Body': f'[{Rq.code}] External Server Error\n\n{Rq.read().decode()}'}
except Exception:
return {'Code': 500, 'Body': f'[500] Internal Server Error{RetDebugIf()}'}
def GetContent(Remote, Path, Session):
try:
Rq = urlopen(Request(f'{Remote}/{Path}', headers={'X-Session-Id': Session, 'User-Agent': UserAgent}))
if Rq.code == 200:
return {'Code': 200, 'Body': Rq.read(), 'Content-Type': Rq.headers['Content-Type']}
else:
return {'Code': Rq.code, 'Body': f'[{Rq.code}] External Server Error\n\n{Rq.read().decode()}'.encode()}
except Exception:
return {'Code': 500, 'Body': f'[500] Internal Server Error{RetDebugIf()}'.encode()}
def RqHandle(Path, Attempt=0):
try:
Rs = {}
Args = Path.strip().removeprefix('/').removesuffix('/').strip().split('/')
if Args[0] == '':
return {'Code': 200, 'Body': HomeTemplate, 'Content-Type': 'text/html'}
else:
TypeCheck = Args[-1].lower().replace('?', '&').split('&')[0]
#Shift = 1 if TypeCheck in ('atom.xml', 'rss.xml', 'atom', 'rss') else 0
#Type = Args[-1].lower().split('&')[0] if Shift == 1 else
if TypeCheck in ('atom.xml', 'rss.xml', 'atom', 'rss'):
Shift = 1
FeedType = TypeCheck.split('.')[0]
else:
Shift = 0
FeedType = DefFeedType
Remote = '/'.join(Args[:-(2+Shift)]).removesuffix('/')
Username = b64UrlDecode(Args[-(2+Shift)]).decode()
Password = b64UrlDecode(Args[-(1+Shift)]).decode()
if not SessionHash(Remote, Username, Password) in Sessions:
TrySession = GetSession(Remote, Username, Password)
if TrySession['Code'] != 200:
return TrySession
Session = Sessions[SessionHash(Remote, Username, Password)]
Rq = urlopen(Request(f'{Remote}/api/bookmarks', headers={
'X-Session-Id': Session,
'User-Agent': UserAgent}))
Rs['Code'] = Rq.code
if Rq.code == 200:
# Shiori got us JSON data, parse it and return our result
Rs['Body'] = MkFeed(json.loads(Rq.read().decode()), Remote, Username, Session, FeedType)
Rs['Content-Type'] = 'application/xml'
elif Rq.code == 500 and Attempt < 1:
# We probably got an expired Session-Id, let's renew it and retry
TrySession = GetSession(Remote, Username, Password)
if TrySession['Code'] != 200:
return TrySession
return ReqHandle(Path, Attempt+1)
else:
Rs['Body'] = f'[{Rq.code}] External Server Error\n\n{Rq.read().decode()}'
return Rs
except Exception:
return {'Code': 500, 'Body': f'[500] Internal Server Error{RetDebugIf()}'}
class Handler(BaseHTTPRequestHandler):
def do_GET(self):
Rs = RqHandle(self.path)
self.send_response(Rs['Code'])
self.send_header('Content-Type', Rs['Content-Type'] if 'Content-Type' in Rs else 'text/plain')
self.end_headers()
self.wfile.write(Rs['Body'].encode())
def do_POST(self):
try:
if self.path == '/':
Post = self.rfile.read(int(self.headers['Content-Length'])).decode()
Body = HomeTemplate.replace('<!-- {{PostResult}} -->', f'''
<p>
Here's your <button id="BtnChangeFeed">Atom</button> feed:
<textarea class="Visible" readonly="true">{MkUrl(Post, 'atom')}</textarea>
<textarea class="Hidden" hidden="true" readonly="true">{MkUrl(Post, 'rss')}</textarea>
</p>
<br />
''').replace('/* {{PostCss}} */', '.PostObscure { opacity: 0.5; }')
self.send_response(200)
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(Body.encode())
else:
self.send_response(400)
self.send_header('Content-Type', 'text/plain')
self.end_headers()
self.wfile.write(b'[400] Bad Request')
except Exception:
self.send_response(500)
self.send_header('Content-Type', 'text/plain')
self.end_headers()
self.wfile.write((f'[500] Internal Server Error{RetDebugIf()}').encode())
##Prevent logging | https://stackoverflow.com/a/3389505
#def log_message(self, format, *args):
# return
# https://stackoverflow.com/a/51559006
class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
pass
def Serve():
ThreadedHTTPServer(Host, Handler).serve_forever()
if __name__ == '__main__':
Sessions = {}
try:
Serve()
except KeyboardInterrupt:
pass