#!/usr/bin/env python3 from html import escape as HtmlEscape from http.server import HTTPServer, BaseHTTPRequestHandler from socketserver import ThreadingMixIn from urllib.request import urlopen, Request from urllib.error import HTTPError, URLError from bs4 import BeautifulSoup # *--------------------* # # | Configuration | # # *--------------------* # Host = ('localhost', 8080) Debug = True # *--------------------* # # Must be stealth about this since there are some nasty servers in the world! (Zuck, Musk, ...) ProxyHeaders = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36", "Sec-Fetch-Site": "same-origin", } AppMeta = { "SiteName": "Web Meta Preview Proxy", "Title": "Web Meta Preview Proxy", "Description": "HTML metadata provided in a proxied and paginated form.", } # class ThreadedHTTPServer(ThreadingMixIn, HTTPServer): pass class Handler(BaseHTTPRequestHandler): def do_GET(self): Res = {} Url = self.path[1:] try: Req = urlopen(Request(Url, headers=ProxyHeaders)) Res['Code'] = 200 Res['Body'] = MakePage(Code=Req.code, Url=Url, Meta=HtmlToMeta(Req.read().decode())) except (HTTPError, URLError) as e: print(e) Res['Code'] = 500 Res['Body'] = MakePage(Url=Url) self.send_response(Res['Code']) self.send_header('Content-Type', 'text/html') self.end_headers() self.wfile.write(Res['Body'].encode()) def Serve(): ThreadedHTTPServer(Host, Handler).serve_forever() #def SureList(Val): # return (Val if type(Val) == list else [Val]) def DictKeyIf(Dict:dict, Key:str): if Key in Dict: return Dict[Key] def SoupAttrIf(Obj, Attr:str, Else=None): if Obj: if hasattr(Obj, Attr) and getattr(Obj, Attr): return getattr(Obj, Attr) return Obj.get(Attr) #if Attr in Obj and Obj[Attr]: # return Obj[Attr] else: return Else #def TryVals(Vals:list, Else=None): # for Val in SureList(Vals): # if Val: # return Val # return Else #def DictJoin(a:dict=None, b:dict=None): # c = dict(a) if a else {} # c.update(b) if b else {} # return c def DictHtmlSafe(Dict:dict): New = {} for Key in Dict: if Dict[Key]: New[Key] = HtmlEscape(Dict[Key]) return New def MakePage(Meta:dict=None, Code:int=None, Url:str=None): return f''' {MetaToHtmlHead(ContextAppMeta(Meta))}

Proxied URL: {Url}

Proxied HTTP Response: {Code}

{MetaToHtmlBody(Meta) if Meta else '

Could not retrieve any metadata from the requested URL.

'}

[Source Code]

''' def ContextAppMeta(Meta:dict=None): #New = DictJoin(AppMeta, Meta) New = dict(AppMeta) if Meta: if Meta['SiteName']: New['SiteName'] = f"{Meta['SiteName']} | {AppMeta['SiteName']}" if Meta['Title']: New['Title'] = f"{Meta['Title']} | {AppMeta['Title']}" if Meta['Description']: New['Description'] = f"{Meta['Description']} | {AppMeta['Description']}" if Meta['Image']: New['Image'] = Meta['Image'] return New def MetaToHtmlHead(Meta:dict): Meta = DictHtmlSafe(Meta) return f'''\ {f'' if DictKeyIf(Meta, 'SiteName') else ''} {f'{Meta["Title"]}' if DictKeyIf(Meta, 'Title') else ''} {f'' if DictKeyIf(Meta, 'Description') else ''} {f'' if DictKeyIf(Meta, 'Image') else ''} ''' def MetaToHtmlBody(Meta:dict): Meta = DictHtmlSafe(Meta) return f'''
{f'
Site Name
{Meta["SiteName"]}
' if DictKeyIf(Meta, 'SiteName') else ''} {f'
Title
{Meta["Title"]}
' if DictKeyIf(Meta, 'Title') else ''} {f'
Description
{Meta["Description"]}
' if DictKeyIf(Meta, 'Description') else ''} {f'
Image
' if DictKeyIf(Meta, 'Image') else ''} {f'
Type
{Meta["Type"]}
' if DictKeyIf(Meta, 'Type') else ''} {f'
Html
{Meta["_Html_"]}
' if DictKeyIf(Meta, '_Html_') and Debug else ''}
''' def HtmlToMeta(Html:str): Soup = BeautifulSoup(Html, 'html.parser') Title = SoupAttrIf(Soup.find('meta', property='og:title'), 'content') if not Title: Title = SoupAttrIf(Soup.find('title'), 'text') Description = SoupAttrIf(Soup.find('meta', attrs={"property": "og:description"}), 'content') if not Description: Description = SoupAttrIf(Soup.find('meta', attrs={"name": "description"}), 'content') return { "SiteName": SoupAttrIf(Soup.find('meta', attrs={"property": "og:site_name"}), 'content'), "Title": Title, "Description": Description, "Image": SoupAttrIf(Soup.find('meta', attrs={"property": "og:image"}), 'content'), "Type": SoupAttrIf(Soup.find('meta', attrs={"property": "og:type"}), 'content'), "_Html_": Html, } if __name__ == '__main__': try: Serve() except KeyboardInterrupt: pass