mirror of
https://github.com/ihabunek/toot
synced 2025-01-25 00:39:45 +01:00
Normalize unicode
This commit is contained in:
parent
cb1f7b4e61
commit
2ecc6a28c6
@ -2,6 +2,7 @@
|
||||
|
||||
import re
|
||||
import socket
|
||||
import unicodedata
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
@ -10,7 +11,9 @@ from toot.exceptions import ConsoleError
|
||||
|
||||
def get_text(html):
|
||||
"""Converts html to text, strips all tags."""
|
||||
return BeautifulSoup(html, "html.parser").get_text().replace(''', "'")
|
||||
text = BeautifulSoup(html, "html.parser").get_text().replace(''', "'")
|
||||
|
||||
return unicodedata.normalize('NFKC', text)
|
||||
|
||||
|
||||
def parse_html(html):
|
||||
|
Loading…
Reference in New Issue
Block a user