mirror of
https://github.com/searx/searx
synced 2025-09-02 15:35:55 +00:00
Replace every bunch of whitespaces with only one space in HTML text
This commit is contained in:
@@ -119,6 +119,8 @@ class HTMLTextExtractor(HTMLParser):
|
|||||||
|
|
||||||
|
|
||||||
def html_to_text(html):
|
def html_to_text(html):
|
||||||
|
html = html.replace('\n', ' ')
|
||||||
|
html = ' '.join(html.split())
|
||||||
s = HTMLTextExtractor()
|
s = HTMLTextExtractor()
|
||||||
s.feed(html)
|
s.feed(html)
|
||||||
return s.get_text()
|
return s.get_text()
|
||||||
|
Reference in New Issue
Block a user