Browse Source

fix: remove popularize

master
Dnomd343 2 years ago
parent
commit
a30fd65ef9
  1. 7
      src/wxsy.net/extract.py

7
src/wxsy.net/extract.py

@ -15,6 +15,12 @@ from logger import logger
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
def clearContent(raw: str) -> str: # remove popularize
if '\n' in raw:
raw = re.search(r'^(.+?)\n', raw)[1]
return raw
def splitHtml(rawHtml: str) -> dict: # extract from raw html content def splitHtml(rawHtml: str) -> dict: # extract from raw html content
html = BeautifulSoup(rawHtml, 'lxml') html = BeautifulSoup(rawHtml, 'lxml')
script = html.select('script')[9].text # js code with chapter info script = html.select('script')[9].text # js code with chapter info
@ -27,6 +33,7 @@ def splitHtml(rawHtml: str) -> dict: # extract from raw html content
} }
if info['title'] != re.search(r'window\.chapterName = \'(.+)\'', script)[1]: # chapter title check if info['title'] != re.search(r'window\.chapterName = \'(.+)\'', script)[1]: # chapter title check
logger.error('Title error -> %s' % info['title']) logger.error('Title error -> %s' % info['title'])
info['content'] = [clearContent(x) for x in info['content']]
return info return info

Loading…
Cancel
Save