From 0876f504fb0a5cf470d3356ab6c6c9642f17116d Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 15 Oct 2022 21:04:40 +0800 Subject: [PATCH] update: change from mobile to PC --- src/{wap.ixsw.la => ixsw.la}/catalog.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) rename src/{wap.ixsw.la => ixsw.la}/catalog.py (78%) diff --git a/src/wap.ixsw.la/catalog.py b/src/ixsw.la/catalog.py similarity index 78% rename from src/wap.ixsw.la/catalog.py rename to src/ixsw.la/catalog.py index 419ec94..3025972 100644 --- a/src/wap.ixsw.la/catalog.py +++ b/src/ixsw.la/catalog.py @@ -32,14 +32,17 @@ def httpRequest(url: str) -> str: # fetch raw html content def extractCatalog(rawHtml: str) -> dict: # extract catalog from html content catalog = {} html = BeautifulSoup(rawHtml, 'lxml') - div = html.select('div[class="directoryArea"]')[0] - for item in div.select('a[style=""]'): + for item in html.select('dd'): + item = item.select('a')[0] name = re.search(r'^(第\d+章)(.*)$', item.text) pageId = item.attrs['href'].replace('/ks82668/', '').replace('.html', '') catalog['%s %s' % (name[1], name[2].strip())] = pageId - return catalog + catalog = sorted(catalog.items(), key = lambda d: int( + re.search(r'^第(\d+)章', d[0])[1] # sort by chapter + )) + return {x[0]: x[1] for x in catalog} # formatted output print(json.dumps( - extractCatalog(httpRequest('https://wap.ixsw.la/ks82668/all.html')) + extractCatalog(httpRequest('https://www.ixsw.la/ks82668/')) ))