From a5c3d481783d3bfbc1938dfa800107efa61ef219 Mon Sep 17 00:00:00 2001
From: Dnomd343 <i@343.re>
Date: Sat, 15 Oct 2022 20:41:07 +0800
Subject: [PATCH] fix: missing second page

---
 src/108shu.com/extract.py | 14 ++++++++++++--
 src/108shu.com/fetch.py   | 15 ++++++++-------
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/108shu.com/extract.py b/src/108shu.com/extract.py
index 0b7eb18..e0766e3 100644
--- a/src/108shu.com/extract.py
+++ b/src/108shu.com/extract.py
@@ -25,13 +25,23 @@ def splitHtml(rawHtml: str) -> dict:  # extract from raw html content
     }
 
 
+def combinePage(chapterId: str) -> dict:  # combine sub pages
+    page_1 = splitHtml(open(os.path.join(sys.argv[2], '%s-1.html' % chapterId)).read())
+    page_2 = splitHtml(open(os.path.join(sys.argv[2], '%s-2.html' % chapterId)).read())
+    if page_1['title'] != page_2['title']:
+        logger.error('Title error -> `%s`' % page_1['title'])
+    return {
+        'title': page_1['title'],
+        'content': page_1['content'] + page_2['content'],
+    }
+
+
 result = {}
 catalog = json.loads(open(sys.argv[1]).read())  # load catalog
 
 for chapterName, chapterId in catalog.items():  # traverse all chapters
     logger.info('Analyse chapter `%s`' % chapterId)
-    htmlFile = os.path.join(sys.argv[2], '%s.html' % chapterId)
-    info = splitHtml(open(htmlFile).read())
+    info = combinePage(chapterId)
     if chapterName != info['title']:
         logger.error('Title error -> %s' % info['title'])
     result[chapterName] = info['content']
diff --git a/src/108shu.com/fetch.py b/src/108shu.com/fetch.py
index a8ade3c..3fe9f3c 100644
--- a/src/108shu.com/fetch.py
+++ b/src/108shu.com/fetch.py
@@ -44,10 +44,11 @@ def httpRequest(fileUrl: str, fileName: str) -> bool:  # save html content
 catalog = json.loads(open(sys.argv[1]).read())  # load catalog
 
 for _, chapterId in catalog.items():  # traverse all chapters
-    pageUrl = 'http://www.108shu.com/book/54247/%s.html' % chapterId
-    pageFile = os.path.join(sys.argv[2], '%s.html' % chapterId)
-    if httpRequest(pageUrl, pageFile):  # save html content
-        logger.info('Page request success -> %s' % pageUrl)
-    else:
-        logger.error('Page request failed -> %s' % pageUrl)
-    time.sleep(1)  # avoid being blocked by the server
+    for subPage in [1, 2]:
+        pageUrl = 'http://www.108shu.com/book/54247/%s_%d.html' % (chapterId, subPage)
+        pageFile = os.path.join(sys.argv[2], '%s-%d.html' % (chapterId, subPage))
+        if httpRequest(pageUrl, pageFile):  # save html content
+            logger.info('Page request success -> %s' % pageUrl)
+        else:
+            logger.error('Page request failed -> %s' % pageUrl)
+        time.sleep(1)  # avoid being blocked by the server