commit 3cbc91e31a8408f7c11bfc1700f0850714e9c1ad
Author: Dnomd343 <i@343.re>
Date:   Tue Oct 11 12:56:33 2022 +0800

    feat: fetch catalog

diff --git a/catalog/fetch.py b/catalog/fetch.py
new file mode 100755
index 0000000..0db6ca4
--- /dev/null
+++ b/catalog/fetch.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import sys
+import json
+import time
+import requests
+from bs4 import BeautifulSoup
+
+basicUrl = 'https://m.wxsy.net/novel/57104/all.html'
+
+userAgent = (  # default user-agent
+    'Mozilla/5.0 (Linux; Android 10; moto g(7) play) '
+    'AppleWebKit/537.36 (KHTML, like Gecko) '
+    'Chrome/100.0.4896.79 Mobile Safari/537.36'
+)
+
+
+def httpRequest(url: str) -> str:
+    request = requests.get(url, headers = {
+        'user-agent': userAgent,  # with fake user-agent
+        'accept-encoding': 'gzip, deflate',  # allow content compress
+    })
+    if request.status_code not in range(200, 300):  # http status code 2xx
+        raise RuntimeError('Http request failed')
+    return request.text
+
+
+def analysePage(rawHtml: str) -> list:
+    analyseRet = []
+    soup = BeautifulSoup(rawHtml, 'lxml')
+    div = soup.select('div[class="border-b"]')[0]
+    for row in div.select('a[class="w100 flex-wrp flex-align-center flex-between pt10 pb10"]'):
+        analyseRet.append({
+            'name': row.attrs['title'],
+            'url': row.attrs['href'],
+        })
+    return analyseRet
+
+
+def fetchCatalog(pageNum: int) -> list:
+    catalog = []
+    for pageIndex in range(1, pageNum + 1):
+        print('Page: %d' % pageIndex, file = sys.stderr)
+        pageUrl = '%s?sort=1&page=%d' % (basicUrl, pageIndex)
+        catalog.append(analysePage(httpRequest(pageUrl)))
+        time.sleep(3)
+    return catalog
+
+
+print(json.dumps(fetchCatalog(18)))