You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
52 lines
1.4 KiB
52 lines
1.4 KiB
2 years ago
|
#!/usr/bin/env python3
|
||
|
# -*- coding: utf-8 -*-
|
||
|
|
||
|
import sys
|
||
|
import json
|
||
|
import time
|
||
|
import requests
|
||
|
from bs4 import BeautifulSoup
|
||
|
|
||
|
basicUrl = 'https://m.wxsy.net/novel/57104/all.html'
|
||
|
|
||
|
userAgent = ( # default user-agent
|
||
|
'Mozilla/5.0 (Linux; Android 10; moto g(7) play) '
|
||
|
'AppleWebKit/537.36 (KHTML, like Gecko) '
|
||
|
'Chrome/100.0.4896.79 Mobile Safari/537.36'
|
||
|
)
|
||
|
|
||
|
|
||
|
def httpRequest(url: str) -> str:
|
||
|
request = requests.get(url, headers = {
|
||
|
'user-agent': userAgent, # with fake user-agent
|
||
|
'accept-encoding': 'gzip, deflate', # allow content compress
|
||
|
})
|
||
|
if request.status_code not in range(200, 300): # http status code 2xx
|
||
|
raise RuntimeError('Http request failed')
|
||
|
return request.text
|
||
|
|
||
|
|
||
|
def analysePage(rawHtml: str) -> list:
|
||
|
analyseRet = []
|
||
|
soup = BeautifulSoup(rawHtml, 'lxml')
|
||
|
div = soup.select('div[class="border-b"]')[0]
|
||
|
for row in div.select('a[class="w100 flex-wrp flex-align-center flex-between pt10 pb10"]'):
|
||
|
analyseRet.append({
|
||
|
'name': row.attrs['title'],
|
||
|
'url': row.attrs['href'],
|
||
|
})
|
||
|
return analyseRet
|
||
|
|
||
|
|
||
|
def fetchCatalog(pageNum: int) -> list:
|
||
|
catalog = []
|
||
|
for pageIndex in range(1, pageNum + 1):
|
||
|
print('Page: %d' % pageIndex, file = sys.stderr)
|
||
|
pageUrl = '%s?sort=1&page=%d' % (basicUrl, pageIndex)
|
||
|
catalog.append(analysePage(httpRequest(pageUrl)))
|
||
|
time.sleep(3)
|
||
|
return catalog
|
||
|
|
||
|
|
||
|
print(json.dumps(fetchCatalog(18)))
|