You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

51 lines
1.4 KiB

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import sys
import json
import time
import requests
from bs4 import BeautifulSoup
basicUrl = ''
userAgent = ( # default user-agent
'Mozilla/5.0 (Linux; Android 10; moto g(7) play) '
'AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/100.0.4896.79 Mobile Safari/537.36'
def httpRequest(url: str) -> str:
request = requests.get(url, headers = {
'user-agent': userAgent, # with fake user-agent
'accept-encoding': 'gzip, deflate', # allow content compress
if request.status_code not in range(200, 300): # http status code 2xx
raise RuntimeError('Http request failed')
return request.text
def analysePage(rawHtml: str) -> list:
analyseRet = []
soup = BeautifulSoup(rawHtml, 'lxml')
div ='div[class="border-b"]')[0]
for row in'a[class="w100 flex-wrp flex-align-center flex-between pt10 pb10"]'):
'name': row.attrs['title'],
'url': row.attrs['href'],
return analyseRet
def fetchCatalog(pageNum: int) -> list:
catalog = []
for pageIndex in range(1, pageNum + 1):
print('Page: %d' % pageIndex, file = sys.stderr)
pageUrl = '%s?sort=1&page=%d' % (basicUrl, pageIndex)
return catalog