You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
779 B
33 lines
779 B
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
"""
|
|
Download raw html content as `.html` files.
|
|
|
|
USAGE: python3 fetch.py [CATALOG] [OUTPUT_DIR] [PROXY] [THREAD] [DELAY]
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import json
|
|
sys.path.append('..')
|
|
from utils import logger
|
|
from utils import htmlFetch
|
|
|
|
|
|
def loadChapter():
|
|
catalog = json.loads(open(sys.argv[1]).read()) # load catalog
|
|
for _, chapterId in catalog.items(): # traverse all chapters
|
|
yield {
|
|
'url': 'https://www.xswang.com/book/56718/%s.html' % chapterId,
|
|
'file': os.path.join(sys.argv[2], '%s.html' % chapterId),
|
|
}
|
|
|
|
|
|
logger.warning('Fetch html of `xswang.com`')
|
|
htmlFetch(
|
|
loadChapter(),
|
|
proxy = sys.argv[3],
|
|
thread = int(sys.argv[4]),
|
|
delay = float(sys.argv[5]),
|
|
)
|
|
|