#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Download raw html content as `.html` files.

    USAGE: python3 fetch.py [CATALOG] [OUTPUT_DIR] [PROXY] [THREAD] [DELAY]
"""

import os
import sys
import json
sys.path.append('..')
from utils import logger
from utils import htmlFetch


def loadChapter():
    catalog = json.loads(open(sys.argv[1]).read())  # load catalog
    for _, chapterId in catalog.items():  # traverse all chapters
        for subPage in [1, 2]:  # two sub-pages in one chapter
            yield {
                'url': 'http://www.108shu.com/book/54247/%s_%d.html' % (chapterId, subPage),
                'file': os.path.join(sys.argv[2], '%s-%d.html' % (chapterId, subPage)),
            }


logger.warning('Fetch html of `108shu.com`')
htmlFetch(
    loadChapter(),
    proxy = sys.argv[3],
    thread = int(sys.argv[4]),
    delay = float(sys.argv[5]),
)