#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
Download raw html content as `.html` files.

    USAGE: python3 fetch.py [CATALOG] [OUTPUT_DIR] [PROXY] [THREAD] [DELAY]
"""

import os
import sys
import json
sys.path.append('..')
from utils import logger
from utils import htmlFetch


def loadChapter():
    catalog = json.loads(open(sys.argv[1]).read())  # load catalog
    for _, chapterId in catalog.items():  # traverse all chapters
        yield {
            'url': 'https://www.wxsy.net/novel/57104/read_%s.html' % chapterId,
            'file': os.path.join(sys.argv[2], '%s.html' % chapterId),
        }


logger.warning('Fetch html of `wxsy.net`')
htmlFetch(
    loadChapter(),
    proxy = sys.argv[3],
    thread = int(sys.argv[4]),
    delay = float(sys.argv[5]),
)