From de1759643bb39fb22f51f4b35969ac58c7e218da Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Mon, 17 Oct 2022 02:08:58 +0800 Subject: [PATCH] feat: raw json content of `zhihu.com` --- src/crawler/zhihu.com/fetch.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 src/crawler/zhihu.com/fetch.py diff --git a/src/crawler/zhihu.com/fetch.py b/src/crawler/zhihu.com/fetch.py new file mode 100644 index 0000000..5627189 --- /dev/null +++ b/src/crawler/zhihu.com/fetch.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +""" +Download raw JSON content. + + USAGE: python3 fetch.py [OUTPUT_JSON] +""" + +import sys +sys.path.append('..') +from utils import logger +from utils import httpRequest + +logger.warning('Fetch html of `zhihu.com`') +jsonRaw = httpRequest('https://www.zhihu.com/api/v4/columns/c_1553471910075449344/items?limit=%d&offset=0' % 23) +with open(sys.argv[1], 'wb') as fileObj: + fileObj.write(jsonRaw)