From 63dfca00f965c41f269f64100e6a802181292e1c Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sat, 15 Oct 2022 16:49:05 +0800 Subject: [PATCH] feat: crawler script for `wxsy.net` --- src/wxsy.net/crawler.sh | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/wxsy.net/crawler.sh diff --git a/src/wxsy.net/crawler.sh b/src/wxsy.net/crawler.sh new file mode 100644 index 0000000..299b207 --- /dev/null +++ b/src/wxsy.net/crawler.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +cd `dirname $0` +mkdir -p ./data/html/ +mkdir -p ./data/json/ + +python3 catalog.py > ./data/catalog.json +python3 fetch.py ./data/catalog.json ./data/html/ +python3 extract.py ./data/catalog.json ./data/html/ ./data/json +python3 release.py ./data/catalog.json ./data/json/ > ./data/xxrs.json + +cd ./data/ +xz -k9 catalog.json +tar cJf html.tar.xz html/ +tar cJf json.tar.xz json/ +xz -k9 xxrs.json + +mkdir -p ../archive/ +mv *.xz ../archive/ +cd ../