From bd483ff77c714f6047231e951513bacaf79335d6 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 16 Oct 2022 00:48:46 +0800 Subject: [PATCH] feat: crawler script for `108shu.com` --- src/108shu.com/crawler.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100755 src/108shu.com/crawler.sh diff --git a/src/108shu.com/crawler.sh b/src/108shu.com/crawler.sh new file mode 100755 index 0000000..4f53aca --- /dev/null +++ b/src/108shu.com/crawler.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +cd `dirname $0` +mkdir -p ./data/html/ + +python3 catalog.py > ./data/catalog.json +python3 fetch.py ./data/catalog.json ./data/html/ +python3 extract.py ./data/catalog.json ./data/html/ > ./data/xxrs.json + +cd ./data/ +xz -k9 catalog.json +tar cJf html.tar.xz html/ +xz -k9 xxrs.json + +mkdir -p ../archive/ +mv *.xz ../archive/ +cd ../