From 147df3f4c54ea61557c6c2d0a7b6afa3968ab310 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 16 Oct 2022 02:02:14 +0800 Subject: [PATCH] feat: crawler script for `ixsw.la` --- src/ixsw.la/crawler.sh | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100755 src/ixsw.la/crawler.sh diff --git a/src/ixsw.la/crawler.sh b/src/ixsw.la/crawler.sh new file mode 100755 index 0000000..4f53aca --- /dev/null +++ b/src/ixsw.la/crawler.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +cd `dirname $0` +mkdir -p ./data/html/ + +python3 catalog.py > ./data/catalog.json +python3 fetch.py ./data/catalog.json ./data/html/ +python3 extract.py ./data/catalog.json ./data/html/ > ./data/xxrs.json + +cd ./data/ +xz -k9 catalog.json +tar cJf html.tar.xz html/ +xz -k9 xxrs.json + +mkdir -p ../archive/ +mv *.xz ../archive/ +cd ../