diff --git a/src/m.wxsy.net/crawler.sh b/src/m.wxsy.net/crawler.sh index e78cec6..d7d1850 100755 --- a/src/m.wxsy.net/crawler.sh +++ b/src/m.wxsy.net/crawler.sh @@ -5,6 +5,6 @@ mkdir -p ./data/html/ mkdir -p ./data/json/ python3 catalog.py > ./data/catalog.json -python3 fetch.py ./data/catalog.json ./data/html -python3 extract.py ./data/catalog.json ./data/html ./data/json -python3 release.py ./data/catalog.json ./data/json > ./data/xxrs.json +python3 fetch.py ./data/catalog.json ./data/html/ +python3 extract.py ./data/catalog.json ./data/html/ ./data/json +python3 release.py ./data/catalog.json ./data/json/ > ./data/xxrs.json diff --git a/src/wxsy.net/check.sh b/src/wxsy.net/check.sh new file mode 100755 index 0000000..7eafab5 --- /dev/null +++ b/src/wxsy.net/check.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +cd `dirname $0` + +diff <(xz -cdk ./archive/catalog.json.xz | jq .) <(cat ./data/catalog.json | jq .) +diff <(cd ./data/html/ && sha1sum * | sort -u) <(cat ./archive/html.sha1sum | sort -u) +diff <(cd ./data/json/ && sha1sum * | sort -u) <(cat ./archive/json.sha1sum | sort -u) +diff <(xz -cdk ./archive/xxrs.json.xz | jq .) <(cat ./data/xxrs.json | jq .) diff --git a/src/wxsy.net/crawler.sh b/src/wxsy.net/crawler.sh index 299b207..d7d1850 100755 --- a/src/wxsy.net/crawler.sh +++ b/src/wxsy.net/crawler.sh @@ -8,13 +8,3 @@ python3 catalog.py > ./data/catalog.json python3 fetch.py ./data/catalog.json ./data/html/ python3 extract.py ./data/catalog.json ./data/html/ ./data/json python3 release.py ./data/catalog.json ./data/json/ > ./data/xxrs.json - -cd ./data/ -xz -k9 catalog.json -tar cJf html.tar.xz html/ -tar cJf json.tar.xz json/ -xz -k9 xxrs.json - -mkdir -p ../archive/ -mv *.xz ../archive/ -cd ../