diff --git a/src/crawler/utils/fetch.py b/src/crawler/utils/fetch.py index 755d397..c779789 100644 --- a/src/crawler/utils/fetch.py +++ b/src/crawler/utils/fetch.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import time import requests from retry import retry from .logger import logger diff --git a/src/crawler/wxsy.net/check.sh b/src/crawler/wxsy.net/check.sh index d25a5a5..3126d26 100755 --- a/src/crawler/wxsy.net/check.sh +++ b/src/crawler/wxsy.net/check.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -cd `dirname $0` +cd "$(dirname "$0")" -diff <(xz -cdk ./archive/catalog.json.xz | jq .) <(cat ./data/catalog.json | jq .) -diff <(cd ./data/json/ && sha1sum * | sort -u) <(cat ./archive/json.sha1sum | sort -u) -diff <(xz -cdk ./archive/xxrs.json.xz | jq .) <(cat ./data/xxrs.json | jq .) +diff <(xz -cdk ./archive/catalog.json.xz | jq .) <(jq . ./data/catalog.json) +diff <(cd ./data/json/ && sha1sum -- * | sort -u) <(sort -u ./archive/json.sha1sum) +diff <(xz -cdk ./archive/xxrs.json.xz | jq .) <(jq . ./data/xxrs.json) diff --git a/src/crawler/wxsy.net/crawler.sh b/src/crawler/wxsy.net/crawler.sh index f701f12..0b8dd34 100755 --- a/src/crawler/wxsy.net/crawler.sh +++ b/src/crawler/wxsy.net/crawler.sh @@ -1,11 +1,11 @@ #!/usr/bin/env bash -cd `dirname $0` +cd "$(dirname "$0")" mkdir -p ./data/html/ mkdir -p ./data/json/ -[ -z ${DELAY} ] && DELAY=1 -[ -z ${THREAD} ] && THREAD=1 +[ -z "${DELAY}" ] && DELAY=1 +[ -z "${THREAD}" ] && THREAD=1 python3 catalog.py > ./data/catalog.json python3 fetch.py ./data/catalog.json ./data/html/ "${PROXY}" ${THREAD} ${DELAY}