From 39143922bf6eb8053327b2f9613d18bfe241ffa1 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Thu, 15 Dec 2022 17:49:06 +0800 Subject: [PATCH] feat: punctuation ending check --- src/punctuation/check.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/punctuation/check.py b/src/punctuation/check.py index f71d4af..47fb992 100755 --- a/src/punctuation/check.py +++ b/src/punctuation/check.py @@ -2,6 +2,7 @@ # -*- coding: utf-8 -*- import os +import re import sys import json from itertools import product @@ -13,6 +14,10 @@ punctuationPairs = [ ('(', ')'), ] +endingPunctuations = [ + '。', '?', '!', '”', '’', '~', '……', '——', +] + defaultPath = os.path.join( os.path.dirname(os.path.realpath(__file__)), '../../release/' ) @@ -61,10 +66,29 @@ def pairsCheck(sentence: str) -> bool: return True # no error match in sentence +def endingCheck(sentence: str) -> bool: + if re.search(r'^第\d+章 \S*$', sentence) is not None: # skip caption + return True + for endingPunctuation in endingPunctuations: + if sentence.endswith(endingPunctuation): # match ending punctuation + return True + print('%s\033[0;31m_\033[0;39m' % sentence) + return False + + def contentCheck(content: list) -> None: + flag = True for row in content: # pairs check - pairsCheck(row) - print('-' * 128) + flag &= pairsCheck(row) + if not flag: + print('-' * 128) # split line + + flag = True + for row in content: # ending check + flag &= endingCheck(row) + if not flag: + print('-' * 128) # split line + # other check process