From 584ab0473ac519f4ce5a25cf8bcbca816fdfcc7f Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 12 Mar 2023 23:18:27 +0800 Subject: [PATCH] update: enhance sentence punctuations check --- src/punctuation/sentence.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/src/punctuation/sentence.py b/src/punctuation/sentence.py index 93c195f..aa5cf19 100755 --- a/src/punctuation/sentence.py +++ b/src/punctuation/sentence.py @@ -23,6 +23,7 @@ duplicates = [ delimiter + '~', delimiter + '……', delimiter + '——', + delimiter + '~!', delimiter + ',' + delimiter + '!', delimiter + ',' + delimiter + '?', delimiter + ',' + delimiter + '、', @@ -44,7 +45,6 @@ def loadContent(filename: str) -> list: # load json content ).read()) combine = [] for (title, content) in raw.items(): - combine.append(title) combine += content return combine @@ -72,13 +72,23 @@ def removeDuplicate(sentence: str) -> str: return sentence -def sentenceType(content: list) -> list: - result = set() +def sentenceType(content: list) -> tuple[list, list]: + resultSingle = set() + resultSequence = set() for row in content: - result.add(removeDuplicate(abstract(row))) - return list(sorted(result)) + sType = removeDuplicate(abstract(row)) + if delimiter in sType: + resultSequence.add(sType) + else: + resultSingle.add(sType) + return list(sorted(resultSingle)), list(sorted(resultSequence)) -print('\n'.join( - sentenceType(loadContent(sys.argv[1])) -)) +def sentenceCheck(content: list) -> None: + single, sequence = sentenceType(content) + print('\n'.join(single)) + print('-' * 64) + print('\n'.join(sequence)) + + +sentenceCheck(loadContent(sys.argv[1]))