From c05f98fa4f77a11f65c136636a69627cc8813715 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Wed, 15 Mar 2023 17:49:29 +0800 Subject: [PATCH] update: possible punctuation error --- src/punctuation/sentence.py | 54 ++++++++++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 3 deletions(-) diff --git a/src/punctuation/sentence.py b/src/punctuation/sentence.py index 76cd3c1..14eb80d 100755 --- a/src/punctuation/sentence.py +++ b/src/punctuation/sentence.py @@ -92,10 +92,58 @@ def symbolConvert(sentence: str) -> str: # similar symbol replacement def sentenceType(content: list) -> tuple[list, list]: # analyze the type of all sentences + + target = [ + '“➕”➕“➕”➕,“➕”➕“➕”➕。', + '“➕”➕。', + '“➕”➕,➕“➕”➕。', + + '“➕,➕”', + + '➕“➕”➕“➕”➕。', + '➕“➕”➕。', + '➕“➕”➕,“➕。”', + '➕“➕”➕,➕“➕”➕。', + '➕“➕”➕,➕……', + '➕“➕”、“➕”,➕……', + '➕“➕”。', + '➕“➕”!', + '➕“➕”,➕“➕”➕。', + '➕“➕”,➕。', + '➕“➕”?', + '➕“➕”?!', + '➕“➕。”', + + '➕,“➕”➕“➕”➕。', + '➕,“➕”➕。', + '➕,“➕”,➕,“➕”。', + + '➕,➕“➕”……', + '➕,➕“➕”➕“➕”➕。', + '➕,➕“➕”➕。', + '➕,➕“➕”➕,“➕”➕。', + '➕,➕“➕”➕,“➕……”', + '➕,➕“➕”➕,“➕。”', + '➕,➕“➕”➕,➕——', + '➕,➕“➕”➕,➕“➕”➕。', + '➕,➕“➕”。', + '➕,➕“➕”!', + '➕,➕“➕”,➕“➕”。', + '➕,➕“➕”,➕……', + '➕,➕“➕”,➕……➕。', + '➕,➕“➕”,➕。', + + '➕,《➕》➕“➕”、“➕”,➕。', + ] + resultSingle = set() resultSequence = set() for row in content: sType = symbolConvert(removeDuplicate(abstract(row))) + + if sType in target: + print(row) + if delimiter in sType: resultSequence.add(sType) else: @@ -105,9 +153,9 @@ def sentenceType(content: list) -> tuple[list, list]: # analyze the type of all def sentenceCheck(content: list) -> None: single, sequence = sentenceType(content) - print('\n'.join(single)) - print('-' * 64) - print('\n'.join(sequence)) + # print('\n'.join(single)) + # print('-' * 64) + # print('\n'.join(sequence)) sentenceCheck(loadContent(sys.argv[1]))