From 12938500f69c861189ab02ce1e8768c286a80c87 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Mon, 13 Mar 2023 22:54:15 +0800 Subject: [PATCH] feat: add symbol convert --- src/punctuation/sentence.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/punctuation/sentence.py b/src/punctuation/sentence.py index aa5cf19..6a5c501 100755 --- a/src/punctuation/sentence.py +++ b/src/punctuation/sentence.py @@ -24,6 +24,7 @@ duplicates = [ delimiter + '……', delimiter + '——', delimiter + '~!', + delimiter + '!~', delimiter + ',' + delimiter + '!', delimiter + ',' + delimiter + '?', delimiter + ',' + delimiter + '、', @@ -72,11 +73,23 @@ def removeDuplicate(sentence: str) -> str: return sentence +def symbolConvert(sentence: str) -> str: + sentence = sentence.replace('➕?!', '➕。') + sentence = sentence.replace('➕!?', '➕。') + sentence = sentence.replace('➕?', '➕。') + sentence = sentence.replace('➕!~', '➕。') + sentence = sentence.replace('➕~!', '➕。') + if '➕!……' not in sentence: + sentence = sentence.replace('➕!', '➕。') + sentence = sentence.replace('➕、', '➕,') + return removeDuplicate(sentence) + + def sentenceType(content: list) -> tuple[list, list]: resultSingle = set() resultSequence = set() for row in content: - sType = removeDuplicate(abstract(row)) + sType = symbolConvert(removeDuplicate(abstract(row))) if delimiter in sType: resultSequence.add(sType) else: