Browse Source

feat: add symbol convert

master
Dnomd343 2 years ago
parent
commit
12938500f6
  1. 15
      src/punctuation/sentence.py

15
src/punctuation/sentence.py

@ -24,6 +24,7 @@ duplicates = [
delimiter + '……', delimiter + '……',
delimiter + '——', delimiter + '——',
delimiter + '~!', delimiter + '~!',
delimiter + '!~',
delimiter + '' + delimiter + '', delimiter + '' + delimiter + '',
delimiter + '' + delimiter + '', delimiter + '' + delimiter + '',
delimiter + '' + delimiter + '', delimiter + '' + delimiter + '',
@ -72,11 +73,23 @@ def removeDuplicate(sentence: str) -> str:
return sentence return sentence
def symbolConvert(sentence: str) -> str:
sentence = sentence.replace('➕?!', '➕。')
sentence = sentence.replace('➕!?', '➕。')
sentence = sentence.replace('➕?', '➕。')
sentence = sentence.replace('➕!~', '➕。')
sentence = sentence.replace('➕~!', '➕。')
if '➕!……' not in sentence:
sentence = sentence.replace('➕!', '➕。')
sentence = sentence.replace('➕、', '➕,')
return removeDuplicate(sentence)
def sentenceType(content: list) -> tuple[list, list]: def sentenceType(content: list) -> tuple[list, list]:
resultSingle = set() resultSingle = set()
resultSequence = set() resultSequence = set()
for row in content: for row in content:
sType = removeDuplicate(abstract(row)) sType = symbolConvert(removeDuplicate(abstract(row)))
if delimiter in sType: if delimiter in sType:
resultSequence.add(sType) resultSequence.add(sType)
else: else:

Loading…
Cancel
Save