Browse Source

update: enhance sentence punctuations check

master
Dnomd343 2 years ago
parent
commit
584ab0473a
  1. 26
      src/punctuation/sentence.py

26
src/punctuation/sentence.py

@ -23,6 +23,7 @@ duplicates = [
delimiter + '~', delimiter + '~',
delimiter + '……', delimiter + '……',
delimiter + '——', delimiter + '——',
delimiter + '~!',
delimiter + '' + delimiter + '', delimiter + '' + delimiter + '',
delimiter + '' + delimiter + '', delimiter + '' + delimiter + '',
delimiter + '' + delimiter + '', delimiter + '' + delimiter + '',
@ -44,7 +45,6 @@ def loadContent(filename: str) -> list: # load json content
).read()) ).read())
combine = [] combine = []
for (title, content) in raw.items(): for (title, content) in raw.items():
combine.append(title)
combine += content combine += content
return combine return combine
@ -72,13 +72,23 @@ def removeDuplicate(sentence: str) -> str:
return sentence return sentence
def sentenceType(content: list) -> list: def sentenceType(content: list) -> tuple[list, list]:
result = set() resultSingle = set()
resultSequence = set()
for row in content: for row in content:
result.add(removeDuplicate(abstract(row))) sType = removeDuplicate(abstract(row))
return list(sorted(result)) if delimiter in sType:
resultSequence.add(sType)
else:
resultSingle.add(sType)
return list(sorted(resultSingle)), list(sorted(resultSequence))
print('\n'.join( def sentenceCheck(content: list) -> None:
sentenceType(loadContent(sys.argv[1])) single, sequence = sentenceType(content)
)) print('\n'.join(single))
print('-' * 64)
print('\n'.join(sequence))
sentenceCheck(loadContent(sys.argv[1]))

Loading…
Cancel
Save