Browse Source

update: chinese content check

master
Dnomd343 2 years ago
parent
commit
f09817218d
  1. 54
      src/character/chinese.py

54
src/character/chinese.py

@ -1,15 +1,25 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os
import sys
import json
from enum import Enum from enum import Enum
defaultPath = os.path.join(
os.path.dirname(os.path.realpath(__file__)), '../../release/'
)
simplifiedChineseReplenish = ['', '', '', '', '']
class Chinese: class Chinese:
OK = 'simplified' # simplified chinese OK = 'simplified' # simplified chinese
WARN = 'traditional' # traditional chinese WARN = 'traditional' # traditional chinese
ERROR = 'unknown' # unknown character ERROR = 'unknown' # unknown character
def is_traditional(character: str) -> bool: # whether character is traditional chinese def isTraditional(character: str) -> bool: # whether character is traditional chinese
character = character[0] character = character[0]
try: try:
character.encode('big5hkscs') character.encode('big5hkscs')
@ -18,8 +28,10 @@ def is_traditional(character: str) -> bool: # whether character is traditional
return True return True
def is_simplified(character: str) -> bool: # whether character is simplified chinese def isSimplified(character: str) -> bool: # whether character is simplified chinese
character = character[0] character = character[0]
if character in simplifiedChineseReplenish:
return True
try: try:
character.encode('gb2312') character.encode('gb2312')
except: except:
@ -27,22 +39,24 @@ def is_simplified(character: str) -> bool: # whether character is simplified ch
return True return True
def chinese_check(character: str) -> Chinese: # chinese character check def characterCheck(character: str) -> Chinese: # chinese character check
character = character[0] character = character[0]
if is_simplified(character): # simplified chinese case if character in ['']: # white list
return Chinese.OK
if isSimplified(character): # simplified chinese case
return Chinese.OK return Chinese.OK
if is_traditional(character): # traditional chinese case if isTraditional(character): # traditional chinese case
return Chinese.WARN return Chinese.WARN
return Chinese.ERROR # unknown case return Chinese.ERROR # unknown case
def sentence_check(sentence: str) -> (bool, str): # chinese sentence check def sentenceCheck(sentence: str) -> (bool, str): # chinese sentence check
flag = False flag = False
characters = [] characters = []
for character in sentence: for character in sentence:
if chinese_check(character) == Chinese.OK: # normal case if characterCheck(character) == Chinese.OK: # normal case
characters.append(character) characters.append(character)
elif chinese_check(character) == Chinese.WARN: # warning case elif characterCheck(character) == Chinese.WARN: # warning case
flag = True flag = True
characters.append('\033[0;33m%s\033[0;39m' % character) characters.append('\033[0;33m%s\033[0;39m' % character)
else: else:
@ -51,5 +65,25 @@ def sentence_check(sentence: str) -> (bool, str): # chinese sentence check
return not flag, ''.join(characters) return not flag, ''.join(characters)
print(sentence_check('我們今天去吃飯了►►►太好吃了')) def chineseCheck(content: list) -> None:
print(sentence_check('测试成功OK')) for row in content:
status, result = sentenceCheck(row)
if status: # normal sentence
continue
print(result)
def loadContent(filename: str) -> list: # load json content
if not filename.endswith('.json'):
filename += '.json' # add file suffix
raw = json.loads(open(
os.path.join(defaultPath, filename)
).read())
combine = []
for (title, content) in raw.items():
combine.append(title)
combine += content
return combine
chineseCheck(loadContent(sys.argv[1]))

Loading…
Cancel
Save