From c64943eb7dbcf7e0d03654f20c1dc2ec3849dd4c Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Tue, 13 Dec 2022 16:21:33 +0800 Subject: [PATCH] feat: traditional chinese convert demo --- src/chinese/convert.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100755 src/chinese/convert.py diff --git a/src/chinese/convert.py b/src/chinese/convert.py new file mode 100755 index 0000000..bd0b296 --- /dev/null +++ b/src/chinese/convert.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from snownlp import SnowNLP + + +def traditionalCheck(sentence: str) -> None: + simplified = SnowNLP(sentence).han # convert into simplified chinese + if simplified == sentence: # simplified chinese already + return + sentence = list(sentence) + simplified = list(simplified) + for i in range(0, min(len(sentence), len(simplified))): # traverse each character + if sentence[i] != simplified[i]: # found different character + sentence[i] = '\033[0;33m%s\033[0;39m' % sentence[i] # set colorful flag + simplified[i] = '\033[0;32m%s\033[0;39m' % simplified[i] + print('%s\n> %s\n> %s\n%s' % ( + '-' * 128, ''.join(sentence), ''.join(simplified), '-' * 128 + )) + + +traditionalCheck('繁體中文的叫法在臺灣亦很常見') +