diff --git a/gvlk/fetch.py b/gvlk/fetch.py
index 3a07b16..a1e8921 100755
--- a/gvlk/fetch.py
+++ b/gvlk/fetch.py
@@ -9,69 +9,51 @@ LANG = yaml.full_load(open('config.yml').read())['lang']
URL = 'https://learn.microsoft.com/%s/windows-server/get-started/kms-client-activation-keys'
-def extractKeys(items: list) -> dict: # detached from original html elements
- def splitHeader(header) -> tuple[str, str]:
- return header['id'], header.text
-
- def splitTable(table) -> dict: # split from html table
- dat = {}
- for item in [x for x in table.tbody if x.name == 'tr']:
- name, key = item.select('td')
- dat[str(name)[4:-5].replace('
', '\n')] = key.text
- return dat
-
- result = {}
- for index in range(len(items)):
- if items[index].name == 'table':
- keyContent = splitTable(items[index]) # GVLK content
- keyId, keyName = splitHeader(items[index - 1])
- result[keyId] = {
- 'name': keyName,
- 'content': keyContent
- }
- return result
-
-
-def fetchGvlk(lang: str) -> dict: # fetch GVLKs of the specified language
+def fetchGvlks(lang: str) -> dict: # fetch GVLKs of the specified language
request = requests.get(URL % lang, timeout = 15)
request.raise_for_status() # only http-code 2xx
request.encoding = 'utf-8'
content = BeautifulSoup(request.text, 'lxml').select('.content')[0] # html parsing
- result = []
- for element in content.children:
- try:
- if element['id'] == 'generic-volume-license-keys-gvlk':
- result = [] # GVLK record begin
- except: pass
- if element.name in ['h3', 'h4', 'table']: # match target DOM
- result.append(element)
- return extractKeys(result)
+ items = [x for x in content.children if x.name in ['h2', 'h3', 'h4', 'table']] # match target DOMs
+ htmlIds = [x['id'] if 'id' in x.attrs else '' for x in items]
+ items = items[htmlIds.index('generic-volume-license-keys-gvlk'):] # located GVLKs section
+
+ gvlks = {}
+ for index in range(len(items)):
+ if items[index].name == 'table':
+ header = items[index - 1] # last h3/h4 DOM
+ table = [x for x in items[index].tbody if x.name == 'tr'] # current table DOM
+ text = lambda x: str(x)[4:-5].replace('
', '\n') # extract DOM text
+ gvlks[header['id']] = {
+ 'name': header.text, # GVLKs title
+ 'content': {
+ text(x.select('td')[0]): x.select('td')[1].text for x in table # extract GVLKs
+ }
+ }
+ return gvlks
-def combineGvlk(rawData: dict) -> dict: # merge multiple languages
+def combineGvlks(rawData: dict) -> dict: # merge multiple languages
firstVal = lambda x: list(x.values())[0]
flipDict = lambda x: {v: k for k, v in x.items()}
- def release(version: str) -> dict:
+ def combined(version: str) -> dict:
keys = [x for _, x in firstVal(rawData)[version]['content'].items()]
- gvlkItem = {
+ gvlksItem = {
'name': {lang: data[version]['name'] for (lang, data) in rawData.items()},
'content': [{'name': {}, 'key': x} for x in keys]
}
for index in range(len(keys)):
for (lang, data) in rawData.items():
data = flipDict(data[version]['content'])
- gvlkItem['content'][index]['name'][lang] = data[keys[index]]
- return gvlkItem
+ gvlksItem['content'][index]['name'][lang] = data[keys[index]]
+ return gvlksItem
- result = {}
- for gvlkVersion in list(firstVal(rawData)):
- result[gvlkVersion] = release(gvlkVersion)
- return result
+ return {x: combined(x) for x in list(firstVal(rawData))}
if __name__ == '__main__':
- gvlkData = combineGvlk({x: fetchGvlk(x) for x in LANG})
- with open('raw.json', 'w') as fp: # output as `raw.json`
- fp.write(json.dumps(gvlkData, indent = 2, ensure_ascii = False) + '\n')
+ gvlksData = combineGvlks({x: fetchGvlks(x) for x in LANG})
+ with open('raw.json', 'w') as fp: # output at `raw.json`
+ fp.write(json.dumps(gvlksData, indent = 2, ensure_ascii = False) + '\n')