#!/usr/bin/env python3 import json import yaml import requests from bs4 import BeautifulSoup LANG = yaml.full_load(open('config.yml').read())['lang'] URL = 'https://learn.microsoft.com/%s/windows-server/get-started/kms-client-activation-keys' def extractKeys(items: list) -> dict: # detached from original html elements def splitHeader(header) -> tuple[str, str]: return header['id'], header.text def splitTable(table) -> dict: # split from html table dat = {} for item in [x for x in table.tbody if x.name == 'tr']: name, key = item.select('td') dat[str(name)[4:-5].replace('
', '\n')] = key.text return dat result = {} for index in range(len(items)): if items[index].name == 'table': keyContent = splitTable(items[index]) # GVLK content keyId, keyName = splitHeader(items[index - 1]) result[keyId] = { 'name': keyName, 'content': keyContent } return result def fetchGvlk(lang: str) -> dict: # fetch GVLKs of the specified language request = requests.get(URL % lang, timeout = 15) request.raise_for_status() # only http-code 2xx request.encoding = 'utf-8' content = BeautifulSoup(request.text, 'lxml').select('.content')[0] # html parsing result = [] for element in content.children: try: if element['id'] == 'generic-volume-license-keys-gvlk': result = [] # GVLK record begin except: pass if element.name in ['h3', 'h4', 'table']: # match target DOM result.append(element) return extractKeys(result) def combineGvlk(rawData: dict) -> dict: # merge multiple languages firstVal = lambda x: list(x.values())[0] flipDict = lambda x: {v: k for k, v in x.items()} def release(version: str) -> dict: keys = [x for _, x in firstVal(rawData)[version]['content'].items()] gvlkItem = { 'name': {lang: data[version]['name'] for (lang, data) in rawData.items()}, 'content': [{'name': {}, 'key': x} for x in keys] } for index in range(len(keys)): for (lang, data) in rawData.items(): data = flipDict(data[version]['content']) gvlkItem['content'][index]['name'][lang] = data[keys[index]] return gvlkItem result = {} for gvlkVersion in list(firstVal(rawData)): result[gvlkVersion] = release(gvlkVersion) return result gvlkData = combineGvlk({x: fetchGvlk(x) for x in LANG}) with open('raw.json', 'w') as fp: # output as `raw.json` fp.write(json.dumps(gvlkData, indent = 2, ensure_ascii = False)) fp.write('\n')