From 35c545bf409cb831d437165c4f2cf79e728ce917 Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Sun, 22 Jun 2025 17:25:02 +0800 Subject: [PATCH] perf: optimize igraph pickle builder --- misc/all-graph/01-build_json.py | 52 +++++++---------- misc/all-graph/02-dump_igraph.py | 97 +++++++++++++++++++++++--------- misc/all-graph/compare.py | 28 +++++++++ 3 files changed, 118 insertions(+), 59 deletions(-) create mode 100644 misc/all-graph/compare.py diff --git a/misc/all-graph/01-build_json.py b/misc/all-graph/01-build_json.py index b35292b..8799571 100755 --- a/misc/all-graph/01-build_json.py +++ b/misc/all-graph/01-build_json.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 +import os import json +import multiprocessing from klotski import Group, Layout, FastCal @@ -29,41 +31,22 @@ def build_graph(group: Group, targets: set[Layout], ignores: set[Layout]) -> dic for layout in steps: assert len(steps[layout]) == len(targets) step = min(x for _, x in steps[layout]) - pivots = set(x for x, y in steps[layout] if y == step) - graph[layout] = {'step': step, 'pivots': pivots, 'next': set()} + pivots = [x for x, y in steps[layout] if y == step] + graph[layout] = {'step': step, 'pivots': pivots, 'next': []} for layout, info in graph.items(): for x in layout.next_cases(): if graph[x]['step'] == info['step'] + 1: - info['next'].add(x) - - # for ignore in ignores: - # assert ignore in graph - # for x in graph[ignore]['next']: - # assert x in ignores - # - # for layout, info in graph.items(): - # if ignore in info['next'] and layout not in ignores: - # assert layout in targets + info['next'].append(x) for ignore in ignores: del graph[ignore] for layout, info in graph.items(): - need_remove = [] - for x in info['next']: - if x in ignores: - assert layout in targets - need_remove.append(x) - for x in need_remove: - info['next'].remove(x) - - # for layout, info in graph.items(): - # for x in info['next']: - # assert x in graph + info['next'] = list(filter(lambda x: x not in ignores, info['next'])) - assert sorted(graph) == list(graph) assert len(set(graph)) == len(graph) + assert sorted(graph) == list(graph) return graph @@ -78,7 +61,8 @@ def dump_json(group_info: dict, graph: dict[Layout, dict]) -> str: return json.dumps({**group_info, 'graph': data}, separators=(',', ':')) -def load_and_dump(info: dict, path_prefix: str) -> None: +def build_and_dump(info: dict, output: str) -> None: + print(f'Start building: {output}') targets = sorted(Layout(x) for x in info['solutions']['valid']) ignores = sorted(Layout(x) for x in info['solutions']['invalid']) @@ -92,13 +76,19 @@ def load_and_dump(info: dict, path_prefix: str) -> None: 'targets': {layout_to_str(x): target_map[x] for x in targets}, 'ignores': [layout_to_str(x) for x in ignores], } - with open(f'{path_prefix}.json', 'w') as fp: + with open(output, 'w') as fp: fp.write(dump_json(group_info, graph)) -if __name__ == '__main__': - raw = json.loads(open('data.json').read()) +def build_all(path: str, group_info: dict[str, dict]) -> None: + pool = multiprocessing.Pool() + for name, info in group_info.items(): + output = os.path.join(path, f'{name}.json') + pool.apply_async(build_and_dump, args=(info, output)) + pool.close() + pool.join() + - for name, info in raw.items(): - print(name) - load_and_dump(info, f'./output-json/{name}') +if __name__ == '__main__': + os.makedirs('./output-json/', exist_ok=True) + build_all('./output-json/', json.loads(open('data.json').read())) diff --git a/misc/all-graph/02-dump_igraph.py b/misc/all-graph/02-dump_igraph.py index 9a15cf0..7009a84 100755 --- a/misc/all-graph/02-dump_igraph.py +++ b/misc/all-graph/02-dump_igraph.py @@ -3,50 +3,91 @@ import os import json import igraph as ig +import multiprocessing -def dump_graph(graph: dict[str, dict]) -> ig.Graph: - index_map = {x: i for i, x in enumerate(graph)} - g = ig.Graph(len(graph)) - for index, (layout, info) in enumerate(graph.items()): - g.vs[index]['code'] = layout - g.vs[index]['step'] = info['step'] - g.add_edges([(index, index_map[x]) for x in info['next']]) - return g +def save_graph(graph: ig.Graph, path: str) -> None: + edges = graph.get_edgelist() + for edge in edges: + assert edge[0] < edge[1] + graph.delete_edges() + graph.add_edges(sorted(edges)) + graph.write_pickle(path) -def dump_sub_graph(graph: dict[str, dict], target: str) -> ig.Graph: - cases = [x for x, info in graph.items() if target in info['pivots']] + +def dump_graph(data: dict[str, dict]) -> ig.Graph: + graph = ig.Graph(len(data)) + index_map = {x: i for i, x in enumerate(data)} + + edges = [] + for index, (layout, info) in enumerate(data.items()): + graph.vs[index]['code'] = layout + graph.vs[index]['step'] = info['step'] + edges.extend((index, index_map[x]) for x in info['next']) + + graph.add_edges(edges) + return graph + + +def dump_sub_graph(data: dict[str, dict], target: str) -> ig.Graph: + cases = [x for x, info in data.items() if target in info['pivots']] assert sorted(cases) == cases + graph = ig.Graph(len(cases)) index_map = {x: i for i, x in enumerate(cases)} - g = ig.Graph(len(cases)) + edges = [] for index, layout in enumerate(cases): - info = graph[layout] - g.vs[index]['code'] = layout - g.vs[index]['step'] = info['step'] + info = data[layout] + graph.vs[index]['code'] = layout + graph.vs[index]['step'] = data[layout]['step'] + edges.extend((index, index_map[x]) for x in info['next']) + + graph.add_edges(edges) + return graph - for x in info['next']: - assert x in index_map - g.add_edge(index, index_map[x]) - return g +def dump_sub_graph_pro(g_all: ig.Graph, target: str) -> ig.Graph: # without pivot needed + cases = set() + queue = [g_all.vs.select(code=target)[0].index] + while queue: + layout = queue.pop(0) + if layout in cases: + continue + cases.add(layout) + step = g_all.vs[layout]['step'] + for neigh in g_all.neighbors(layout): + if neigh not in cases and g_all.vs[neigh]['step'] == step + 1: + queue.append(neigh) + + return g_all.induced_subgraph(sorted(cases)) + + +def convert_and_dump(file: str, output_prefix: str) -> None: + print(f'Start coverting: {file}') -def convert_ig(file: str, output: str) -> None: raw = json.loads(open(file).read()) - g_main = dump_graph(raw['graph']) - g_main.write_pickle(f'{output}.pkl') + graph = dump_graph(raw['graph']) + save_graph(graph, f'{output_prefix}.pkl') for layout, sub_tag in raw['targets'].items(): - print(layout, sub_tag) - g = dump_sub_graph(raw['graph'], layout) - g.write_pickle(f'{output}-{sub_tag}_{layout}.pkl') + sub_graph = dump_sub_graph(raw['graph'], layout) + # sub_graph = dump_sub_graph_pro(graph, layout) + save_graph(sub_graph, f'{output_prefix}-{sub_tag}_{layout}.pkl') + + +def convert_all(json_dir: str, output_dir: str) -> None: + pool = multiprocessing.Pool() + for name in sorted(os.listdir(json_dir)): + json_file = f'{json_dir}/{name}' + output_prefix = f"{output_dir}/{name.removesuffix('.json')}" + pool.apply_async(convert_and_dump, args=(json_file, output_prefix)) + pool.close() + pool.join() if __name__ == '__main__': - for name in sorted(os.listdir('output-json')): - name = name.removesuffix('.json') - print(name) - convert_ig(f'output-json/{name}.json', f'output-ig/{name}') + os.makedirs('output-ig', exist_ok=True) + convert_all('output-json', 'output-ig') diff --git a/misc/all-graph/compare.py b/misc/all-graph/compare.py new file mode 100644 index 0000000..726e31a --- /dev/null +++ b/misc/all-graph/compare.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python3 + +import os +import igraph as ig + + +def compare(file_1: str, file_2: str) -> None: + print(f'{file_1} vs {file_2}') + g1 = ig.Graph.Read_Pickle(file_1) + g2 = ig.Graph.Read_Pickle(file_2) + + assert g1.vcount() == g2.vcount() + assert g1.ecount() == g2.ecount() + assert g1.isomorphic(g2) + + for edge in g1.es: + assert edge.attributes() == {} + + for edge in g2.es: + assert edge.attributes() == {} + + for i in range(g1.vcount()): + assert g1.vs[i].attributes() == g2.vs[i].attributes() + + +if __name__ == '__main__': + for name in sorted(os.listdir('output-ig-raw')): + compare(f'output-ig/{name}', f'output-ig-raw/{name}')