Browse Source

perf: optimize igraph pickle builder

master
Dnomd343 6 days ago
parent
commit
35c545bf40
  1. 52
      misc/all-graph/01-build_json.py
  2. 97
      misc/all-graph/02-dump_igraph.py
  3. 28
      misc/all-graph/compare.py

52
misc/all-graph/01-build_json.py

@ -1,6 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os
import json import json
import multiprocessing
from klotski import Group, Layout, FastCal from klotski import Group, Layout, FastCal
@ -29,41 +31,22 @@ def build_graph(group: Group, targets: set[Layout], ignores: set[Layout]) -> dic
for layout in steps: for layout in steps:
assert len(steps[layout]) == len(targets) assert len(steps[layout]) == len(targets)
step = min(x for _, x in steps[layout]) step = min(x for _, x in steps[layout])
pivots = set(x for x, y in steps[layout] if y == step) pivots = [x for x, y in steps[layout] if y == step]
graph[layout] = {'step': step, 'pivots': pivots, 'next': set()} graph[layout] = {'step': step, 'pivots': pivots, 'next': []}
for layout, info in graph.items(): for layout, info in graph.items():
for x in layout.next_cases(): for x in layout.next_cases():
if graph[x]['step'] == info['step'] + 1: if graph[x]['step'] == info['step'] + 1:
info['next'].add(x) info['next'].append(x)
# for ignore in ignores:
# assert ignore in graph
# for x in graph[ignore]['next']:
# assert x in ignores
#
# for layout, info in graph.items():
# if ignore in info['next'] and layout not in ignores:
# assert layout in targets
for ignore in ignores: for ignore in ignores:
del graph[ignore] del graph[ignore]
for layout, info in graph.items(): for layout, info in graph.items():
need_remove = [] info['next'] = list(filter(lambda x: x not in ignores, info['next']))
for x in info['next']:
if x in ignores:
assert layout in targets
need_remove.append(x)
for x in need_remove:
info['next'].remove(x)
# for layout, info in graph.items():
# for x in info['next']:
# assert x in graph
assert sorted(graph) == list(graph)
assert len(set(graph)) == len(graph) assert len(set(graph)) == len(graph)
assert sorted(graph) == list(graph)
return graph return graph
@ -78,7 +61,8 @@ def dump_json(group_info: dict, graph: dict[Layout, dict]) -> str:
return json.dumps({**group_info, 'graph': data}, separators=(',', ':')) return json.dumps({**group_info, 'graph': data}, separators=(',', ':'))
def load_and_dump(info: dict, path_prefix: str) -> None: def build_and_dump(info: dict, output: str) -> None:
print(f'Start building: {output}')
targets = sorted(Layout(x) for x in info['solutions']['valid']) targets = sorted(Layout(x) for x in info['solutions']['valid'])
ignores = sorted(Layout(x) for x in info['solutions']['invalid']) ignores = sorted(Layout(x) for x in info['solutions']['invalid'])
@ -92,13 +76,19 @@ def load_and_dump(info: dict, path_prefix: str) -> None:
'targets': {layout_to_str(x): target_map[x] for x in targets}, 'targets': {layout_to_str(x): target_map[x] for x in targets},
'ignores': [layout_to_str(x) for x in ignores], 'ignores': [layout_to_str(x) for x in ignores],
} }
with open(f'{path_prefix}.json', 'w') as fp: with open(output, 'w') as fp:
fp.write(dump_json(group_info, graph)) fp.write(dump_json(group_info, graph))
if __name__ == '__main__': def build_all(path: str, group_info: dict[str, dict]) -> None:
raw = json.loads(open('data.json').read()) pool = multiprocessing.Pool()
for name, info in group_info.items():
output = os.path.join(path, f'{name}.json')
pool.apply_async(build_and_dump, args=(info, output))
pool.close()
pool.join()
for name, info in raw.items(): if __name__ == '__main__':
print(name) os.makedirs('./output-json/', exist_ok=True)
load_and_dump(info, f'./output-json/{name}') build_all('./output-json/', json.loads(open('data.json').read()))

97
misc/all-graph/02-dump_igraph.py

@ -3,50 +3,91 @@
import os import os
import json import json
import igraph as ig import igraph as ig
import multiprocessing
def dump_graph(graph: dict[str, dict]) -> ig.Graph: def save_graph(graph: ig.Graph, path: str) -> None:
index_map = {x: i for i, x in enumerate(graph)} edges = graph.get_edgelist()
g = ig.Graph(len(graph)) for edge in edges:
for index, (layout, info) in enumerate(graph.items()): assert edge[0] < edge[1]
g.vs[index]['code'] = layout
g.vs[index]['step'] = info['step']
g.add_edges([(index, index_map[x]) for x in info['next']])
return g
graph.delete_edges()
graph.add_edges(sorted(edges))
graph.write_pickle(path)
def dump_sub_graph(graph: dict[str, dict], target: str) -> ig.Graph:
cases = [x for x, info in graph.items() if target in info['pivots']] def dump_graph(data: dict[str, dict]) -> ig.Graph:
graph = ig.Graph(len(data))
index_map = {x: i for i, x in enumerate(data)}
edges = []
for index, (layout, info) in enumerate(data.items()):
graph.vs[index]['code'] = layout
graph.vs[index]['step'] = info['step']
edges.extend((index, index_map[x]) for x in info['next'])
graph.add_edges(edges)
return graph
def dump_sub_graph(data: dict[str, dict], target: str) -> ig.Graph:
cases = [x for x, info in data.items() if target in info['pivots']]
assert sorted(cases) == cases assert sorted(cases) == cases
graph = ig.Graph(len(cases))
index_map = {x: i for i, x in enumerate(cases)} index_map = {x: i for i, x in enumerate(cases)}
g = ig.Graph(len(cases))
edges = []
for index, layout in enumerate(cases): for index, layout in enumerate(cases):
info = graph[layout] info = data[layout]
g.vs[index]['code'] = layout graph.vs[index]['code'] = layout
g.vs[index]['step'] = info['step'] graph.vs[index]['step'] = data[layout]['step']
edges.extend((index, index_map[x]) for x in info['next'])
graph.add_edges(edges)
return graph
for x in info['next']:
assert x in index_map
g.add_edge(index, index_map[x])
return g def dump_sub_graph_pro(g_all: ig.Graph, target: str) -> ig.Graph: # without pivot needed
cases = set()
queue = [g_all.vs.select(code=target)[0].index]
while queue:
layout = queue.pop(0)
if layout in cases:
continue
cases.add(layout)
step = g_all.vs[layout]['step']
for neigh in g_all.neighbors(layout):
if neigh not in cases and g_all.vs[neigh]['step'] == step + 1:
queue.append(neigh)
return g_all.induced_subgraph(sorted(cases))
def convert_and_dump(file: str, output_prefix: str) -> None:
print(f'Start coverting: {file}')
def convert_ig(file: str, output: str) -> None:
raw = json.loads(open(file).read()) raw = json.loads(open(file).read())
g_main = dump_graph(raw['graph']) graph = dump_graph(raw['graph'])
g_main.write_pickle(f'{output}.pkl') save_graph(graph, f'{output_prefix}.pkl')
for layout, sub_tag in raw['targets'].items(): for layout, sub_tag in raw['targets'].items():
print(layout, sub_tag) sub_graph = dump_sub_graph(raw['graph'], layout)
g = dump_sub_graph(raw['graph'], layout) # sub_graph = dump_sub_graph_pro(graph, layout)
g.write_pickle(f'{output}-{sub_tag}_{layout}.pkl') save_graph(sub_graph, f'{output_prefix}-{sub_tag}_{layout}.pkl')
def convert_all(json_dir: str, output_dir: str) -> None:
pool = multiprocessing.Pool()
for name in sorted(os.listdir(json_dir)):
json_file = f'{json_dir}/{name}'
output_prefix = f"{output_dir}/{name.removesuffix('.json')}"
pool.apply_async(convert_and_dump, args=(json_file, output_prefix))
pool.close()
pool.join()
if __name__ == '__main__': if __name__ == '__main__':
for name in sorted(os.listdir('output-json')): os.makedirs('output-ig', exist_ok=True)
name = name.removesuffix('.json') convert_all('output-json', 'output-ig')
print(name)
convert_ig(f'output-json/{name}.json', f'output-ig/{name}')

28
misc/all-graph/compare.py

@ -0,0 +1,28 @@
#!/usr/bin/env python3
import os
import igraph as ig
def compare(file_1: str, file_2: str) -> None:
print(f'{file_1} vs {file_2}')
g1 = ig.Graph.Read_Pickle(file_1)
g2 = ig.Graph.Read_Pickle(file_2)
assert g1.vcount() == g2.vcount()
assert g1.ecount() == g2.ecount()
assert g1.isomorphic(g2)
for edge in g1.es:
assert edge.attributes() == {}
for edge in g2.es:
assert edge.attributes() == {}
for i in range(g1.vcount()):
assert g1.vs[i].attributes() == g2.vs[i].attributes()
if __name__ == '__main__':
for name in sorted(os.listdir('output-ig-raw')):
compare(f'output-ig/{name}', f'output-ig-raw/{name}')
Loading…
Cancel
Save