|
@ -5,132 +5,97 @@ import igraph as ig |
|
|
import multiprocessing |
|
|
import multiprocessing |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def split_layer(graph: ig.Graph, step_a: int, step_b: int) -> tuple[list[set[int]], list[set[int]]]: |
|
|
def split_adjacent_layers(graph: ig.Graph, step: int) -> tuple[list[set[int]], list[set[int]]]: |
|
|
|
|
|
layouts = graph.vs.select(step_in=[step, step + 1]) |
|
|
def extend_from(node: ig.Vertex) -> tuple[set[ig.Vertex], set[ig.Vertex]]: |
|
|
code_map = {x['code']: x.index for x in layouts} |
|
|
assert node['step'] == step_a |
|
|
to_index = lambda iter: {code_map[x['code']] for x in iter} |
|
|
|
|
|
|
|
|
scan_a2b = True |
|
|
layer_curr, layer_next = [], [] |
|
|
union_a, union_b = set(), set() |
|
|
g_focus = graph.subgraph(layouts) |
|
|
curr_set, next_set = set([node]), set() |
|
|
isolated = g_focus.vs.select(_degree=0) |
|
|
|
|
|
if isolated: |
|
|
while curr_set: |
|
|
assert {x['step'] for x in isolated} == {step} |
|
|
for layout in curr_set: |
|
|
layer_curr = [to_index(isolated)] |
|
|
for neigh in layout.neighbors(): |
|
|
g_focus.delete_vertices(isolated) |
|
|
if scan_a2b and neigh['step'] == step_b and neigh not in union_b: |
|
|
|
|
|
next_set.add(neigh) |
|
|
for component in g_focus.connected_components(): |
|
|
elif not scan_a2b and neigh['step'] == step_a and neigh not in union_a: |
|
|
component = [g_focus.vs[x] for x in component] |
|
|
next_set.add(neigh) |
|
|
layer_curr.append(to_index(x for x in component if x['step'] == step)) |
|
|
|
|
|
layer_next.append(to_index(x for x in component if x['step'] == step + 1)) |
|
|
union_a.update(curr_set if scan_a2b else next_set) |
|
|
return layer_curr, layer_next |
|
|
union_b.update(next_set if scan_a2b else curr_set) |
|
|
|
|
|
scan_a2b = not scan_a2b |
|
|
|
|
|
curr_set = next_set |
|
|
def apply_layer_unions(unions_a: list[set[int]], unions_b: list[set[int]]) -> list[set[int]]: |
|
|
next_set = set() |
|
|
layer_data = {x for u in unions_a for x in u} |
|
|
|
|
|
assert layer_data == {x for u in unions_b for x in u} |
|
|
return union_a, union_b |
|
|
|
|
|
|
|
|
unions = [] |
|
|
assert step_a + 1 == step_b |
|
|
for curr_union in unions_a: |
|
|
|
|
|
for other_union in unions_b: |
|
|
layer_a = set(x for x in graph.vs if x['step'] == step_a) |
|
|
if union := curr_union.intersection(other_union): |
|
|
layer_b = set(x for x in graph.vs if x['step'] == step_b) |
|
|
unions.append(union) |
|
|
layer_num_a, layer_num_b = len(layer_a), len(layer_b) |
|
|
curr_union -= union |
|
|
assert layer_num_a > 0 and layer_num_b > 0 |
|
|
other_union -= union |
|
|
|
|
|
assert len(curr_union) == 0 |
|
|
data_a: list[set[int]] = [] |
|
|
|
|
|
data_b: list[set[int]] = [] |
|
|
|
|
|
special_set = set() |
|
|
|
|
|
while layer_a: |
|
|
|
|
|
union_a, union_b = extend_from(layer_a.pop()) |
|
|
|
|
|
if len(union_b) == 0: |
|
|
|
|
|
assert len(union_a) == 1 |
|
|
|
|
|
special_set.update(union_a) |
|
|
|
|
|
continue |
|
|
|
|
|
layer_a -= union_a |
|
|
|
|
|
layer_b -= union_b |
|
|
|
|
|
data_a.append(set(x.index for x in union_a)) |
|
|
|
|
|
data_b.append(set(x.index for x in union_b)) |
|
|
|
|
|
data_a.append(set(x.index for x in special_set)) |
|
|
|
|
|
|
|
|
|
|
|
assert len(layer_a) == 0 and len(layer_b) == 0 |
|
|
|
|
|
assert sum(len(x) for x in data_a) == layer_num_a |
|
|
|
|
|
assert sum(len(x) for x in data_b) == layer_num_b |
|
|
|
|
|
return data_a, [x for x in data_b if x] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_multi_set(unions_a: list[set[int]], unions_b: list[set[int]]) -> list[set[int]]: |
|
|
|
|
|
assert set(y for x in unions_a for y in x) == set(y for x in unions_b for y in x) |
|
|
|
|
|
|
|
|
|
|
|
release = [] |
|
|
|
|
|
for curr in unions_a: |
|
|
|
|
|
for other in unions_b: |
|
|
|
|
|
mid = curr.intersection(other) |
|
|
|
|
|
if mid: |
|
|
|
|
|
release.append(mid) |
|
|
|
|
|
curr -= mid |
|
|
|
|
|
other -= mid |
|
|
|
|
|
assert len(curr) == 0 |
|
|
|
|
|
|
|
|
|
|
|
assert set(len(x) for x in unions_a) == {0} |
|
|
assert set(len(x) for x in unions_a) == {0} |
|
|
assert set(len(x) for x in unions_b) == {0} |
|
|
assert set(len(x) for x in unions_b) == {0} |
|
|
return release |
|
|
assert layer_data == {x for u in unions for x in u} |
|
|
|
|
|
return unions |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def do_split(g: ig.Graph) -> ig.Graph: |
|
|
def build_all_unions(graph: ig.Graph) -> list[set[int]]: |
|
|
max_step = max(x['step'] for x in g.vs) |
|
|
max_step = max(graph.vs['step']) |
|
|
|
|
|
layer_unions = [[{x.index for x in graph.vs if x['step'] == 0}]] |
|
|
layer_data = [[] for _ in range(max_step + 1)] |
|
|
|
|
|
layer_data[0].append([set(x.index for x in g.vs if x['step'] == 0)]) |
|
|
|
|
|
for step in range(0, max_step): |
|
|
for step in range(0, max_step): |
|
|
data_a, data_b = split_layer(g, step, step + 1) |
|
|
layer_unions.extend(list(split_adjacent_layers(graph, step))) |
|
|
layer_data[step].append(data_a) |
|
|
layer_unions.append([{x.index for x in graph.vs if x['step'] == max_step}]) |
|
|
layer_data[step + 1].append(data_b) |
|
|
assert len(layer_unions) == (max_step + 1) * 2 |
|
|
layer_data[max_step].append([set(x.index for x in g.vs if x['step'] == max_step)]) |
|
|
|
|
|
|
|
|
|
|
|
assert len(layer_data) == max_step + 1 |
|
|
all_unions = [] |
|
|
assert set(len(x) for x in layer_data) == {2} |
|
|
for idx in range(0, len(layer_unions), 2): |
|
|
|
|
|
all_unions.extend(apply_layer_unions(*layer_unions[idx:idx + 2])) |
|
|
|
|
|
for unions in all_unions: |
|
|
|
|
|
assert len(unions) > 0 |
|
|
|
|
|
assert len(set(graph.vs[x]['step'] for x in unions)) == 1 |
|
|
|
|
|
return sorted(all_unions, key=lambda u: min(graph.vs[x]['code'] for x in u)) |
|
|
|
|
|
|
|
|
unions = {} |
|
|
|
|
|
for step in range(0, max_step + 1): |
|
|
|
|
|
layer_unions = build_multi_set(layer_data[step][0], layer_data[step][1]) |
|
|
|
|
|
for union in layer_unions: |
|
|
|
|
|
assert len(set(g.vs[x]['step'] for x in union)) == 1 |
|
|
|
|
|
codes = [g.vs[x]['code'] for x in union] |
|
|
|
|
|
unions[min(codes)] = union |
|
|
|
|
|
|
|
|
|
|
|
assert sorted(y for x in unions.values() for y in x) == list(range(g.vcount())) |
|
|
def combine_graph(graph: ig.Graph) -> ig.Graph: |
|
|
|
|
|
unions = build_all_unions(graph) |
|
|
|
|
|
union_idx = sorted((x, idx) for idx, u in enumerate(unions) for x in u) |
|
|
|
|
|
|
|
|
combine_info = [-1 for _ in range(g.vcount())] |
|
|
combine_idx = [x for _, x in union_idx] |
|
|
for index, key in enumerate(sorted(unions)): |
|
|
assert len(combine_idx) == graph.vcount() |
|
|
for x in unions[key]: |
|
|
assert set(combine_idx) == set(range(len(unions))) |
|
|
combine_info[x] = index |
|
|
|
|
|
|
|
|
|
|
|
assert len(combine_info) == g.vcount() |
|
|
id_len = len(str(len(unions) - 1)) |
|
|
assert set(combine_info) == set(range(len(unions))) |
|
|
graph.vs['id'] = [f'U{x:0{id_len}}' for x in combine_idx] |
|
|
|
|
|
|
|
|
g.contract_vertices(combine_info, combine_attrs={'step': 'first', 'code': list}) |
|
|
graph.contract_vertices(combine_idx, combine_attrs={'id': 'first', 'step': 'first', 'code': list}) |
|
|
assert set(x.is_loop() for x in g.es) == {False} |
|
|
assert [int(x.removeprefix('U')) for x in graph.vs['id']] == list(range(len(unions))) |
|
|
g.simplify(multiple=True) |
|
|
assert not any(x.is_loop() for x in graph.es) |
|
|
return g |
|
|
graph.simplify(multiple=True) |
|
|
|
|
|
return graph |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def do_combine(input: str, output: str) -> None: |
|
|
def do_combine(input: str, output: str) -> None: |
|
|
print(f'Start combining: {input}') |
|
|
print(f'Start combining: {input}') |
|
|
g = do_split(ig.Graph.Read_Pickle(input)) |
|
|
|
|
|
g.write_pickle(output) |
|
|
|
|
|
|
|
|
|
|
|
g_mod = g.copy() |
|
|
g_raw = (graph := combine_graph(ig.Graph.Read_Pickle(input))).copy() |
|
|
for x in g_mod.vs: |
|
|
graph.vs['codes'] = graph.vs['code'] |
|
|
x['code'] = '+'.join(x['code']) |
|
|
del graph.vs['code'] |
|
|
g_mod = do_split(g_mod) |
|
|
graph.write_pickle(output) # save combined graph |
|
|
|
|
|
|
|
|
|
|
|
g_raw.vs['code'] = g_raw.vs['id'] # modify as origin format |
|
|
|
|
|
g_mod = combine_graph(g_raw.copy()) |
|
|
|
|
|
|
|
|
assert g.vcount() == g_mod.vcount() |
|
|
assert g_raw.vcount() == g_mod.vcount() |
|
|
assert g.ecount() == g_mod.ecount() |
|
|
assert g_raw.ecount() == g_mod.ecount() |
|
|
for index in range(g.vcount()): |
|
|
assert all(x['code'] == [x['id']] for x in g_mod.vs) |
|
|
assert len(g_mod.vs[index]['code']) == 1 |
|
|
assert g_raw.vs['step'] == g_mod.vs['step'] |
|
|
assert g.vs[index]['step'] == g_mod.vs[index]['step'] |
|
|
assert g_raw.vs['code'] == g_mod.vs['id'] |
|
|
assert '+'.join(g.vs[index]['code']) == g_mod.vs[index]['code'][0] |
|
|
assert g_raw.isomorphic(g_mod) |
|
|
assert g.isomorphic(g_mod) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def combine_all(ig_dir: str, output_dir: str) -> None: |
|
|
def combine_all(ig_dir: str, output_dir: str) -> None: |
|
@ -141,6 +106,6 @@ def combine_all(ig_dir: str, output_dir: str) -> None: |
|
|
pool.join() |
|
|
pool.join() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
if __name__ == '__main__': |
|
|
os.makedirs('output-combine', exist_ok=True) |
|
|
os.makedirs('output-combine', exist_ok=True) |
|
|
combine_all('output-ig', 'output-combine') |
|
|
combine_all('output-ig', 'output-combine') |
|
|