Explore Reactome#

Exploring Reactome from a network perspective

All species#

Loading the node and edge info#

import importlib
import lipinet.parse_reactome as parse_reactome_module

importlib.reload(parse_reactome_module)
from lipinet.parse_reactome import parse_reactome_data
from lipinet.parse_reactome import parse_reactome_data 

reactome_results = parse_reactome_data(verbose=True, use_cache=True, human_only=False)
df_reactome_nodes = reactome_results['df_nodes']
df_reactome_edges = reactome_results['df_edges']
⏬ loading Reactome raw tables …
Fetching ChEBI2Reactome_PE_All_Levels.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_All_Levels.tsv. Loading data...
Fetching ChEBI2Reactome_PE_Reactions.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_Reactions.tsv. Loading data...
Fetching ReactomePathways.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathways.tsv. Loading data...
Fetching ReactomePathwaysRelation.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathwaysRelation.tsv. Loading data...
Returning ['ChEBI2Reactome_PE_All_Levels.tsv', 'ChEBI2Reactome_PE_Reactions.tsv', 'ReactomePathways.tsv', 'ReactomePathwaysRelation.tsv'] as a dict of dfs
[reactome] edges: (673294, 14)
[reactome] nodes: (147756, 12)
↪ caching Reactome (processed) as reactome_all_nb
df_reactome_nodes
node_id layer source_db_identifier reactome_pe_name species pe_name pe_location human url event_name_pathway_or_reaction evidence_code name
0 10033 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
1 10036 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
2 10055 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
3 10093 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
4 10100 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ...
147751 autophagosome membrane reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147752 lamellar body reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147753 lamellar body membrane reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147754 clathrin-sculpted gamma-aminobutyric acid tran... reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147755 endosome reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN

147756 rows × 12 columns

df_reactome_edges
source_id target_id source_layer target_layer source_db_identifier reactome_pe_name url event_name_pathway_or_reaction evidence_code species human pe_name pe_location interlayer
0 10033 R-ALL-9014945 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
1 10036 R-ALL-5696412 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
2 10055 R-ALL-9611688 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
3 10093 R-ALL-9648287 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
4 10093 R-ALL-3296452 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
673289 R-XTR-9958790 R-XTR-427652 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673290 R-XTR-9958790 R-XTR-433137 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673291 R-XTR-9958863 R-XTR-352230 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673292 R-XTR-9958863 R-XTR-428559 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673293 R-XTR-9959399 R-XTR-427975 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False

673294 rows × 14 columns

from graph_tool.all import Graph, GraphView, graph_draw
import graph_tool as gt

from onionnet import OnionNet
import onionnet.visualisation

import pandas as pd

import onionnet.exporter 

Building the network with OnionNet#

onion = OnionNet()

onion.grow_onion(df_nodes=df_reactome_nodes,
           df_edges=df_reactome_edges,
           node_prop_cols=df_reactome_nodes.columns.to_list(),
           edge_prop_cols=df_reactome_edges.columns.to_list(),
           drop_na=True,
           drop_duplicates=True)
Nodes: in=147756, dropped_na=0, deduped=43592 → final=104164
Edges: in=673294, dropped_invalid=0, deduped=9996 → final=662980
onion = OnionNet()

onion.grow_onion(df_nodes=df_reactome_nodes,
           df_edges=df_reactome_edges,
           node_prop_cols=df_reactome_nodes.columns.to_list(),
           edge_prop_cols=df_reactome_edges.columns.to_list(),
           drop_na=True,
           drop_duplicates=True)
Nodes: in=147756, dropped_na=0, deduped=43592 → final=104164
Edges: in=673294, dropped_invalid=0, deduped=9996 → final=662980

Note that we have some dedupes, would need to carefully consider if this has led to a loss of information or corruption.

onion.core.graph
<Graph object, directed, with 104164 vertices and 662980 edges, 14 internal vertex properties, 14 internal edge properties, at 0x342a7d5e0>
list(onion.core.graph.vp)
['layer_hash',
 'node_id_hash',
 'node_id',
 'layer',
 'source_db_identifier',
 'reactome_pe_name',
 'species',
 'pe_name',
 'pe_location',
 'human',
 'url',
 'event_name_pathway_or_reaction',
 'evidence_code',
 'name']
df_reactome_nodes.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147756 entries, 0 to 147755
Data columns (total 12 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   node_id                         147756 non-null  object 
 1   layer                           147756 non-null  object 
 2   source_db_identifier            49593 non-null   float64
 3   reactome_pe_name                49593 non-null   object 
 4   species                         134597 non-null  object 
 5   pe_name                         55515 non-null   string 
 6   pe_location                     55515 non-null   string 
 7   human                           134597 non-null  object 
 8   url                             61847 non-null   object 
 9   event_name_pathway_or_reaction  61847 non-null   object 
 10  evidence_code                   61847 non-null   object 
 11  name                            23157 non-null   object 
dtypes: float64(1), object(9), string(2)
memory usage: 13.5+ MB

Decode property labels that were encoded

# onion.decode_property_labels_bulk(df=df_reactome_nodes, encoded_prop_type='v')
# onion.decode_property_labels_bulk(df=df_reactome_edges, encoded_prop_type='e')
df_reactome_edges.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 673294 entries, 0 to 673293
Data columns (total 14 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   source_id                       673294 non-null  object 
 1   target_id                       673294 non-null  object 
 2   source_layer                    673294 non-null  object 
 3   target_layer                    673294 non-null  object 
 4   source_db_identifier            625837 non-null  float64
 5   reactome_pe_name                625837 non-null  object 
 6   url                             625837 non-null  object 
 7   event_name_pathway_or_reaction  625837 non-null  object 
 8   evidence_code                   625837 non-null  object 
 9   species                         625837 non-null  object 
 10  human                           625837 non-null  object 
 11  pe_name                         397097 non-null  string 
 12  pe_location                     397097 non-null  string 
 13  interlayer                      23259 non-null   object 
dtypes: float64(1), object(11), string(2)
memory usage: 71.9+ MB
onion.core.layer_code_to_name
{0: 'reactome_chebi',
 1: 'reactome_physicalent',
 2: 'reactome_physicalent_nameloc',
 3: 'reactome_physicalent_name',
 4: 'reactome_physicalent_loc',
 5: 'reactome_pathway',
 6: 'reactome_reactions'}

Inspecting LipiNet metagraph for Reactome#

from onionnet.analytics import layer_stats, plot_layer_metagraph

nodes_by_layer, edges_by_pair = layer_stats(
    df_nodes=df_reactome_nodes,
    df_edges=df_reactome_edges,
    print_tables=True
)
Node counts by layer:
count
layer
reactome_reactions 61847
reactome_physicalent 49593
reactome_pathway 23157
reactome_physicalent_nameloc 5922
reactome_physicalent_name 4084
reactome_chebi 3068
reactome_physicalent_loc 85
Interlayer edge count: 0
Edge counts by (source_layer, target_layer):
edges
source_layer target_layer
reactome_physicalent reactome_pathway 391096
reactome_reactions 234741
reactome_pathway reactome_pathway 23259
reactome_chebi reactome_physicalent 6353
reactome_physicalent_nameloc reactome_physicalent 6001
reactome_physicalent_loc reactome_physicalent_nameloc 5922
reactome_physicalent_name reactome_physicalent_nameloc 5922
mg, mg_pos = plot_layer_metagraph(
    edges_by_pair,
    nodes_by_layer=nodes_by_layer,
    node_size_range=(20, 30),
    node_text_size_range=(12, 16),
    edge_width_range=(6, 12),
    node_scaler="log",
    edge_scaler="log",
    show_labels=True,
    output_size=(1000, 1000),
    node_text_position=1,
    return_graph=True,
    # pad_label_string=True,
    # vertex_font='consolas',
    # family_extractor=my_family,
    # family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
mg, mg_pos = plot_layer_metagraph(
    edges_by_pair,
    nodes_by_layer=nodes_by_layer,
    node_size_range=(20, 30),
    node_text_size_range=(12, 16),
    edge_width_range=(6, 12),
    node_scaler="log",
    edge_scaler="log",
    show_labels=True,
    output_size=(1000, 1000),
    node_text_position=1,
    return_graph=True,
    show_edge_counts=True,
    show_node_counts=True,
    # pad_label_string=True,
    # vertex_font='consolas',
    # family_extractor=my_family,
    # family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']
source_id target_id source_layer target_layer source_db_identifier reactome_pe_name url event_name_pathway_or_reaction evidence_code species human pe_name pe_location interlayer
0 10033 R-ALL-9014945 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
1 10036 R-ALL-5696412 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
2 10055 R-ALL-9611688 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
3 10093 R-ALL-9648287 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
4 10093 R-ALL-3296452 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6348 9884 R-ALL-9713792 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6349 9927 R-ALL-9615299 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6350 9943 R-ALL-9714401 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6351 9948 R-ALL-9660998 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6352 9948 R-ALL-9614135 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN

6353 rows × 14 columns

df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']['source_id'].value_counts()
source_id
36080    308
16991    129
61120    103
33697     87
17843     61
        ... 
32223      1
3223       1
32246      1
32269      1
43966      1
Name: count, Length: 3068, dtype: int64

Human (homo sapien) only#

Loading the node and edge info#

from lipinet.parse_reactome import parse_reactome_data 

reactome_results = parse_reactome_data(verbose=True, use_cache=True, human_only=True)
df_reactome_nodes = reactome_results['df_nodes']
df_reactome_edges = reactome_results['df_edges']
⏬ loading Reactome raw tables …
Fetching ChEBI2Reactome_PE_All_Levels.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_All_Levels.tsv. Loading data...
Fetching ChEBI2Reactome_PE_Reactions.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_Reactions.tsv. Loading data...
Fetching ReactomePathways.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathways.tsv. Loading data...
Fetching ReactomePathwaysRelation.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathwaysRelation.tsv. Loading data...
Returning ['ChEBI2Reactome_PE_All_Levels.tsv', 'ChEBI2Reactome_PE_Reactions.tsv', 'ReactomePathways.tsv', 'ReactomePathwaysRelation.tsv'] as a dict of dfs
[reactome] edges: (140889, 14)
[reactome] nodes: (31717, 12)
↪ caching Reactome (processed) as reactome_human_nb
df_reactome_nodes
node_id layer source_db_identifier reactome_pe_name species pe_name pe_location human url event_name_pathway_or_reaction evidence_code name
0 10033 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
1 10036 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
2 10055 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
3 10093 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
4 10100 reactome_chebi NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ...
147751 autophagosome membrane reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147752 lamellar body reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147753 lamellar body membrane reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147754 clathrin-sculpted gamma-aminobutyric acid tran... reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN
147755 endosome reactome_physicalent_loc NaN NaN NaN <NA> <NA> NaN NaN NaN NaN NaN

31717 rows × 12 columns

df_reactome_edges
source_id target_id source_layer target_layer source_db_identifier reactome_pe_name url event_name_pathway_or_reaction evidence_code species human pe_name pe_location interlayer
0 10033 R-ALL-9014945 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
1 10036 R-ALL-5696412 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
2 10055 R-ALL-9611688 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
3 10093 R-ALL-9648287 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
4 10093 R-ALL-3296452 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
673289 R-XTR-9958790 R-XTR-427652 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673290 R-XTR-9958790 R-XTR-433137 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673291 R-XTR-9958863 R-XTR-352230 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673292 R-XTR-9958863 R-XTR-428559 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False
673293 R-XTR-9959399 R-XTR-427975 reactome_pathway reactome_pathway NaN NaN NaN NaN NaN NaN NaN <NA> <NA> False

140889 rows × 14 columns

Building the network with OnionNet#

onion = OnionNet()

onion.grow_onion(df_nodes=df_reactome_nodes,
           df_edges=df_reactome_edges,
           node_prop_cols=df_reactome_nodes.columns.to_list(),
           edge_prop_cols=df_reactome_edges.columns.to_list(),
           drop_na=True,
           drop_duplicates=True)
Nodes: in=31717, dropped_na=0, deduped=352 → final=31365
Edges: in=140889, dropped_invalid=0, deduped=7565 → final=112753
onion = OnionNet()

onion.grow_onion(df_nodes=df_reactome_nodes,
           df_edges=df_reactome_edges,
           node_prop_cols=df_reactome_nodes.columns.to_list(),
           edge_prop_cols=df_reactome_edges.columns.to_list(),
           drop_na=True,
           drop_duplicates=True)
Nodes: in=31717, dropped_na=0, deduped=352 → final=31365
Edges: in=140889, dropped_invalid=0, deduped=7565 → final=112753

Note that we have some dedupes, would need to carefully consider if this has led to a loss of information or corruption.

onion.core.graph
<Graph object, directed, with 31365 vertices and 112753 edges, 14 internal vertex properties, 14 internal edge properties, at 0x35a513bf0>
list(onion.core.graph.vp)
['layer_hash',
 'node_id_hash',
 'node_id',
 'layer',
 'source_db_identifier',
 'reactome_pe_name',
 'species',
 'pe_name',
 'pe_location',
 'human',
 'url',
 'event_name_pathway_or_reaction',
 'evidence_code',
 'name']
df_reactome_nodes.info()
<class 'pandas.core.frame.DataFrame'>
Index: 31717 entries, 0 to 147755
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   node_id                         31717 non-null  object 
 1   layer                           31717 non-null  object 
 2   source_db_identifier            6277 non-null   float64
 3   reactome_pe_name                6277 non-null   object 
 4   species                         18558 non-null  object 
 5   pe_name                         12199 non-null  string 
 6   pe_location                     12199 non-null  string 
 7   human                           18558 non-null  object 
 8   url                             9456 non-null   object 
 9   event_name_pathway_or_reaction  9456 non-null   object 
 10  evidence_code                   9456 non-null   object 
 11  name                            2825 non-null   object 
dtypes: float64(1), object(9), string(2)
memory usage: 3.1+ MB

Decode property labels that were encoded

onion.decode_property_labels_bulk(df=df_reactome_nodes, encoded_prop_type='v')
V property 'node_id_decoded' created successfully.
V property 'layer_decoded' created successfully.
source_db_identifier prop left as is, no decoding needed (not an object type)
V property 'reactome_pe_name_decoded' created successfully.
V property 'species_decoded' created successfully.
pe_name prop left as is, no decoding needed (not an object type)
pe_location prop left as is, no decoding needed (not an object type)
V property 'human_decoded' created successfully.
V property 'url_decoded' created successfully.
V property 'event_name_pathway_or_reaction_decoded' created successfully.
V property 'evidence_code_decoded' created successfully.
V property 'name_decoded' created successfully.
onion.decode_property_labels_bulk(df=df_reactome_edges, encoded_prop_type='e')
E property 'source_id_decoded' created successfully.
E property 'target_id_decoded' created successfully.
E property 'source_layer_decoded' created successfully.
E property 'target_layer_decoded' created successfully.
source_db_identifier prop left as is, no decoding needed (not an object type)
E property 'reactome_pe_name_decoded' created successfully.
E property 'url_decoded' created successfully.
E property 'event_name_pathway_or_reaction_decoded' created successfully.
E property 'evidence_code_decoded' created successfully.
E property 'species_decoded' created successfully.
E property 'human_decoded' created successfully.
pe_name prop left as is, no decoding needed (not an object type)
pe_location prop left as is, no decoding needed (not an object type)
E property 'interlayer_decoded' created successfully.
df_reactome_edges.info()
<class 'pandas.core.frame.DataFrame'>
Index: 140889 entries, 0 to 673293
Data columns (total 14 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   source_id                       140889 non-null  object 
 1   target_id                       140889 non-null  object 
 2   source_layer                    140889 non-null  object 
 3   target_layer                    140889 non-null  object 
 4   source_db_identifier            93432 non-null   float64
 5   reactome_pe_name                93432 non-null   object 
 6   url                             93432 non-null   object 
 7   event_name_pathway_or_reaction  93432 non-null   object 
 8   evidence_code                   93432 non-null   object 
 9   species                         93432 non-null   object 
 10  human                           93432 non-null   object 
 11  pe_name                         63729 non-null   string 
 12  pe_location                     63729 non-null   string 
 13  interlayer                      23259 non-null   object 
dtypes: float64(1), object(11), string(2)
memory usage: 16.1+ MB
onion.core.layer_code_to_name
{0: 'reactome_chebi',
 1: 'reactome_physicalent',
 2: 'reactome_physicalent_nameloc',
 3: 'reactome_physicalent_name',
 4: 'reactome_physicalent_loc',
 5: 'reactome_pathway',
 6: 'reactome_reactions'}

Inspecting LipiNet metagraph for Reactome#

from onionnet.analytics import layer_stats, plot_layer_metagraph

nodes_by_layer, edges_by_pair = layer_stats(
    df_nodes=df_reactome_nodes,
    df_edges=df_reactome_edges,
    print_tables=True
)
Node counts by layer:
count
layer
reactome_reactions 9456
reactome_physicalent 6277
reactome_physicalent_nameloc 5922
reactome_physicalent_name 4084
reactome_chebi 3068
reactome_pathway 2825
reactome_physicalent_loc 85
Interlayer edge count: 0
Edge counts by (source_layer, target_layer):
edges
source_layer target_layer
reactome_physicalent reactome_pathway 57728
reactome_reactions 35704
reactome_pathway reactome_pathway 23259
reactome_chebi reactome_physicalent 6353
reactome_physicalent_nameloc reactome_physicalent 6001
reactome_physicalent_loc reactome_physicalent_nameloc 5922
reactome_physicalent_name reactome_physicalent_nameloc 5922
mg, mg_pos = plot_layer_metagraph(
    edges_by_pair,
    nodes_by_layer=nodes_by_layer,
    node_size_range=(20, 30),
    node_text_size_range=(12, 16),
    edge_width_range=(6, 12),
    node_scaler="log",
    edge_scaler="log",
    show_labels=True,
    output_size=(1000, 1000),
    node_text_position=1,
    return_graph=True,
    # pad_label_string=True,
    # vertex_font='consolas',
    # family_extractor=my_family,
    # family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
mg, mg_pos = plot_layer_metagraph(
    edges_by_pair,
    nodes_by_layer=nodes_by_layer,
    node_size_range=(20, 30),
    node_text_size_range=(12, 16),
    edge_width_range=(6, 12),
    node_scaler="log",
    edge_scaler="log",
    show_labels=True,
    output_size=(1000, 1000),
    node_text_position=1,
    return_graph=True,
    show_edge_counts=True,
    show_node_counts=True,
    # pad_label_string=True,
    # vertex_font='consolas',
    # family_extractor=my_family,
    # family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']
source_id target_id source_layer target_layer source_db_identifier reactome_pe_name url event_name_pathway_or_reaction evidence_code species human pe_name pe_location interlayer
0 10033 R-ALL-9014945 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
1 10036 R-ALL-5696412 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
2 10055 R-ALL-9611688 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
3 10093 R-ALL-9648287 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
4 10093 R-ALL-3296452 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
6348 9884 R-ALL-9713792 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6349 9927 R-ALL-9615299 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6350 9943 R-ALL-9714401 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6351 9948 R-ALL-9660998 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN
6352 9948 R-ALL-9614135 reactome_chebi reactome_physicalent NaN NaN NaN NaN NaN NaN NaN <NA> <NA> NaN

6353 rows × 14 columns

df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']['source_id'].value_counts()
source_id
36080    308
16991    129
61120    103
33697     87
17843     61
        ... 
32223      1
3223       1
32246      1
32269      1
43966      1
Name: count, Length: 3068, dtype: int64
import matplotlib.pyplot as plt

cm20 = plt.get_cmap('tab20')

# Nodes
color_result = onionnet.visualisation.color_nodes(g=onion.core.graph, prop_name="species_decoded", method="categorical", generate_legend=True, custom_colormap=cm20)
shape_result = onionnet.visualisation.shape_nodes(g=onion.core.graph, prop_name="species_decoded", shape_method="categorical", generate_legend=True)

# Create summary dict for convenience
graphic_styles = {**color_result, **shape_result}
graphic_styles

# Assign some of the properties that we will likely be using often back to the graph
onion.core.graph.vp['v_color_level'] = graphic_styles['v_color']
onion.core.graph.vp['v_shape_layer'] = graphic_styles['v_shape']
print(onion.view_layers('reactome_pathway'))

graph_draw(
    onion.view_layers('reactome_pathway'),
    vertex_fill_color=onion.g.vp['v_color_level'],
    vertex_shape=onion.g.vp['v_shape_layer'],
    )
<GraphView object, directed, with 2825 vertices and 2841 edges, 25 internal vertex properties, 25 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x175375d60, at 0x17572c5f0>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x175375d60, at 0x175349e80>, at 0x175375d60>
../_images/8a065f18929419026ec865658ee155a536820b8d40c7d4594162980b7c19a713.png
<VertexPropertyMap object with value type 'vector<double>', for Graph 0x35a513bf0, at 0x1757185c0>
import plotly.express as px
net = onion.view_layers('reactome_pathway')

# note, for homo sapiens only, for now
homo_decoded_pathway_names = onionnet.exporter.export_info(net, mode='v')['name_decoded']
# split on either space or hyphen (one or more in a row)
homo_decoded_pathway_names_firstword = homo_decoded_pathway_names.str.split(r'[ -]+').str[0]
# or just use the first word (without splitting by hyphen)
# all_decoded_pathway_names_firstword = all_decoded_pathway_names.str.split(' ').str[0]

px.bar(homo_decoded_pathway_names_firstword.value_counts()[:50], text_auto=True, title='Counts of the first word in the decoded pathway name for all species')
# def top20_anno(word): lambda word: word in homo_decoded_pathway_names_firstword.value_counts()[:20].index.to_list()

# precompute your top-19 or 20 first words
top20 = (
    homo_decoded_pathway_names_firstword
    .value_counts()
    .index[:19]
    .tolist()
)

def top20_anno(word):
    """Return the word itself if it is in the top20 list, else 'other'."""
    return word if word in top20 else 'other'

# now apply it to the first token of each decoded name
# annotated_other = [
#     top20_anno(vp.split(' ', 1)[0])
#     for vp in net.vp['name_decoded']
# ]

import re
# compile a splitter that splits on space, dash or slash (one or more)
splitter = re.compile(r'[ \-/]+')
annotated_other = [
    top20_anno(splitter.split(vp, 1)[0])
    for vp in net.vp['name_decoded']
]
annotated_other[:10]

# now also just get the first words
annotated_fw = [
    splitter.split(vp, 1)[0]
    for vp in net.vp['name_decoded']
]
annotated_fw[:10]
['2', '3', '3', '3', '5', 'ABC', 'ABC', 'ABC', 'ABO', 'ADORA2B']

Now we create vertex properties from the lists

v_label_other = net.new_vertex_property("string")

for v, label in zip(net.vertices(), annotated_other):
    v_label_other[v] = label

net.vertex_properties["first_word_annot"] = v_label_other

v_label_fw = net.new_vertex_property("string")

for v, label in zip(net.vertices(), annotated_fw):
    v_label_fw[v] = label

net.vertex_properties["first_word_annotfull"] = v_label_fw
# TODO: when no longer WIP notebook, should be changed to : 
# pos_sfdp_pathwayont_homo = onionnet.visualisation.load_or_compute_layout(net, filename='.data/.explore_reactome_pos_sfdp_pathwayont_homo.tsv')
pos_sfdp_pathwayont_homo = onionnet.visualisation.load_or_compute_layout(net, filename='.data/.explore_reactome_pos_sfdp_pathwayont_homo.tsv')
[Computed] Saved layout for 2825 vertices → .data/.explore_reactome_pos_sfdp_fv_homo.tsv
color_result = onionnet.visualisation.color_nodes(g=net, prop_name="first_word_annot", method="categorical", generate_legend=True, custom_colormap=plt.get_cmap('tab20'))

graph_draw(
    net,
    pos=pos_sfdp_pathwayont_homo,
    vertex_size=5,
    vertex_fill_color=color_result['v_color'],
    vertex_text=net.vp['first_word_annot'],
    output_size=(1500,1500)
    )

onionnet.visualisation.get_legend(source=color_result['legend_node_color'], 
                                    title='Legend: Reactome first-word of pathway', custom_cmap=plt.get_cmap('tab20'))
graph_draw(
    net,
    pos=pos_sfdp_pathwayont_homo,
    vertex_fill_color=color_result['v_color'],
    vertex_text=net.vp['first_word_annotfull'],
    vertex_text_position=-2,
    vertex_size=net.degree_property_map("out"),
    vertex_font_size=net.degree_property_map("out"),
    output_size=(1500,1500)
    )
../_images/f443e8f2997564faa967a05977460381070d01f1c2941ba8eab57a847849cf6b.png
<VertexPropertyMap object with value type 'vector<double>', for Graph 0x1698cfc20, at 0x169123fe0>
from graph_tool.centrality import betweenness
from graph_tool.draw import graph_draw

def plot_reactome_ont(graph, pos, betweenness):
    graph_draw(
        net,
        pos=pos,
        vertex_fill_color    = color_result['v_color'],
        vertex_text          = net.vp['first_word_annotfull'],
        vertex_text_position = -2,
        vertex_font_size     = betweenness,
        vertex_size          = betweenness,             # now in [0…30]
        output_size          = (1500,1500)
    )

    return onionnet.visualisation.get_legend(source=color_result['legend_node_color'], 
                                        title='Legend: Reactome first-word of pathway', custom_cmap=plt.get_cmap('tab20'))


# one‐liner to compute, scale, and draw:
bv, _ = betweenness(net)              # raw betweenness prop-map
bv.a /= bv.a.max() / 30                    # scale max→30px
bv.a = bv.a + 3          # so zero-betweenness nodes still get size >0

plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)

Now inspecting undirected betweenness

# one‐liner to compute, scale, and draw:
bv, _ = betweenness(GraphView(net, directed=False))              # raw betweenness prop-map
bv.a /= bv.a.max() / 30                    # scale max→30px
bv.a = bv.a + 3          # so zero-betweenness nodes still get size >0

plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)
# 1) Compute betweenness on your (undirected) view
bv, _ = betweenness(GraphView(net, directed=False))

# 2) In-place scale & offset to make the largest node ~70px
bv.a = bv.a * 7e2 + 5

plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)
# one‐liner to compute, scale, and draw:
bv, _ = betweenness(net)              # raw betweenness prop-map
bv.a /= bv.a.max() / 30                    # scale max→30px
bv.a = bv.a + 3          # so zero-betweenness nodes still get size >0

plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)

More TODO - notebook in progress#