Explore Reactome#
Exploring Reactome from a network perspective
All species#
Loading the node and edge info#
import importlib
import lipinet.parse_reactome as parse_reactome_module
importlib.reload(parse_reactome_module)
from lipinet.parse_reactome import parse_reactome_data
from lipinet.parse_reactome import parse_reactome_data
reactome_results = parse_reactome_data(verbose=True, use_cache=True, human_only=False)
df_reactome_nodes = reactome_results['df_nodes']
df_reactome_edges = reactome_results['df_edges']
⏬ loading Reactome raw tables …
Fetching ChEBI2Reactome_PE_All_Levels.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_All_Levels.tsv. Loading data...
Fetching ChEBI2Reactome_PE_Reactions.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_Reactions.tsv. Loading data...
Fetching ReactomePathways.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathways.tsv. Loading data...
Fetching ReactomePathwaysRelation.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathwaysRelation.tsv. Loading data...
Returning ['ChEBI2Reactome_PE_All_Levels.tsv', 'ChEBI2Reactome_PE_Reactions.tsv', 'ReactomePathways.tsv', 'ReactomePathwaysRelation.tsv'] as a dict of dfs
[reactome] edges: (673294, 14)
[reactome] nodes: (147756, 12)
↪ caching Reactome (processed) as reactome_all_nb
df_reactome_nodes
| node_id | layer | source_db_identifier | reactome_pe_name | species | pe_name | pe_location | human | url | event_name_pathway_or_reaction | evidence_code | name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10033 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 1 | 10036 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 2 | 10055 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 3 | 10093 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 4 | 10100 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 147751 | autophagosome membrane | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147752 | lamellar body | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147753 | lamellar body membrane | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147754 | clathrin-sculpted gamma-aminobutyric acid tran... | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147755 | endosome | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
147756 rows × 12 columns
df_reactome_edges
| source_id | target_id | source_layer | target_layer | source_db_identifier | reactome_pe_name | url | event_name_pathway_or_reaction | evidence_code | species | human | pe_name | pe_location | interlayer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10033 | R-ALL-9014945 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 1 | 10036 | R-ALL-5696412 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 2 | 10055 | R-ALL-9611688 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 3 | 10093 | R-ALL-9648287 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 4 | 10093 | R-ALL-3296452 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 673289 | R-XTR-9958790 | R-XTR-427652 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673290 | R-XTR-9958790 | R-XTR-433137 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673291 | R-XTR-9958863 | R-XTR-352230 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673292 | R-XTR-9958863 | R-XTR-428559 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673293 | R-XTR-9959399 | R-XTR-427975 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
673294 rows × 14 columns
from graph_tool.all import Graph, GraphView, graph_draw
import graph_tool as gt
from onionnet import OnionNet
import onionnet.visualisation
import pandas as pd
import onionnet.exporter
Building the network with OnionNet#
onion = OnionNet()
onion.grow_onion(df_nodes=df_reactome_nodes,
df_edges=df_reactome_edges,
node_prop_cols=df_reactome_nodes.columns.to_list(),
edge_prop_cols=df_reactome_edges.columns.to_list(),
drop_na=True,
drop_duplicates=True)
Nodes: in=147756, dropped_na=0, deduped=43592 → final=104164
Edges: in=673294, dropped_invalid=0, deduped=9996 → final=662980
onion = OnionNet()
onion.grow_onion(df_nodes=df_reactome_nodes,
df_edges=df_reactome_edges,
node_prop_cols=df_reactome_nodes.columns.to_list(),
edge_prop_cols=df_reactome_edges.columns.to_list(),
drop_na=True,
drop_duplicates=True)
Nodes: in=147756, dropped_na=0, deduped=43592 → final=104164
Edges: in=673294, dropped_invalid=0, deduped=9996 → final=662980
Note that we have some dedupes, would need to carefully consider if this has led to a loss of information or corruption.
onion.core.graph
<Graph object, directed, with 104164 vertices and 662980 edges, 14 internal vertex properties, 14 internal edge properties, at 0x342a7d5e0>
list(onion.core.graph.vp)
['layer_hash',
'node_id_hash',
'node_id',
'layer',
'source_db_identifier',
'reactome_pe_name',
'species',
'pe_name',
'pe_location',
'human',
'url',
'event_name_pathway_or_reaction',
'evidence_code',
'name']
df_reactome_nodes.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147756 entries, 0 to 147755
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 node_id 147756 non-null object
1 layer 147756 non-null object
2 source_db_identifier 49593 non-null float64
3 reactome_pe_name 49593 non-null object
4 species 134597 non-null object
5 pe_name 55515 non-null string
6 pe_location 55515 non-null string
7 human 134597 non-null object
8 url 61847 non-null object
9 event_name_pathway_or_reaction 61847 non-null object
10 evidence_code 61847 non-null object
11 name 23157 non-null object
dtypes: float64(1), object(9), string(2)
memory usage: 13.5+ MB
Decode property labels that were encoded
# onion.decode_property_labels_bulk(df=df_reactome_nodes, encoded_prop_type='v')
# onion.decode_property_labels_bulk(df=df_reactome_edges, encoded_prop_type='e')
df_reactome_edges.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 673294 entries, 0 to 673293
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 source_id 673294 non-null object
1 target_id 673294 non-null object
2 source_layer 673294 non-null object
3 target_layer 673294 non-null object
4 source_db_identifier 625837 non-null float64
5 reactome_pe_name 625837 non-null object
6 url 625837 non-null object
7 event_name_pathway_or_reaction 625837 non-null object
8 evidence_code 625837 non-null object
9 species 625837 non-null object
10 human 625837 non-null object
11 pe_name 397097 non-null string
12 pe_location 397097 non-null string
13 interlayer 23259 non-null object
dtypes: float64(1), object(11), string(2)
memory usage: 71.9+ MB
onion.core.layer_code_to_name
{0: 'reactome_chebi',
1: 'reactome_physicalent',
2: 'reactome_physicalent_nameloc',
3: 'reactome_physicalent_name',
4: 'reactome_physicalent_loc',
5: 'reactome_pathway',
6: 'reactome_reactions'}
Inspecting LipiNet metagraph for Reactome#
from onionnet.analytics import layer_stats, plot_layer_metagraph
nodes_by_layer, edges_by_pair = layer_stats(
df_nodes=df_reactome_nodes,
df_edges=df_reactome_edges,
print_tables=True
)
Node counts by layer:
| count | |
|---|---|
| layer | |
| reactome_reactions | 61847 |
| reactome_physicalent | 49593 |
| reactome_pathway | 23157 |
| reactome_physicalent_nameloc | 5922 |
| reactome_physicalent_name | 4084 |
| reactome_chebi | 3068 |
| reactome_physicalent_loc | 85 |
Interlayer edge count: 0
Edge counts by (source_layer, target_layer):
| edges | ||
|---|---|---|
| source_layer | target_layer | |
| reactome_physicalent | reactome_pathway | 391096 |
| reactome_reactions | 234741 | |
| reactome_pathway | reactome_pathway | 23259 |
| reactome_chebi | reactome_physicalent | 6353 |
| reactome_physicalent_nameloc | reactome_physicalent | 6001 |
| reactome_physicalent_loc | reactome_physicalent_nameloc | 5922 |
| reactome_physicalent_name | reactome_physicalent_nameloc | 5922 |
mg, mg_pos = plot_layer_metagraph(
edges_by_pair,
nodes_by_layer=nodes_by_layer,
node_size_range=(20, 30),
node_text_size_range=(12, 16),
edge_width_range=(6, 12),
node_scaler="log",
edge_scaler="log",
show_labels=True,
output_size=(1000, 1000),
node_text_position=1,
return_graph=True,
# pad_label_string=True,
# vertex_font='consolas',
# family_extractor=my_family,
# family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
mg, mg_pos = plot_layer_metagraph(
edges_by_pair,
nodes_by_layer=nodes_by_layer,
node_size_range=(20, 30),
node_text_size_range=(12, 16),
edge_width_range=(6, 12),
node_scaler="log",
edge_scaler="log",
show_labels=True,
output_size=(1000, 1000),
node_text_position=1,
return_graph=True,
show_edge_counts=True,
show_node_counts=True,
# pad_label_string=True,
# vertex_font='consolas',
# family_extractor=my_family,
# family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']
| source_id | target_id | source_layer | target_layer | source_db_identifier | reactome_pe_name | url | event_name_pathway_or_reaction | evidence_code | species | human | pe_name | pe_location | interlayer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10033 | R-ALL-9014945 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 1 | 10036 | R-ALL-5696412 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 2 | 10055 | R-ALL-9611688 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 3 | 10093 | R-ALL-9648287 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 4 | 10093 | R-ALL-3296452 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6348 | 9884 | R-ALL-9713792 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6349 | 9927 | R-ALL-9615299 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6350 | 9943 | R-ALL-9714401 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6351 | 9948 | R-ALL-9660998 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6352 | 9948 | R-ALL-9614135 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
6353 rows × 14 columns
df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']['source_id'].value_counts()
source_id
36080 308
16991 129
61120 103
33697 87
17843 61
...
32223 1
3223 1
32246 1
32269 1
43966 1
Name: count, Length: 3068, dtype: int64
Human (homo sapien) only#
Loading the node and edge info#
from lipinet.parse_reactome import parse_reactome_data
reactome_results = parse_reactome_data(verbose=True, use_cache=True, human_only=True)
df_reactome_nodes = reactome_results['df_nodes']
df_reactome_edges = reactome_results['df_edges']
⏬ loading Reactome raw tables …
Fetching ChEBI2Reactome_PE_All_Levels.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_All_Levels.tsv. Loading data...
Fetching ChEBI2Reactome_PE_Reactions.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ChEBI2Reactome_PE_Reactions.tsv. Loading data...
Fetching ReactomePathways.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathways.tsv. Loading data...
Fetching ReactomePathwaysRelation.tsv
File found locally at /Users/macsbook/Code/lipinet/lipinet/.data/downloaded/ReactomePathwaysRelation.tsv. Loading data...
Returning ['ChEBI2Reactome_PE_All_Levels.tsv', 'ChEBI2Reactome_PE_Reactions.tsv', 'ReactomePathways.tsv', 'ReactomePathwaysRelation.tsv'] as a dict of dfs
[reactome] edges: (140889, 14)
[reactome] nodes: (31717, 12)
↪ caching Reactome (processed) as reactome_human_nb
df_reactome_nodes
| node_id | layer | source_db_identifier | reactome_pe_name | species | pe_name | pe_location | human | url | event_name_pathway_or_reaction | evidence_code | name | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10033 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 1 | 10036 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 2 | 10055 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 3 | 10093 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 4 | 10100 | reactome_chebi | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 147751 | autophagosome membrane | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147752 | lamellar body | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147753 | lamellar body membrane | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147754 | clathrin-sculpted gamma-aminobutyric acid tran... | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
| 147755 | endosome | reactome_physicalent_loc | NaN | NaN | NaN | <NA> | <NA> | NaN | NaN | NaN | NaN | NaN |
31717 rows × 12 columns
df_reactome_edges
| source_id | target_id | source_layer | target_layer | source_db_identifier | reactome_pe_name | url | event_name_pathway_or_reaction | evidence_code | species | human | pe_name | pe_location | interlayer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10033 | R-ALL-9014945 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 1 | 10036 | R-ALL-5696412 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 2 | 10055 | R-ALL-9611688 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 3 | 10093 | R-ALL-9648287 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 4 | 10093 | R-ALL-3296452 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 673289 | R-XTR-9958790 | R-XTR-427652 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673290 | R-XTR-9958790 | R-XTR-433137 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673291 | R-XTR-9958863 | R-XTR-352230 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673292 | R-XTR-9958863 | R-XTR-428559 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
| 673293 | R-XTR-9959399 | R-XTR-427975 | reactome_pathway | reactome_pathway | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | False |
140889 rows × 14 columns
Building the network with OnionNet#
onion = OnionNet()
onion.grow_onion(df_nodes=df_reactome_nodes,
df_edges=df_reactome_edges,
node_prop_cols=df_reactome_nodes.columns.to_list(),
edge_prop_cols=df_reactome_edges.columns.to_list(),
drop_na=True,
drop_duplicates=True)
Nodes: in=31717, dropped_na=0, deduped=352 → final=31365
Edges: in=140889, dropped_invalid=0, deduped=7565 → final=112753
onion = OnionNet()
onion.grow_onion(df_nodes=df_reactome_nodes,
df_edges=df_reactome_edges,
node_prop_cols=df_reactome_nodes.columns.to_list(),
edge_prop_cols=df_reactome_edges.columns.to_list(),
drop_na=True,
drop_duplicates=True)
Nodes: in=31717, dropped_na=0, deduped=352 → final=31365
Edges: in=140889, dropped_invalid=0, deduped=7565 → final=112753
Note that we have some dedupes, would need to carefully consider if this has led to a loss of information or corruption.
onion.core.graph
<Graph object, directed, with 31365 vertices and 112753 edges, 14 internal vertex properties, 14 internal edge properties, at 0x35a513bf0>
list(onion.core.graph.vp)
['layer_hash',
'node_id_hash',
'node_id',
'layer',
'source_db_identifier',
'reactome_pe_name',
'species',
'pe_name',
'pe_location',
'human',
'url',
'event_name_pathway_or_reaction',
'evidence_code',
'name']
df_reactome_nodes.info()
<class 'pandas.core.frame.DataFrame'>
Index: 31717 entries, 0 to 147755
Data columns (total 12 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 node_id 31717 non-null object
1 layer 31717 non-null object
2 source_db_identifier 6277 non-null float64
3 reactome_pe_name 6277 non-null object
4 species 18558 non-null object
5 pe_name 12199 non-null string
6 pe_location 12199 non-null string
7 human 18558 non-null object
8 url 9456 non-null object
9 event_name_pathway_or_reaction 9456 non-null object
10 evidence_code 9456 non-null object
11 name 2825 non-null object
dtypes: float64(1), object(9), string(2)
memory usage: 3.1+ MB
Decode property labels that were encoded
onion.decode_property_labels_bulk(df=df_reactome_nodes, encoded_prop_type='v')
V property 'node_id_decoded' created successfully.
V property 'layer_decoded' created successfully.
source_db_identifier prop left as is, no decoding needed (not an object type)
V property 'reactome_pe_name_decoded' created successfully.
V property 'species_decoded' created successfully.
pe_name prop left as is, no decoding needed (not an object type)
pe_location prop left as is, no decoding needed (not an object type)
V property 'human_decoded' created successfully.
V property 'url_decoded' created successfully.
V property 'event_name_pathway_or_reaction_decoded' created successfully.
V property 'evidence_code_decoded' created successfully.
V property 'name_decoded' created successfully.
onion.decode_property_labels_bulk(df=df_reactome_edges, encoded_prop_type='e')
E property 'source_id_decoded' created successfully.
E property 'target_id_decoded' created successfully.
E property 'source_layer_decoded' created successfully.
E property 'target_layer_decoded' created successfully.
source_db_identifier prop left as is, no decoding needed (not an object type)
E property 'reactome_pe_name_decoded' created successfully.
E property 'url_decoded' created successfully.
E property 'event_name_pathway_or_reaction_decoded' created successfully.
E property 'evidence_code_decoded' created successfully.
E property 'species_decoded' created successfully.
E property 'human_decoded' created successfully.
pe_name prop left as is, no decoding needed (not an object type)
pe_location prop left as is, no decoding needed (not an object type)
E property 'interlayer_decoded' created successfully.
df_reactome_edges.info()
<class 'pandas.core.frame.DataFrame'>
Index: 140889 entries, 0 to 673293
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 source_id 140889 non-null object
1 target_id 140889 non-null object
2 source_layer 140889 non-null object
3 target_layer 140889 non-null object
4 source_db_identifier 93432 non-null float64
5 reactome_pe_name 93432 non-null object
6 url 93432 non-null object
7 event_name_pathway_or_reaction 93432 non-null object
8 evidence_code 93432 non-null object
9 species 93432 non-null object
10 human 93432 non-null object
11 pe_name 63729 non-null string
12 pe_location 63729 non-null string
13 interlayer 23259 non-null object
dtypes: float64(1), object(11), string(2)
memory usage: 16.1+ MB
onion.core.layer_code_to_name
{0: 'reactome_chebi',
1: 'reactome_physicalent',
2: 'reactome_physicalent_nameloc',
3: 'reactome_physicalent_name',
4: 'reactome_physicalent_loc',
5: 'reactome_pathway',
6: 'reactome_reactions'}
Inspecting LipiNet metagraph for Reactome#
from onionnet.analytics import layer_stats, plot_layer_metagraph
nodes_by_layer, edges_by_pair = layer_stats(
df_nodes=df_reactome_nodes,
df_edges=df_reactome_edges,
print_tables=True
)
Node counts by layer:
| count | |
|---|---|
| layer | |
| reactome_reactions | 9456 |
| reactome_physicalent | 6277 |
| reactome_physicalent_nameloc | 5922 |
| reactome_physicalent_name | 4084 |
| reactome_chebi | 3068 |
| reactome_pathway | 2825 |
| reactome_physicalent_loc | 85 |
Interlayer edge count: 0
Edge counts by (source_layer, target_layer):
| edges | ||
|---|---|---|
| source_layer | target_layer | |
| reactome_physicalent | reactome_pathway | 57728 |
| reactome_reactions | 35704 | |
| reactome_pathway | reactome_pathway | 23259 |
| reactome_chebi | reactome_physicalent | 6353 |
| reactome_physicalent_nameloc | reactome_physicalent | 6001 |
| reactome_physicalent_loc | reactome_physicalent_nameloc | 5922 |
| reactome_physicalent_name | reactome_physicalent_nameloc | 5922 |
mg, mg_pos = plot_layer_metagraph(
edges_by_pair,
nodes_by_layer=nodes_by_layer,
node_size_range=(20, 30),
node_text_size_range=(12, 16),
edge_width_range=(6, 12),
node_scaler="log",
edge_scaler="log",
show_labels=True,
output_size=(1000, 1000),
node_text_position=1,
return_graph=True,
# pad_label_string=True,
# vertex_font='consolas',
# family_extractor=my_family,
# family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
mg, mg_pos = plot_layer_metagraph(
edges_by_pair,
nodes_by_layer=nodes_by_layer,
node_size_range=(20, 30),
node_text_size_range=(12, 16),
edge_width_range=(6, 12),
node_scaler="log",
edge_scaler="log",
show_labels=True,
output_size=(1000, 1000),
node_text_position=1,
return_graph=True,
show_edge_counts=True,
show_node_counts=True,
# pad_label_string=True,
# vertex_font='consolas',
# family_extractor=my_family,
# family_colors={"sl": (0.2,0.6,0.9,0.9), "rhea": (0.9,0.4,0.1,0.9)}
)
df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']
| source_id | target_id | source_layer | target_layer | source_db_identifier | reactome_pe_name | url | event_name_pathway_or_reaction | evidence_code | species | human | pe_name | pe_location | interlayer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 10033 | R-ALL-9014945 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 1 | 10036 | R-ALL-5696412 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 2 | 10055 | R-ALL-9611688 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 3 | 10093 | R-ALL-9648287 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 4 | 10093 | R-ALL-3296452 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6348 | 9884 | R-ALL-9713792 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6349 | 9927 | R-ALL-9615299 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6350 | 9943 | R-ALL-9714401 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6351 | 9948 | R-ALL-9660998 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
| 6352 | 9948 | R-ALL-9614135 | reactome_chebi | reactome_physicalent | NaN | NaN | NaN | NaN | NaN | NaN | NaN | <NA> | <NA> | NaN |
6353 rows × 14 columns
df_reactome_edges[df_reactome_edges['source_layer']=='reactome_chebi']['source_id'].value_counts()
source_id
36080 308
16991 129
61120 103
33697 87
17843 61
...
32223 1
3223 1
32246 1
32269 1
43966 1
Name: count, Length: 3068, dtype: int64
import matplotlib.pyplot as plt
cm20 = plt.get_cmap('tab20')
# Nodes
color_result = onionnet.visualisation.color_nodes(g=onion.core.graph, prop_name="species_decoded", method="categorical", generate_legend=True, custom_colormap=cm20)
shape_result = onionnet.visualisation.shape_nodes(g=onion.core.graph, prop_name="species_decoded", shape_method="categorical", generate_legend=True)
# Create summary dict for convenience
graphic_styles = {**color_result, **shape_result}
graphic_styles
# Assign some of the properties that we will likely be using often back to the graph
onion.core.graph.vp['v_color_level'] = graphic_styles['v_color']
onion.core.graph.vp['v_shape_layer'] = graphic_styles['v_shape']
print(onion.view_layers('reactome_pathway'))
graph_draw(
onion.view_layers('reactome_pathway'),
vertex_fill_color=onion.g.vp['v_color_level'],
vertex_shape=onion.g.vp['v_shape_layer'],
)
<GraphView object, directed, with 2825 vertices and 2841 edges, 25 internal vertex properties, 25 internal edge properties, edges filtered by <EdgePropertyMap object with value type 'bool', for Graph 0x175375d60, at 0x17572c5f0>, vertices filtered by <VertexPropertyMap object with value type 'bool', for Graph 0x175375d60, at 0x175349e80>, at 0x175375d60>
<VertexPropertyMap object with value type 'vector<double>', for Graph 0x35a513bf0, at 0x1757185c0>
import plotly.express as px
net = onion.view_layers('reactome_pathway')
# note, for homo sapiens only, for now
homo_decoded_pathway_names = onionnet.exporter.export_info(net, mode='v')['name_decoded']
# split on either space or hyphen (one or more in a row)
homo_decoded_pathway_names_firstword = homo_decoded_pathway_names.str.split(r'[ -]+').str[0]
# or just use the first word (without splitting by hyphen)
# all_decoded_pathway_names_firstword = all_decoded_pathway_names.str.split(' ').str[0]
px.bar(homo_decoded_pathway_names_firstword.value_counts()[:50], text_auto=True, title='Counts of the first word in the decoded pathway name for all species')
# def top20_anno(word): lambda word: word in homo_decoded_pathway_names_firstword.value_counts()[:20].index.to_list()
# precompute your top-19 or 20 first words
top20 = (
homo_decoded_pathway_names_firstword
.value_counts()
.index[:19]
.tolist()
)
def top20_anno(word):
"""Return the word itself if it is in the top20 list, else 'other'."""
return word if word in top20 else 'other'
# now apply it to the first token of each decoded name
# annotated_other = [
# top20_anno(vp.split(' ', 1)[0])
# for vp in net.vp['name_decoded']
# ]
import re
# compile a splitter that splits on space, dash or slash (one or more)
splitter = re.compile(r'[ \-/]+')
annotated_other = [
top20_anno(splitter.split(vp, 1)[0])
for vp in net.vp['name_decoded']
]
annotated_other[:10]
# now also just get the first words
annotated_fw = [
splitter.split(vp, 1)[0]
for vp in net.vp['name_decoded']
]
annotated_fw[:10]
['2', '3', '3', '3', '5', 'ABC', 'ABC', 'ABC', 'ABO', 'ADORA2B']
Now we create vertex properties from the lists
v_label_other = net.new_vertex_property("string")
for v, label in zip(net.vertices(), annotated_other):
v_label_other[v] = label
net.vertex_properties["first_word_annot"] = v_label_other
v_label_fw = net.new_vertex_property("string")
for v, label in zip(net.vertices(), annotated_fw):
v_label_fw[v] = label
net.vertex_properties["first_word_annotfull"] = v_label_fw
# TODO: when no longer WIP notebook, should be changed to :
# pos_sfdp_pathwayont_homo = onionnet.visualisation.load_or_compute_layout(net, filename='.data/.explore_reactome_pos_sfdp_pathwayont_homo.tsv')
pos_sfdp_pathwayont_homo = onionnet.visualisation.load_or_compute_layout(net, filename='.data/.explore_reactome_pos_sfdp_pathwayont_homo.tsv')
[Computed] Saved layout for 2825 vertices → .data/.explore_reactome_pos_sfdp_fv_homo.tsv
color_result = onionnet.visualisation.color_nodes(g=net, prop_name="first_word_annot", method="categorical", generate_legend=True, custom_colormap=plt.get_cmap('tab20'))
graph_draw(
net,
pos=pos_sfdp_pathwayont_homo,
vertex_size=5,
vertex_fill_color=color_result['v_color'],
vertex_text=net.vp['first_word_annot'],
output_size=(1500,1500)
)
onionnet.visualisation.get_legend(source=color_result['legend_node_color'],
title='Legend: Reactome first-word of pathway', custom_cmap=plt.get_cmap('tab20'))
graph_draw(
net,
pos=pos_sfdp_pathwayont_homo,
vertex_fill_color=color_result['v_color'],
vertex_text=net.vp['first_word_annotfull'],
vertex_text_position=-2,
vertex_size=net.degree_property_map("out"),
vertex_font_size=net.degree_property_map("out"),
output_size=(1500,1500)
)
from graph_tool.centrality import betweenness
from graph_tool.draw import graph_draw
def plot_reactome_ont(graph, pos, betweenness):
graph_draw(
net,
pos=pos,
vertex_fill_color = color_result['v_color'],
vertex_text = net.vp['first_word_annotfull'],
vertex_text_position = -2,
vertex_font_size = betweenness,
vertex_size = betweenness, # now in [0…30]
output_size = (1500,1500)
)
return onionnet.visualisation.get_legend(source=color_result['legend_node_color'],
title='Legend: Reactome first-word of pathway', custom_cmap=plt.get_cmap('tab20'))
# one‐liner to compute, scale, and draw:
bv, _ = betweenness(net) # raw betweenness prop-map
bv.a /= bv.a.max() / 30 # scale max→30px
bv.a = bv.a + 3 # so zero-betweenness nodes still get size >0
plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)
Now inspecting undirected betweenness
# one‐liner to compute, scale, and draw:
bv, _ = betweenness(GraphView(net, directed=False)) # raw betweenness prop-map
bv.a /= bv.a.max() / 30 # scale max→30px
bv.a = bv.a + 3 # so zero-betweenness nodes still get size >0
plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)
# 1) Compute betweenness on your (undirected) view
bv, _ = betweenness(GraphView(net, directed=False))
# 2) In-place scale & offset to make the largest node ~70px
bv.a = bv.a * 7e2 + 5
plot_reactome_ont(net, pos_sfdp_pathwayont_homo, bv)