Source code for graphdot.graph

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""GraphDot's native graph container class

This module defines the class ``Graph`` that are used to store graphs across
this library, and provides conversion and importing methods from popular
graph formats.
"""
import itertools as it
import copy as cp
import numpy as np
import scipy.sparse
import warnings
from graphdot.codegen.typetool import common_min_type
from graphdot.minipandas import DataFrame
from graphdot.util.cookie import VolatileCookie
from ._from_ase import _from_ase
from ._from_networkx import _from_networkx
from ._from_pymatgen import _from_pymatgen
try:
    from ._from_rdkit import _from_rdkit
except ImportError:
    warnings.warn(
        'Cannot import RDKit, `graph.from_rdkit()` will be unavailable.\n'
    )
from ._to_networkx import _to_networkx


__all__ = ['Graph']


def _from_dict(d):
    if isinstance(d, DataFrame):
        return d
    else:
        # dict of column name-data pairs
        return DataFrame(d)


[docs]class Graph: """ This is the class that stores a graph in GraphDot. Parameters ---------- nodes: dataframe each row represent a node edges: dataframe each row represent an edge title: str a unique identifier of the graph """ def __init__(self, nodes, edges, title=''): self.title = str(title) self.nodes = _from_dict(nodes) self.edges = _from_dict(edges) assert('!i' in self.nodes) assert('!i' in self.edges and '!j' in self.edges) def __repr__(self): return '{}(nodes={}, edges={}, title={})'.\ format(type(self).__name__, repr(self.nodes), repr(self.edges), repr(self.title)) @property def cookie(self): try: return self.__cookie except AttributeError: self.__cookie = VolatileCookie() return self.__cookie
[docs] def copy(self, deep=False): '''Make a copy of an existing graph. Parameters ---------- deep: boolean If deep=True, then real copies will be made for the node and edge dataframes as well as other user-specified attributes. Otherwise, only references to the dataframe columns and user-specified attributes will be inserted into the new graph. Returns ------- g: Graph A new graph. ''' g = self.__class__( nodes=self.nodes.copy(deep=deep), edges=self.edges.copy(deep=deep), title=self.title ) for key, val in self.__dict__.items(): if key not in ['nodes', 'edges', 'title']: g.__dict__[key] = cp.deepcopy(val) if deep else val return g
[docs] def permute(self, perm, inplace=False): '''Rearrange the node indices of a graph by a permutation array. Parameters ---------- perm: sequence Array of permuted node indices inplace: boolean Whether to reorder the nodes in-place or to create a new graph. Returns ------- permuted_graph: Graph The original graph object (inplace=True) or a new one (inplace=False) with the nodes permuted. ''' if inplace: g = self self.cookie.clear() else: g = self.copy(deep=True) iperm = np.argsort(perm) g.nodes['!i'][:] = iperm[g.nodes['!i']] g.edges['!i'][:] = iperm[g.edges['!i']] g.edges['!j'][:] = iperm[g.edges['!j']] return g
@property def adjacency_matrix(self): '''Get the adjacency matrix of the graph as a sparse matrix. Returns ------- adjacency_matrix: sparse matrix The adjacency matrix, either weighted or unweighted depending on the original graph. ''' N = len(self.nodes) i = self.edges['!i'] j = self.edges['!j'] w = self.edges['!w'] if '!w' in self.edges else np.ones_like(i) A = scipy.sparse.coo_matrix((w, (i, j)), shape=(N, N)) return A + A.T @property def laplacian(self): '''Get the graph Laplacian as a sparse matrix. Returns ------- laplacian: sparse matrix The laplacian matrix, either weighted or unweighted depending on the original graph. ''' A = self.adjacency_matrix D = A.sum(axis=0).flat return scipy.sparse.diags(D, 0) - A
[docs] @staticmethod def has_unified_types(graphs): '''Check if all graphs have the same set of nodal/edge features.''' first = next(iter(graphs)) node_t = first.nodes.rowtype() edge_t = first.edges.rowtype() for second in graphs: if second.nodes.rowtype() != node_t: return ('nodes', first, second) elif second.edges.rowtype() != edge_t: return ('edges', first, second) return True
[docs] @classmethod def unify_datatype(cls, graphs, inplace=False): '''Ensure that each attribute has the same data type across graphs. Parameters ---------- graphs: list A list of graphs that have the same set of node and edge features. The types for each attribute will then be chosen to be the smallest scalar type that can safely hold all the values as found across the graphs. inplace: bool Whether or not to modify the graph features in-place. Returns ------- None or list If inplace is True, the graphs will be modified in-place and nothing will be returned. Otherwise, a new list of graphs with type-unified features will be returned. ''' '''copy graphs if not editing in-place''' for g in graphs: g.cookie.clear() if inplace is not True: graphs = [g.copy(deep=False) for g in graphs] '''ensure all graphs have the same node and edge features''' features = {} for component in ['nodes', 'edges']: first = None for g in graphs: second = set(getattr(g, component).columns) first = first or second if second != first: raise TypeError( f'Graph {g} with node features {second} ' 'does not match with the other graphs.' ) features[component] = first '''unify data type for each feature''' for component in ['nodes', 'edges']: group = [getattr(g, component) for g in graphs] for key in features[component]: types = [g[key].concrete_type for g in group] t = common_min_type.of_types(types) if t == np.object: t = common_min_type.of_types(types, coerce=False) if t is None: raise TypeError( f'Cannot unify attribute {key} containing mixed ' 'object types' ) if np.issctype(t): for g in group: g[key] = g[key].astype(t) elif t in [list, tuple, np.ndarray]: t_sub = common_min_type.of_values( it.chain.from_iterable( it.chain.from_iterable([g[key] for g in group]) ) ) if t_sub is None: raise TypeError( f'Cannot find a common type for elements in {key}.' ) for g in group: g[key] = [np.array(seq, dtype=t_sub) for seq in g[key]] '''only returns if not editing in-place''' if inplace is not True: return graphs
[docs] @classmethod def from_networkx(cls, graph, weight=None): """Convert from NetworkX ``Graph`` Parameters ---------- graph: a NetworkX ``Graph`` instance an undirected graph with homogeneous node and edge features, i.e. carrying same features. weight: str name of the attribute that encode edge weights Returns ------- graphdot.graph.Graph the converted graph """ return _from_networkx(cls, graph, weight)
[docs] @classmethod def from_ase(cls, atoms, adjacency='default', use_charge=False, use_pbc=True): """Convert from ASE atoms to molecular graph Parameters ---------- atoms: ASE Atoms object A molecule as represented by a collection of atoms in 3D space. usb_pbc: boolean or list of 3 booleans Whether to use the periodic boundary condition as specified in the atoms object to create edges between atoms. adjacency: 'default' or object A functor that implements the rule for making edges between atoms. Returns ------- graphdot.Graph: a molecular graph where atoms become nodes while edges resemble short-range interatomic interactions. """ return _from_ase(cls, atoms, adjacency, use_charge, use_pbc)
[docs] @classmethod def from_pymatgen(cls, molecule, use_pbc=True, adjacency='default'): """Convert from pymatgen molecule to molecular graph Parameters ---------- molecule: pymatgen Molecule object A molecule as represented by a collection of atoms in 3D space. usb_pbc: boolean or list of 3 booleans Whether to use the periodic boundary condition as specified in the atoms object to create edges between atoms. adjacency: 'default' or object A functor that implements the rule for making edges between atoms. Returns ------- graphdot.Graph: A molecular graph where atoms become nodes while edges resemble short-range interatomic interactions. """ return _from_pymatgen(cls, molecule, use_pbc, adjacency)
[docs] @classmethod def from_smiles(cls, smiles): """DEPRECATED and replaced by from_rdkit.""" raise RuntimeError( 'from_smiles has been removed, use from_rdkit instead.' )
[docs] @classmethod def from_rdkit(cls, mol, title=None, bond_type='order', set_ring_list=True, set_ring_stereo=True): """Convert a RDKit molecule to a graph Parameters ---------- bond_type: 'order' or 'type' If 'order', an edge attribute 'order' will be populated with numeric values such as 1 for single bonds, 2 for double bonds, and 1.5 for aromatic bonds. If 'type', an attribute 'type' will be populated with :py:class:`rdkit.Chem.BondType` values. set_ring_list: bool if True, a nodal attribute 'ring_list' will be used to store a list of the size of the rings that the atom participates in. set_ring_stereo: bool If True, an edge attribute 'ring_stereo' will be used to store the E-Z stereo configuration of substitutes at the end of each bond along a ring. Returns ------- graphdot.Graph: A graph where nodes represent atoms and edges represent bonds. Each node and edge carries an array of features as inferred from the chemical structure of the molecule. """ return _from_rdkit(cls, mol, title=title, bond_type=bond_type, set_ring_list=set_ring_list, set_ring_stereo=set_ring_stereo)
[docs] def to_networkx(self): """Convert the graph to a NetworkX ``Graph`` and copy the node and edge attributes.""" return _to_networkx(self)