3D Molecular GraphΒΆ
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | #!/usr/bin/env python
# -*- coding: utf-8 -*-
""" Similarity comparison between molecular configurations in 3D.
The molecules are first converted to molecular graphs using an 'adjacency rule'
as described in Tang & de Jong https://doi.org/10.1063/1.5078640, then computed
using the marginalized graph kernel.
"""
import numpy as np
import pandas as pd
from ase.build import molecule, bulk
from graphdot import Graph
from graphdot.kernel.molecular import Tang2019MolecularKernel
# build sample molecules
small_title = ['H2O', 'HCl', 'NaCl']
bulk_title = ['NaCl-bulk', 'NaCl-bulk2']
bulk = [
bulk('NaCl', 'rocksalt', a=5.64),
bulk('NaCl', 'rocksalt', a=5.66),
]
molecules = [molecule(name) for name in small_title] + bulk
# convert to molecular graphs
graphs = [Graph.from_ase(m) for m in molecules]
# use pre-defined molecular kernel
kernel = Tang2019MolecularKernel(edge_length_scale=0.1)
R = kernel(graphs)
# normalize the similarity matrix
d = np.diag(R)**-0.5
K = np.diag(d).dot(R).dot(np.diag(d))
# note the difference between the NaCl variants
title = small_title + bulk_title
print(pd.DataFrame(K, columns=title, index=title))
|
Exptected output:
H2O HCl NaCl NaCl-bulk NaCl-bulk2
H2O 1.000000 0.073903 0.031434 0.031434 0.031434
HCl 0.073903 1.000000 0.015842 0.015842 0.015841
NaCl 0.031434 0.015842 1.000000 0.023764 0.023764
NaCl-bulk 0.031434 0.015842 0.023764 1.000000 0.803760
NaCl-bulk2 0.031434 0.015841 0.023764 0.803760 1.000000