Highlighting changing atoms and bonds in reactions

tutorial
reactions
A compact view of what changed in a product molecule
Published

November 26, 2021

A while ago there was a question on Twitter about highlighting the bonds which changed in a reaction. I put together a quick bit of example code to answer that question and made a note to do a blog post on the topic. I’m finally getting around to doing that blog post.

Fair warning: This one is heavy on code and light on words. :-)

from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import rdChemReactions
import rdkit
print(rdkit.__version__)
2021.09.2

Here’s something similar to the reaction from the question:

rxn1 = rdChemReactions.ReactionFromRxnBlock('''$RXN

      Mrv2102  111820212128

  2  1
$MOL

  Mrv2102 11182121282D          

 13 13  0  0  0  0            999 V2000
   -7.5723    2.6505    0.0000 C   0  0  0  0  0  0  0  0  0  1  0  0
   -6.8579    2.2380    0.0000 O   0  0  0  0  0  0  0  0  0  2  0  0
   -6.8580    1.4130    0.0000 C   0  0  0  0  0  0  0  0  0  3  0  0
   -6.1435    1.0004    0.0000 O   0  0  0  0  0  0  0  0  0  4  0  0
   -7.5725    1.0005    0.0000 C   0  0  0  0  0  0  0  0  0  5  0  0
   -7.5725    0.1755    0.0000 N   0  0  0  0  0  0  0  0  0  6  0  0
   -8.2869   -0.2369    0.0000 C   0  0  0  0  0  0  0  0  0  7  0  0
   -8.2870   -1.0620    0.0000 C   0  0  0  0  0  0  0  0  0  8  0  0
   -9.0015   -1.4745    0.0000 C   0  0  0  0  0  0  0  0  0  9  0  0
   -9.0015   -2.2995    0.0000 C   0  0  0  0  0  0  0  0  0 10  0  0
   -8.2870   -2.7120    0.0000 C   0  0  0  0  0  0  0  0  0 11  0  0
   -7.5726   -2.2995    0.0000 C   0  0  0  0  0  0  0  0  0 12  0  0
   -7.5726   -1.4745    0.0000 C   0  0  0  0  0  0  0  0  0 13  0  0
  1  2  1  0  0  0  0
  2  3  1  0  0  0  0
  3  4  2  0  0  0  0
  3  5  1  0  0  0  0
  5  6  1  0  0  0  0
  6  7  2  0  0  0  0
  7  8  1  0  0  0  0
  8  9  1  0  0  0  0
  8 13  2  0  0  0  0
  9 10  2  0  0  0  0
 10 11  1  0  0  0  0
 11 12  2  0  0  0  0
 12 13  1  0  0  0  0
M  END
$MOL

  Mrv2102 11182121282D          

 12 11  0  0  0  0            999 V2000
   -3.7934    0.7703    0.0000 C   0  0  0  0  0  0  0  0  0 14  0  0
   -3.0790    1.1828    0.0000 C   0  0  0  0  0  0  0  0  0 15  0  0
   -2.3645    0.7703    0.0000 C   0  0  0  0  0  0  0  0  0 16  0  0
   -3.7934   -0.0547    0.0000 C   0  0  0  0  0  0  0  0  0 17  0  0
   -4.5078   -0.4672    0.0000 O   0  0  0  0  0  0  0  0  0 18  0  0
   -3.0789   -0.4671    0.0000 O   0  0  0  0  0  0  0  0  0 19  0  0
   -1.6500    1.1828    0.0000 O   0  0  0  0  0  0  0  0  0 20  0  0
   -2.3645   -0.0547    0.0000 O   0  0  0  0  0  0  0  0  0 21  0  0
   -3.0788   -1.2922    0.0000 C   0  0  0  0  0  0  0  0  0 22  0  0
   -1.6500   -0.4672    0.0000 C   0  0  0  0  0  0  0  0  0 23  0  0
   -2.3644   -1.7046    0.0000 C   0  0  0  0  0  0  0  0  0 24  0  0
   -1.6500   -1.2922    0.0000 C   0  0  0  0  0  0  0  0  0 25  0  0
  1  2  2  0  0  0  0
  1  4  1  0  0  0  0
  2  3  1  0  0  0  0
  3  7  2  0  0  0  0
  3  8  1  0  0  0  0
  4  5  2  0  0  0  0
  4  6  1  0  0  0  0
  6  9  1  0  0  0  0
  8 10  1  0  0  0  0
  9 11  1  0  0  0  0
 10 12  1  0  0  0  0
M  END
$MOL

  Mrv2102 11182121282D          

 25 26  0  0  0  0            999 V2000
    5.1328    0.9532    0.0000 C   0  0  0  0  0  0  0  0  0  5  0  0
    5.8002    0.4683    0.0000 N   0  0  0  0  0  0  0  0  0  6  0  0
    5.5453   -0.3163    0.0000 C   0  0  0  0  0  0  0  0  0  7  0  0
    4.7203   -0.3163    0.0000 C   0  0  0  0  0  0  0  0  0 14  0  0
    4.4654    0.4683    0.0000 C   0  0  0  0  0  0  0  0  0 15  0  0
    5.1328    1.7782    0.0000 C   0  0  0  0  0  0  0  0  0  3  0  0
    3.6807    0.7232    0.0000 C   0  0  0  0  0  0  0  0  0 16  0  0
    4.2354   -0.9838    0.0000 C   0  0  0  0  0  0  0  0  0 17  0  0
    6.0302   -0.9838    0.0000 C   0  0  0  0  0  0  0  0  0  8  0  0
    6.8507   -0.8975    0.0000 C   0  0  0  0  0  0  0  0  0  9  0  0
    7.3356   -1.5650    0.0000 C   0  0  0  0  0  0  0  0  0 10  0  0
    7.0001   -2.3187    0.0000 C   0  0  0  0  0  0  0  0  0 11  0  0
    6.1796   -2.4049    0.0000 C   0  0  0  0  0  0  0  0  0 12  0  0
    5.6947   -1.7375    0.0000 C   0  0  0  0  0  0  0  0  0 13  0  0
    3.4149   -0.8975    0.0000 O   0  0  0  0  0  0  0  0  0 18  0  0
    4.5709   -1.7375    0.0000 O   0  0  0  0  0  0  0  0  0 19  0  0
    4.0860   -2.4049    0.0000 C   0  0  0  0  0  0  0  0  0 22  0  0
    3.2655   -2.3187    0.0000 C   0  0  0  0  0  0  0  0  0 24  0  0
    3.5092    1.5302    0.0000 O   0  0  0  0  0  0  0  0  0 20  0  0
    3.0676    0.1712    0.0000 O   0  0  0  0  0  0  0  0  0 21  0  0
    2.2830    0.4261    0.0000 C   0  0  0  0  0  0  0  0  0 23  0  0
    1.6699   -0.1259    0.0000 C   0  0  0  0  0  0  0  0  0 25  0  0
    5.8473    2.1907    0.0000 O   0  0  0  0  0  0  0  0  0  4  0  0
    4.4183    2.1907    0.0000 O   0  0  0  0  0  0  0  0  0  2  0  0
    4.4183    3.0157    0.0000 C   0  0  0  0  0  0  0  0  0  1  0  0
  1  2  1  0  0  0  0
  2  3  1  0  0  0  0
  3  4  1  0  0  0  0
  4  5  1  0  0  0  0
  1  5  1  0  0  0  0
  1  6  1  0  0  0  0
  5  7  1  0  0  0  0
  4  8  1  0  0  0  0
  3  9  1  0  0  0  0
 10 11  2  0  0  0  0
 11 12  1  0  0  0  0
 12 13  2  0  0  0  0
 13 14  1  0  0  0  0
  9 10  1  0  0  0  0
  9 14  2  0  0  0  0
  8 15  2  0  0  0  0
  8 16  1  0  0  0  0
 16 17  1  0  0  0  0
 17 18  1  0  0  0  0
  7 19  2  0  0  0  0
  7 20  1  0  0  0  0
 20 21  1  0  0  0  0
 21 22  1  0  0  0  0
  6 23  2  0  0  0  0
  6 24  1  0  0  0  0
 24 25  1  0  0  0  0
M  END
''')

Let’s take a look at the reaction:

IPythonConsole.molSize = (600,250)
IPythonConsole.highlightByReactant = True
rxn1

We can ask the reaction to tell us which atoms in the reactants are modified in the reaction:

rxn1.Initialize()
rxn1.GetReactingAtoms()
((4, 5, 6), (0, 1))

The information about which atoms react is enough to figure out which bonds change, but we have to do some additional work for this:

from collections import namedtuple
AtomInfo = namedtuple('AtomInfo',('mapnum','reactant','reactantAtom','product','productAtom'))
def map_reacting_atoms_to_products(rxn,reactingAtoms):
    ''' figures out which atoms in the products each mapped atom in the reactants maps to '''
    res = []
    for ridx,reacting in enumerate(reactingAtoms):
        reactant = rxn.GetReactantTemplate(ridx)
        for raidx in reacting:
            mapnum = reactant.GetAtomWithIdx(raidx).GetAtomMapNum()
            foundit=False
            for pidx,product in enumerate(rxn.GetProducts()):
                for paidx,patom in enumerate(product.GetAtoms()):
                    if patom.GetAtomMapNum()==mapnum:
                        res.append(AtomInfo(mapnum,ridx,raidx,pidx,paidx))
                        foundit = True
                        break
                    if foundit:
                        break
    return res
def get_mapped_neighbors(atom):
    ''' test all mapped neighbors of a mapped atom'''
    res = {}
    amap = atom.GetAtomMapNum()
    if not amap:
        return res
    for nbr in atom.GetNeighbors():
        nmap = nbr.GetAtomMapNum()
        if nmap:
            if amap>nmap:
                res[(nmap,amap)] = (atom.GetIdx(),nbr.GetIdx())
            else:
                res[(amap,nmap)] = (nbr.GetIdx(),atom.GetIdx())
    return res

BondInfo = namedtuple('BondInfo',('product','productAtoms','productBond','status'))
def find_modifications_in_products(rxn):
    ''' returns a 2-tuple with the modified atoms and bonds from the reaction '''
    reactingAtoms = rxn.GetReactingAtoms()
    amap = map_reacting_atoms_to_products(rxn,reactingAtoms)
    res = []
    seen = set()
    # this is all driven from the list of reacting atoms:
    for _,ridx,raidx,pidx,paidx in amap:
        reactant = rxn.GetReactantTemplate(ridx)
        ratom = reactant.GetAtomWithIdx(raidx)
        product = rxn.GetProductTemplate(pidx)
        patom = product.GetAtomWithIdx(paidx)

        rnbrs = get_mapped_neighbors(ratom)
        pnbrs = get_mapped_neighbors(patom)
        for tpl in pnbrs:
            pbond = product.GetBondBetweenAtoms(*pnbrs[tpl])
            if (pidx,pbond.GetIdx()) in seen:
                continue
            seen.add((pidx,pbond.GetIdx()))
            if not tpl in rnbrs:
                # new bond in product
                res.append(BondInfo(pidx,pnbrs[tpl],pbond.GetIdx(),'New'))
            else:
                # present in both reactants and products, check to see if it changed
                rbond = reactant.GetBondBetweenAtoms(*rnbrs[tpl])
                if rbond.GetBondType()!=pbond.GetBondType():
                    res.append(BondInfo(pidx,pnbrs[tpl],pbond.GetIdx(),'Changed'))
    return amap,res

Let’s look at what that function returns for our reaction:

atms,bnds = find_modifications_in_products(rxn1)
print(atms)
print(bnds)
[AtomInfo(mapnum=5, reactant=0, reactantAtom=4, product=0, productAtom=0), AtomInfo(mapnum=6, reactant=0, reactantAtom=5, product=0, productAtom=1), AtomInfo(mapnum=7, reactant=0, reactantAtom=6, product=0, productAtom=2), AtomInfo(mapnum=14, reactant=1, reactantAtom=0, product=0, productAtom=3), AtomInfo(mapnum=15, reactant=1, reactantAtom=1, product=0, productAtom=4)]
[BondInfo(product=0, productAtoms=(4, 0), productBond=4, status='New'), BondInfo(product=0, productAtoms=(2, 1), productBond=1, status='Changed'), BondInfo(product=0, productAtoms=(3, 2), productBond=2, status='New'), BondInfo(product=0, productAtoms=(4, 3), productBond=3, status='Changed')]

Finally, define the funciton which we’ll use to draw the product molecule with highlights shown for bonds and atoms involved in the reaction:

from IPython.display import Image
def draw_product_with_modified_bonds(rxn,atms,bnds,productIdx=None,showAtomMaps=False):
    if productIdx is None:
        pcnts = [x.GetNumAtoms() for x in rxn.GetProducts()]
        largestProduct = list(sorted(zip(pcnts,range(len(pcnts))),reverse=True))[0][1]
        productIdx = largestProduct
    d2d = Draw.rdMolDraw2D.MolDraw2DCairo(350,300)
    pmol = Chem.Mol(rxn.GetProductTemplate(productIdx))
    Chem.SanitizeMol(pmol)
    if not showAtomMaps:
        for atom in pmol.GetAtoms():
            atom.SetAtomMapNum(0)
    bonds_to_highlight=[]
    highlight_bond_colors={}
    atoms_seen = set()
    for binfo in bnds:
        if binfo.product==productIdx and binfo.status=='New':
            bonds_to_highlight.append(binfo.productBond)
            atoms_seen.update(binfo.productAtoms)
            highlight_bond_colors[binfo.productBond] = (1,.4,.4)
        if binfo.product==productIdx and binfo.status=='Changed':
            bonds_to_highlight.append(binfo.productBond)
            atoms_seen.update(binfo.productAtoms)
            highlight_bond_colors[binfo.productBond] = (.4,.4,1)
    atoms_to_highlight=set()
    for ainfo in atms:
        if ainfo.product != productIdx or ainfo.productAtom in atoms_seen:
            continue
        atoms_to_highlight.add(ainfo.productAtom)

    d2d.drawOptions().useBWAtomPalette()
    d2d.drawOptions().continuousHighlight=False
    d2d.drawOptions().highlightBondWidthMultiplier = 24
    d2d.drawOptions().setHighlightColour((.9,.9,0))
    d2d.drawOptions().fillHighlights=False
    atoms_to_highlight.update(atoms_seen)
    d2d.DrawMolecule(pmol,highlightAtoms=atoms_to_highlight,highlightBonds=bonds_to_highlight,
                     highlightBondColors=highlight_bond_colors)
    d2d.FinishDrawing()
    return d2d.GetDrawingText()

Now we can draw the highlighted product molecule:

Image(draw_product_with_modified_bonds(rxn1,atms,bnds))

Let’s look at some other reactions. I will use the SI data from

import pandas as pd
df = pd.read_csv('../data/reaction_data_ci6b00564/dataSetB.csv')
df.head()
rxn_Class patentID rxnSmiles_Mapping_NameRxn reactantSet_NameRxn NameRxn_Mapping_Complete rxnSmiles_Mapping_IndigoTK reactantSet_IndigoTK IndigoTK_Mapping_Complete rxnSmiles_IndigoAutoMapperKNIME reactantSet_IndigoAutoMapperKNIME IndigoAutoMapperKNIME_Mapping_Complete
0 6 US05849732 C.CCCCCC.CO.O=C(OCc1ccccc1)[NH:1][CH2:2][CH2:3... set([3, 4]) True C(OC([NH:11][CH2:12][CH2:13][CH2:14][CH2:15][C... set([0, 2]) True C.CCCCCC.CO.[CH3:10][O:11][C:12]([C@@H:14]([NH... set([3, 4]) True
1 2 US20120114765A1 O[C:1](=[O:2])[c:3]1[cH:4][c:5]([N+:6](=[O:7])... set([0, 1]) True [Cl:1][c:2]1[cH:3][n:4][cH:5][c:6]([Cl:20])[c:... set([0, 1]) True [NH2:1][c:2]1[c:11]2[c:6]([cH:7][n:8][cH:9][cH... set([0, 1]) True
2 1 US08003648B2 Cl.O=[CH:1][c:2]1[cH:3][cH:4][c:5](-[c:6]2[n:7... set([1, 3]) True [CH2:1]([NH:3][CH2:4][CH3:5])[CH3:2].C([BH3-])... set([0, 3]) True [CH3:1][CH2:2][NH:3][CH2:4][CH3:5].[CH3:6][c:7... set([0, 1]) True
3 1 US09045475B2 CC(=O)O[BH-](OC(C)=O)OC(C)=O.ClCCl.O=[C:1]([CH... set([2, 3]) True [nH:1]1[c:5]2[n:6][cH:7][c:8]([O:10][c:11]3[cH... set([0, 3]) True CC(O[BH-](OC(=O)C)OC(=O)C)=O.[CH3:14][C:15]1([... set([1, 3]) True
4 2 US08188098B2 CCN(C(C)C)C(C)C.ClCCl.Cl[C:1](=[O:2])[O:3][CH:... set([2, 5]) True Cl[C:2]([O:4][CH:5]1[CH2:9][CH2:8][CH2:7][CH2:... set([0, 2]) True CCN(C(C)C)C(C)C.[CH3:10][CH2:11][O:12][c:13]1[... set([1, 4]) True
rxnclass = 1
class_smis = df[df['rxn_Class']==rxnclass].rxnSmiles_Mapping_NameRxn.to_list()
rxn = rdChemReactions.ReactionFromSmarts(class_smis[3],useSmiles=True)
rxn.Initialize()
atms,bnds = find_modifications_in_products(rxn)
print(atms)
print(bnds)
Image(draw_product_with_modified_bonds(rxn,atms,bnds))
[AtomInfo(mapnum=1, reactant=3, reactantAtom=1, product=0, productAtom=0), AtomInfo(mapnum=7, reactant=4, reactantAtom=0, product=0, productAtom=6)]
[BondInfo(product=0, productAtoms=(6, 0), productBond=5, status='New')]

rxnclass = 4
class_smis = df[df['rxn_Class']==rxnclass].rxnSmiles_Mapping_NameRxn.to_list()
rxn = rdChemReactions.ReactionFromSmarts(class_smis[1],useSmiles=True)
rxn.Initialize()
atms,bnds = find_modifications_in_products(rxn)
print(atms)
print(bnds)
Image(draw_product_with_modified_bonds(rxn,atms,bnds))
[AtomInfo(mapnum=1, reactant=1, reactantAtom=1, product=0, productAtom=0), AtomInfo(mapnum=2, reactant=1, reactantAtom=2, product=0, productAtom=9), AtomInfo(mapnum=16, reactant=2, reactantAtom=0, product=0, productAtom=18), AtomInfo(mapnum=17, reactant=2, reactantAtom=1, product=0, productAtom=16), AtomInfo(mapnum=19, reactant=2, reactantAtom=3, product=0, productAtom=15)]
[BondInfo(product=0, productAtoms=(9, 0), productBond=8, status='Changed'), BondInfo(product=0, productAtoms=(18, 0), productBond=18, status='New'), BondInfo(product=0, productAtoms=(15, 9), productBond=14, status='New'), BondInfo(product=0, productAtoms=(16, 18), productBond=17, status='Changed'), BondInfo(product=0, productAtoms=(15, 16), productBond=15, status='Changed')]

Look at an example where there are no changed bonds in the products but where there is a changed atom:

rxnclass = 7
class_smis = df[df['rxn_Class']==rxnclass].rxnSmiles_Mapping_NameRxn.to_list()
rxn = rdChemReactions.ReactionFromSmarts(class_smis[1],useSmiles=True)
rxn.Initialize()
atms,bnds = find_modifications_in_products(rxn)
print(atms)
print(bnds)
Image(draw_product_with_modified_bonds(rxn,atms,bnds))
[AtomInfo(mapnum=1, reactant=1, reactantAtom=1, product=0, productAtom=0)]
[]