OpenStructure
dockq.py
Go to the documentation of this file.
1 from ost import geom
2 from ost import mol
3 from ost import seq
4 
5 def _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
6  ch1_aln = None, ch2_aln = None):
7  """ Preprocesses *mdl* and *ref*
8 
9  Sets int properties to each residue in mdl_ch1, mdl_ch2 as well as
10  the respective reference chains.
11  dockq_mapped: 1 if a residues in mdl_ch1 is mapped to a residue in ref_ch1
12  and vice versa, 0 otherwise. Same is done for mdl_ch2 and
13  ref_ch2.
14  dockq_idx: If a pair of residue is mapped, the same index will be set
15  to both residues. The index is unique otherwise.
16 
17  By default, mapping happens with residue numbers but you can enforce a
18  mapping with an alignment. In the example of ch1_aln, the first sequence
19  corresponds to the ATOMSEQ of ref_ch1 in ref and the second sequence to
20  the ATOMSEQ of mdl_ch1 in mdl.
21  """
22 
23  # set default values for dockq_mapped and dockq_idx properties
24  # => makes sure that we have a clean slate if stuff has been set in
25  # previous runs
26  for cname in [ref_ch1, ref_ch2]:
27  ch = ref.FindChain(cname)
28  for r in ch.residues:
29  r.SetIntProp("dockq_mapped", 0)
30  r.SetIntProp("dockq_idx", -1)
31  for cname in [mdl_ch1, mdl_ch2]:
32  ch = mdl.FindChain(cname)
33  for r in ch.residues:
34  r.SetIntProp("dockq_mapped", 0)
35  r.SetIntProp("dockq_idx", -1)
36 
37  dockq_idx = 0
38  if ch1_aln is not None and ch2_aln is not None:
39  # there are potentially already views attached to the alns but
40  # we go for *mdl* and *ref* here
41  if ch1_aln.GetCount() != 2 or ch2_aln.GetCount() != 2:
42  raise RuntimeError("Expect exactly two sequences in alns provided "
43  "to DockQ!")
44  tmp = ch1_aln.GetSequence(0)
45  ref_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
46  ref_s1.SetOffset(tmp.GetOffset())
47  ref_s1.AttachView(ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)}"))
48  tmp = ch1_aln.GetSequence(1)
49  mdl_s1 = seq.CreateSequence(tmp.GetName(), str(tmp))
50  mdl_s1.SetOffset(tmp.GetOffset())
51  mdl_s1.AttachView(mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)}"))
52  new_ch1_aln = seq.CreateAlignment(ref_s1, mdl_s1)
53  for col in new_ch1_aln:
54  if col[0] != '-' and col[1] != '-':
55  ref_r = col.GetResidue(0)
56  mdl_r = col.GetResidue(1)
57  if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
58  raise RuntimeError("DockQ: mismatch between provided "
59  "alignments and ATOMSEQ in structures")
60  if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
61  raise RuntimeError("DockQ: mismatch between provided "
62  "alignments and ATOMSEQ in structures")
63  ref_r.SetIntProp("dockq_idx", dockq_idx)
64  mdl_r.SetIntProp("dockq_idx", dockq_idx)
65  ref_r.SetIntProp("dockq_mapped", 1)
66  mdl_r.SetIntProp("dockq_mapped", 1)
67  dockq_idx += 1
68 
69  tmp = ch2_aln.GetSequence(0)
70  ref_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
71  ref_s2.SetOffset(tmp.GetOffset())
72  ref_s2.AttachView(ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)}"))
73  tmp = ch2_aln.GetSequence(1)
74  mdl_s2 = seq.CreateSequence(tmp.GetName(), str(tmp))
75  mdl_s2.SetOffset(tmp.GetOffset())
76  mdl_s2.AttachView(mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch2)}"))
77  new_ch2_aln = seq.CreateAlignment(ref_s2, mdl_s2)
78  for col in new_ch2_aln:
79  if col[0] != '-' and col[1] != '-':
80  ref_r = col.GetResidue(0)
81  mdl_r = col.GetResidue(1)
82  if not (ref_r.IsValid() and ref_r.one_letter_code == col[0]):
83  raise RuntimeError("DockQ: mismatch between provided "
84  "alignments and ATOMSEQ in structures")
85  if not (mdl_r.IsValid() and mdl_r.one_letter_code == col[1]):
86  raise RuntimeError("DockQ: mismatch between provided "
87  "alignments and ATOMSEQ in structures")
88  ref_r.SetIntProp("dockq_idx", dockq_idx)
89  mdl_r.SetIntProp("dockq_idx", dockq_idx)
90  ref_r.SetIntProp("dockq_mapped", 1)
91  mdl_r.SetIntProp("dockq_mapped", 1)
92  dockq_idx += 1
93  else:
94  # go by residue numbers
95  for mdl_r in mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)}").residues:
96  ref_r = ref.FindResidue(ref_ch1, mdl_r.GetNumber())
97  if ref_r.IsValid():
98  ref_r.SetIntProp("dockq_idx", dockq_idx)
99  mdl_r.SetIntProp("dockq_idx", dockq_idx)
100  ref_r.SetIntProp("dockq_mapped", 1)
101  mdl_r.SetIntProp("dockq_mapped", 1)
102  dockq_idx += 1
103  for mdl_r in mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch2)}").residues:
104  ref_r = ref.FindResidue(ref_ch2, mdl_r.GetNumber())
105  if ref_r.IsValid():
106  ref_r.SetIntProp("dockq_idx", dockq_idx)
107  mdl_r.SetIntProp("dockq_idx", dockq_idx)
108  ref_r.SetIntProp("dockq_mapped", 1)
109  mdl_r.SetIntProp("dockq_mapped", 1)
110  dockq_idx += 1
111 
112  # set unique dockq_idx property for all residues that are still -1
113  for cname in [ref_ch1, ref_ch2]:
114  ch = ref.FindChain(cname)
115  for r in ch.residues:
116  if r.GetIntProp("dockq_idx") == -1:
117  r.SetIntProp("dockq_idx", dockq_idx)
118  dockq_idx += 1
119  for cname in [mdl_ch1, mdl_ch2]:
120  ch = mdl.FindChain(cname)
121  for r in ch.residues:
122  if r.GetIntProp("dockq_idx") == -1:
123  r.SetIntProp("dockq_idx", dockq_idx)
124  dockq_idx += 1
125 
126 def _GetContacts(ent, ch1, ch2, dist_thresh):
127  int1 = ent.Select(f"cname={mol.QueryQuoteName(ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch2)}]")
128  int2 = ent.Select(f"cname={mol.QueryQuoteName(ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ch1)}]")
129  contacts = set()
130  int1_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int1.residues]
131  int2_p = [geom.Vec3List([a.pos for a in r.atoms]) for r in int2.residues]
132  for r1, p1 in zip(int1.residues, int1_p):
133  for r2, p2 in zip(int2.residues, int2_p):
134  if p1.IsWithin(p2, dist_thresh):
135  contacts.add((r1.GetIntProp("dockq_idx"),
136  r2.GetIntProp("dockq_idx")))
137  return contacts
138 
139 def _ContactScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
140  dist_thresh=5.0):
141  ref_contacts = _GetContacts(ref, ref_ch1, ref_ch2, dist_thresh)
142  mdl_contacts = _GetContacts(mdl, mdl_ch1, mdl_ch2, dist_thresh)
143 
144  nnat = len(ref_contacts)
145  nmdl = len(mdl_contacts)
146 
147  fnat = len(ref_contacts.intersection(mdl_contacts))
148  if nnat > 0:
149  fnat /= nnat
150 
151  fnonnat = len(mdl_contacts.difference(ref_contacts))
152  if len(mdl_contacts) > 0:
153  fnonnat /= len(mdl_contacts)
154 
155  return (nnat, nmdl, fnat, fnonnat)
156 
157 def _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, dist_thresh=10.0,
158  cb_mode=False):
159 
160  # backbone atoms used for superposition
161  sup_atoms = ['CA','C','N','O']
162 
163  # make mapped residues accessible by the dockq_idx property
164  mapped_mdl = mdl.Select(f"cname={mol.QueryQuoteName(mdl_ch1)},{mol.QueryQuoteName(mdl_ch2)} and grdockq_mapped=1")
165  mapped_ref = ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)},{mol.QueryQuoteName(ref_ch2)} and grdockq_mapped=1")
166  ch = mapped_mdl.FindChain(mdl_ch1)
167  mdl_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
168  ch = mapped_mdl.FindChain(mdl_ch2)
169  mdl_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
170  ch = mapped_ref.FindChain(ref_ch1)
171  ref_ch1_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
172  ch = mapped_ref.FindChain(ref_ch2)
173  ref_ch2_residues = {r.GetIntProp("dockq_idx"): r for r in ch.residues}
174 
175  # iRMSD
176 
177  i_ref = ref
178  if cb_mode:
179  i_ref = ref.Select("aname=CB or (rname=GLY and aname=CA)")
180  int1 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch1)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch2)}]")
181  int2 = i_ref.Select(f"cname={mol.QueryQuoteName(ref_ch2)} and {dist_thresh} <> [cname={mol.QueryQuoteName(ref_ch1)}]")
182  ref_pos = geom.Vec3List()
183  mdl_pos = geom.Vec3List()
184  for r in int1.residues:
185  idx = r.GetIntProp("dockq_idx")
186  if idx in ref_ch1_residues and idx in mdl_ch1_residues:
187  ref_r = ref_ch1_residues[idx]
188  mdl_r = mdl_ch1_residues[idx]
189  for aname in sup_atoms:
190  ref_a = ref_r.FindAtom(aname)
191  mdl_a = mdl_r.FindAtom(aname)
192  if ref_a.IsValid() and mdl_a.IsValid():
193  ref_pos.append(ref_a.pos)
194  mdl_pos.append(mdl_a.pos)
195  for r in int2.residues:
196  idx = r.GetIntProp("dockq_idx")
197  if idx in ref_ch2_residues and idx in mdl_ch2_residues:
198  ref_r = ref_ch2_residues[idx]
199  mdl_r = mdl_ch2_residues[idx]
200  for aname in sup_atoms:
201  ref_a = ref_r.FindAtom(aname)
202  mdl_a = mdl_r.FindAtom(aname)
203  if ref_a.IsValid() and mdl_a.IsValid():
204  ref_pos.append(ref_a.pos)
205  mdl_pos.append(mdl_a.pos)
206 
207  if len(mdl_pos) >= 3:
208  sup_result = mol.alg.SuperposeSVD(mdl_pos, ref_pos)
209  irmsd = sup_result.rmsd
210  else:
211  irmsd = 0.0
212 
213  # lRMSD
214 
216  n_ch1 = len(ref.FindChain(ref_ch1).residues)
217  n_ch2 = len(ref.FindChain(ref_ch2).residues)
218  if n_ch1 > n_ch2:
219  ref_receptor_residues = ref_ch1_residues.values()
220  ref_ligand_residues = ref_ch2_residues.values()
221  mdl_receptor_residues = \
222  [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
223  mdl_ligand_residues = \
224  [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
225  else:
226  ref_receptor_residues = ref_ch2_residues.values()
227  ref_ligand_residues = ref_ch1_residues.values()
228  mdl_receptor_residues = \
229  [mdl_ch2_residues[idx] for idx in ref_ch2_residues.keys()]
230  mdl_ligand_residues = \
231  [mdl_ch1_residues[idx] for idx in ref_ch1_residues.keys()]
232  ref_receptor_positions = geom.Vec3List()
233  mdl_receptor_positions = geom.Vec3List()
234  ref_ligand_positions = geom.Vec3List()
235  mdl_ligand_positions = geom.Vec3List()
236  for ref_r, mdl_r in zip(ref_receptor_residues, mdl_receptor_residues):
237  for aname in sup_atoms:
238  ref_a = ref_r.FindAtom(aname)
239  mdl_a = mdl_r.FindAtom(aname)
240  if ref_a.IsValid() and mdl_a.IsValid():
241  ref_receptor_positions.append(ref_a.pos)
242  mdl_receptor_positions.append(mdl_a.pos)
243  for ref_r, mdl_r in zip(ref_ligand_residues, mdl_ligand_residues):
244  for aname in sup_atoms:
245  ref_a = ref_r.FindAtom(aname)
246  mdl_a = mdl_r.FindAtom(aname)
247  if ref_a.IsValid() and mdl_a.IsValid():
248  ref_ligand_positions.append(ref_a.pos)
249  mdl_ligand_positions.append(mdl_a.pos)
250 
251  if len(mdl_receptor_positions) >= 3:
252  sup_result = mol.alg.SuperposeSVD(mdl_receptor_positions,
253  ref_receptor_positions)
254  mdl_ligand_positions.ApplyTransform(sup_result.transformation)
255  lrmsd = mdl_ligand_positions.GetRMSD(ref_ligand_positions)
256  else:
257  lrmsd = 0.0
258 
259  return (irmsd, lrmsd)
260 
261 def _ScaleRMSD(rmsd, d):
262  return 1.0/(1+(rmsd/d)**2)
263 
264 def _DockQ(fnat, lrmsd, irmsd, d1, d2):
265  """ The final number chrunching as described in the DockQ manuscript
266  """
267  return (fnat + _ScaleRMSD(lrmsd, d1) + _ScaleRMSD(irmsd, d2))/3
268 
269 def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
270  ch1_aln=None, ch2_aln=None, contact_dist_thresh = 5.0,
271  interface_dist_thresh = 10.0, interface_cb = False):
272  """ Computes DockQ for specified interface
273 
274  DockQ is described in: Sankar Basu and Bjoern Wallner (2016), "DockQ: A
275  Quality Measure for Protein-Protein Docking Models", PLOS one
276 
277  Residues are mapped based on residue numbers by default. If you provide
278  *ch1_aln* and *ch2_aln* you can enforce an arbitrary mapping.
279 
280  :param mdl: Model structure
281  :type mdl: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
282  :param ref: Reference structure, i.e. native structure
283  :type ref: :class:`ost.mol.EntityView`/:class:`ost.mol.EntityHandle`
284  :param mdl_ch1: Specifies chain in model constituting first part of
285  interface
286  :type mdl_ch1: :class:`str`
287  :param mdl_ch2: Specifies chain in model constituting second part of
288  interface
289  :type mdl_ch2: :class:`str`
290  :param ref_ch1: ref equivalent of mdl_ch1
291  :type ref_ch1: :class:`str`
292  :param ref_ch2: ref equivalent of mdl_ch2
293  :type ref_ch2: :class:`str`
294  :param ch1_aln: Alignment with two sequences to map *ref_ch1* and *mdl_ch1*.
295  The first sequence must match the sequence in *ref_ch1* and
296  the second to *mdl_ch1*.
297  :type ch1_aln: :class:`ost.seq.AlignmentHandle`
298  :param ch2_aln: Alignment with two sequences to map *ref_ch2* and *mdl_ch2*.
299  The first sequence must match the sequence in *ref_ch2* and
300  the second to *mdl_ch2*.
301  :type ch2_aln: :class:`ost.seq.AlignmentHandle`
302  :param contact_dist_thresh: Residues with any atom within this threshold
303  are considered to be in contact. Affects contact
304  based scores fnat and fnonnat. CAPRI suggests
305  to lower the default of 5.0 to 4.0 for
306  protein-peptide interactions.
307  :type contact_dist_thresh: :class:`float`
308  :param interface_dist_thresh: Residues with any atom within this threshold
309  to another chain are considered interface
310  residues. Affects irmsd. CAPRI suggests to
311  lower the default of 10.0 to 8.0 in
312  combination with interface_cb=True for
313  protein-peptide interactions.
314  :type interface_dist_thresh: :class:`float`
315  :param interface_cb: Only use CB atoms (CA for GLY) to identify interface
316  residues for irmsd. CAPRI suggests to enable this
317  flag in combination with lowering
318  *interface_dist_thresh* to 8.0 for protein-peptide
319  interactions.
320  :type interface_cb: :class:`bool`
321  :returns: :class:`dict` with keys nnat, nmdl, fnat, fnonnat, irmsd, lrmsd,
322  DockQ which corresponds to the equivalent values in the original
323  DockQ implementation.
324  """
325  _PreprocessStructures(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
326  ch1_aln = ch1_aln, ch2_aln = ch2_aln)
327  nnat, nmdl, fnat, fnonnat = _ContactScores(mdl, ref, mdl_ch1, mdl_ch2,
328  ref_ch1, ref_ch2,
329  dist_thresh = contact_dist_thresh)
330  irmsd, lrmsd = _RMSDScores(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2,
331  dist_thresh = interface_dist_thresh,
332  cb_mode = interface_cb)
333  return {"nnat": nnat,
334  "nmdl": nmdl,
335  "fnat": fnat,
336  "fnonnat": fnonnat,
337  "irmsd": round(irmsd, 3),
338  "lrmsd": round(lrmsd, 3),
339  "DockQ": round(_DockQ(fnat, lrmsd, irmsd, 8.5, 1.5), 3)}
def DockQ(mdl, ref, mdl_ch1, mdl_ch2, ref_ch1, ref_ch2, ch1_aln=None, ch2_aln=None, contact_dist_thresh=5.0, interface_dist_thresh=10.0, interface_cb=False)
Definition: dockq.py:271