OpenStructure
remote.py
Go to the documentation of this file.
1 #------------------------------------------------------------------------------
2 # This file is part of the OpenStructure project <www.openstructure.org>
3 #
4 # Copyright (C) 2008-2020 by the OpenStructure authors
5 #
6 # This library is free software; you can redistribute it and/or modify it under
7 # the terms of the GNU Lesser General Public License as published by the Free
8 # Software Foundation; either version 3.0 of the License, or (at your option)
9 # any later version.
10 # This library is distributed in the hope that it will be useful, but WITHOUT
11 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 # FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
13 # details.
14 #
15 # You should have received a copy of the GNU Lesser General Public License
16 # along with this library; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #------------------------------------------------------------------------------
19 
20 import urllib.request, urllib.error, urllib.parse
21 import tempfile
22 
23 from ost.io import LoadPDB, LoadMMCIF
24 
26  """
27  A remote repository represents a structural database accessible through the
28  internet, e.g. the PDB or SWISS-MODEL template library.
29 
30  :param name: Name of the repository
31  :param url_pattern: URL pattern for repository. Required format is described
32  in :func:`URLForID`
33  :param type: Data format to expect at resolved URL must be in
34  ('pdb', 'cif')
35  :param id_transform: Transformation to apply to ID before resolving URL
36  in :func:`URLForID`. Must be in ('lower', 'upper')
37 
38  :type name: :class:`str`
39  :type url_pattern: :class:`str`
40  :type type: :class:`str`
41  :type id_transform: :class:`str`
42  """
43  def __init__(self, name, url_pattern, type, id_transform='upper'):
44  self.namename = name
45  self.url_patternurl_pattern = url_pattern
46  self.typetype = type
47  if type not in ('cif', 'pdb'):
48  raise ValueError('only cif and pdb types are supported')
49  self.id_transformid_transform = id_transform
50 
51  def URLForID(self, id):
52  """
53  Resolves URL given *url_pattern* and *id_transform* provided at object
54  initialization.
55  The *url_pattern* must contain substring '$ID'. Given *id*, the URL to
56  the structure gets constructed by applying *id_transform* and inserting it
57  at the location of '$ID'. e.g. 'https://files.rcsb.org/view/$ID.pdb' given
58  1ake as *id* and 'upper' as *id_transform* resolves to:
59  'https://files.rcsb.org/view/1AKE.pdb'
60  """
61  if self.id_transformid_transform == 'upper':
62  id = id.upper()
63  if self.id_transformid_transform == 'lower':
64  id = id.lower()
65  return self.url_patternurl_pattern.replace('$ID', id)
66 
67  def Get(self, id):
68  """
69  Resolves URL with :func:`URLForID`, dumps the content in a temporary file
70  and returns its path.
71 
72  :param id: ID to resolve
73  :type id: :class:`str`
74  """
75  remote_url = self.URLForIDURLForID(id)
76  tmp_file_suffix = '.%s' % self.typetype
77  if remote_url.endswith('.gz'):
78  tmp_file_suffix+='.gz'
79 
80  try:
81  connection = urllib.request.urlopen(remote_url)
82  if hasattr(connection, 'code'):
83  status = connection.code
84  else:
85  status = connection.getcode()
86  except urllib.error.HTTPError as e:
87  status = e.code
88  if status != 200:
89  raise IOError('Could not load %s from %s (status code %d, url %s)' \
90  % (id, self.namename, status, remote_url))
91  tmp_file = tempfile.NamedTemporaryFile(suffix=tmp_file_suffix)
92  tmp_file.write(connection.read())
93  tmp_file.flush()
94  return tmp_file
95 
96  def Load(self, id):
97  """
98  Resolves URL with :func:`URLForID` and directly loads/returns the according
99  :class:`ost.mol.EntityHandle`. Loading invokes the
100  :func:`ost.io.LoadPDB`/:func:`ost.io.LoadMMCIF` with default parameterization. If you need
101  custom settings, you might want to consider to call :func:`Get` and do the
102  loading manually.
103 
104  :param id: ID to resolve
105  :type id: :class:`str`
106  """
107  tmp_file = self.GetGet(id)
108  if self.typetype == 'pdb':
109  return LoadPDB(tmp_file.name)
110  if self.typetype == 'cif':
111  return LoadMMCIF(tmp_file.name)
112 
113 REMOTE_REPOSITORIES = {
114  'pdb' : RemoteRepository('rcsb.org (PDB)', 'https://files.rcsb.org/download/$ID.pdb.gz',
115  type='pdb', id_transform='upper'),
116  'smtl' : RemoteRepository('SMTL', 'https://swissmodel.expasy.org/templates/$ID.pdb',
117  type='pdb', id_transform='lower'),
118  'cif' : RemoteRepository('rcsb.org (mmCIF)', 'https://files.rcsb.org/download/$ID.cif.gz',
119  type='cif', id_transform='lower'),
120  'pdb_redo' : RemoteRepository('pdbredo', 'https://pdb-redo.eu/db/$ID/$ID_besttls.pdb.gz',
121  type='pdb', id_transform='lower'),
122 }
123 
124 def RemoteGet(id, from_repo='pdb'):
125  """
126  Invokes :func:`RemoteRepository.Get` on predefined repositories
127  ('pdb', 'smtl', 'cif', 'pdb_redo')
128 
129  :param from_repo: One of the predefined repositories
130  :type from_repo: :class:`str`
131  """
132  remote_repo = REMOTE_REPOSITORIES.get(from_repo, None)
133  if not remote_repo:
134  raise ValueError('%s is not a valid repository' % from_repo)
135  return remote_repo.Get(id)
136 
137 def RemoteLoad(id, from_repo='pdb'):
138  """
139  Invokes :func:`RemoteRepository.Load` on predefined repositories
140  ('pdb', 'smtl', 'cif', 'pdb_redo')
141 
142  :param from_repo: One of the predefined repositories
143  :type from_repo: :class:`str`
144  """
145  remote_repo = REMOTE_REPOSITORIES.get(from_repo, None)
146  if not remote_repo:
147  raise ValueError('%s is not a valid repository' % from_repo)
148  return remote_repo.Load(id)
def __init__(self, name, url_pattern, type, id_transform='upper')
Definition: remote.py:43
def RemoteLoad(id, from_repo='pdb')
Definition: remote.py:137
def RemoteGet(id, from_repo='pdb')
Definition: remote.py:124
Definition: io.dox:1
def LoadPDB(filename, restrict_chains="", no_hetatms=None, fault_tolerant=None, load_multi=False, join_spread_atom_records=None, calpha_only=None, profile='DEFAULT', remote=False, remote_repo='pdb', dialect=None, seqres=False, bond_feasibility_check=None, read_conect=False)
Definition: __init__.py:87
def LoadMMCIF(filename, fault_tolerant=None, calpha_only=None, profile='DEFAULT', remote=False, seqres=False, info=False)
Definition: __init__.py:349