Source code for schrodinger.protein.remediate
"""
Module for remediating names in PDB structures - converting PDBv2 residue and
atom names to PDBv3 convention.
Used by:
psp-src/python/modules/PXfiles.py
mmshare/python/scripts/residue_scanning_backend.py
Copyright Schrodinger, LLC. All rights reserved.
"""
import json
import os
import re
import sys
from schrodinger import structure
from schrodinger.infra import mm
atom_exch = None
[docs]def init():
global atom_exch
if atom_exch:
return
with open(os.path.join(os.path.dirname(__file__), "remediate.json")) as fh:
d = json.load(fh)
atom_exch = d["atom_exch"]
atom_exch.update(d["schrod_atom_exch"])
init()
[docs]def remediate_ct(ct):
if mm.M2IO_PDB_FORMAT_VERSION in list(ct.property):
if len(ct.property[mm.M2IO_PDB_FORMAT_VERSION]) > 0 and ct.property[
mm.M2IO_PDB_FORMAT_VERSION][0] == "3":
# Already remediated.
return
# Fix residue names.
for residue in ct.residue:
if residue.pdbres in [' A ', ' C ', ' G ', ' T ']:
convert = True
for atom in residue.atom:
if atom.pdbname in [' O2*', ' O2\'']:
# It's RNA, not DNA.
convert = False
break
if convert:
new_pdbres = ' D' + residue.pdbres[2:]
for atom in residue.atom:
atom.pdbres = new_pdbres
# Fix atom names.
for atom in ct.atom:
pdbres = atom.pdbres[0:3]
pdbname = atom.pdbname
if pdbres in [' DA', ' DC', ' DG', ' DT', ' A', ' C', ' G', ' U']:
# Replace any * with '.
pdbname = pdbname.replace('*', '\'')
#--make any left-justified residue names right-justified------------------
if re.match(r'([a-zA-Z]) ', pdbres):
pdbres = re.sub(r'(.)\s\s', r' \g<1>', pdbres)
elif re.match(r'([a-zA-Z][a-zA-Z]) ', pdbres):
pdbres = re.sub(r'(..)\s ', r' \g<1>', pdbres)
#-------------------------------------------------------------------------
identity = pdbname + ' ' + pdbres
if identity in atom_exch:
new_identity = atom_exch[identity]
pdbname = new_identity[0:4]
pdbres = new_identity[5:]
pdbres += ' '
#if pdbname != atom.pdbname:
# print 'Changing atom name |%s| to |%s| for residue %s.' % (atom.pdbname,pdbname,pdbres)
#if pdbres != atom.pdbres:
# print 'Changing residue name |%s| to |%s|.' % (atom.pdbres,pdbres)
atom.pdbname = pdbname
atom.pdbres = pdbres
return
if __name__ == '__main__':
input = sys.argv[1]
output = sys.argv[2]
ct = structure.StructureReader.read(input)
remediate_ct(ct)
ct.write(output)