Source code for schrodinger.protein.remediate
"""
Module for remediating names in PDB structures - converting PDBv2 residue and
atom names to PDBv3 convention.
Used by:
  psp-src/python/modules/PXfiles.py
  mmshare/python/scripts/residue_scanning_backend.py
Copyright Schrodinger, LLC. All rights reserved.
"""
import json
import os
import re
import sys
from schrodinger import structure
from schrodinger.infra import mm
atom_exch = None
[docs]def init():
    global atom_exch
    if atom_exch:
        return
    with open(os.path.join(os.path.dirname(__file__), "remediate.json")) as fh:
        d = json.load(fh)
        atom_exch = d["atom_exch"]
        atom_exch.update(d["schrod_atom_exch"])
init()
[docs]def remediate_ct(ct):
    if mm.M2IO_PDB_FORMAT_VERSION in list(ct.property):
        if len(ct.property[mm.M2IO_PDB_FORMAT_VERSION]) > 0 and ct.property[
                mm.M2IO_PDB_FORMAT_VERSION][0] == "3":
            # Already remediated.
            return
    # Fix residue names.
    for residue in ct.residue:
        if residue.pdbres in ['  A ', '  C ', '  G ', '  T ']:
            convert = True
            for atom in residue.atom:
                if atom.pdbname in [' O2*', ' O2\'']:
                    # It's RNA, not DNA.
                    convert = False
                    break
            if convert:
                new_pdbres = ' D' + residue.pdbres[2:]
                for atom in residue.atom:
                    atom.pdbres = new_pdbres
    # Fix atom names.
    for atom in ct.atom:
        pdbres = atom.pdbres[0:3]
        pdbname = atom.pdbname
        if pdbres in [' DA', ' DC', ' DG', ' DT', '  A', '  C', '  G', '  U']:
            # Replace any * with '.
            pdbname = pdbname.replace('*', '\'')
        #--make any left-justified residue names right-justified------------------
        if re.match(r'([a-zA-Z])  ', pdbres):
            pdbres = re.sub(r'(.)\s\s', r'  \g<1>', pdbres)
        elif re.match(r'([a-zA-Z][a-zA-Z]) ', pdbres):
            pdbres = re.sub(r'(..)\s ', r' \g<1>', pdbres)
        #-------------------------------------------------------------------------
        identity = pdbname + ' ' + pdbres
        if identity in atom_exch:
            new_identity = atom_exch[identity]
            pdbname = new_identity[0:4]
            pdbres = new_identity[5:]
        pdbres += ' '
        #if pdbname != atom.pdbname:
        #    print 'Changing atom name |%s| to |%s| for residue %s.' % (atom.pdbname,pdbname,pdbres)
        #if pdbres != atom.pdbres:
        #    print 'Changing residue name |%s| to |%s|.' % (atom.pdbres,pdbres)
        atom.pdbname = pdbname
        atom.pdbres = pdbres
    return
if __name__ == '__main__':
    input = sys.argv[1]
    output = sys.argv[2]
    ct = structure.StructureReader.read(input)
    remediate_ct(ct)
    ct.write(output)