I was send the following code by a student as a response to the Web Services practical:
## ~!/usr/bin/env python3
from urllib import request
import sys
import re
def ParseRST(result):
p = re.compile('.*AC:\s+(.*?)#')
m = p.match(result)
ac = m.group(1)
q = re.compile('.*UPCOUNT:\s+(.*?)#')
n = q.match(result)
upresnum = int(n.group(1))
return(ac, upresnum)
def ReadPDBSWS(pdbcode, chain, resnum):
url = 'http://www.bioinf.org.uk/servers/pdbsws/query.cgi?plain=1&qtype=pdb'
url += '&id=' + pdbcode
if ('&chain' != ''):
url += '&chain=' + chain
url += '&res=' + str(resnum)
result = request.urlopen(url).read()
result = str(result, encoding='utf-8')
result = result.replace('\n', '#')
if (result != ''):
rst = ParseRST(result)
return(rst)
else:
sys.stderr.write('Nothing was returned\n')
return('')
## Main program ##
pdbcode = input('Enter the pdbcode: ')
chain = input('Enter the chain label (optional): ')
resnum = input('Enter the residue number: ')
(ac, upresnum) = ReadPDBSWS(pdbcode, chain, resnum)
print('Accession: ' + ac)
print('UniProt Resnum: ' + str(upresnum))
[Download]
This was a well-written functioning piece of code, but there are a number of things that can be done to make it an excellent piece of code that would score higher marks in the assessed coursework.
#!/usr/bin/env python3
# Config.py - configuration information baseurl = 'http://www.bioinf.org.uk/servers/pdbsws/query.cgi?plain=1&qtype=pdb'
[Download]
#!/usr/bin/env python3
"""
Program: pdb2sprot
File: pdb2sprot.py
Version: V1.0
Date: 01.03.18
Function: Obtain SwissProt information for a PDB code and residue
number using PDBSWS
Copyright: (c) Dr. Andrew C. R. Martin, UCL, 2018
Author: Dr. Andrew C. R. Martin
Address: Institute of Structural and Molecular Biology
Division of Biosciences
University College London
--------------------------------------------------------------------------
This program is released under the GNU Public Licence (GPL V3)
--------------------------------------------------------------------------
Description:
============
This program takes a PDB code and residue identifier and returns
the relevant UniProtKB/SwissProt (or trEMBL) accession together with
the residue number in the sequence entry
--------------------------------------------------------------------------
Usage:
======
pdb2sprot PDBID RESID
--------------------------------------------------------------------------
Revision History:
=================
V1.0 01.03.18 Original By: ACRM
"""
#*************************************************************************
# Import libraries
from urllib import request
import sys
import re
import config
#*************************************************************************
def ParseRST(result):
"""Parse the results from PDBSWS
Input: result --- The result returned by PDBSWS
Return: (ac, resnum) --- A list containing the UniProt accession
code and the UniProt residue number
01.03.18 Original By: ACRM
"""
pattern = re.compile('.*AC:\s+(.*?)#')
match = pattern.match(result)
ac = match.group(1)
pattern = re.compile('.*UPCOUNT:\s+(.*?)#')
match = pattern.match(result)
resnum = int(match.group(1))
return(ac, resnum)
#*************************************************************************
def ReadPDBSWS(pdbcode, resid):
"""Obtain the UniProt accession, and residue number from PDBSWS
given a PDB code, and residue identifier
Input: pdbcode --- A PDB code (e.g. 8cat)
resid --- A residue identifier (e.g. A23)
Return: (ac, resnum) --- A list containing the UniProt accession
code and the UniProt residue number
01.03.18 Original By: ACRM
"""
# Obtain the chain and residue number from the resid
# e.g. A 23 C
pattern = re.compile('([a-zA-Z]+)([0-9]+[a-zA-z]*)')
match = pattern.match(resid)
chain = match.group(1)
resnum = match.group(2)
url = config.baseurl
url += '&id=' + pdbcode
url += '&chain=' + chain
url += '&res=' + str(resnum)
result = request.urlopen(url).read()
result = str(result, encoding='utf-8')
result = result.replace('\n', '#')
# Success - parse and return the list
if (result != ''):
rst = ParseRST(result)
return(rst)
# Failure - return a blank list
return([])
#*************************************************************************
def UsageDie():
"""Print a usage message and exit
01.03.18 Original By: ACRM
"""
print(
"""
pdb2sws V1.0 (c) 2018 UCL, Dr. Andrew C.R. Martin
Usage: pdb2sws pdbcode resid
Obtain the UniProtKB/SwissProt accession and residue number for a
PDB code (e.g. 8cat) and residue identifier (e.g. A23)
""")
exit(0)
#*************************************************************************
#*** Main program ***
#*************************************************************************
if ((len(sys.argv) < 3) or (sys.argv[1] == '-h')):
UsageDie()
pdbcode = sys.argv[1]
resid = sys.argv[2]
results = ReadPDBSWS(pdbcode, resid)
if (len(results)):
ac = results[0]
upresnum = results[1]
print('Accession: ' + ac)
print('UniProt Resnum: ' + str(upresnum))
else:
print('PDB code or residue identifier not found.')
[Download]
You should download some general guidance on what is expected. See the Good Code section for further help.