#!/usr/bin/perl -s
#*************************************************************************
#
#   Program:    prodoric
#   File:       prodoric.pl
#   
#   Version:    V1.0
#   Date:       09.06.11
#   Function:   Query the Prodoric web server for prokaryotic promoter
#               binding sites
#   
#   Copyright:  (c) Dr. Andrew C. R. Martin 2011
#   Author:     Dr. Andrew C. R. Martin
#   Address:    Institute of Structural and Molecular Biology
#               Division of Biosciences,
#               University College,
#               Gower Street,
#               London.
#               WC1E 6BT.
#   EMail:      andrew@bioinf.org.uk
#               
#*************************************************************************
#
#   This program is not in the public domain, but it may be copied
#   according to the conditions laid out in the accompanying file
#   COPYING.DOC
#
#   The code may be modified as required, but any modifications must be
#   documented so that the person responsible can be identified. If 
#   someone else breaks this code, I don't want to be blamed for code 
#   that does not work! 
#
#   The code may not be sold commercially or included as part of a 
#   commercial product except as described in the file COPYING.DOC.
#
#*************************************************************************
#
#   Description:
#   ============
#   Submits a sequence to the Prodoric bacterial promoter prediction
#   server at http://prodoric.tu-bs.de/vfp/vfp_promoter.php
#
#*************************************************************************
#
#   Usage:
#   ======
#   See Usage message with
#      prodoric -h
#*************************************************************************
#
#   Revision History:
#   =================
#   V1.0   09.06.11 Original   By: ACRM
#
#*************************************************************************
use LWP::UserAgent;
use strict;

# Create global arrays of list of species and list of PWMs
Setup();
# Parse the command line getting list of species indexes
my $species = ParseCmdLine();
# Build list of PWMs for these species. This comes back as an array of chunks
my @pwms = BuildPwms($species,1000);
# Read the sequence from the FASTA file
my $seq = ReadSeq();
# Print a results header
print "# PWM, start, stop, score, sequence\n";
# Run Prodoric
my $results = Prodoric($seq, @pwms);
# Print the results
PrintResults($results);

########################################################################
# Prints the results, obeying print options:
# -t=threshold  (specify threshold)
# -c            (just count those above the threshold)
# -u            (just count distinct promoters above the threshold)
#
# 09.06.11 Original   By: ACRM
sub PrintResults
{
    my($allResults) = (@_);
    my @results;
    my $threshold = 0;
    my %printed = ();

    @results = split(/\n/, $results);

    if(defined($::t))
    {
        $threshold = $::t;
    }

    my $count = 0;
    foreach my $result (@results)
    {
        my @fields = split(/\,/,$result); 
        if($fields[3] >= $threshold)
        {
            if(defined($::u))
            {
                if(!defined($printed{$fields[0]}))
                {
                    $printed{$fields[0]} = 1;
                    $count++;
                }
            }
            else
            {
                $count++;
            }

            if(!defined($::c) && !defined($::u))
            {
                print "$result\n";
            }
        }
    }

    if(defined($::u))
    {
        print "Number of unique PWM matches above threshold ($threshold) = $count\n";
    }
    elsif(defined($::c))
    {
        print "Number of matches above threshold ($threshold) = $count\n";
    }
}

########################################################################
# Called with the sequence and an array of Prodoric PWM name sets. Each
# name set uses a # to delimit each PWM name - i.e. name#name#name#
# (note trailing #)
# Returns a set of results in a string
#
# 09.06.11 Original   By: ACRM
sub Prodoric
{
    my($seq, @pwms) = @_;
    my $results = "";

    foreach my $pwms (@pwms)
    {
        if(defined($::v))
        {
            my $tmp = $pwms;
            $tmp =~ s/\#/\n   /g;
            print STDERR "Testing:\n   $tmp\n";
        }
        $results .= doProdoric($seq, $pwms);
    }
    return($results);
}



########################################################################
# Read the first sequence from a FASTA file
# 09.06.11 Original   By: ACRM
sub ReadSeq
{
    my $seq = "";
    while(<>)
    {
        chomp;
        if(/^\>/)
        {
            if($seq ne "")
            {
                return($seq);
            }
        }
        else
        {
            s/\s//g;
            $seq .= $_;
        }
    }
    return($seq);
}

########################################################################
# Does the work of running Prodoric on an individual PWM nameset
# 09.06.11 Original   By: ACRM
#
sub doProdoric
{
    my ($sequence, $pwms) = @_;

    my $url="http://prodoric.tu-bs.de/vfp/vfp_promoter_start.php";
    my $webproxy = "";

    my %options;
    $options{genome} = "PLEASE SELECT...";

    $options{gene_name} = "";
    $options{upstream} = 500;
    $options{genome_pos1} = "";
    $options{genome_pos2} = "";
    $options{userfile} = "";

    $options{sequence} = $sequence;
    $options{pwm_data} = $pwms;
    $options{seq_source} = 3; # paste raw sequence
    $options{msens1} = $::sensitivity; # Sensitivity 0.7-1.0 (default 0.8)
    $options{nop} = ($::nonop?0:1);    # Non-occurrence penalty (default 1)
    $options{csens1} = $::core;        # Core sensitivty 0.7-1.0 (default 0.9)
    $options{core1} = $::size;         # Core size 3-8 (default 5)
    $options{constraint} = 2; # Show all results for each PWM
#    $options{constraint} = 1; # Show top results for each PWM
    $options{max_results} = 3; # How many to show for each PWM
    $options{MAX_FILE_SIZE} = 100000000;

    # build options into post string
    my $post = "";
    foreach my $key (keys %options)
    {
        if($post eq "")
        {
            $post = "$key=$options{$key}";
        }
        else
        {
            $post .= "&";
            $post .= "$key=$options{$key}";
        }
    }

    if(defined($::v))
    {
        print STDERR "Submitting data to Prodoric\n";
    }

    # Submit
    my $ua = CreateUserAgent($webproxy);
    my $req = CreatePostRequest($url, $post);
    my $html = GetContent($ua, $req);

    # Find the URL for the results page from the html
    $html =~ /\/tmp\/result(\d+)\.html/;
    my $id = $1;

    # Keep refreshing the results page until we have the final data
    do
    {
        sleep 1;
 
        $url = "http://prodoric.tu-bs.de/tmp/result$id.html";
        if(defined($::v))
        {
            print STDERR "Regetting $url...\n";
        }

        $req = CreateGetRequest($url, "");
        $html = GetContent($ua, $req);
        # Find the URL for the results page from the html
        $id = "";
        if($html =~ /\/tmp\/result(\d+)\.html/)
        {
            $id = $1;
        }
    } while($id ne "");

    if(defined($::v))
    {
        print STDERR "Parsing results...\n";
    }
    my $results = ParseProdoricHTML($html);

    return($results);
}

########################################################################
# Get data from a website using LWP
sub GetContent
{
    my($ua, $req) = @_;
    my($res);

    $res = $ua->request($req);
    if($res->is_success)
    {
        return($res->content);
    }
    return(undef);
}

########################################################################
# Create a get request using LWP
sub CreateGetRequest
{
    my($url) = @_;
    my($req);
    $req = HTTP::Request->new('GET',$url);
    return($req);
}

########################################################################
# Create a post request using LWP
sub CreatePostRequest
{
    my($url, $params) = @_;
    my($req);
    $req = HTTP::Request->new(POST => $url);
    $req->content_type('application/x-www-form-urlencoded');
    $req->content($params);

    return($req);
}

########################################################################
# Create a web agent using LWP
sub CreateUserAgent
{                               
    my($webproxy) = @_;

    my($ua);
    $ua = LWP::UserAgent->new;
    if(length($webproxy))
    {
        $ua->proxy(['http', 'ftp'] => $webproxy);
    }
    return($ua);
}


########################################################################
# Parse the HTML that is returned by Prodoric
# 09.06.11 Original   By: ACRM
sub ParseProdoricHTML
{
    my($html) = @_;

    # Create line breaks at all table boundaries
    $html =~ s/<td /\n<td /g;
    $html =~ s/<tr>/\n<tr>/g;

    # Extract HTML from the table with "PWM (species)" in the header
    my @lines = split(/\n/, $html);
    my $inSection = 0;
    my @resultsTable;
    foreach my $line (@lines)
    {
        if($line =~ /PWM \(species\)/)
        {
            $inSection = 1;
        }
        elsif($line =~ /<\/table>/)
        {
            $inSection = 0;
        }
        if($inSection)
        {
            push @resultsTable, $line;
        }
    }

    # Parse each row from the table into an array, then assemble that into an output string
    my $results = "";
    my $count = 0;
    my @fields = ();
    foreach my $line (@resultsTable)
    {
        if($line =~ /\<tr\>/)
        {
            if($count)
            {
                # Extract data of interest
                if($fields[3] eq "+")
                {
                    $results .= "$fields[0], $fields[1], $fields[2], $fields[4], $fields[5]\n";
                }
            }
            $count = 0;
        }
        elsif($line =~ /tablerow_white\".*\>(.*)\<\/td\>/)
        {
            $fields[$count] = $1;
            $fields[$count] =~ s/\,//g; # Strip commas from fields
            $count++;
        }
        elsif($line =~ /seq\".*\>(.*)\<\/td\>/)
        {
            $fields[$count] = $1;
            $fields[$count] =~ s/\,//g; # Strip commas from fields
            $count++;
        }
    }

    # And the last one...
    if($count)
    {
        # Extract data of interest
        if($fields[3] eq "+")
        {
            $results .= "$fields[0], $fields[1], $fields[2], $fields[4], $fields[5]\n";
        }
    }

    return($results);
}
########################################################################
# Build an array containing PWM namesets - each nameset being a 
# #-delimited set of Prodoric PWM names
#
# 09.06.11 Original   By: ACRM
sub BuildPwms
{
    my ($specList, $chunkSize) = @_;
    my @species=split(/\,/, $specList);
    my @pwms = ();
    my $pwmSet = 0;
    my $count = 0;

    foreach my $spec (@species)
    {
        my $specName = $::speciesList[$spec];
        foreach my $pwm (@::pwmList)
        {
            my $pwm2 = $pwm;
            $pwm2 =~ s/.*\|\s+//;
            if($pwm2 eq $specName)
            {
                $count++;
                $pwms[$pwmSet] .= "$pwm#";
                if(!($count%$chunkSize))
                {
                    $pwmSet++;
                }
            }
        }
    }

    if(defined($::v))
    {
        print STDERR "Using $count PWMs\n";
    }
    return(@pwms);
}

########################################################################
# Parse the command line
#
# 09.06.11 Original   By: ACRM
sub ParseCmdLine
{
    my $species = "";

    if(defined($::h) || defined($::help))
    {
        Usage();
        exit 0;
    }
    
    if(!defined($::species))
    {
        if(defined($::all))
        {
            $species .= "1,2,3,4,5,6,7,8,9,10,11,12";
        }
        else
        {
            if(defined($::bac))
            {
                $species .= "0,1,";
            }
            if(defined($::bra))
            {
                $species .= "2,";
            }
            if(defined($::eco))
            {
                $species .= "3,4,";
            }
            if(defined($::hel))
            {
                $species .= "5,";
            }
            if(defined($::lis))
            {
                $species .= "6,";
            }
            if(defined($::pse))
            {
                $species .= "7,8,9,";
            }
            if(defined($::sal))
            {
                $species .= "10,";
            }
            if(defined($::str))
            {
                $species .= "11,12,";
            }
            $species =~ s/\,$//;
        }
    }
    else
    {
        $species = $::species;
    }

    # Default to E.coli
    if($species eq "")
    {
        $species = "3,4";
    }

    $::sensitivity = 0.8 if(!defined($::sensitivity));
    $::nonop = 0         if(!defined($::nonop));
    $::core = 0.9        if(!defined($::core));
    $::size = 5          if(!defined($::size));

    if(defined($::v))
    {
        print  STDERR "Sensitivity: $::sensitivity\n";
        printf STDERR "Nop: %s\n", $::nonop?"off":"on";
        print  STDERR "Core Sensitivity: $::core\n";
        print  STDERR "Core Size: $::size\n";
    }
    return($species);
}

########################################################################
# Print a usage message
#
# 09.06.11 Original   By: ACRM
sub Usage
{
    print <<__EOF;

prodoric (V1.0), (c) 2010, Dr. Andrew C.R. Martin, UCL

Usage: prodoric [-v] [-c] [-t=threshold] [-species=n[,n...]] 
                [-eco][-bac][-pse][-bra][-hel][-lis][-sal][-str] 
                [-sensitivity=n] [-nonop] [-core=n] [-size=n]
                file.faa

         -v Verbose
         -c Just return the count of matches found
         -u Just return the count of distinct TFBSs found
            (overrides -c)
         -t Only return results with score >= threshold
         -all Predict for all known species
         -eco Predict for Escherichia coli
         -bac Predict for Bacillus subtilis
         -bra Predict for Bradyrhizobium japonicum
         -hel Predict for Helicobacter pylori
         -lis Predict for Listeria monocytogenes
         -pse Predict for Pseudomonas aeruginosa and Pseudomonas syringae
         -sal Predict for Salmonella typhi
         -str Predict for Streptococcus pyogenes
         -species Predict for specific variants / strains
                  Use as -species=1,3,5
                  This overrides anything specified with -all, -eco, etc
__EOF

    my $count = 0;
    foreach my $spec (@::speciesList)
    {
        printf "                  %2d $spec\n", $count++;
    }

    print <<__EOF;
         (Specied defaults to E.coli if nothing specified.)
         -sensitivity Specify sensitivity (range: 0.7-1.0; default: 0.8)
         -nonop       Do not apply a non-occurence penalty
         -core        Specify the core sensitivity (range: 0.7-1.0; default: 0.9)
         -size        Specify the core size (range: 3-8; default: 5)

prodoric runs the Prodoric webserver at http://prodoric.tu-bs.de/vfp/vfp_promoter.php
from the command line.
See http://prodoric.tu-bs.de/vfp/vfp_help.php for the meaning of the -sensitivity,
-nonop, -core and -size flags
__EOF

}


########################################################################
# Define PWM list and species list from the Prodoric web site
# Note that 
# "Lrp + Leu (SELEX) | Escherichia coli (strain K12)"
# causes the program to freeze on uploading the data - probably the '+'
# sign does something special and needs masking
#
# 09.06.11 Original   By: ACRM
sub Setup
{
    @::pwmList = ("AbrB | Bacillus subtilis (strain 168)",
                  "Ada | Escherichia coli (strain K12)",
                  "AhrC | Bacillus subtilis (strain 168)",
                  "AlgR | Pseudomonas syringae (pathovar tomato, strain DC3000)",
                  "AlgU (-10) | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "AlgU (-35) | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "AlgU (N16) | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "AlgU (N17) | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "Anr | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "Anr_Dnr_37 | Pseudomonas aeruginosa",
                  "Anr_Dnr | Pseudomonas aeruginosa",
                  "Anr_Dnr_40 | Pseudomonas aeruginosa",
                  "AraC | Escherichia coli (strain K12)",
                  "AraR | Bacillus subtilis (strain 168)",
                  "ArcA | Escherichia coli (strain K12)",
                  "ArgR | Escherichia coli (strain K12)",
                  "ArgR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "CRE | Bacillus subtilis (strain 168)",
                  "CTSR | Bacillus subtilis (strain 168)",
                  "CaiF | Escherichia coli (strain K12)",
                  "CcpC | Bacillus subtilis (strain 168)",
                  "CitT | Bacillus subtilis (strain 168)",
                  "CodY | Bacillus subtilis (strain 168)",
                  "ComA | Bacillus subtilis (strain 168)",
                  "ComK | Bacillus subtilis (strain 168)",
                  "CovR | Streptococcus pyogenes (serovar M18 / M18, strain MGAS8232)",
                  "CovR | Streptococcus pyogenes (serovar M18 / M18, strain MGAS8232)",
                  "CpxR | Escherichia coli (strain K12)",
                  "Crp | Escherichia coli (strain K12)",
                  "CspA | Escherichia coli (strain K12)",
                  "CtsR | Listeria monocytogenes (serovar 1/2a, strain EGD-e)",
                  "CynR | Escherichia coli (strain K12)",
                  "CysB | Escherichia coli (strain K12)",
                  "CytR | Escherichia coli (strain K12)",
                  "DegU | Bacillus subtilis (strain 168)",
                  "DeoR | Escherichia coli (strain K12)",
                  "DinR/LexA | Bacillus subtilis (strain 168)",
                  "DnaA | Escherichia coli (strain K12)",
                  "Dnr | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "ExsA | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "FadR | Escherichia coli (strain K12)",
                  "FhlA | Escherichia coli (strain K12)",
                  "Fis | Escherichia coli (strain K12)",
                  "FleQ | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "FlhD2C2 | Escherichia coli (strain K12)",
                  "FliA | Escherichia coli (strain K12)",
                  "Fnr | Bacillus subtilis (strain 168)",
                  "Fnr_neu | Bacillus subtilis",
                  "Fnr | Escherichia coli (strain K12)",
                  "FruR | Escherichia coli (strain K12)",
                  "Fur (18mer) | Escherichia coli (strain K12)",
                  "Fur (8mer)| Escherichia coli (strain K12)",
                  "Fur | Helicobacter pylori (strain ATCC 700392 / 26695)",
                  "Fur | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "GalR | Escherichia coli (strain K12)",
                  "GalS | Escherichia coli (strain K12)",
                  "GcvA | Escherichia coli (strain K12)",
                  "GerE | Bacillus subtilis (strain 168)",
                  "GlnG | Escherichia coli (strain K12)",
                  "GlnR | Bacillus subtilis (strain 168)",
                  "GlpR | Escherichia coli (strain K12)",
                  "GlpR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "GltC | Bacillus subtilis (strain 168)",
                  "GltR | Bacillus subtilis (strain 168)",
                  "GntR | Escherichia coli (strain K12)",
                  "H-NS | Escherichia coli (strain K12)",
                  "Hpr | Bacillus subtilis (strain 168)",
                  "IHF | Escherichia coli (strain K12)",
                  "IHF | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "IciA | Escherichia coli (strain K12)",
                  "IlvY | Escherichia coli (strain K12)",
                  "LacI | Escherichia coli (strain K12)",
                  "LasR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "LevR | Bacillus subtilis (strain 168)",
                  "LexA | Escherichia coli (strain K12)",
                  "LicT | Bacillus subtilis (strain 168)",
                  "Lrp | Escherichia coli (strain K12)",
                  "Lrp (SELEX) | Escherichia coli (strain K12)",
# Commented out as it seems to freeze on this
#                  "Lrp + Leu (SELEX) | Escherichia coli (strain K12)",
                  "MalI | Escherichia coli (strain K12)",
                  "MalT | Escherichia coli (strain K12)",
                  "MarR | Escherichia coli (strain K12)",
                  "MetJ | Escherichia coli (strain K12)",
                  "MetJ (Selex) | Escherichia coli (strain K12)",
                  "MetJ (Selex) | Escherichia coli (strain K12)",
                  "MetR | Escherichia coli (strain K12)",
                  "MexR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "Mlc | Escherichia coli (strain K12)",
                  "Mlc (Selex) | Escherichia coli (strain K12)",
                  "MngR (former FarR) | Escherichia coli (strain K12)",
                  "MntR | Bacillus subtilis (strain 168)",
                  "ModE | Escherichia coli (strain K12)",
                  "Mta | Bacillus subtilis (strain 168)",
                  "MtrB | Bacillus subtilis (strain 168)",
                  "Nac | Escherichia coli (strain K12)",
                  "NagC | Escherichia coli (strain K12)",
                  "NagC (Selex) | Escherichia coli (strain K12)",
                  "NarL | Escherichia coli (strain K12)",
                  "NarL | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "NhaR | Escherichia coli (strain K12)",
                  "OmpR (C box)| Escherichia coli (strain K12)",
                  "OmpR (F box)| Escherichia coli (strain K12)",
                  "OmpR | Escherichia coli (strain K12)",
                  "OmpR | Helicobacter pylori (strain ATCC 700392 / 26695)",
                  "OxyR | Escherichia coli (strain K12)",
                  "OxyR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "OxyR (SELEX) | Escherichia coli (strain K12)",
                  "PdhR | Escherichia coli (strain K12)",
                  "PhhR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "PhoP | Escherichia coli (strain K12)",
                  "PrfA | Listeria monocytogenes (serovar 1/2a strain EGD-e)",
                  "PucR | Bacillus subtilis (strain 168)",
                  "PurR | Escherichia coli (strain K12)",
                  "PvdS | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "PyrR | Bacillus subtilis (strain 168)",
                  "RcsAB | Escherichia coli (strain K12)",
                  "RegR | Bradyrhizobium japonicum (strain USDA 110)",
                  "ResD | Bacillus subtilis (strain 168)",
                  "Rex | Bacillus subtilis (strain 168)",
                  "RhlR | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "RocR | Bacillus subtilis (strain 168)",
                  "RofA | Streptococcus pyogenes (serovar M1, strain SF370 / ATCC 700294)",
                  "RpoE-SigE | Escherichia coli (strain K12)",
                  "RpoE-SigE | Salmonella typhi (strain CT18)",
                  "RpoN | Bradyrhizobium japonicum (strain USDA 110)",
                  "RpoN | Escherichia coli (strain K12)",
                  "Sig70 (-10) | Escherichia coli",
                  "SigB (-10) | Bacillus subtilis (strain 168)",
                  "SigB (-35) | Bacillus subtilis (strain 168)",
                  "SigB (N12) | Bacillus subtilis (strain 168)",
                  "SigB (N13) | Bacillus subtilis (strain 168)",
                  "SigB (N14) | Bacillus subtilis (strain 168)",
                  "SigB (N15) | Bacillus subtilis (strain 168)",
                  "SigD | Bacillus subtilis (strain 168)",
                  "SigE (-10) | Bacillus subtilis (strain 168)",
                  "SigE (-35) | Bacillus subtilis (strain 168)",
                  "SigE (N13) | Bacillus subtilis (strain 168)",
                  "SigE (N14) | Bacillus subtilis (strain 168)",
                  "SigE (N15) | Bacillus subtilis (strain 168)",
                  "SigH (-10) | Bacillus subtilis (strain 168)",
                  "SigH (-35) | Bacillus subtilis (strain 168)",
                  "SigL | Bacillus subtilis (strain 168)",
                  "SigW (-10) | Bacillus subtilis (strain 168)",
                  "SigW (-35) | Bacillus subtilis (strain 168)",
                  "SigW (N16) | Bacillus subtilis (strain 168)",
                  "SigW (N17) | Bacillus subtilis (strain 168)",
                  "SigX (-10) | Bacillus subtilis (strain 168)",
                  "SigX (-35) | Bacillus subtilis (strain 168)",
                  "SigX (N15) | Bacillus subtilis (strain 168)",
                  "SinR | Bacillus subtilis (strain 168)",
                  "Spo0A | Bacillus subtilis (strain 168)",
                  "Spo0A (II) | Bacillus subtilis (strain 168)",
                  "SpoIIID | Bacillus subtilis (strain 168)",
                  "TnrA | Bacillus subtilis (strain 168)",
                  "TreR | Bacillus subtilis (strain 168)",
                  "TrpI | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "Vfr | Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                  "Xre | Bacillus subtilis (strain 168)",
                  "XylR | Bacillus subtilis (strain 168)",
                  "YhiX | Escherichia coli (strain K12)",
                  "Zur | Bacillus subtilis (strain 168)");
    @::speciesList = ("Bacillus subtilis",
                      "Bacillus subtilis (strain 168)",
                      "Bradyrhizobium japonicum (strain USDA 110)",
                      "Escherichia coli",
                      "Escherichia coli (strain K12)",
                      "Helicobacter pylori (strain ATCC 700392 / 26695)",
                      "Listeria monocytogenes (serovar 1/2a, strain EGD-e)",
                      "Pseudomonas aeruginosa",
                      "Pseudomonas aeruginosa (strain ATCC 15692 / PAO1)",
                      "Pseudomonas syringae (pathovar tomato, strain DC3000)",
                      "Salmonella typhi (strain CT18)",
                      "Streptococcus pyogenes (serovar M18 / M18, strain MGAS8232)",
                      "Streptococcus pyogenes (serovar M1, strain SF370 / ATCC 700294)");
}
