Source code for elaspic.call_tcoffee

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import object

import os.path as op
from os import environ
import time
import logging
import shutil

from Bio import SeqIO

from . import conf, errors, helper, structure_tools
configs = conf.Configs()

logger = logging.getLogger(__name__)


class TCoffee(object):
    """
    Alignes sequences using t_coffee in expresso mode.
    """
    def __init__(self, alignment_fasta_file, mode, pdb_file=None):
        """
        """
        self.alignment_id = op.splitext(op.basename(alignment_fasta_file))[0]
        self.alignment_fasta_file = op.join(configs['tcoffee_dir'], self.alignment_id + '.fasta')
        if alignment_fasta_file != self.alignment_fasta_file:
            shutil.copy(alignment_fasta_file, self.alignment_fasta_file)

        self.target_seqrecord, self.template_seqrecord = (
            list(SeqIO.parse(self.alignment_fasta_file, 'fasta'))
        )
        self.mode = mode

        if pdb_file is not None:
            self.pdb_id, self.pdb_file = self._clean_pdb(pdb_file)

            # Write a template file for the sequences to be aligned
            self.alignment_template_file = (
                op.join(configs['tcoffee_dir'], '{}.template_list'.format(self.alignment_id))
            )
            with open(self.alignment_template_file, 'w') as fh:
                fh.writelines([
                    ">" + self.target_seqrecord.id + " _P_ " + self.pdb_id.upper() + "\n"
                    ">" + self.template_seqrecord.id + " _P_ " + self.pdb_id.upper() + "\n"
                ])

    def _clean_pdb(self, pdb_file):
        """Write a template PDB file in a format that is compatible with t_coffee.
        """
        message = (
            "Cleaning pdb {} to serve as a template for t_coffee...".format(pdb_file)
        )
        logger.debug(message)

        pdb_id = structure_tools.get_pdb_id(pdb_file)
        pdb_file_new = op.join(configs['tcoffee_dir'], pdb_id + '.pdb')

        system_command = (
            "t_coffee -other_pg extract_from_pdb {} > {}".format(pdb_file, pdb_file_new)
        )
        result, error_message, return_code = (
            helper.subprocess_check_output_locally(configs['tcoffee_dir'], system_command)
        )
        if return_code:
            logger.error("Error cleaning pdb!")
            logger.error("System command: '{}'".format(system_command))
            logger.error("Result: '{}'".format(result))
            logger.error("Error message: '{}'".format(error_message))
        time.sleep(0.2)
        return pdb_id, pdb_file_new

    def _get_tcoffee_system_command(
            self, alignment_fasta_file, alignment_template_file, alignment_output_file, mode):
        """

        Parameters
        ----------
        alignment_fasta_file : str
            Name of the file that contains the sequences to be aligned in fasta format.
        alignment_template_file : str
            Name of the file that contains the structureal templates to be used for
            structure-assisted alignments.
        alignment_output_file : str
            Name of the file where the alignment should be saved.
        mode : str
            T-coffee mode the should be run.

        Returns
        --------
        system_command : str
            System call that runs tcoffee.
        tcoffee_env : str
            A dictionary of environment variables to run tcoffee in a thread-safe manner.
        """
        # Environment variables
        # To be able to run parallel instances of t_coffee, the environment
        # variables have to be set to unique paths for each t_coffee call.
        # Also, make sure to change the directory to a unique one bevore
        # calling t_coffee.
        tcoffee_env = environ.copy()
        tcoffee_env['HOME_4_TCOFFEE'] = op.join(configs['tcoffee_dir'])
        tcoffee_env['TMP_4_TCOFFEE'] = op.join(configs['tcoffee_dir'], 'tmp')
        tcoffee_env['CACHE_4_TCOFFEE'] = op.join(configs['tcoffee_dir'], 'cache')
        tcoffee_env['LOCKDIR_4_TCOFFEE'] = op.join(configs['tcoffee_dir'], 'lck')
        tcoffee_env['ERRORFILE_4_TCOFFEE'] = (
            op.join(configs['tcoffee_dir'], 't_coffee.ErrorReport')
        )
        tcoffee_env['BLASTDB'] = configs['blast_db_dir']
        tcoffee_env['PDB_DIR'] = configs['pdb_dir']
        tcoffee_env['NO_REMOTE_PDB_DIR'] = '1'

        # Print a command that can be used to set environmental variables
        t_coffee_environment_variables = [
            'HOME_4_TCOFFEE', 'TMP_4_TCOFFEE', 'CACHE_4_TCOFFEE', 'LOCKDIR_4_TCOFFEE',
            'ERRORFILE_4_TCOFFEE', 'BLASTDB', 'PDB_DIR', 'NO_REMOTE_PDB_DIR']
        exports = [
            'export {}={}'.format(x, tcoffee_env.get(x, '$'+x))
            for x in t_coffee_environment_variables
        ]
        message = (
            "\nSystem command for setting environmental variables:\n" + ' && '.join(exports)
        )
        logger.debug(message)

        # ### System command
        # Use the following command to clean the pdb file (add headers, etc.)
        # 't_coffee -other_pg extract_from_pdb 32c2A.pdb > template.pdb '

        # Use the folllowing command to perform the most accurate alignment
        # 't_coffee -mode 3dcoffee -method sap_pair,mustang_pair,TMalign_pair
        # -blast_server=LOCAL -pdb_db=pdbaa -protein_db=nr -outorder=input
        # -output fasta_aln -outfile tcoffee_output.aln -seq seqfiles.fasta
        # -pdb_min_sim=20 -template_file seqfiles.template '

        multi_core_option = (
            '{}'.format(configs['n_cores'])
            if configs['n_cores'] and int(configs['n_cores']) > 1 else 'no'
        )
        n_core_option = '{}'.format(configs['n_cores']) if configs['n_cores'] else '1'
        protein_db = op.join(configs['blast_db_dir'], 'nr')
        pdb_db = op.join(configs['blast_db_dir'], 'pdbaa')
        if mode == '3dcoffee':
            system_command = (
                "t_coffee " +
                " -seq " + alignment_fasta_file +
                " -method=sap_pair,mustang_pair,TMalign_pair " +
                " -blast_server=LOCAL " +
                " -protein_db=" + protein_db +
                " -pdb_db=" + pdb_db +
                " -outorder=input" +
                " -output=fasta_aln" +
                " -pdb_min_sim=30" +
                # " -quiet" +
                # " -no_warning" +
                " -outfile=" + alignment_output_file +
                " -multi_core=no" +
                " -n_core=" + n_core_option +
                " -template_file=" + alignment_template_file
            )
        if mode == 'expresso':
            system_command = (
                't_coffee' +
                ' -mode expresso' +
                ' -method sap_pair' +
                ' -seq ' + alignment_fasta_file +
                ' -blast_server=LOCAL' +
                " -protein_db=" + protein_db +
                " -pdb_db=" + pdb_db +
                ' -outorder=input' +
                ' -output fasta_aln' +
                ' -quiet -no_warning' +
                ' -outfile=' + alignment_output_file +
                ' -cache ignore ' +
                ' -multi_core ' + multi_core_option +  # AS changed !!!
                ' -n_core ' + n_core_option  # AS changed !!!
            )
        if mode == 't_coffee':
            system_command = (
                't_coffee' +
                ' -mode expresso' +
                ' -method clustalw_pair,slow_pair' +
                ' -seq ' + alignment_fasta_file +
                ' -blast_server=LOCAL' +
                " -protein_db=" + protein_db +
                " -pdb_db=" + pdb_db +
                ' -outorder=input' +
                ' -output fasta_aln' +
                ' -quiet -no_warning' +
                ' -outfile=' + alignment_output_file +
                ' -multi_core ' + multi_core_option +  # AS changed !!!
                ' -n_core ' + n_core_option  # AS changed !!!
            )
        if mode == 'quick':
            system_command = (
                't_coffee' +
                ' -mode quickaln' +
                ' -method clustalw_pair,slow_pair' +
                ' -seq ' + alignment_fasta_file +
                ' -blast_server=LOCAL' +
                " -protein_db=" + protein_db +
                " -pdb_db=" + pdb_db +
                ' -outorder=input' +
                ' -output fasta_aln' +
                ' -quiet -no_warning' +
                ' -outfile=' + alignment_output_file +
                ' -multi_core ' + multi_core_option +  # AS changed !!!
                ' -n_core ' + n_core_option  # AS changed !!!
            )
        return system_command, tcoffee_env

[docs] def align(self, GAPOPEN=-0.0, GAPEXTEND=-0.0): """ Calls t_coffee (make sure BLAST is installed locally!). Parameters ---------- alignment_fasta_file : string A file containing the fasta sequences to be aligned alignment_template_file : string A file containing the structural templates for the fasta sequences described above GAPOPEN : int or str See t_coffee manual GAPEXTEND : int or str See t_coffee manual Returns -------- alignment_output_file : str Name of file which contains the alignment in fasta format. """ # try the alignment in expresso mode (structure based with sap alignment) alignment_output_file = op.join(configs['tcoffee_dir'], self.alignment_id + '.aln') system_command, tcoffee_env = self._get_tcoffee_system_command( self.alignment_fasta_file, self.alignment_template_file, alignment_output_file, self.mode) # Perform t_coffee alignment logger.debug("\nTCoffee system command:\n{}".format(system_command)) result, error_message, return_code = ( helper.subprocess_check_output_locally( configs['tcoffee_dir'], system_command, env=tcoffee_env) ) logger.debug("t_coffee results:\n{}".format(result.strip())) error_message_summary_idx = error_message.find( '* MESSAGES RECAPITULATION') if error_message_summary_idx == -1: error_message_summary = '' else: error_message_summary = error_message[error_message_summary_idx:] logger.debug("t_coffee errors:\n{}".format(error_message_summary.strip())) # Check if tcoffee had an unexpected exit and if not, create and return # the alignment object if return_code == 0: logger.info("Successfully made the alignment") return alignment_output_file else: logger.error( 'Structural alignment failed with the following error: {}' .format(error_message)) logger.error('Running quickalign alignment instead...') system_command, tcoffee_env = self._call_tcoffee_system_command( self.alignment_fasta_file, self.alignment_template_file, alignment_output_file, 'quick') result, error_message, return_code = ( helper.subprocess_check_output_locally( configs['tcoffee_dir'], system_command, env=tcoffee_env) ) if return_code == 0: return alignment_output_file else: logger.error('Even quickaln didn\'t work. Cannot create an alignment. Giving up.') raise errors.TcoffeeError(result, error_message, self.alignment_fasta_file, system_command)