Source code for elaspic.call_tcoffee

# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import unicode_literals
from builtins import object

import os.path as op
from os import environ
import time
import logging
import shutil

from Bio import SeqIO

from . import conf, errors, helper, structure_tools
configs = conf.Configs()

logger = logging.getLogger(__name__)


[docs]class TCoffee(object): """ Alignes sequences using t_coffee in expresso mode. """ def __init__(self, alignment_fasta_file, mode, pdb_file=None): """ """ self.alignment_id = op.splitext(op.basename(alignment_fasta_file))[0] self.alignment_fasta_file = op.join(configs['tcoffee_dir'], self.alignment_id + '.fasta') if alignment_fasta_file != self.alignment_fasta_file: shutil.copy(alignment_fasta_file, self.alignment_fasta_file) self.target_seqrecord, self.template_seqrecord = ( list(SeqIO.parse(self.alignment_fasta_file, 'fasta')) ) self.mode = mode if pdb_file is not None: self.pdb_id, self.pdb_file = self._clean_pdb(pdb_file) # Write a template file for the sequences to be aligned self.alignment_template_file = ( op.join(configs['tcoffee_dir'], '{}.template_list'.format(self.alignment_id)) ) with open(self.alignment_template_file, 'w') as fh: fh.writelines([ ">" + self.target_seqrecord.id + " _P_ " + self.pdb_id.upper() + "\n" ">" + self.template_seqrecord.id + " _P_ " + self.pdb_id.upper() + "\n" ]) def _clean_pdb(self, pdb_file): """Write a template PDB file in a format that is compatible with t_coffee. """ message = ( "Cleaning pdb {} to serve as a template for t_coffee...".format(pdb_file) ) logger.debug(message) pdb_id = structure_tools.get_pdb_id(pdb_file) pdb_file_new = op.join(configs['tcoffee_dir'], pdb_id + '.pdb') system_command = ( "t_coffee -other_pg extract_from_pdb {} > {}".format(pdb_file, pdb_file_new) ) result, error_message, return_code = ( helper.subprocess_check_output_locally(configs['tcoffee_dir'], system_command) ) if return_code: logger.error("Error cleaning pdb!") logger.error("System command: '{}'".format(system_command)) logger.error("Result: '{}'".format(result)) logger.error("Error message: '{}'".format(error_message)) time.sleep(0.2) return pdb_id, pdb_file_new def _get_tcoffee_system_command( self, alignment_fasta_file, alignment_template_file, alignment_output_file, mode): """ Parameters ---------- alignment_fasta_file : str Name of the file that contains the sequences to be aligned in fasta format. alignment_template_file : str Name of the file that contains the structureal templates to be used for structure-assisted alignments. alignment_output_file : str Name of the file where the alignment should be saved. mode : str T-coffee mode the should be run. Returns -------- system_command : str System call that runs tcoffee. tcoffee_env : str A dictionary of environment variables to run tcoffee in a thread-safe manner. """ # Environment variables # To be able to run parallel instances of t_coffee, the environment # variables have to be set to unique paths for each t_coffee call. # Also, make sure to change the directory to a unique one bevore # calling t_coffee. tcoffee_env = environ.copy() tcoffee_env['HOME_4_TCOFFEE'] = op.join(configs['tcoffee_dir']) tcoffee_env['TMP_4_TCOFFEE'] = op.join(configs['tcoffee_dir'], 'tmp') tcoffee_env['CACHE_4_TCOFFEE'] = op.join(configs['tcoffee_dir'], 'cache') tcoffee_env['LOCKDIR_4_TCOFFEE'] = op.join(configs['tcoffee_dir'], 'lck') tcoffee_env['ERRORFILE_4_TCOFFEE'] = ( op.join(configs['tcoffee_dir'], 't_coffee.ErrorReport') ) tcoffee_env['BLASTDB'] = configs['blast_db_dir'] tcoffee_env['PDB_DIR'] = configs['pdb_dir'] tcoffee_env['NO_REMOTE_PDB_DIR'] = '1' # Print a command that can be used to set environmental variables t_coffee_environment_variables = [ 'HOME_4_TCOFFEE', 'TMP_4_TCOFFEE', 'CACHE_4_TCOFFEE', 'LOCKDIR_4_TCOFFEE', 'ERRORFILE_4_TCOFFEE', 'BLASTDB', 'PDB_DIR', 'NO_REMOTE_PDB_DIR'] exports = [ 'export {}={}'.format(x, tcoffee_env.get(x, '$'+x)) for x in t_coffee_environment_variables ] message = ( "\nSystem command for setting environmental variables:\n" + ' && '.join(exports) ) logger.debug(message) # ### System command # Use the following command to clean the pdb file (add headers, etc.) # 't_coffee -other_pg extract_from_pdb 32c2A.pdb > template.pdb ' # Use the folllowing command to perform the most accurate alignment # 't_coffee -mode 3dcoffee -method sap_pair,mustang_pair,TMalign_pair # -blast_server=LOCAL -pdb_db=pdbaa -protein_db=nr -outorder=input # -output fasta_aln -outfile tcoffee_output.aln -seq seqfiles.fasta # -pdb_min_sim=20 -template_file seqfiles.template ' multi_core_option = ( '{}'.format(configs['n_cores']) if configs['n_cores'] and int(configs['n_cores']) > 1 else 'no' ) n_core_option = '{}'.format(configs['n_cores']) if configs['n_cores'] else '1' protein_db = op.join(configs['blast_db_dir'], 'nr') pdb_db = op.join(configs['blast_db_dir'], 'pdbaa') if mode == '3dcoffee': system_command = ( "t_coffee " + " -seq " + alignment_fasta_file + " -method=sap_pair,mustang_pair,TMalign_pair " + " -blast_server=LOCAL " + " -protein_db=" + protein_db + " -pdb_db=" + pdb_db + " -outorder=input" + " -output=fasta_aln" + " -pdb_min_sim=30" + # " -quiet" + # " -no_warning" + " -outfile=" + alignment_output_file + " -multi_core=no" + " -n_core=" + n_core_option + " -template_file=" + alignment_template_file ) if mode == 'expresso': system_command = ( 't_coffee' + ' -mode expresso' + ' -method sap_pair' + ' -seq ' + alignment_fasta_file + ' -blast_server=LOCAL' + " -protein_db=" + protein_db + " -pdb_db=" + pdb_db + ' -outorder=input' + ' -output fasta_aln' + ' -quiet -no_warning' + ' -outfile=' + alignment_output_file + ' -cache ignore ' + ' -multi_core ' + multi_core_option + # AS changed !!! ' -n_core ' + n_core_option # AS changed !!! ) if mode == 't_coffee': system_command = ( 't_coffee' + ' -mode expresso' + ' -method clustalw_pair,slow_pair' + ' -seq ' + alignment_fasta_file + ' -blast_server=LOCAL' + " -protein_db=" + protein_db + " -pdb_db=" + pdb_db + ' -outorder=input' + ' -output fasta_aln' + ' -quiet -no_warning' + ' -outfile=' + alignment_output_file + ' -multi_core ' + multi_core_option + # AS changed !!! ' -n_core ' + n_core_option # AS changed !!! ) if mode == 'quick': system_command = ( 't_coffee' + ' -mode quickaln' + ' -method clustalw_pair,slow_pair' + ' -seq ' + alignment_fasta_file + ' -blast_server=LOCAL' + " -protein_db=" + protein_db + " -pdb_db=" + pdb_db + ' -outorder=input' + ' -output fasta_aln' + ' -quiet -no_warning' + ' -outfile=' + alignment_output_file + ' -multi_core ' + multi_core_option + # AS changed !!! ' -n_core ' + n_core_option # AS changed !!! ) return system_command, tcoffee_env
[docs] def align(self, GAPOPEN=-0.0, GAPEXTEND=-0.0): """ Calls t_coffee (make sure BLAST is installed locally!). Parameters ---------- alignment_fasta_file : string A file containing the fasta sequences to be aligned alignment_template_file : string A file containing the structural templates for the fasta sequences described above GAPOPEN : int or str See t_coffee manual GAPEXTEND : int or str See t_coffee manual Returns -------- alignment_output_file : str Name of file which contains the alignment in fasta format. """ # try the alignment in expresso mode (structure based with sap alignment) alignment_output_file = op.join(configs['tcoffee_dir'], self.alignment_id + '.aln') system_command, tcoffee_env = self._get_tcoffee_system_command( self.alignment_fasta_file, self.alignment_template_file, alignment_output_file, self.mode) # Perform t_coffee alignment logger.debug("\nTCoffee system command:\n{}".format(system_command)) result, error_message, return_code = ( helper.subprocess_check_output_locally( configs['tcoffee_dir'], system_command, env=tcoffee_env) ) logger.debug("t_coffee results:\n{}".format(result.strip())) error_message_summary_idx = error_message.find( '* MESSAGES RECAPITULATION') if error_message_summary_idx == -1: error_message_summary = '' else: error_message_summary = error_message[error_message_summary_idx:] logger.debug("t_coffee errors:\n{}".format(error_message_summary.strip())) # Check if tcoffee had an unexpected exit and if not, create and return # the alignment object if return_code == 0: logger.info("Successfully made the alignment") return alignment_output_file else: logger.error( 'Structural alignment failed with the following error: {}' .format(error_message)) logger.error('Running quickalign alignment instead...') system_command, tcoffee_env = self._call_tcoffee_system_command( self.alignment_fasta_file, self.alignment_template_file, alignment_output_file, 'quick') result, error_message, return_code = ( helper.subprocess_check_output_locally( configs['tcoffee_dir'], system_command, env=tcoffee_env) ) if return_code == 0: return alignment_output_file else: logger.error('Even quickaln didn\'t work. Cannot create an alignment. Giving up.') raise errors.TcoffeeError(result, error_message, self.alignment_fasta_file, system_command)