Difference between revisions of "Applications/Ncbi-blast"
From HPC
(Created page with "==Application Details== * Description: NCBI-Blast finds regions of similarity between biological sequences. The program compares nucleotide or protein sequences to sequence d...") |
m |
||
| Line 10: | Line 10: | ||
===Command Set=== | ===Command Set=== | ||
| − | + | ncbi-blast provides the following commands: | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| + | * blast_formatter | ||
| + | * blastdb_aliastool | ||
| + | * blastdbcheck | ||
| + | * blastdbcmd | ||
| + | * blastn | ||
| + | * blastp | ||
| + | * blastx | ||
| + | * convert2blastmask | ||
| + | * deltablast | ||
| + | * dustmasker | ||
| + | * legacy_blast.pl | ||
| + | * makeblastdb | ||
| + | * makembindex | ||
| + | * makeprofiledb | ||
| + | * psiblast | ||
| + | * rpsblast | ||
| + | * rpstblastn | ||
| + | * segmasker | ||
| + | * tblastn | ||
| + | * tblastx | ||
| + | * update_blastdb.pl | ||
| + | * windowmasker | ||
| + | |||
| + | ===SLURM Script=== | ||
<pre style="background-color: #f5f5dc; color: black; font-family: monospace, sans-serif;"> | <pre style="background-color: #f5f5dc; color: black; font-family: monospace, sans-serif;"> | ||
| − | + | #!/bin/bash | |
| − | |||
| − | |||
| − | |||
| − | |||
| − | + | fasta=/home/user1/maker.all.proteins.fasta | |
| + | seqs_per_file=300 | ||
| + | files_per_dir=100 | ||
| + | BLAST_DB=/home/user1/database/nr_metazoa | ||
| + | prefix=Tcancriformis_maker2 | ||
| + | threads_per_job=28 | ||
| + | e=1e-5 | ||
| + | cul=10 | ||
| + | n_seqs=50 | ||
| + | fmt=5 #5=xml | ||
| + | basedir=$(pwd) | ||
| + | partition_script=/home/user1/ectools/partition.py | ||
| − | + | ########### | |
| − | + | echo -e "\nsplitting up files\n" | |
| − | + | #cat $fasta | sed 's/ .*//g' > $genome.fasta | |
| − | + | python $partition_script $seqs_per_file $files_per_dir $fasta | |
| − | |||
| − | |||
| − | |||
| − | == | + | count=$(ls -1 | grep -E "^[0-9]{4}" |wc -l) |
| + | for i in $(seq $count -1 1) | ||
| + | do | ||
| + | current=$(printf "%04d" $i) | ||
| + | echo -e "processing directory $current\n" | ||
| + | cd $current | ||
| − | + | for p in $(ls -1 | grep -E "p[0-9]{4}$" | sort -nr) | |
| + | do | ||
| − | + | echo -e "#!/bin/bash | |
| + | #SBATCH -J b-$current-$p-$prefix | ||
| + | #SBATCH -N 1 | ||
| + | #SBATCH --ntasks-per-node $threads_per_job | ||
| + | #SBATCH -o job-%j.out | ||
| + | #SBATCH -e job-%j.out | ||
| + | #SBATCH -p compute | ||
| − | + | #LOAD MODULE | |
| + | module load ncbi-blast/2.4.0 | ||
| + | # | ||
| + | date | ||
| + | cd $basedir/$current | ||
| − | '' | + | echo -e \"\\\nNumber of scaffolds to process:\\\t\$(cat $basedir/$current/${p} | grep \">\" | wc -l)\" |
| + | echo -e \"\\\nTotal length of scaffolds:\\\t\$(cat $basedir/$current/${p} | grep \">\" -v | perl -ne 'chomp; print \"\$_\"' | wc -m)\\\n\" | ||
| − | + | blastp -query ${p} -db $BLAST_DB -outfmt $fmt -max_target_seqs $n_seqs -culling_limit $cul -num_threads \$SLURM_NTASKS_PER_NODE -evalue $e -out ${p}-vs-nt-n$n_seqs.cul$cul.$e.blastp.out.xml | |
| − | + | echo -e \"\\\nDONE\\\n\" | |
| − | + | date" > run_blastn_${p}.slurm.sh | |
| + | sbatch run_blastn_${p}.slurm.sh | ||
| + | done | ||
| + | cd .. | ||
| + | done | ||
</pre> | </pre> | ||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
==Further Information== | ==Further Information== | ||
| − | * [ | + | * [https://blast.ncbi.nlm.nih.gov/Blast.cgi https://blast.ncbi.nlm.nih.gov/Blast.cgi] |
{| | {| | ||
Revision as of 13:24, 5 April 2017
Contents
Application Details
- Description: NCBI-Blast finds regions of similarity between biological sequences. The program compares nucleotide or protein sequences to sequence databases and calculates the statistical significance.
- Version: 2.4.0
- Modules: ncbi-blast/2.4.0
- Licence: Open-source (BLAST is a registered trademark of the National Library of Medicine)
Usage Examples
Command Set
ncbi-blast provides the following commands:
- blast_formatter
- blastdb_aliastool
- blastdbcheck
- blastdbcmd
- blastn
- blastp
- blastx
- convert2blastmask
- deltablast
- dustmasker
- legacy_blast.pl
- makeblastdb
- makembindex
- makeprofiledb
- psiblast
- rpsblast
- rpstblastn
- segmasker
- tblastn
- tblastx
- update_blastdb.pl
- windowmasker
SLURM Script
#!/bin/bash
fasta=/home/user1/maker.all.proteins.fasta
seqs_per_file=300
files_per_dir=100
BLAST_DB=/home/user1/database/nr_metazoa
prefix=Tcancriformis_maker2
threads_per_job=28
e=1e-5
cul=10
n_seqs=50
fmt=5 #5=xml
basedir=$(pwd)
partition_script=/home/user1/ectools/partition.py
###########
echo -e "\nsplitting up files\n"
#cat $fasta | sed 's/ .*//g' > $genome.fasta
python $partition_script $seqs_per_file $files_per_dir $fasta
count=$(ls -1 | grep -E "^[0-9]{4}" |wc -l)
for i in $(seq $count -1 1)
do
current=$(printf "%04d" $i)
echo -e "processing directory $current\n"
cd $current
for p in $(ls -1 | grep -E "p[0-9]{4}$" | sort -nr)
do
echo -e "#!/bin/bash
#SBATCH -J b-$current-$p-$prefix
#SBATCH -N 1
#SBATCH --ntasks-per-node $threads_per_job
#SBATCH -o job-%j.out
#SBATCH -e job-%j.out
#SBATCH -p compute
#LOAD MODULE
module load ncbi-blast/2.4.0
#
date
cd $basedir/$current
echo -e \"\\\nNumber of scaffolds to process:\\\t\$(cat $basedir/$current/${p} | grep \">\" | wc -l)\"
echo -e \"\\\nTotal length of scaffolds:\\\t\$(cat $basedir/$current/${p} | grep \">\" -v | perl -ne 'chomp; print \"\$_\"' | wc -m)\\\n\"
blastp -query ${p} -db $BLAST_DB -outfmt $fmt -max_target_seqs $n_seqs -culling_limit $cul -num_threads \$SLURM_NTASKS_PER_NODE -evalue $e -out ${p}-vs-nt-n$n_seqs.cul$cul.$e.blastp.out.xml
echo -e \"\\\nDONE\\\n\"
date" > run_blastn_${p}.slurm.sh
sbatch run_blastn_${p}.slurm.sh
done
cd ..
done
Further Information
| |