Dec 3, 2016 Bash commands Text form can be found at my github #!/usr/bin/env bash # Makes bioperl index file of GOS CN assembly bp_index.pl -dir . -fmt fasta GOS_ass_CN_index /usr/local/depot/projects/GOS/analysis/jgoll-CN_2012_92pct-annotation/metagene_seq.faa # loops through txt file with 1 accession per line and extracts them using bp_fetch.pl while read accession_number do bp_fetch net::${accession_number} done < accessions.list > results.txt # reads an ID file containing scaffold IDs of TRAP SBP, # then loops through *all* scaffold IDs with the annotation 'TRAP' # and counts the number of occurences each SBP scaffold has in that file. while read accession_number do grep -c "${accession_number}" TRAP_CN_total_assembly done < TRAP_SBP_scf_IDs.formatted.list > results.txt #replaces any instance of one or more whitespace # characters with a tab (the g is for global) sed -r 's/\s+/\t/g' rest.out > rest1.out #removes one or more whitespace only at the end of each line sed -r 's/\s+$//' 1-3 > 1-3.out #removes one or more whitespace only at the beginning of each line sed -r 's/^\s+//' rest > rest.out #Separate file by multiple delimiters - in this case it is colon and semicolon awk -F '[>...]' '{print $2}' file #Take a uclust output file, separate spaces by tab, # select the second column, remove ">" from that column, # remove "..." from the column, print output sed -r 's/\s+/\t/g' art | awk -F '[\t]' '{print $2}' | sed -r 's/>//' | sed -r 's/\.\.\.//' > out #Remove all rows with a . in the 7th column awk '$7 == "." { next } { print }' "$file" #Add Cluster_ to begining of each line in file sed -r 's/^/Cluster_/' test > test.new # Rename all files with *_05242016.fna # to *.fna for f in *_05242016.fna do mv -- "$f" "${f%_05242016.fna}.fna" done # Delete all files in a list called "remove_list.txt" while read -r entry do rm ${entry} done < remove_list.txt # count the files in every subdirectory in a directory for D in */ do echo -n $D" has this many entries " ls $D | wc -l done ## RENAME all files in directory # from scB241_528N20.contigs.fna to scB241_528N20.fna rename .contigs.fna '.fna' * ## RENAME all files in directory # from scB241_528N20.fna to B241_528N20.fna rename scB 'B' * ## Gets file extensions and prepending paths for f in /nobackup1/shogle/pro_genomes/sags/simons/* do path=${f%.fna} echo ${path} name=${path##*/} echo ${name} done #split fasta file into files with single fasta entry while read line do if [[ ${line:0:1} == '>' ]] then outfile=${line#>}.fa echo $line > $outfile else echo $line >> $outfile fi done < tmp.fasta ## find lines that exist in file "all" that don't exist in file "have" comm -13 <(sort have) <(sort all) > ids_4_phylosift.txt ## show differences between file "have" and "all" in # side by side format and with width of 72 characters diff -y -W 72 <(sort have) <(sort all) ## parse the ncbi taxonomy tree (nodes.dmp) searching for a (partial) list of ids in file names.txt while read entry do grep ${entry} names.dmp | gsed 's/\t//g' | gcut -d "|" -f1,2 | gsed 's/|/\t/g' done < names.txt > filtered_pro.txt