Post date: Aug 30, 2013 8:10:44 PM
head -n 60000000 parsed_clean_lane3_Undetermined_R1.cat.fastq > stygoparnus15mil.fastq
cp /data/local/lycaeides_gbs/Assemblies/gbs_de_novo/lycaeides_40mil_denovo.smng.txt ./
mv lycaeides_40mil_denovo.smng.txt stygoparnus_15mil_denovo.smng.txt
emacs stygoparnus_15mil_denovo.smng.txt
#Things I changed:
loadSeq file:
"/data/local/july13_ut/stygoparnus15mil.fastq"
setParam matchSize:70
setParam minMatchPercent:92
setParam matchSpacing:100
setParam minContigSeqs: 10
RealignContigs
saveProject file: "/data/local/july13_ut/stygoparnus_15mil_denovo_mmp92.ace"
format:Phrap
saveReport file: "/data/local/july13_ut/stygoparnus_15mil_denovo_mmp92.report.txt"
closeProject
smng stygoparnus_15mil_denovo.smng.txt
#Results of denovo assembly:
Number of contigs: 228102
36% assembled
contig N50 of 87 bases
#Made consensus sequence (I chose contigs that were 80 to 96 bases in length because that's what I did with all three other taxa):
cp /data/local/lycaeides_gbs/Scripts/pruneContigs.pl ./
perl pruneContigs.pl stygoparnus_15mil_denovo_mmp92.ace 80 96
grep Contig pruned_stygoparnus_15mil_denovo_mmp92.ace | wc
Number of contigs after pruning: 227414
mv pruned_stygoparnus_15mil_denovo_mmp92.ace pruned_stygoparnus_15mil_denovo_mmp92.fasta
#Then I tried to assemble these sequences to themselves to identify similar, potentially repetitive contigs.
cp stygoparnus_15mil_denovo.smng.txt stygoparnus_15mil_qc.smng.txt
emacs stygoparnus_15mil_qc.smng.txt
#I changed:
loadSeq file:
"/data/local/july13_ut/pruned_stygoparnus_15mil_denovo_mmp92.fasta"
setParam minMatchPercent:84
RealignContigs
saveProject file: "/data/local/july13_ut/stygoparnus_15mil_qc_mmp84.fasta"
format:Phrap
saveReport file: "/data/local/july13_ut/stygoparnus_15mil_qc_mmp84.report.txt"
writeUnassembledSeqs file: "/data/local/july13_ut/stygoparnus_15mil_qc_mmp84.fasta"
closeProject
smng stygoparnus_15mil_qc.smng.txt
226532 did not assemble (these are the good ones)