Post date: Oct 15, 2013 3:41:22 PM
head -n 60000000 parsed_clean_TXState14_NoIndex_L005_R1_001.fastq > Esosorum15mil.fastq
cp /data/local/july13_ut/stygoparnus_15mil_denovo.smng.txt ./
mv stygoparnus_15mil_denovo.smng.txt Esosorum_15mil_denovo.smng.txt
emacs Esosorum_15mil_denovo.smng.txt
#Things I changed:
loadSeq file:
"/data/local/august13_ut/Esosorum15mil.fastq"
setParam matchSize:70
setParam minMatchPercent:92
setParam matchSpacing:100
setParam minContigSeqs: 10
RealignContigs
saveProject file: "/data/local/august13_ut/Esosorum_15mil_denovo_mmp92.ace"
format:Phrap
saveReport file: "/data/local/august13_ut/Esosorum_15mil_denovo_mmp92.report.txt"
closeProject
smng Esosorum_15mil_denovo.smng.txt &
#Results of denovo assembly with 15 million:
Assembly Totals
Contigs: 54271
Contigs > 2K: 0
Assembled Sequences: 2216620
Unassembled Sequences: 12783380
Sequences not assembled due to complete trimming: 251720
All Sequences: 15000000
Contig N50: 87 bases
** Only 15% assembled **
Compare to Stygoparnus:
Assembly Totals
Contigs: 228102
Contigs > 2K: 0
Assembled Sequences: 5441544
Unassembled Sequences: 9558456
Sequences not assembled due to complete trimming: 398734
All Sequences: 15000000
Contig N50: 87 bases
** 36% assembled **
***Not many assembled. It could be because they are sallies with huge genomes. So, I'm going to try another assembly with 40 million reads instead, and dropping the minContigSeqs to 8.
head -n 160000000 parsed_clean_TXState14_NoIndex_L005_R1_001.fastq > Esosorum40mil.fastq
cp Esosorum_15mil_denovo.smng.txt Esosorum_40mil_denovo.smng.txt
emacs Esosorum_40mil_denovo.smng.txt
#Things I changed:
loadSeq file:
"/data/local/august13_ut/Esosorum40mil.fastq"
setParam matchSize:70
setParam minMatchPercent:92
setParam matchSpacing:100
setParam minContigSeqs: 8
RealignContigs
saveProject file: "/data/local/august13_ut/Esosorum_40mil_denovo_mmp92.ace"
format:Phrap
saveReport file: "/data/local/august13_ut/Esosorum_40mil_denovo_mmp92.report.txt"
closeProject
smng Esosorum_40mil_denovo.smng.txt &
#Results of denovo assembly with 40 million and minContigSeq: 8
Assembly Totals
Contigs: 583340
Contigs > 2K: 0
Assembled Sequences: 11157637
Unassembled Sequences: 28842363
Sequences not assembled due to complete trimming: 725164
All Sequences: 40000000
Contig N50: 87 bases
** 28% assembled this time. But is this too many contigs now? **