@article{ALeS20, author = {Mallik, Arnab and Ilie, Lucian}, title = {{ALeS}: Adaptive-length spaced-seed design}, journal = {Bioinformatics}, year = {2020}, optkey = {}, optvolume = {}, optnumber = {}, optpages = {}, month = {November}, doi = {10.1093/bioinformatics/btaa945}, url = {https://doi.org/10.1093/bioinformatics/btaa945}}@article{MiBF20, author = {Chu, Justin and Mohamadi, Hamid and Erhan, Emre and Tse, Jeffery and Chiu, Readman and Yeo, Sarah and Birol, Inanc}, title = {Mismatch-tolerant, alignment-free sequence classification using multiple spaced seeds and multiindex Bloom filters}, month = {July}, year = {2020}, journal = {Proceedings of the National Academy of Sciences}, volume = {117}, number = {29}, pages = {16961--16968}, doi = {10.1073/pnas.1903436117}, url = {https://doi.org/10.1073/pnas.1903436117}}@article{Sesame20, author = {Filion, Guillaume J. and Cortini, Ruggero and Zorita, Eduard}, title = {Calibrating Seed-Based Heuristics to Map Short Reads With Sesame}, journal = {Frontiers in Genetics}, year = {2020}, volume = {11}, pages = {572}, month = {June}, url = {https://doi.org/10.3389/fgene.2020.00572}, doi = {10.3389/fgene.2020.00572}}@article{MultiSpaM20, author = {Dencker, Thomas and Leimeister, Chris-Andre and Gerth, Michael and Bleidorn, Christoph and Snir, Sagi and Morgenstern, Burkhard}, title = {{Multi-SpaM}:a maximum-likelihood approach to phylogeny reconstruction using multiple spaced-word matches and quartet trees}, journal = {{NAR} Genomics and Bioinformatics}, year = {2020}, volume = {2}, number = {1}, month = {March}, doi = {10.1093/nargab/lqz013}, url = {https://doi.org/10.1093/nargab/lqz013}}@article{RohlingMorgenstern20, author = {R\"{o}hling, Sophie and Linne, Alexander and Schellhorn, Jendrik and Hosseini, Morteza and Dencker, Thomas and Morgenstern, Burkhard}, title = {The number of $k$-mer matches between two DNA sequences as a function of $k$ and applications to estimate phylogenetic distances}, journal = {PLOS ONE}, year = {2020}, month = {February}, doi = {10.1371/journal.pone.0228070}, url = {https://doi.org/10.1371/journal.pone.0228070}}@article{ELMERI20, author = {Salmela, Leena and Mukherjee, Kingshuk and Puglisi, Simon J. and Muggli, Martin D. and Boucher, Christina}, title = {Fast and accurate correction of optical mapping data via spaced seeds}, journal = {Bioinformatics}, year = {2020}, volume = {36}, number = {3}, pages = {682--689}, month = {February}, doi = {10.1093/bioinformatics/btz663}, url = {https://doi.org/10.1093/bioinformatics/btz663}}@article{SWeeP20, author = {De Pierri, Camilla Reginatto and Voyceik, Ricardo and Santos de Mattos, Letícia Graziela Costa and Kulik, Mariane Gonçalves and Camargo, Josué Oliveira and Repula de Oliveira, Aryel Marlus and de Lima Nichio, Bruno Thiago and Marchaukoski, Jeroniza Nunes and da Silva Filho, Antonio Camilo and Guizelini, Dieval and Ortega, J. Miguel and Pedrosa, Fabio O. and Raittz, Roberto Tadeu}, title = {{SWeeP}: representing large biological sequences datasets in compact vectors}, journal = {Nature Scientific Reports}, year = {2020}, volume = {10}, number = {91}, month = {January}, doi = {10.1038/s41598-019-55627-4}, url = {https://doi.org/10.1038/s41598-019-55627-4}}@article{KRAKENTWO19, author = {Wood, Derrick E. and Lu, Jennifer and Langmead, Ben}, title = {Improved metagenomic analysis with Kraken 2}, journal = {Genome Biology}, year = {2019}, volume = {20}, number = {257}, month = {November}, doi = {10.1186/s13059-019-1891-0}, url = {https://doi.org/10.1186/s13059-019-1891-0}}@article{ProtSpaM19, author = {Leimeister, Chris-Andre and Schellhorn, Jendrik and D\"{o}rrer, Svenja and Gerth, Michael and Bleidorn, Christoph and Morgenstern, Burkhard}, title = {{Prot-SpaM}: Fast alignment-free phylogeny reconstruction based on whole-proteome sequences}, journal = {GigaScience}, year = {2019}, volume = {8}, number = {3}, month = {March}, doi = {10.1093/gigascience/giy148}, url = {https://doi.org/10.1093/gigascience/giy148}}@article{ReadSpaM19, author = {Lau, Anna-Katharina and Dörrer, Svenja and Leimeister, Chris-André and Bleidorn, Christoph and Morgenstern, Burkhard}, title = {Read-SpaM: assembly-free and alignment-free comparison of bacterial genomes with low sequencing coverage}, journal = {BMC Bioinformatics }, year = {2019}, volume = {20}, number = {638}, month = {December}, doi = {10.1186/s12859-019-3205-7}, url = {https://doi.org/10.1186/s12859-019-3205-7}}@inproceedings{GirottoCominPizziBBCC18, author = {Girotto, Samuele and Comin, Matteo and Pizzi, Cinzia}, title = {Efficient computation of spaced seed hashing with block indexing}, booktitle = {Proceedings from the 12th International BBCC conference}, year = {2018}, volume = {19 suppl 15}, number = {441}, series = {BMC Bioinformatics}, month = {November}, doi = {10.1186/s12859-018-2415-8}, url = {https://doi.org/10.1186/s12859-018-2415-8}}@inproceedings{MultiSpaM18, author = {Dencker, Thomas and Leimeister, Chris-Andre and Gerth, Michael and Bleidorn, Christoph and Snir, Sagi and Morgenstern, Burkhard}, title = {{Multi-SpaM}: a {M}aximum-{L}ikelihood approach to Phylogeny reconstruction Using Multiple Spaced-Word Matches and Quartet Trees}, booktitle = {Proceedings of the 16th RECOMB international conference on Comparative Genomics, Magog-Orford (Canada)}, year = {2018}, volume = {11183}, series = {Lecture Notes in Computer Science}, pages = {227--241}, month = {October}, publisher = {Springer}, doi = {10.1007/978-3-030-00834-5_13}, url = {https://doi.org/10.1007/978-3-030-00834-5_13}}@article{GirottoCominPizziAMB18, author = {Girotto, Samuele and Comin, Matteo and Pizzi, Cinzia}, title = {{FSH}: fast spaced seed hashing exploiting adjacent hashes}, journal = {Algorithms for Molecular Biology}, year = {2018}, volume = {13}, number = {8}, month = {March}, doi = {10.1186/s13015-018-0125-4}, url = {https://almob.biomedcentral.com/articles/10.1186/s13015-018-0125-4}, note = {(earlier version in WABI 2017)}}@article{MartinCSSC18, author = {Martin, Donald E. K.}, title = {Minimal auxiliary Markov chains through sequential elimination of states}, journal = {Communications in Statistics - Simulation and Computation}, year = {2018}, volume = {48}, number = {4}, pages = {1040--1054}, month = {February}, doi = {10.1080/03610918.2017.1406505}, url = {https://doi.org/10.1080/03610918.2017.1406505}}@article{PhylOligo17, author = {Mallet, Ludovic and Bitard-Feildel, Tristan and Cerutti, Franck and Chiapello, H\'el\`ene}, title = {{PhylOligo}: a package to identify contaminant or untargeted organism sequences in genome assemblies}, journal = {Bioinformatics}, year = {2017}, volume = {33}, number = {20}, pages = {3283--3285}, month = {October}, doi = {10.1093/bioinformatics/btx396}, url = {https://doi.org/10.1093/bioinformatics/btx396}}@article{GirottoCominPizziTCS17, author = {Girotto, Samuele and Comin, Matteo and Pizzi, Cinzia}, title = {Metagenomic reads binning with spaced seeds}, journal = {Theoretical Computer Science}, year = {2017}, volume = {698}, pages = {88--99}, month = {October}, doi = {10.1016/j.tcs.2017.05.023}, url = {https://doi.org/10.1016/j.tcs.2017.05.023}}@inproceedings{GirottoCominPizziWABI17, author = {Girotto, Samuele and Comin, Matteo and Pizzi, Cinzia}, title = {Fast Spaced Seed Hashing}, booktitle = {Proceedings of the 17th International Workshop on Algorithms in Bioinformatics ({WABI}), Boston (USA)}, year = {2017}, volume = {88}, pages = {7:1--7:14}, month = {August}, series = {Leibniz International Proceedings in Informatics (LIPIcs)}, publisher = {Schloss Dagstuhl--Leibniz-Zentrum fuer Informatik}, doi = {10.4230/LIPIcs.WABI.2017.7}, url = {https://drops.dagstuhl.de/opus/volltexte/2017/7650/}, editor = {Russell Schwartz and Knut Reinert}}@inproceedings{GirottoCominPizziCIBCB17, author = {Girotto, Samuele and Comin, Matteo and Pizzi, Cinzia}, title = {Binning metagenomic reads with probabilistic sequence signatures based on spaced seeds}, booktitle = {Proceedings of the 12th IEEE Conference on Computational Intelligence in Bioinformatics and Computational Biology (CIBCB), Manchester (UK)}, year = {2017}, month = {August}, doi = {10.1109/CIBCB.2017.8058538}, url = {https://ieeexplore.ieee.org/abstract/document/8058538}}@article{LeimeisterSohrabiMorgensternBioinformatics17, author = {Leimeister, Chris-Andr{\'e} and Sohrabi-Jahromi, Salma and Morgenstern, Burkhard}, title = {Fast and accurate phylogeny reconstruction using filtered spaced-word matches}, journal = {Bioinformatics}, year = {2017}, volume = {33}, number = {7}, pages = {971--979}, month = {April}, url = {https://dx.doi.org/10.1093/bioinformatics/btw776}, doi = {10.1093/bioinformatics/btw776}}@article{WALT16, author = {Chen, Haifeng and Smith, Andrew D. and Chen, Ting}, title = {{WALT}: fast and accurate read mapping for bisulfite sequencing}, journal = {Bioinformatics}, year = {2016}, volume = {32}, number = {22}, pages = {3507--3509}, month = {July}, doi = {10.1093/bioinformatics/btw490}, url = {https://dx.doi.org/10.1093/bioinformatics/btw490}}@article{rasbhari16, author = {Hahn, Lars and Leimeister, Chris-Andr{\'e} and Ounit, Rachid and Lonardi, Stefano and Morgenstern, Burkhard}, title = {{rasbhari}: optimizing spaced seeds for database searching, read mapping and alignment-free sequence comparison}, journal = {{PLoS} Computational Biology}, year = {2016}, volume = {12}, number = {10}, pages = {e1005107}, month = {October}, doi = {10.1371/journal.pcbi.1005107}, url = {https://dx.doi.org/10.1371/journal.pcbi.1005107}}@article{OunitLonardiBioinformatics16, author = {Ounit, Rachid and Lonardi, Stefano}, title = {Higher classification sensitivity of short metagenomic reads with {CLARK-S}}, journal = {Bioinformatics}, year = {2016}, volume = {32}, number = {24}, pages = {3823--3825}, month = {August}, doi = {10.1093/bioinformatics/btw542}, url = {https://doi.org/10.1093/bioinformatics/btw542}}@inproceedings{FLAK16, title = {{FLAK}: Ultra-Fast {F}uzzy {W}hole {G}enome {A}lignment}, author = {Healy, John}, booktitle = {Proceedings of the 10th International Conference on Practical Applications of Computational Biology \& Bioinformatics (PACBB)}, pages = {123--131}, year = {2016}, month = {June}, series = {Advances in Intelligent Systems and Computing}, volume = {477}, publisher = {Springer}, doi = {10.1007/978-3-319-40126-3_13}, url = {https://link.springer.com/chapter/10.1007/978-3-319-40126-3_13}}@article{GraphMap16, author = {Sovi\'{c}, Ivan and \v{S}iki\'{c}, Mile and Wilm, Andreas and Fenlon, Shannon Nicole and Chen, Swaine and Nagarajan, Niranjan}, title = {Fast and sensitive mapping of nanopore sequencing reads with {GraphMap}}, journal = {Nature Communications}, year = {2016}, volume = {7}, number = {11307}, month = {April}, doi = {10.1038/ncomms11307}, url = {https://www.nature.com/articles/ncomms11307}}@article{WangXuLiuNatureScientificReports16, author = {Wang, Rong and Xu, Yong and Liu, Bin}, title = {Recombination spot identification Based on gapped k-mers}, journal = {Nature Scientific Reports}, year = {2016}, volume = {6}, number = {23934}, month = {March}, doi = {10.1038/srep23934}, url = {https://www.nature.com/articles/ncomms11307}, note = {RETRACTED: 20 March 2018}}@inproceedings{KLAST15, title = {{KLAST}: fast and sensitive software to compare large genomic databanks on cloud}, author = {Petrov, Ivaylo and Brillet, S{\'e}bastien and Drezen, Erwan and Quiniou, Sylvain and Antin, L. and Durand, Patrick and Lavenier, Dominique}, booktitle = {Proceedings of the World Congress in Computer Science, Computer Engineering, and Applied Computing (WORLDCOMP), Las Vegas (USA)}, year = {2015}, url = {https://worldcomp-proceedings.com/proc/p2015/BIC2743.pdf}, optvolume = {}, optnumber = {}, optseries = {}, pages = {85--90}, month = {July}, optaddress = {}, optorganization = {}, optpublisher = {}, optnote = {}, optannote = {}}@inproceedings{OunitLonardiWABI15, author = {Ounit, Rachid and Lonardi, Stefano}, title = {Higher Classification Accuracy of Short Metagenomic Reads by Discriminative Spaced k-mers}, booktitle = {Proceedings of the 15th International Workshop on Algorithms in Bioinformatics ({WABI}), Atlanta (USA)}, year = {2015}, volume = {9289}, pages = {286--295}, month = {August}, series = {Lecture Notes in Bioinformatics}, publisher = {Springer}, doi = {10.1007/978-3-662-48221-6_21}, url = {https://link.springer.com/chapter/10.1007/978-3-662-48221-6_21}}@article{BirolEtAlIHG15, author = {Birol, Inan\c{c} and Chu, Justin and Mohamadi, Hamid and Jackman, Shaun D. and Raghavan, Karthika and Vandervalk, Benjamin P. and Raymond, Anthony and Warren, Ren{\'e} L.}, title = {Spaced Seed Data Structures for De Novo Assembly}, journal = {International Journal of Genomics}, year = {2015}, month = {March}, volume = {2015}, pages = {ID 196591}, doi = {10.1155/2015/196591}, pdf = {https://downloads.hindawi.com/journals/ijg/2015/196591.pdf}}@article{GheraibiaEtAlIJSSCI15, author = {Gheraibia, Youcef and Moussaoui, Abdelouahab and Djenouri, Youcef and Kabir, Sohag and Yin, Peng-Yeng and Mazouzi, Smaine}, title = {Penguin Search Optimisation Algorithm for Finding Optimal Spaced Seeds}, journal = {International Journal of Software Science and Computational Intelligence (IJSSCI)}, year = {2015}, volume = {7}, number = {2}, pages = {85--99}, month = {November}, doi = {10.4018/IJSSCI.2015040105}, url = {https://www.igi-global.com/article/penguin-search-optimisation-algorithm-for-finding-optimal-spaced-seeds/141243}}@inproceedings{DoTranThiNICS15, author = {Do, Phan-Thuan and Tran-Thi, Cam-Giang}, title = {An improvement of the overlap complexity in the spaced seed searching problem between genomic {DNA}s}, booktitle = {Proceedings of the 2nd {N}ational Foundation for Science and Technology Development Conference on {I}nformation and {C}omputer {S}cience (NICS), Ho Chi Minh City (Vietnam)}, year = {2015}, opteditor = {}, optvolume = {}, optnumber = {}, optseries = {}, pages = {271--276}, month = {September}, publisher = {IEEE Computer Society Press}, url = {https://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=7302205}, doi = {10.1109/NICS.2015.7302205}}@article{BrindaSykulskiKucherovBioinformatics15, author = {B{\v{r}}inda, Karel and Sykulski, Maciej and Kucherov, Gregory}, title = {Spaced seeds improve k-mer based metagenomic classification}, journal = {Bioinformatics}, year = {2015}, volume = {31}, number = {22}, pages = {3584--3592}, month = {July}, doi = {10.1093/bioinformatics/btv419}, url = {https://bioinformatics.oxfordjournals.org/content/31/22/3584}, eprint = {1502.06256}}@inproceedings{TranGiraudVarreHICOMB15, author = {Tran, Tuan Tu and Giraud, Mathieu and Varr{\'e}, Jean-St{\'e}phane}, title = {Perfect Hashing Structures for Parallel Similarity Searches}, booktitle = {Proceedings of the 14th IEEE International Workshop on High Performance Computational Biology (HICOMB), Hyderabad, India}, year = {2015}, opteditor = {}, optvolume = {}, optnumber = {}, optseries = {}, pages = {332--341}, month = {May}, pdf = {https://www.hicomb.org/HiCOMB2015/papers/HICOMB2015-01.pdf}, doi = {10.1109/IPDPSW.2015.105}}@article{EgidiManziniJBCB15, author = {Egidi, Lavinia and Manzini, Giovanni}, title = {Multiple seeds sensitivity using a single seed with threshold}, journal = {Journal of Bioinformatics and Computational Biology}, year = {2015}, month = {March}, volume = {13}, number = {4}, pages = {1550011}, url = {https://www.worldscientific.com/doi/abs/10.1142/S0219720015500110}, doi = {10.1142/S0219720015500110}}@article{MorgensternEtAlAMB15, author = {Morgenstern, Burkhard and Zhu, Bingyao and Horwege, Sebastian and Leimeister, Chris-Andr{\'e} }, title = {Estimating evolutionary distances between genomic sequences from spaced-word matches}, journal = {Algorithms for Molecular Biology}, year = {2015}, month = {February}, volume = {10}, number = {5}, doi = {10.1186/s13015-015-0032-x}, url = {https://almob.biomedcentral.com/articles/10.1186/s13015-015-0032-x}, pdf = {https://www.almob.org/content/pdf/s13015-015-0032-x.pdf}}@article{DIAMOND14, author = {Buchfink,Benjamin and Xie, Chao and Huson, Daniel H.}, title = {Fast and sensitive protein alignment using {DIAMOND}}, journal = {Nature Methods}, year = {2014}, volume = {12}, pages = {59--60}, month = {November}, url = {https://www.nature.com/nmeth/journal/v12/n1/full/nmeth.3176.html}, doi = {10.1038/nmeth.3176}}@article{GiaquintaEtAlTCS14, author = {Giaquinta, Emanuele and Fredriksson, Kimmo and Grabowski, Szymon and Tomescu, Alexandru I. and Ukkonen, Esko}, title = {Motif matching using gapped patterns}, journal = {Theoretical Computer Science}, year = {2014}, volume = {548}, optnumber = {}, pages = {1-13}, month = {September}, url = {https://doi.org/10.1016/j.tcs.2014.06.032}, doi = {10.1016/j.tcs.2014.06.032}}@article{GhandiEtAlPLoSComputationalBiology14, author = {Ghandi, Mahmoud and Lee, Dongwon and Mohammad-Noori, Morteza and Beer, Michael A.}, title = {Enhanced Regulatory Sequence Prediction Using Gapped k-mer Features}, year = {2014}, month = {July}, journal = {{PLoS} Computational Biology}, volume = {10}, number = {7}, pages = {e1003711}, doi = {10.1371/journal.pcbi.1003711}, url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003711}}@article{GhandiEtAlJMB14, author = {Ghandi, Mahmoud and Mohammad-Noori, Morteza and Beer, Michael A.}, title = {Robust k-mer frequency estimation using gapped k-mers}, journal = {Journal of Mathematical Biology}, year = {2014}, month = {August}, volume = {69}, number = {2}, pages = {469--500}, doi = {10.1007/s00285-013-0705-3}, url = {https://link.springer.com/article/10.1007/s00285-013-0705-3}, pdf = {https://link.springer.com/content/pdf/10.1007/s00285-013-0705-3.pdf}}@article{LeimeisterEtAlBioinformatics14, author = {Leimeister, Chris-Andr{\'e} and Boden, Marcus and Horwege, Sebastian and Lindner, Sebastian and Morgenstern, Burkhard}, title = {Fast alignment-free sequence comparison using spaced-word frequencies}, journal = {Bioinformatics}, year = {2014}, month = {March-April}, volume = {30}, number = {14}, pages = {1991--1999}, doi = {10.1093/bioinformatics/btu177}, url = {https://bioinformatics.oxfordjournals.org/content/30/14/1991}, pdf = {https://bioinformatics.oxfordjournals.org/content/30/14/1991.full.pdf}}@article{HorwegeEtAlNAR14, author = {Horwege, Sebastian and Lindner, Sebastian and Boden, Marcus and Hatje, Klas and Kollmar, Martin and Leimeister, Chris-Andr{\'e} and Morgenstern, Burkhard}, title = {Spaced words and kmacs: Fast alignment-free sequence comparison based on inexact word matches}, journal = {Nucleic Acids Research}, year = {2014}, month = {May}, volume = {42}, number = {W1}, pages = {W7--W11}, doi = {10.1093/nar/gku398}, url = {https://nar.oxfordjournals.org/content/42/W1/W7}, pdf = {https://nar.oxfordjournals.org/content/42/W1/W7.full.pdf}}@article{HealyDesmondTCBB14, author = {Healy, John and Chambers, Desmond}, title = {Approximate k-Mer Matching Using Fuzzy Hash Maps}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, year = {2014}, volume = {11}, number = {1}, pages = {258--264}, month = {March}, doi = {10.1109/TCBB.2014.2309609}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6758383}}@article{LiMaZhangTCBB14, author = {Li, Weiming and Ma, Bin and Zhang, Kaizhong}, title = {Optimizing spaced k-mer neighbors for efficient filtration in protein similarity search}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, year = {2014}, month = {February}, volume = {11}, number = {2}, pages = {398--406}, doi = {10.1109/tcbb.2014.2306831}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6744614}}@inproceedings{BrindaAFL14, author = {B{\v{r}}inda, Karel}, title = {Languages of lossless seeds}, booktitle = {Proceedings of the 14th International Conference on {A}utomata and {F}ormal {L}anguages (AFL), Szeged, Hungary}, year = {2014}, volume = {151}, series = {Electronic Proceedings in Theoretical Computer Science }, editor = {{\'E}sik, Zolt{\'a}n and F{\"u}l{\"o}p, Zolt{\'a}n}, pages = {139--150}, doi = {10.4204/EPTCS.151.9}, url = {https://rvg.web.cse.unsw.edu.au/eptcs/paper.cgi?AFL2014.9}, pdf = {https://rvg.web.cse.unsw.edu.au/eptcs/paper.cgi?151.9.pdf}}@inproceedings{GagieManziniValenzuelaICABD14, author = {Gagie, Travis and Manzini, Giovanni and Valenzuela, Daniel}, title = {Compressed Spaced Suffix Arrays}, booktitle = {Proceedings of the 2nd {I}nternational {C}onference on {A}lgorithms for {B}ig {D}ata ({ICABD}), Palermo (Italy)}, year = {2014}, volume = {1146}, series = {CEUR-WS}, pages = {37--45}, pdf = {https://ceur-ws.org/Vol-1146/paper7.pdf}}@article{ShresthaEtAllBIB2014, author = {Shrestha, Anish Man Singh and Frith, Martin C. and Horton, Paul}, title = {A bioinformatician's guide to the forefront of suffix array construction algorithms}, journal = {Briefings in bioinformatics}, year = {2014}, month = {January}, volume = {15}, number = {2}, pages = {138--154}, doi = {10.1093/bib/bbt081}, url = {https://bib.oxfordjournals.org/content/15/2/138}, pdf = {https://bib.oxfordjournals.org/content/15/2/138.full.pdf}}@article{EgidiManziniFI14, author = {Egidi, Lavinia and Manzini, Giovanni}, title = {Spaced Seeds Design Using Perfect Rulers}, journal = {Fundamenta Informaticae}, year = {2014}, volume = {131}, number = {2}, pages = {187--203}, month = {March}, doi = {10.3233/FI-2014-1009}, url = {https://content.iospress.com/articles/fundamenta-informaticae/fi131-2-02}, note = {(earlier version in SPIRE 2011)}}@article{EgidiManziniTCS14, author = {Egidi, Lavinia and Manzini, Giovanni}, title = {Design and analysis of periodic multiple seeds}, journal = {Theoretical Computer Science}, year = {2014}, volume = {522}, pages = {62--76}, month = {February}, doi = {10.1016/j.tcs.2013.12.007}, url = {https://www.sciencedirect.com/science/article/pii/S0304397513009092}}@article{EgidiManziniJCSS13, author = {Egidi, Lavinia and Manzini, Giovanni}, title = {Better spaced seeds using quadratic residues}, journal = {{J}ournal of {C}omputer and {S}ystem {S}ciences}, year = {2013}, month = {November}, volume = {79}, number = {7}, pages = {1144--1155}, doi = {10.1016/j.jcss.2013.03.002}, url = {https://www.sciencedirect.com/science/article/pii/S0022000013000664}}@inproceedings{BodenEtAlGCB13, author = {Boden, Marcus and Sch\"{o}neich, Martin and Horwege, Sebastian and Lindner, Sebastian and Leimeister, Chris and Morgenstern, Burkhard}, title = {Alignment-free sequence comparison with spaced $k$-mers}, booktitle = {{P}roceedings of the {G}erman {C}onference on {B}ioinformatics ({GCB})}, year = {2013}, month = {September}, volume = {34}, pages = {24--34}, series = {OpenAccess Series in Informatics (OASIcs)}, doi = {10.4230/OASIcs.GCB.2013.24}, pdf = {https://drops.dagstuhl.de/opus/volltexte/2013/4233/pdf/p024-boden.pdf}}@inproceedings{OnoderaShibuyaMLDM13, author = {Onodera, Taku and Shibuya, Tetsuo}, title = {The gapped spectrum kernel for support vector machines}, booktitle = {{P}roceedings of the {I}nternational {C}onference on {M}achine {L}earning and {D}ata {M}ining in {P}attern {R}ecognition (MLDM)}, year = {2013}, month = {April}, volume = {7988}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, pages = {1--15}, doi = {10.1007/978-3-642-39712-7_1}, url = {https://link.springer.com/chapter/10.1007/978-3-642-39712-7_1}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-642-39712-7_1.pdf}}@article{BONDBMCBioinformatics13, author = {Ilie, Lucian and Mohamadi, Hamid and Brian Golding, Geoffrey and Smyth, William F.}, title = {{BOND}: {B}asic {O}ligo{N}ucleotide {D}esign}, journal = {{BMC} {B}ioinformatics}, year = {2013}, volume = {14}, number = {69}, month = {February}, doi = {10.1186/1471-2105-14-69}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-14-69}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2105-14-69.pdf}}@inproceedings{HouZhangHarrisBCB12, author = {Hou, Minmei and Zhang, Louxin and Harris, Robert S.}, title = {Alignment seeding strategies using contiguous pyrimidine purine matches}, booktitle = {Proceedings of the ACM Conference on Bioinformatics, Computational Biology and Biomedicine (BCB), Orlando (USA)}, year = {2012}, month = {October}, pages = {384--391}, doi = {10.1145/2382936.2382985}, url = {https://doi.acm.org/10.1145/2382936.2382985}}@inproceedings{LiMaZhangBIBM12, author = {Li, Weiming and Ma, Bin and Zhang, Kaizhong}, title = {Efficient Filtration for Similarity Search with Spaced k-mer Neighbors}, booktitle = {Proceedings of the IEEE International Conference on Bioinformatics and Biomedicine (BIBM), Philadelphia (USA)}, year = {2012}, month = {October}, pages = {11--16}, publisher = {IEEE Computer Society Press}, doi = {10.1109/BIBM.2012.6392695}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6392695}}@article{MarshallHermsKaltenbachRahmannTCBB12, author = {Marschall, Tobias and Herms, Inke and Kaltenbach, Hans-Michael and Rahmann, Sven}, title = {Probabilistic Arithmetic Automata and their Applications}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, year = {2012}, month = {December}, volume = {9}, number = {6}, pages = {1737--1750}, doi = {10.1109/tcbb.2012.109}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=6257364}}@inproceedings{AcoSeedANTS12, author = {Do Duc, Dong and Dinh, Huy Q. and Dang, Thanh Hai and Laukens, Kris and Hoang, Xuan Huan}, title = {{A}co{S}ee{D}: An Ant Colony Optimization for Finding Optimal Spaced Seeds in Biological Sequence Search}, booktitle = {Proceedings of the 8th International Conference on Swarm Intelligence (ANTS), Brussels (Belgium)}, year = {2012}, month = {September}, volume = {7461}, pages = {204--211}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-642-32650-9_19}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-642-32650-9_19.pdf}, doi = {10.1007/978-3-642-32650-9_19}}@article{PTRStalker12, author = {Pellegrini,Marco and Renda, Maria Elena and Vecchio, Alessio}, title = {Ab initio detection of fuzzy amino acid tandem repeats in protein sequences}, journal = {BMC Bioinformatics}, year = {2012}, volume = {13}, number = {Suppl 3}, pages = {S8}, month = {March}, doi = {10.1186/1471-2105-13-S3-S8}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-13-S3-S8}}@article{IlieBMCResearchNotes12, author = {Ilie, Silvana}, title = {Efficient Computation of Spaced Seeds}, journal = {{BMC} {R}esearch {N}otes}, year = {2012}, month = {February}, volume = {5}, number = {123}, url = {https://bmcresnotes.biomedcentral.com/articles/10.1186/1756-0500-5-123}, pdf = {https://www.biomedcentral.com/content/pdf/1756-0500-5-123.pdf}, doi = {10.1186/1756-0500-5-123}}@inproceedings{EgidiManziniSPIRE11, author = {Egidi, Lavinia and Manzini, Giovanni}, title = {Spaced Seeds Design Using Perfect Rulers}, booktitle = {Proceedings of the 18th International Symposium on String Processing and Information Retrieval (SPIRE), Pisa (Italy)}, year = {2011}, month = {October}, volume = {7024}, pages = {32--43}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-642-24583-1_5}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-642-24583-1_5.pdf}, doi = {10.1007/978-3-642-24583-1_5}}@article{IlieEtAlBioinformatics11, author = {Ilie, Lucian and Ilie, Silvana and Mansouri Bigvand, Anahita}, title = {{SpEED}: fast computation of sensitive spaced seeds}, journal = {Bioinformatics}, year = {2011}, month = {September}, volume = {27}, number = {17}, pages = {2433--2434}, doi = {10.1093/bioinformatics/btr368}, url = {https://bioinformatics.oxfordjournals.org/content/27/17/2433}, pdf = {https://bioinformatics.oxfordjournals.org/content/27/17/2433.full.pdf}}@article{SEEDBioinformatics11, author = {Bao, Ergude and Jiang, Tao and Kaloshian, Isgouhi and Girke, Thomas}, title = {{SEED}: efficient clustering of next-generation sequences}, journal = {Bioinformatics}, year = {2011}, month = {August}, volume = {27}, number = {18}, pages = {2502--2509}, url = {https://bioinformatics.oxfordjournals.org/content/27/18/2502}, pdf = {https://bioinformatics.oxfordjournals.org/content/27/18/2502.full.pdf}, doi = {10.1093/bioinformatics/btr447}}@article{IlieEtAlBMCGenomics11, author = {Ilie, Lucian and Ilie, Silvana and Khoshraftar, Shima and Mansouri Bigvand, Anahita}, title = {Seeds for Effective Oligonucleotide Design}, journal = {{BMC} {G}enomics}, year = {2011}, month = {June}, volume = {12}, pages = {280}, url = {https://bmcgenomics.biomedcentral.com/articles/10.1186/1471-2164-12-280}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2164-12-280.pdf}, doi = {10.1186/1471-2164-12-280}}@article{SHRiMP2, title = {{SHRiMP2}: Sensitive yet Practical Short Read Mapping}, author = {David, Matei and Dzamba, Misko and Lister, Dan and Ilie, Lucian and Brudno, Michael}, journal = {Bioinformatics}, year = {2011}, month = {April}, volume = {27}, number = {7}, pages = {1011--1012}, publisher = {Oxford University Press}, url = {https://bioinformatics.oxfordjournals.org/content/27/7/1011}, doi = {10.1093/bioinformatics/btr046}}@article{ChenSheZhuCSB11, author = {Chen, Ke and She, Kun and Zhu, QingXin}, title = {Overlap digraph: An effective model for finding good spaced seeds for biological sequence local alignment}, journal = {Chinese Science Bulletin}, year = {2011}, month = {April}, volume = {56}, number = {11}, pages = {1100--1107}, url = {https://link.springer.com/article/10.1007/s11434-010-4161-9}, pdf = {https://link.springer.com/content/pdf/10.1007/s11434-010-4161-9.pdf}, doi = {10.1007/s11434-010-4161-9}}@article{KielbasaWanSatoHortonFrithGENOMERESEARCH11, author = {Kie{\l}basa, Szymon M. and Wan, Raymond and Sato, Kengo and Horton, Paul and Frith, Martin C.}, title = {Adaptive seeds tame genomic sequence comparison}, journal = {Genome Research}, year = {2011}, month = {March}, volume = {21}, number = {3}, pages = {487--493}, url = {https://genome.cshlp.org/content/21/3/487}, pdf = {https://genome.cshlp.org/content/21/3/487.pdf}, doi = {10.1101/gr.113985.110}}@inproceedings{CrochemoreTischlerSPIRE10, author = {Crochemore, Maxime and Tischler, German}, title = {The Gapped Suffix Array: A New Index Structure for Fast Approximate Matching}, booktitle = {Proceedings of the 17th International Symposium on String Processing and Information Retrieval (SPIRE), Los Cabos (Mexico)}, year = {2010}, editor = {Chavez, Edgar and Lonardi, Stefano}, volume = {6393}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, pages = {359--364}, month = {October}, url = {https://link.springer.com/chapter/10.1007/978-3-642-16321-0_37}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-642-16321-0_37.pdf}, doi = {10.1007/978-3-642-16321-0_37}}@article{GiladiEtAlJCB10, author = {Giladi, Eldar and Healy, John and Myers, Gene and Hart, Chris and Kapranov, Phillip and Lipson, Doron and Roels, Steven and Thayer, Edward and Letovsky, Stan}, title = {Error tolerant indexing and alignment of short reads with covering template families}, journal = {Journal of Computational Biology}, year = {2010}, month = {October}, volume = {17}, number = {10}, pages = {1397--1411}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2010.0005}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2010.0005}, doi = {10.1089/cmb.2010.0005}}@article{ZhouMihaiFloreaCIS10, author = {Zhou, Leming and Mihai, Ingrid and Florea, Liliana}, title = {Spaced Seeds for Cross-species {cDNA}-to-genome Sequence Alignment}, journal = {Communications in Information and Systems}, year = {2010}, volume = {10}, number = {2}, pages = {115--136}, url = {https://projecteuclid.org/euclid.cis/1268143267}, pdf = {https://www.ims.cuhk.edu.hk/~cis/2010.2/CIS_10_2_04.pdf}}@article{ChungParkAPBC10, author = {Chung, Won-Hyoung and Park, Seong-Bae}, title = {Hit integration for identifying optimal spaced seeds}, journal = {{BMC} {B}ioinformatics - Selected articles from the 8th Asia-Pacific Bioinformatics Conference (APBC), 18-21 january, Bangalore, India}, year = {2010}, month = {January}, volume = {11}, number = {Suppl 1}, pages = {S37}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-S1-S37}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2105-11-S1-S37.pdf}, doi = {10.1186/1471-2105-11-S1-S37}}@article{BattagliaEtAlTCS09, author = {Battaglia, Giovanni and Cangelosi, Davide and Grossi, Roberto and Pisanti, Nadia}, title = {Masking patterns in sequences: A new class of motif discovery with don't cares}, journal = {Theoretical Computer Science}, year = {2009}, month = {October}, volume = {410}, number = {43}, pages = {4327--4340}, url = {https://www.sciencedirect.com/science/article/pii/S0304397509004757}, doi = {10.1016/j.tcs.2009.07.014}}@article{ChungParkBMCGenomics09, author = {Chung, Won-Hyoung and Park, Seong-Bae}, title = {An empirical study of choosing efficient discriminative seeds for oligonucleotide design}, journal = {{BMC} {G}enomics}, year = {2009}, month = {December}, volume = {10}, number = {Suppl 3}, pages = {S3}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-S1-S37}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2164-10-S3-S3.pdf}, doi = {10.1186/1471-2164-10-S3-S3}}@article{NguyenLavenierBMCBioinformatics09, author = {Nguyen, Van-Hoa and Lavenier, Dominique}, title = {{PLAST}: parallel local alignment search tool for database comparison}, journal = {{BMC} {B}ioinformatics}, year = {2009}, month = {October}, volume = {10}, pages = {329}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-10-329}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2105-10-329.pdf}, doi = {10.1186/1471-2105-10-329}}@article{ChenZhuYangTangCSB09, author = {Chen, Ke and Zhu, QingXin and Yang, Fan and Tang, DongMing}, title = {An efficient way of finding good indel seeds for local homology search}, journal = {Chinese Science Bulletin}, year = {2009}, month = {November}, volume = {54}, number = {20}, pages = {3837--3842}, url = {https://link.springer.com/article/10.1007/s11434-009-0531-6}, pdf = {https://link.springer.com/content/pdf/10.1007/s11434-009-0531-6.pdf}, doi = {10.1007/s11434-009-0531-6}}@article{ChenSouaiaiaChenBioinformatics09, author = {Chen, Yangho and Souaiaia, Tate and Chen, Ting}, title = {{P}er{M}: efficient mapping of short sequencing reads with periodic full sensitive spaced seeds}, journal = {Bioinformatics}, year = {2009}, month = {October}, volume = {25}, number = {19}, pages = {2514--2521}, url = {https://bioinformatics.oxfordjournals.org/content/25/19/2514}, pdf = {https://bioinformatics.oxfordjournals.org/content/25/19/2514.full.pdf}, doi = {10.1093/bioinformatics/btp486}}@article{MaYaoIPL09, author = {Ma, Bin and Yao, Hongyi}, title = {Seed optimization for i.i.d. similarities is no easier than optimal {G}olomb ruler design}, journal = {Information Processing Letters}, year = {2009}, month = {September}, volume = {109}, number = {19}, pages = {1120--1124}, url = {https://www.sciencedirect.com/science/article/pii/S0020019009002270}, doi = {10.1016/j.ipl.2009.07.008}, note = {(earlier version in APBC 2008)}}@article{SHRiMP09, author = {Rumble, Stephen M. and Lacroute, Phil and Dalca, Adrian V. and Fiume, Marc and Sidow, Arend and Brudno, Michael}, journal = {PLoS Comp. Biol}, publisher = {Public Library of Science}, title = {{SHRiMP}: Accurate Mapping of Short Color-space Reads}, year = {2009}, volume = {5}, number = {5}, pages = {e1000386}, month = {May}, url = {https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1000386}, doi = {10.1371/journal.pcbi.1000386}}@article{SIM4CC09, author = {Zhou, Leming and Pertea, Mihaela and Delcher, Arthur L. and Florea, Liliana}, title = {Sim4cc: A cross-species spliced alignment program}, journal = {Nucleic Acids Research}, year = {2009}, volume = {37}, number = {11}, pages = {e80}, month = {May}, doi = {10.1093/nar/gkp319}}@inproceedings{LiMaZhangBICOB09, author = {Li, Weiming and Ma, Bin and Zhang, Kaizhong}, title = {Amino Acid Classification and Hash Seeds for Homology Search}, booktitle = {Proceedings of the 1st International Conference in Bioinformatics and Computational Biology, BICoB 2009, New Orleans LA (USA)}, pages = {44--51}, year = {2009}, month = {April}, volume = {5462}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-642-00727-9_6}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-642-00727-9_6.pdf}, doi = {10.1007/978-3-642-00727-9_6}}@article{IlieIlieBioinformatics09, author = {Ilie, Lucian and Ilie, Silvana}, title = {Fast computation of neighbor seeds}, journal = {Bioinformatics}, year = {2009}, month = {March}, volume = {25}, number = {6}, pages = {822--823}, url = {https://bioinformatics.oxfordjournals.org/content/25/6/822}, pdf = {https://bioinformatics.oxfordjournals.org/content/25/6/822.full.pdf}, doi = {10.1093/bioinformatics/btp054}}@article{MakBensonBioinformatics09, author = {Mak, Denise Y.F. and Benson, Gary}, title = {All hits all the time: parameter free calculation of seed sensitivity}, journal = {Bioinformatics}, year = {2009}, month = {February}, volume = {25}, number = {3}, pages = {302--308}, note = {(earlier version in APBC 2007)}, url = {https://bioinformatics.oxfordjournals.org/content/25/3/302}, pdf = {https://bioinformatics.oxfordjournals.org/content/25/3/302.full.pdf}, doi = {10.1093/bioinformatics/btn643}}@book{ChaoZhangBook08, author = {Chao, Kun-Mao and Zhang, Louxin}, title = {Sequence Comparison: Theory and Methods}, publisher = {Springer}, year = {2008}, key = {978-1-84800-319-4}, volume = {7}, series = {Computational Biology}, url = {https://www.springer.com/fr/book/9781848003194}, doi = {10.1007/978-1-84800-320-0}}@article{YangZhangJCB08, author = {Yang, Jialiang and Zhang, Louxin}, title = {Run Probabilities of Seed-Like Patterns and Identifying Good Transition Seeds}, journal = {Journal of Computational Biology}, year = {2008}, month = {December}, volume = {15}, number = {10}, pages = {1295--1313}, note = {(earlier version in APBC 2008)}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2007.0209}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2007.0209}, doi = {10.1089/cmb.2007.0209}}@inproceedings{BensonMakSPIRE08, author = {Benson, Gary and Mak, Denise Y.F.}, title = {Exact Distribution of a Spaced Seed Statistic for {DNA} Homology Detection}, booktitle = {Proceedings of the 15th International Symposium on String Processing and Information Retrieval (SPIRE), Melbourne (Australia)}, pages = {282--293}, year = {2008}, month = {November}, editor = {Amir, A. and Turpin, A. and Moffat, A.}, volume = {5280}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-89097-3_27}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-89097-3_27.pdf}, doi = {10.1007/978-3-540-89097-3_27}}@article{LinZhangZhangMaLiBioinformatics08, author = {Lin, Hao and Zhang, Zefeng and Zhang, Michael Q. and Ma, Bin and Li, Ming}, title = {{ZOOM!} {Z}illions {O}f {O}ligos {M}apped}, journal = {Bioinformatics}, year = {2008}, month = {November}, volume = {24}, number = {21}, pages = {2431--2437}, url = {https://bioinformatics.oxfordjournals.org/content/24/21/2431}, pdf = {https://bioinformatics.oxfordjournals.org/content/24/21/2431.full.pdf}, doi = {10.1093/bioinformatics/btn416}}@inproceedings{HermsRahmannWABI08, author = {Herms, Inke and Rahmann, Sven}, title = {Computing Alignment Seed Sensitivity with Probabilistic Arithmetic Automata}, booktitle = {Proceedings of the 8th International Workshop on Algorithms in Bioinformatics ({WABI}), Karlsruhe (Germany)}, year = {2008}, month = {September}, volume = {5251}, pages = {318--329}, series = {Lecture Notes in Bioinformatics}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-87361-7_27}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-87361-7_27.pdf}, doi = {10.1007/978-3-540-87361-7_27}}@article{NicolasRivalsJCSS08, author = {Nicolas, Fran{\c{c}}ois and Rivals, {\'E}ric}, title = {Hardness of Optimal Spaced Seed Design}, journal = {Journal of Computer and System Sciences}, year = {2008}, month = {August}, volume = {74}, number = {5}, pages = {831--849}, note = {(earlier version in CPM 2005)}, url = {https://www.sciencedirect.com/science/article/pii/S0022000007001444}, pdf = {https://www.lirmm.fr/~rivals/PUBLI/FILES/NR-seeds-preprint-121007.pdf}, doi = {10.1016/j.jcss.2007.10.001}}@inproceedings{NguyenLavenierRIVF08, author = {Nguyen, Van-Hoa and Lavenier, Dominique}, title = {Speeding up Subset Seed Algorithm for Intensive Protein Sequence Comparison}, booktitle = {Proceedings of the 6th IEEE International Conference on research, innovation \& vision for the future}, year = {2008}, month = {July}, pages = {57--63}, url = {https://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=4586333}, doi = {10.1109/RIVF.2008.4586333}}@article{ZhangLinLiJBCB08, author = {Zhang, Zefeng and Lin, Hao and Li, Ming}, title = {{M}ango: multiple alignment with {N} gapped oligos}, journal = {Journal of Bioinformatics and Computational Biology}, year = {2008}, month = {June}, volume = {6}, number = {3}, pages = {521--541}, url = {https://www.worldscinet.com/jbcb/06/0603/S0219720008003527.html}, pdf = {https://www.worldscinet.com/jbcb/06/preserved-docs/0603/S0219720008003527.pdf}, doi = {10.1142/S0219720008003527}}@inproceedings{LavenierIPDPS08, author = {Lavenier, Dominique}, title = {Ordered Index Seed Algorithm for Intensive {DNA} Sequence Comparison}, booktitle = {IEEE International Symposium on Parallel and Distributed Processing (IPDPS)}, year = {2008}, month = {April}, pages = {1--8}, doi = {10.1109/IPDPS.2008.4536172}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4536172}, pdf = {https://hal.archives-ouvertes.fr/docs/00/32/26/96/PDF/lavenier_1569087275.pdf}}@inbook{BrownBA08, author = {Brown, Daniel G.}, title = {Bioinformatics Algorithms: Techniques and Applications}, chapter = {A survey of seeding for sequence alignment}, publisher = {Wiley-Interscience (I. M{\v{a}}ndoiu, A. Zelikovsky)}, year = {2008}, month = {February}, pages = {126--152}, doi = {10.1002/9780470253441.ch6}}@article{ZhouStantonFloreaBMCBioinformatics08, author = {Zhou, Leming and Stanton, Jonathan and Florea, Liliana}, title = {Universal seeds for c{DNA}-to-genome comparison}, journal = {{BMC} {B}ioinformatics}, year = {2008}, month = {January}, volume = {9}, pages = {36}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-36}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2105-9-36.pdf}, doi = {10.1186/1471-2105-9-36}}@inproceedings{YangZhangAPBC08, author = {Yang, Jialiang and Zhang, Louxin}, title = {Run Probability of High-Order Seed Patterns and its Applications to Finding Good Transition Seeds}, booktitle = {Proceedings of the 6th Asia Pacific Bioinformatics Conference (APBC), 14-17 January 2008, Kyoto, Japan}, year = {2008}, month = {January}, pages = {123--132}, editor = {Brazma, A. and Miyano, S. and Akutsu, T.}, volume = {6}, series = {Advances in Bioinformatics and Computational Biology}, publisher = {Imperial College Press}, url = {https://www.worldscientific.com/doi/abs/10.1142/9781848161092_0015}, pdf = {https://www.comp.nus.edu.sg/~wongls/psZ/apbc2008/apbc056a.pdf}, doi = {10.1142/9781848161092_0015}}@inproceedings{MaYaoAPBC08, author = {Ma, Bin and Yao, Hongyi}, title = {Seed Optimization Is No Easier than Optimal {G}olomb Ruler Design}, booktitle = {Proceedings of the 6th Asia Pacific Bioinformatics Conference (APBC), 14-17 January 2008, Kyoto, Japan}, year = {2008}, month = {January}, pages = {133--144}, editor = {Brazma, A. and Miyano, S. and Akutsu, T.}, volume = {6}, series = {Advances in Bioinformatics and Computational Biology}, publisher = {Imperial College Press}, url = {https://www.worldscientific.com/doi/abs/10.1142/9781848161092_0016}, pdf = {https://www.comp.nus.edu.sg/~wongls/psZ/apbc2008/apbc019a.pdf}, doi = {10.1142/9781848161092_0016}}@phdthesis{HarrisLastZPHD07, author = {Harris, Robert S.}, title = {Improved pairwise alignment of genomic DNA}, school = {The Pennsylvania State University}, year = {2007}, type = {Ph.D. Thesis}, month = {December}, optnote = {}, optannote = {}}@inproceedings{ZhangLinLiCSB07, author = {Zhang, Zefeng and Lin, Hao and Li, Ming}, title = {{M}ango: A new approach to multiple sequence alignment}, booktitle = {Proceedings of the 6th International Conference on Computational Systems Bioinformatics (CSB), San Diego (USA)}, year = {2007}, month = {August}, volume = {6}, pages = {237--247}, pdf = {https://www.lifesciencessociety.org/CSB2007/toc/PDF/237.2007.pdf}, url = {https://www.lifesciencessociety.org/CSB2007/toc/237.2007.html}}@inproceedings{GaoLiLuFAW07, author = {Gao, Xin and Li, Shuai Cheng and Lu, Yinan}, title = {New Algorithms for the Spaced Seeds}, booktitle = {Frontiers of Algorithmic Workshop 2007 (FAW2007)}, year = {2007}, month = {August}, volume = {4613}, pages = {51--61}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-73814-5_5}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-73814-5_5.pdf}, doi = {10.1007/978-3-540-73814-5_5}}@inproceedings{IlieIlieBIOCOMP07, author = {Ilie, Lucian and Ilie, Silvana}, title = {Long spaced seeds for finding similarities between biological sequences}, booktitle = {Proceedings of the 2nd International Conference on Bioinformatics {\&} Computational Biology (BIOCOMP)}, year = {2007}, pages = {3--8}, pdf = {https://www.math.ryerson.ca/~silvana/IlieIlie_BIOCOMP07.pdf}}@inproceedings{DuchesneGiraudElMabroukCOCOON07, author = {Duchesne, Jean-Eudes and Giraud, Mathieu and El-Mabrouk, Nadia}, title = {Seed-Based Exclusion Method for Non-coding {RNA} Gene Search}, booktitle = {Proceedings of the 13rd International Computing and Combinatorics Conference ({COCOON})}, year = {2007}, month = {July}, volume = {4598}, pages = {27--39}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-73545-8_6}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-73545-8_6.pdf}, doi = {10.1007/978-3-540-73545-8_6}}@article{IlieIlieBioinformatics07, author = {Ilie, Lucian and Ilie, Silvana}, title = {Multiple spaced seeds for homology search}, journal = {Bioinformatics}, year = {2007}, month = {September}, volume = {23}, number = {22}, pages = {2969--2977}, url = {https://bioinformatics.oxfordjournals.org/content/23/22/2969}, pdf = {https://bioinformatics.oxfordjournals.org/content/23/22/2969.full.pdf}, doi = {10.1093/bioinformatics/btm422}}@inproceedings{IlieIlieWABI07, author = {Ilie, Lucian and Ilie, Silvana}, title = {Fast computation of good multiple spaced seeds}, booktitle = {Proceedings of the 7th International Workshop on Algorithms in Bioinformatics ({WABI}), Philadelphia (USA)}, year = {2007}, month = {September}, volume = {4645}, pages = {346--358}, series = {Lecture Notes in Bioinformatics}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-74126-8_32}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-74126-8_32.pdf}, doi = {10.1007/978-3-540-74126-8_32}}@article{ZhangTCBB07, author = {Zhang, Louxin}, title = {Superiority of Spaced Seeds for Homology Search}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, year = {2007}, month = {July}, volume = {4}, number = {3}, pages = {496--505}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4288075}, doi = {10.1109/tcbb.2007.1013}}@article{MaLiJCSS07, author = {Ma, Bin and Li, Ming}, title = {On the complexity of spaced seeds}, journal = {Journal of Computer and System Sciences}, year = {2007}, month = {March}, volume = {73}, number = {7}, pages = {1024--1034}, url = {https://www.sciencedirect.com/science/article/pii/S0022000007000268}, doi = {10.1016/j.jcss.2007.03.008}}@article{FarachEtAlJCSS07, author = {Farach-Colton, Martin and Landau, Gad M. and Cenk Sahinalp, S{\"u}leyman and Tsur, Dekel}, title = {Optimal spaced seeds for faster approximate string matching}, journal = {Journal of Computer and System Sciences}, year = {2007}, month = {November}, volume = {73}, number = {7}, pages = {1035--1044}, url = {https://www.sciencedirect.com/science/article/pii/S0022000007000256}, doi = {10.1016/j.jcss.2007.03.007}}@article{FengTillierBioinformatics07, author = {Feng, Shengzhong and Tillier, Elisabeth R.M.}, title = {A fast and flexible approach to oligonucleotide probe design for genomes and gene families}, journal = {Bioinformatics}, year = {2007}, month = {May}, volume = {23}, number = {10}, pages = {1195--1202}, url = {https://bioinformatics.oxfordjournals.org/content/23/10/1195}, pdf = {https://bioinformatics.oxfordjournals.org/content/23/10/1195.full.pdf}, doi = {10.1093/bioinformatics/btm114}}@article{KongJCB07, author = {Kong, Yong}, title = {Generalized Correlation Functions and Their Applications in Selection of Optimal Multiple Spaced Seeds for Homology Search}, journal = {Journal of Computational Biology}, year = {2007}, volume = {14}, number = {2}, pages = {238--254}, month = {March}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2006.0008}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2006.0008}, doi = {10.1089/cmb.2006.0008}}@article{ZhouFloreaJCB07, author = {Zhou, Leming and Florea, Liliana}, title = {Designing sensitive and specific spaced seeds for cross-species m{RNA}-to-genome alignment}, journal = {Journal of Computational Biology}, year = {2007}, month = {March}, volume = {14}, number = {2}, pages = {113--130}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2006.0130}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2006.0130}, doi = {10.1089/cmb.2006.0130}}@inproceedings{MakBensonAPBC07, author = {Mak, Denise Y.F. and Benson, Gary}, title = {All hits all the time: parameter free calculation of seed sensitivity}, booktitle = {Proceedings of the 5th Asia Pacific Bioinformatics Conference (APBC)}, year = {2007}, month = {January}, pages = {327--340}, editor = {Sankoff, D. and Wang, L. and Chin, F.}, series = {Advances in Bioinformatics and Computational Biology}, volume = {5}, publisher = {Imperial College Press}, url = {https://www.worldscientific.com/doi/abs/10.1142/9781860947995_0035}, pdf = {https://tandem.bu.edu/papers/all_hits_all_the_time.pdf}, doi = {10.1142/9781860947995_0035}}@article{XuBrownLiMaJCB06, author = {Xu, Jinbo and Brown, Daniel G. and Li, Ming and Ma, Bin}, title = {Optimizing Multiple Spaced Seeds for Homology Search}, journal = {Journal of Computational Biology}, year = {2006}, month = {September}, volume = {13}, number = {7}, pages = {1355--1368}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2006.13.1355}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2006.13.1355}, doi = {10.1089/cmb.2006.13.1355}, note = {(earlier version in CPM 2004)}}@article{MakGelfandBensonBioinformatics06, author = {Mak, Denise Y.F. and Gelfand, Yevgeniy and Benson, Gary}, title = {Indel seeds for homology search}, journal = {Bioinformatics}, year = {2006}, volume = {22}, number = {14}, pages = {e341--e349}, url = {https://bioinformatics.oxfordjournals.org/content/22/14/e341}, pdf = {https://bioinformatics.oxfordjournals.org/content/22/14/e341.full.pdf}, doi = {10.1093/bioinformatics/btl263}}@article{CsurosMaAlgorithmica07, author = {Cs{\H{u}}r{\"o}s, Mikl{\'o}s and Ma, Bin}, title = {Rapid homology search with neighbor seeds}, journal = {Algorithmica}, year = {2007}, month = {June}, volume = {48}, number = {2}, pages = {187--202}, url = {https://link.springer.com/article/10.1007/s00453-007-0062-y}, pdf = {https://link.springer.com/content/pdf/10.1007/s00453-007-0062-y.pdf}, pdf = {https://www.iro.umontreal.ca/~csuros/papers/neighbor-seeds.pdf}, doi = {10.1007/s00453-007-0062-y}, note = {(earlier version in COCOON 2005)}}@inproceedings{DarlingTreangenZhangWABI06, author = {Darling, Aaron E. and Treangen, Todd J. and Zhang, Louxin and Kuiken, Carla and Messeguer, Xavier and Perna, Nicole T.}, title = {Procrastination leads to efficient filtration for local multiple alignment}, booktitle = {Proceedings of the 6th International Workshop on Algorithms in Bioinformatics (WABI), Z\"{u}rich (Switzerland)}, pages = {126--137}, year = {2006}, volume = {4175}, series = {Lecture Notes in Bioinformatics}, month = {September}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/11851561_12}, pdf = {https://link.springer.com/content/pdf/10.1007/11851561_12.pdf}, doi = {10.1007/11851561_12}}@article{SunBuhlerBMCBioinformatics06, author = {Sun, Yanni and Buhler, Jeremy}, title = {Choosing the best heuristic for seeded alignment of {DNA} sequences}, journal = {{BMC} {B}ioinformatics}, year = {2006}, volume = {7}, pages = {133}, month = {March}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-7-133}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2105-7-133.pdf}, doi = {10.1186/1471-2105-7-133}}@inproceedings{LiMaZhangSODA06, author = {Li, Ming and Ma, Bin and Zhang, Louxin}, title = {Superiority and complexity of the Spaced Seeds}, booktitle = {Proceedings of the 17th Symposium on Discrete Algorithms (SODA)}, year = {2006}, month = {January}, pages = {444--453}, publisher = {ACM Press}, pdf = {https://www.math.nus.edu.sg/~matzlx/papers/Soda06.pdf}, url = {https://doi.acm.org/10.1145/1109557.1109607}, doi = {10.1145/1109557.1109607}}@inproceedings{PeterlongoSPIRE05, author = {Peterlongo, Pierre and Pisanti, Nadia and Boyer, Frederic and Sagot, Marie-France}, title = {Lossless Filter for Finding Long Multiple Approximate Repetitions Using a New Data Structure, the Bi-factor Array}, booktitle = {Proceedings of the 12th International Conference, on String Processing and Information Retrieval (SPIRE), Buenos Aires (Argentina)}, year = {2005}, month = {November}, volume = {3772}, pages = {179--190}, editor = {Consens, Mariano and Navarro, Gonzalo}, series = {Lecture Notes in Computer Science}, doi = {10.1007/11575832_20}, url = {https://link.springer.com/chapter/10.1007/11575832_20}, pdf = {https://link.springer.com/content/pdf/10.1007/11575832_20.pdf}}@article{ChoiZhangCOSMOS05, author = {Choi, Kwok Pui and Zhang, Louxin}, title = {Analysis of Spaced Seed Technique in Sequence Alignment}, journal = {COSMOS}, year = {2005}, month = {May}, volume = {1}, number = {1}, pages = {57--73}, url = {https://www.worldscinet.com/cosmos/01/0101/S0219607705000048.html}, pdf = {https://www.worldscinet.com/cosmos/01/preserved-docs/0101/S0219607705000048.pdf}, doi = {10.1142/S0219607705000048}}@article{FlannickBatzoglouNAR05, author = {Flannick, Jason and Batzoglou, Serafim}, title = {Using multiple alignments to improve seeded local alignment algorithms}, journal = {Nucleic Acids Research}, year = {2005}, month = {August}, volume = {33}, number = {14}, pages = {4563--4577}, url = {https://nar.oxfordjournals.org/content/33/14/4563.full}, doi = {10.1093/nar/gki767}}@inproceedings{PolKahveciBIBE05, author = {Pol, Abhijit and Kahveci, Tamer}, title = {Highly Scalable and Accurate Seeds for Subsequence Alignments}, booktitle = {Proceedings of the IEEE 5th Symposium on Bioinformatics and Bioengineering (BIBE), Minneapolis (USA)}, year = {2005}, month = {October}, pages = {27--31}, publisher = {IEEE Computer Society Press}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=1544445}, doi = {10.1109/BIBE.2005.37}}@article{FontaineBurkhardtKarkkainenIJFCS05, author = {Fontaine, Marc and Burkhardt, Stefan and K{\"a}rkk{\"a}inen, Juha}, title = {{BDD}-Based Analysis of Gapped $q$-Gram Filters}, journal = {International Journal of Foundations of Computer Science}, year = {2005}, month = {December}, volume = {16}, number = {6}, pages = {1121--1134}, postscript = {https://www.cs.helsinki.fi/juha.karkkainen/publications/ijfcs05-preliminary.ps.gz}, doi = {10.1142/S0129054105003698}, note = {(earlier version in PSC 2004)}}@inproceedings{CsurosMaCOCOON05, author = {Cs{\H{u}}r{\"o}s, Mikl{\'o}s and Ma, Bin}, title = {Rapid homology search with two-stage extension and daughter seeds}, booktitle = {Proceedings of the 11th International Computing and Combinatorics Conference ({COCOON})}, pages = {104--114}, year = {2005}, month = {August}, volume = {3595}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/11533719_13}, pdf = {https://link.springer.com/content/pdf/10.1007/11533719_13.pdf}, pdf = {https://www.iro.umontreal.ca/~csuros/papers/daughter-seeds.pdf}, doi = {10.1007/11533719_13}}@article{PreparataZhangChoiJCB05, author = {Preparata, Franco P. and Zhang, Louxin and Choi, Kwok Pui}, title = {Quick, Practical Selection of Effective Seeds for Homology Search}, journal = {Journal of Computational Biology}, year = {2005}, month = {November}, volume = {12}, number = {9}, pages = {1137--1152}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2005.12.1137}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2005.12.1137}, doi = {10.1089/cmb.2005.12.1137}}@article{BuhlerKeichSunJCSS05, author = {Buhler, Jeremy and Keich, Uri and Sun, Yanni}, title = {Designing seeds for similarity search in genomic {DNA}}, journal = {Journal of Computer and System Sciences}, year = {2005}, volume = {70}, number = {3}, pages = {342--363}, pdf = {https://www.cs.cornell.edu/~keich/papers/Designing_seeds_for_similarity_search_in_genomic_DNA_JCSS05.pdf}, url = {https://www.sciencedirect.com/science/article/pii/S0022000004001515}, doi = {10.1016/j.jcss.2004.12.003}, note = {(earlier version in RECOMB 2003)}}@article{BrejovaBrownVinarJCSS05, author = {Brejov{\'a}, Bro{\v{n}}a and Brown, Daniel G. and Vina{\v{r}}, Tom{\'a}{\v{s}}}, title = {Vector seeds: An extension to spaced seeds}, journal = {Journal of Computer and System Sciences}, year = {2005}, volume = {70}, number = {3}, pages = {364--380}, url = {https://www.sciencedirect.com/science/article/pii/S0022000004001527}, doi = {10.1016/j.jcss.2004.12.008}, note = {(earlier version in WABI 2003)}}@article{SunBuhlerJCB05, author = {Sun, Yanni and Buhler, Jeremy}, title = {Designing Multiple Simultaneous Seeds for {DNA} Similarity Search}, journal = {Journal of Computational Biology}, volume = {12}, number = {6}, year = {2005}, pages = {847--861}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2005.12.847}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2005.12.847}, doi = {10.1089/cmb.2005.12.847}, note = {(earlier version in RECOMB 2004)}}@inproceedings{FarachLandauICALP05, author = {Farach-Colton, Martin and Landau, Gad M. and Cenk Sahinalp, S{\"u}leyman and Tsur, Dekel}, title = {Optimal spaced seeds for faster approximate string matching}, booktitle = {Proceedings of the 32nd International Colloquium on Automata, Languages and Programming (ICALP'05), Lisboa (Portugal)}, pages = {1251--1262}, year = {2005}, volume = {3580}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/11523468_101}, pdf = {https://link.springer.com/content/pdf/10.1007/11523468_101.pdf}, doi = {10.1007/11523468_101}}@inproceedings{NicolasRivalsCPM05, author = {Nicolas, Fran{\c{c}}ois and Rivals, {\'E}ric}, title = {Hardness of Optimal Spaced Seed Design}, booktitle = {Proceedings of the 16th Annual Symposium on Combinatorial Pattern Matching (CPM), Jeju Island (Korea)}, pages = {144--155}, year = {2005}, editor = {Apostolico, A. and Crochemore, M. and Park, K.}, volume = {3537}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/11496656_13}, pdf = {https://link.springer.com/content/pdf/10.1007/11496656_13.pdf}, doi = {10.1007/b137128}}@article{BrownTCBB05, author = {Brown, Daniel G.}, title = {Optimizing Multiple Seeds for Protein Homology Search}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, pages = {29--38}, year = {2005}, key = {ISSN:1545-5963}, volume = {2}, number = {1}, month = {january}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=1416848}, doi = {10.1109/tcbb.2005.13}, note = {(earlier version in WABI 2004)}}@article{TPatternHunter05, author = {Kisman, Derek and Li, Ming and Ma, Bin and Li, Wang}, title = {t{P}atternHunter: gapped, fast and sensitive translated homology search}, journal = {Bioinformatics}, year = {2005}, volume = {21}, number = {4}, pages = {542--544}, month = {February}, url = {https://bioinformatics.oxfordjournals.org/content/21/4/542}, pdf = {https://bioinformatics.oxfordjournals.org/content/21/4/542.full.pdf}, doi = {10.1093/bioinformatics/bti035}}@article{BrownLiMaJBCB04, author = {Brown, Daniel G. and Li, Ming and Ma, Bin}, title = {A tutorial of recent developments in the seeding of local alignment}, journal = {Journal of Bioinformatics and Computational Biology}, year = {2004}, volume = {2}, number = {4}, pages = {819--842}, url = {https://www.worldscinet.com/jbcb/02/0204/S0219720004000983.html}, pdf = {https://www.worldscinet.com/jbcb/02/preserved-docs/0204/S0219720004000983.pdf}, pdf = {https://www.cs.ucdavis.edu/~gusfield/spring06readings/jbcbsurvey.pdf}, doi = {10.1142/S0219720004000983}}@inproceedings{FontaineBurkhardtKarkkainenPSC04, author = {Fontaine, Marc and Burkhardt, Stefan and K{\"a}rkk{\"a}inen, Juha}, title = {{BDD}-Based Analysis of Gapped $q$-Gram Filters}, booktitle = {Proceedings of the 9th Prague Stringology Conference (PSC)}, pages = {56--68}, year = {2004}, url = {https://www.stringology.org/event/2004/p5.html}, pdf = {https://www.stringology.org/papers/PSC2005.pdf}}@inproceedings{BrownHudekWABI04, author = {Brown, Daniel G. and Hudek, Alexander K.}, title = {New Algorithms for Multiple {DNA} Sequence Alignment}, booktitle = {Proceedings of the 4th International Workshop on Algorithms in Bioinformatics (WABI), Bergen (Norway)}, pages = {314--325}, year = {2004}, editor = {Jonassen, I. and Kim, J.}, volume = {3240}, series = {Lecture Notes in Bioinformatics}, month = {September}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-30219-3_27}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-30219-3_27.pdf}, doi = {10.1007/978-3-540-30219-3_27}}@inproceedings{BrownWABI04, author = {Brown, Daniel G.}, title = {Multiple vector seeds for protein alignment}, booktitle = {Proceedings of the 4th International Workshop on Algorithms in Bioinformatics (WABI), Bergen (Norway)}, pages = {170--181}, year = {2004}, editor = {Jonassen, I. and Kim, J.}, volume = {3240}, series = {Lecture Notes in Bioinformatics}, month = {September}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-30219-3_15}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-30219-3_15.pdf}, doi = {10.1007/978-3-540-30219-3_15}}@article{HuangYeChouEtAl2004, author = {Huang, Xiaoqiu and Ye, Liang and Chou, Hui-Hsien and Yang, I-Hsuan and Chao, Kun-Mao}, title = {Efficient Combination of Multiple Word Models for Improved Sequence Comparison}, journal = {Bioinformatics}, year = {2004}, volume = {20}, number = {16}, pages = {2529--2533}, url = {https://bioinformatics.oxfordjournals.org/content/20/16/2529}, pdf = {https://bioinformatics.oxfordjournals.org/content/20/16/2529.full.pdf}, doi = {10.1093/bioinformatics/bth279}}@article{KeichLiMaTrompDAM04, author = {Keich, Uri and Li, Ming and Ma, Bin and Tromp, John}, title = {On spaced seeds for similarity search}, journal = {Discrete Applied Mathematics}, year = {2004}, volume = {138}, number = {3}, pages = {253--263}, note = {(earlier version in 2002)}, doi = {10.1016/S0166-218X(03)00382-2}, url = {https://www.sciencedirect.com/science/article/pii/S0166218X03003822}}@inproceedings{CsurosCPM04, author = {Cs{\H{u}}r{\"o}s, Mikl{\'o}s}, title = {Performing Local Similarity Searches with Variable Length Seeds}, booktitle = {Proceedings of the 15th Annual Combinatorial Pattern Matching Symposium (CPM), Istanbul (Turkey)}, pages = {373--387}, year = {2004}, editor = {Sahinalp, S.C. and Muthukrishnan, S. and Dogrusoz, U.}, volume = {3109}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-27801-6_28}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-27801-6_28.pdf}, doi = {10.1007/978-3-540-27801-6_28}}@inproceedings{XuBrownLiMaCPM04, author = {Xu, Jinbo and Brown, Daniel G. and Li, Ming and Ma, Bin}, title = {Optimizing multiple spaced seeds for homology search}, booktitle = {Proceedings of the 15th Symposium on Combinatorial Pattern Matching (CPM), Istambul (Turkey)}, pages = {47--58}, year = {2004}, editor = {Sahinalp, S.C. and Muthukrishnan, S. and Dogrusoz, U.}, volume = {3109}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2006.13.1355}, pdf = {https://online.liebertpub.com/doi/pdf/10.1089/cmb.2006.13.1355}, doi = {10.1089/cmb.2006.13.1355}}@inproceedings{YangWangChenEtAlBIBE04, author = {Yang, I-Hsuan and Wang, Sheng-Ho and Chen, Yang-Ho and Huang, Pao-Hsian and Ye, Liang and Huang, Xiaoqiu and Chao, Kun-Mao}, title = {Efficient Methods for Generating Optimal Single and Multiple Spaced Seeds}, booktitle = {Proceedings of the IEEE 4th Symposium on Bioinformatics and Bioengineering (BIBE), Taichung (Taiwan)}, pages = {411--416}, year = {2004}, publisher = {IEEE Computer Society Press}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=1317372}, doi = {10.1109/BIBE.2004.1317372}}@article{BrejovaBrownVinarJBCB04, author = {Brejov{\'a}, Bro{\v{n}}a and Brown, Daniel G. and Vina{\v{r}}, Tom{\'a}{\v{s}}}, title = {Optimal spaced seeds for homologous coding regions}, journal = {Journal of Bioinformatics and Computational Biology}, year = {2004}, volume = {1}, number = {4}, pages = {595--610}, month = {January}, url = {https://www.worldscinet.com/jbcb/01/0104/S0219720004000326.html}, pdf = {https://www.worldscinet.com/jbcb/01/preserved-docs/0104/S0219720004000326.pdf}, doi = {10.1142/S0219720004000326}}@article{PatternHunter04, author = {Li, Ming and Ma, Bin and Kisman, Derek and Tromp, John}, title = {{P}attern{H}unter {II}: Highly Sensitive and Fast Homology Search}, journal = {Journal of Bioinformatics and Computational Biology}, year = {2004}, volume = {2}, number = {3}, pages = {417--439}, note = {(earlier version in GIW 2003)}, url = {https://www.worldscinet.com/jbcb/02/0203/S0219720004000661.html}, doi = {10.1142/S0219720004000661}}@inproceedings{SunBuhlerRECOMB04, author = {Sun, Yanni and Buhler, Jeremy}, title = {Designing multiple simultaneous seeds for {DNA} similarity search}, booktitle = {Proceedings of the 8th Annual International Conference on Research in Computational Molecular Biology (RECOMB), San Diego (California)}, pages = {76--84}, year = {2004}, month = {March}, url = {https://doi.acm.org/10.1145/974614.974625}, doi = {10.1145/974614.974625}}@article{ChoiZengZhangBioinformatics04, author = {Choi, Kwok Pui and Zeng, Fanfan and Zhang, Louxin}, title = {Good Spaced Seeds For Homology Search}, journal = {Bioinformatics}, year = {2004}, volume = {20}, number = {7}, pages = {1053--1059}, url = {https://bioinformatics.oxfordjournals.org/content/20/7/1053}, pdf = {https://bioinformatics.oxfordjournals.org/content/20/7/1053.full.pdf}, doi = {10.1093/bioinformatics/bth037}}@article{ChoiZhangJCSS04, author = {Choi, Kwok Pui and Zhang, Louxin}, title = {Sensitivity Analysis and Efficient Method for Identifying Optimal Spaced Seeds}, journal = {Journal of Computer and System Sciences}, year = {2004}, volume = {68}, number = {1}, pages = {22--40}, url = {https://www.sciencedirect.com/science/article/pii/S0022000003001430}, doi = {10.1016/j.jcss.2003.04.002}}@article{ChenSungGI03, author = {Chen, Wei and Sung, Wing-Kin}, title = {On Half Gapped Seed}, journal = {Genome Informatics}, year = {2003}, volume = {14}, pages = {176--185}, note = {(earlier version in GIW 2003)}, url = {https://www.jstage.jst.go.jp/article/gi1990/14/0/14_0_176/_article}, pdf = {https://www.jsbi.org/pdfs/journal1/GIW03/GIW03F018.pdf}, doi = {10.11234/gi1990.14.176}}@inproceedings{BrejovaBrownVinarWABI03, author = {Brejov{\'a}, Bro{\v{n}}a and Brown, Daniel G. and Vina{\v{r}}, Tom{\'a}{\v{s}}}, title = {Vector seeds: an extension to spaced seeds allows substantial improvements in sensitivity and specificity}, booktitle = {WABI}, pages = {39--54}, year = {2003}, volume = {2812}, series = {Lecture Notes in Computer Science}, month = {September}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/978-3-540-39763-2_4}, pdf = {https://link.springer.com/content/pdf/10.1007/978-3-540-39763-2_4.pdf}, doi = {10.1007/978-3-540-39763-2_4}}@inproceedings{BuhlerKeichSunRECOMB03, author = {Buhler, Jeremy and Keich, Uri and Sun, Yanni}, title = {Designing seeds for similarity search in genomic {DNA}}, booktitle = {Proceedings of the 7th Annual International Conference on Research in Computational Molecular Biology (RECOMB), Berlin (Germany)}, pages = {67--75}, year = {2003}, month = {April}, publisher = {ACM Press}, pdf = {https://www.cs.cornell.edu/~keich/papers/spaced_seeds_RECOMB03.pdf}, doi = {10.1145/640075.640083}}@article{BLASTZ03, author = {Schwartz, Scott and Kent, W. James and Smit, Arian and Zhang, Zheng and Baertsch, Robert and Hardison, Ross C. and Haussler, David and Miller, Webb}, title = {Human--Mouse Alignments with {BLASTZ}}, journal = {Genome Research}, year = {2003}, volume = {13}, pages = {103--107}, url = {https://www.genome.org/cgi/content/abstract/13/1/103}, doi = {10.1101/gr.809403}}@inproceedings{BrejovaBrownVinarCPM03, author = {Brejov{\'a}, Bro{\v{n}}a and Brown, Daniel G. and Vina{\v{r}}, Tom{\'a}{\v{s}}}, title = {Optimal Spaced Seeds for {H}idden {M}arkov {M}odels, with Application to Homologous Coding Regions}, booktitle = {Proceedings of the 14th Symposium on Combinatorial Pattern Matching (CPM), Morelia (Mexico)}, pages = {42--54}, year = {2003}, editor = {R. Baeza-Yates, E. Chavez, M. Crochemore}, volume = {2676}, series = {Lecture Notes in Computer Science}, month = {June}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/3-540-44888-8_4}, pdf = {https://link.springer.com/content/pdf/10.1007/3-540-44888-8_4.pdf}, doi = {10.1007/3-540-44888-8_4}}@article{BurkhardtKarkkainenFI03, author = {Burkhardt, Stefan and K{\"a}rkk{\"a}inen, Juha}, title = {Better filtering with gapped $q$-grams}, journal = {Fundamenta Informaticae}, year = {2002}, volume = {56}, number = {1-2}, pages = {51--70}, note = {(earlier version in CPM 2001)}, url = {https://content.iospress.com/articles/fundamenta-informaticae/fi56-1-2-04}, postscript = {https://www.cs.helsinki.fi/u/tpkarkka/publications/fundamenta-revised02.ps.gz}}@inproceedings{BurkhardtKarkkainenCPM02, author = {Burkhardt, Stefan and K{\"a}rkk{\"a}inen, Juha}, title = {{O}ne-gapped $q$-gram filters for {L}evenshtein {D}istance}, booktitle = {Proceedings of the 13th Symposium on Combinatorial Pattern Matching (CPM)}, pages = {225--234}, year = {2002}, volume = {2373}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, url = {https://link.springer.com/chapter/10.1007/3-540-45452-7_19}, pdf = {https://link.springer.com/content/pdf/10.1007/3-540-45452-7.pdf}, doi = {10.1007/3-540-45452-7_19}}@inproceedings{BuhlerRECOMB02, author = {Buhler, Jeremy}, title = {Provably sensitive indexing strategies for biosequence similarity search}, booktitle = {RECOMB, Washington DC (USA)}, pages = {90--99}, year = {2002}, month = {April}, publisher = {ACM Press}, url = {https://dl.acm.org/citation.cfm?doid=565196.565208}, doi = {10.1145/565196.565208}}@article{PatternHunter02, author = {Ma, Bin and Tromp, John and Li, Ming}, title = {{P}attern{H}unter: Faster and more sensitive homology search}, journal = {Bioinformatics}, year = {2002}, volume = {18}, number = {3}, pages = {440--445}, url = {https://bioinformatics.oxfordjournals.org/content/18/3/440}, pdf = {https://bioinformatics.oxfordjournals.org/content/18/3/440.full.pdf}, doi = {10.1093/bioinformatics/18.3.440}}@inproceedings{BurkhardtKarkkainenCPM01, author = {Burkhardt, Stefan and K{\"a}rkk{\"a}inen, Juha}, title = {Better Filtering with Gapped $q$-Grams}, booktitle = {Proceedings of the 12th Symposium on Combinatorial Pattern Matching (CPM)}, pages = {73--85}, volume = {2089}, year = {2001}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, month = {July}, url = {https://link.springer.com/chapter/10.1007/3-540-48194-X_6}, pdf = {https://link.springer.com/content/pdf/10.1007/3-540-48194-X_6.pdf}, doi = {10.1007/3-540-48194-X_6}}@inproceedings{BuhlerTompaRECOMB01, author = {Buhler, Jeremy and Tompa, Martin}, title = {Finding motifs using random projections}, booktitle = {Proceedings of the 5th Annual International Conference on Research in Computational Molecular Biology (RECOMB)}, pages = {69--76}, publisher = {ACM Press}, year = {2001}, url = {https://dl.acm.org/citation.cfm?doid=369133.369172}, doi = {10.1145/369133.369172}}@article{LSHALLPAIRS01, author = {Buhler, Jeremy}, title = {Efficient Large-Scale Sequence Comparison by Locality-Sensitive Hashing}, journal = {Bioinformatics}, year = {2001}, volume = {17}, number = {5}, pages = {419--428}, url = {https://bioinformatics.oxfordjournals.org/content/17/5/419}, pdf = {https://bioinformatics.oxfordjournals.org/content/17/5/419.full.pdf}, doi = {10.1093/bioinformatics/17.5.419}}@article{WABA00, author = {Kent, William James and Zahler, Alan M.}, title = {Conservation, Regulation, Synteny, and Introns in a Large-scale C. briggsae–C. elegans Genomic Alignment}, journal = {Genome Research}, year = {2000}, volume = {10}, optnumber = {}, pages = {1115--1125}, month = {August}, url = {https://genome.cshlp.org/content/10/8/1115}, pdf = {https://genome.cshlp.org/content/10/8/1115.pdf}, doi = {10.1101/gr.10.8.1115}}@article{NicodemeSalvyFlajolet02TCS, author = {Nicod{\`e}me, Pierre and Salvy, Bruno and Flajolet, Philippe}, title = {Motif statistics}, journal = {Theoretical Computer Science}, volume = {287}, number = {2}, year = {2002}, issn = {0304-3975}, pages = {593--617}, url = {https://www.sciencedirect.com/science/article/pii/S030439750100264X}, doi = {10.1016/S0304-3975(01)00264-X}}@inproceedings{FLASH93, author = {Califano, Andrea and Rigoutsos, Isidore}, title = {Flash: A fast look-up algorithm for string homology}, booktitle = {Proceedings of the 1st International Conference on Intelligent Systems for Molecular Biology (ISMB)}, pages = {56--64}, year = {1993}, month = {July}, url = {https://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=341106}, pdf = {https://www.aaai.org/Papers/ISMB/1993/ISMB93-007.pdf}, doi = {10.1109/CVPR.1993.341106}}@article{FrithNoeKucherovBIOINFORMATICS21, author = {Frith, Martin C. and No{\'e}, Laurent and Kucherov, Gregory}, title = {Minimally-overlapping words for sequence similarity search}, journal = {Bioinformatics}, year = {2020}, optkey = {}, optvolume = {}, optnumber = {}, optpages = {}, month = {December}, doi = {10.1093/bioinformatics/btaa1054}, url = {https://doi.org/10.1093/bioinformatics/btaa1054}, hal-url = {https://hal.archives-ouvertes.fr/hal-03087470/en}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/33346833/}}@article{PetrucciNoePizziCominJCB20, author = {Petrucci, Enrico and No{\'e}, Laurent and Pizzi, Cinzia and Comin, Matteo}, title = {Iterative Spaced Seed Hashing: Closing the Gap Between Spaced Seed Hashing}, journal = {Journal of Computational Biology}, year = {2020}, volume = {27}, number = {2}, optpages = {}, month = {February}, doi = {10.1089/cmb.2019.0298}, url = {https://doi.org/10.1089/cmb.2019.0298}, hal-url = {https://hal.archives-ouvertes.fr/hal-02910076/en}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/31800307/}}@article{NoeAMB17, author = {No{\'e}, Laurent}, title = {Best hits of 11110110111: model-free selection and parameter-free sensitivity calculation of spaced seeds}, journal = {Algorithms for Molecular Biology}, year = {2017}, volume = {12}, number = {1}, month = {February}, abstract = {Background: Spaced seeds, also named gapped q-grams, gapped k-mers, spaced q-grams, have been proven to be more sensitive than contiguous seeds (contiguous q-grams, contiguous k-mers) in nucleic and amino-acid sequences analysis. Initially proposed to detect sequence similarities and to anchor sequence alignments, spaced seeds have more recently been applied in several alignment-free related methods. Unfortunately, spaced seeds need to be initially designed. This task is known to be time-consuming due to the number of spaced seed candidates. Moreover, it can be altered by a set of arbitrary chosen parameters from the probabilistic alignment models used. In this general context, Dominant seeds have been introduced by Mak and Benson (Bioinformatics 25:302–308, 2009) on the Bernoulli model, in order to reduce the number of spaced seed candidates that are further processed in a parameter-free calculation of the sensitivity.Results: We expand the scope of work of Mak and Benson on single and multiple seeds by considering the Hit Integration model of Chung and Park (BMC Bioinform 11:31, 2010), demonstrate that the same dominance definition can be applied, and that a parameter-free study can be performed without any significant additional cost. We also consider two new discrete models, namely the Heaviside and the Dirac models, where lossless seeds can be integrated. From a theoretical standpoint, we establish a generic framework on all the proposed models, by applying a counting semi-ring to quickly compute large polynomial coefficients needed by the dominance filter. From a practical standpoint, we confirm that dominant seeds reduce the set of, either single seeds to thoroughly analyse, or multiple seeds to store. Moreover, in https://bioinfo.cristal.univ-lille.fr/yass/iedera_dominance, we provide a full list of spaced seeds computed on the four aforementioned models, with one (continuous) parameter left free for each model, and with several (discrete) alignment lengths.}, doi = {10.1186/s13015-017-0092-1}, url = {https://almob.biomedcentral.com/articles/10.1186/s13015-017-0092-1}, pdf = {https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0092-1#.pdf}, hal-url = {https://hal.archives-ouvertes.fr/hal-01467970/en}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/28289437}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5310094}}@article{MartinNoeAISM17, author = {Martin, Donald E. K. and No{\'e}, Laurent}, title = {Faster exact distributions of pattern statistics through sequential elimination of states}, journal = {Annals of the Institute of Statistical Mathematics}, year = {2017}, volume = {69}, number = {1}, pages = {231--248}, month = {February}, abstract = {When using an auxiliary Markov chain (AMC) to compute sampling distributions, the computational complexity is directly related to the number of Markov chain states. For certain complex pattern statistics, minimal deterministic finite automata (DFA) have been used to facilitate efficient computation by reducing the number of AMC states. For example, when statistics of overlapping pattern occurrences are counted differently than non-overlapping occurrences, a DFA consisting of prefixes of patterns extended to overlapping occurrences has been generated and then minimized to form an AMC. However, there are situations where forming such a DFA is computationally expensive, e.g., with computing the distribution of spaced seed coverage. In dealing with this problem, we develop a method to obtain a small set of states during the state generation process without forming a DFA, and show that a huge reduction in the size of the AMC can be attained.}, doi = {10.1007/s10463-015-0540-y}, url = {https://link.springer.com/article/10.1007/s10463-015-0540-y}, pdf = {https://link.springer.com/content/pdf/10.1007/s10463-015-0540-y.pdf}, hal-url = {https://hal.archives-ouvertes.fr/hal-01237045/en}}@article{NoeMartinJCB14, author = {No{\'e}, Laurent and Martin, Donald E. K.}, title = {A Coverage Criterion for Spaced Seeds and Its Applications to Support Vector Machine String Kernels and k-Mer Distances}, journal = {Journal of Computational Biology}, year = {2014}, volume = {21}, number = {12}, pages = {947--963}, month = {December}, abstract = { Spaced seeds have been recently shown to not only detect more alignments, but also to give a more accurate measure of phylogenetic distances, and to provide a lower misclassification rate when used with Support Vector Machines (SVMs). We confirm by independent experiments these two results, and propose in this article to use a coverage criterion to measure the seed efficiency in both cases in order to design better seed patterns. We show first how this coverage criterion can be directly measured by a full automaton-based approach. We then illustrate how this criterion performs when compared with two other criteria frequently used, namely the single-hit and multiple-hit criteria, through correlation coefficients with the correct classification/the true distance. At the end, for alignment-free distances, we propose an extension by adopting the coverage criterion, show how it performs, and indicate how it can be efficiently computed.}, url = {https://online.liebertpub.com/doi/abs/10.1089/cmb.2014.0173}, pdf = {https://drive.google.com/file/d/1G47iWOflF2pN-tnaCbcgaHFq_h-ZbpZE/view}, postscript = {https://drive.google.com/file/d/1K3dQo5aSop_kVT4dBLzwlNis7BB9E41M/view?usp=sharing}, eprint = {1412.2587}, hal-url = {https://hal.archives-ouvertes.fr/hal-01083204/en}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/25393923}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4253314}, doi = {10.1089/cmb.2014.0173}}@article{FrithNoeNAR14, author = {Frith, Martin C. and No{\'e}, Laurent}, title = {Improved search heuristics find 20 000 new alignments between human and mouse genomes}, journal = {Nucleic Acids Research}, volume = {42}, number = {7}, pages = {e59}, abstract = {Sequence similarity search is a fundamental way of analyzingnucleotide sequences. Despite decades of research, this is not asolved problem because there exist many similarities that are notfound by current methods. Search methods are typically based on aseed-and-extend approach, which has many variants (e.g. spaced seeds,transition seeds), and it remains unclear how to optimize thisapproach. This study designs and tests seeding methods forinter-mammal and inter-insect genome comparison. By consideringsubstitution patterns of real genomes, we design sets of multiplecomplementary transition seeds, which have better performance(sensitivity per run time) than previous seeding strategies. Often thebest seed patterns have more transition positions than those usedpreviously. We also point out that recent computer memory sizes(e.g. 60 GB) make it feasible to use multiple (e.g. eight) seeds forwhole mammal genomes. Interestingly, the most sensitive settingsachieve diminishing returns for human-dog andmelanogaster-pseudoobscura comparisons, but not for human-mouse, whichsuggests that we still miss many human-mouse alignments. Our optimizedheuristics find ∼20 000 new human-mouse alignments that are missingfrom the standard UCSC alignments. We tabulate seed patterns andparameters that work well so they can be used in future research. }, month = {February}, year = {2014}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/24493737}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3985675}, hal-url = {https://hal.archives-ouvertes.fr/hal-00958207/en}, url = {https://nar.oxfordjournals.org/content/42/7/e59}, pdf = {https://nar.oxfordjournals.org/content/42/7/e59.full.pdf}, doi = {10.1093/nar/gku104}}@article{StartekEtAl12, author = {Startek, Micha{\l} and Lasota, S{\l}awomir and Sykulski, Macieij and Bu{\l}ak, Adam and No{\'e}, Laurent and Kucherov, Gregory and Gambin, Anna}, title = {Efficient alternatives to {PSI-BLAST}}, journal = {Bulletin of the Polish Academy of Sciences: Technical Sciences}, year = {2012}, abstract = {In this paper we present two algorithms that may serve as efficient alternatives to the well-known PSI BLAST tool: SeedBLAST and CTX-PSI Blast. Both may benefit from the knowledge about amino acid composition specificto a given protein family: SeedBLAST uses a advisedly designed seed, whileCTX-PSI BLAST extends PSI BLAST with the context-specific substitution model.
The seeding technique became central in the theory of sequence alignment. There are several efficient tools applying seeds to DNA homology search, but not to protein homology search. In this paper we fill this gap. We advocate the use of multiple subset seeds derived from a hierarchical tree of amino acid residues. Our method computes, by an evolutionary algorithm, seeds that are specifically designed for a given protein family. The seeds are represented by deterministic finite automata (DFAs) and built into the NCBI-BLAST software.This extended tool, named SeedBLAST, is compared to the original BLAST and PSI-BLAST on several protein families. Our results demonstrate a superiority of SeedBLAST in terms of efficiency, especially in the case of twilight zone hits.
The contextual substitution model has been proven to increase sensitivity of protein alignment. In this paper we perform a next step in the contextual alignment program. We announce a contextual version of the PSI-BLAST algorithm, an iterative version of the NCBI-BLAST tool. The experimental evaluation has beenperformed demonstrating a significantly higher sensitivity compared to the ordinary PSI-BLAST algorithm.}, volume = {60}, number = {3}, pages = {495--505}, month = {December}, url = {http://journals.pan.pl/Content/83626?format_id=1}, pdf = {http://www.mimuw.edu.pl/~sl/PAPERS/AltPSIBLAST.pdf}, doi = {10.2478/v10175-012-0063-0}, hal-url = {https://hal.archives-ouvertes.fr/hal-00749016/en}}@inproceedings{GambinEtAlBIOSTEC11, author = {Gambin, Anna and Lasota, S{\l}awomir and Startek, Micha{\l} and Sykulski, Macieij and No{\'e}, Laurent and Kucherov, Gregory}, title = {Subset seed extension to {P}rotein {BLAST}}, booktitle = {Proceedings of the International Conference on Bioinformatics Models, Methods and Algorithms (BIOINFORMATICS 2011), January 26-29 2011, Rome (Italy)}, pages = {149--158}, month = {January}, year = {2011}, abstract = {The seeding technique became central in the theory of sequence alignment and there are several efficient tools applying seeds to DNA homology search. Recently, a concept of subset seeds has been proposed for similarity search in protein sequences. We experimentally evaluate the applicability of subset seeds to protein homology search. We advocate the use of multiple subset seeds derived from a hierarchical tree of amino acid residues. Our method computes, by an evolutionary algorithm, seeds that are specifically designed for a given protein family. The representation of seeds by deterministic finite automata (DFAs) is developed and built into the NCBI-BLAST software. This extended tool, named SeedBLAST, is compared to the original NCBI-BLAST on the GPCR protein family. Our results demonstrate a clearsuperiority of SeedBLAST in terms of efficiency, especially in the case of twilight zone hits. SeedBLAST is an open source software freely available https://bioputer.mimuw.edu.pl/papers/sblast. Supplementary material and user manual are also provided.}, publisher = {{S}ci{T}e{P}ress Digital Library}, hal-url = {https://hal.inria.fr/inria-00609791/en/}, pdf = {https://drive.google.com/file/d/17Vgzp3wesS-RpTrCbyPEPyIgfLfvixSV/view}, doi = {10.5220/0003147601490158}, opteditor = {}, optvolume = {}, optnumber = {}, optseries = {}}@article{NoeGirdeaKucherovABI10, author = {No{\'e}, Laurent and G{\^i}rdea, Marta and Kucherov, Gregory}, title = {Designing efficient spaced seeds for {SOLiD} read mapping}, journal = {Advances in Bioinformatics}, year = {2010}, month = {July}, volume = {2010}, pages = {ID 708501}, doi = {10.1155/2010/708501}, url = {https://www.hindawi.com/journals/abi/2010/708501/}, pdf = {https://downloads.hindawi.com/journals/abi/2010/708501.pdf}, hal-url = {https://hal.inria.fr/inria-00527029/en/}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/20936175}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2945724}, opteprint = {}, abstract = {The advent of high-throughput sequencing technologies constituted a major advance in genomic studies, offering new prospects in a wide range of applications.We propose a rigorous and flexible algorithmic solution to mapping SOLiD color-space reads to a reference genome. The solutionrelies on an advanced method of seed design that uses a faithful probabilisticmodel of read matches and, on the other hand, a novel seeding principleespecially adapted to read mapping. Our method can handle both lossy andlossless frameworks and is able to distinguish, at the level of seed design,between SNPs and reading errors. We illustrate our approach by several seeddesigns and demonstrate their efficiency.}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-pays = {RU}, aeres = {ACL}, selectif = {oui}}@inproceedings{NoeGirdeaKucherovRECOMB10, author = {No{\'e}, Laurent and G{\^i}rdea, Marta and Kucherov, Gregory}, title = {Seed design framework for mapping {SOLiD} reads}, booktitle = {Proceedings of the 14th Annual International Conference on Research in Computational Molecular Biology ({RECOMB}), April 25-28, 2010, Lisbon (Portugal)}, pages = {384--396}, year = {2010}, month = {April}, editor = {Berger, B.}, volume = {6044}, series = {Lecture Notes in Computer Science}, doi = {10.1007/978-3-642-12683-3_25}, url = {https://link.springer.com/chapter/10.1007/978-3-642-12683-3_25}, pdf = {https://drive.google.com/file/d/1mH4wEgyyyqmOvcCdFxYWIlO6wbd-w68G/view}, postscript = {https://drive.google.com/file/d/1-KBN0v4jAMTvAfcMvZTJRH1Kya3lLsPb/view}, hal-url = {https://hal.inria.fr/inria-00484642/en/}, optpubmed-url = {}, eprint = {1006.2625}, abstract = {The advent of high-throughput sequencing technologiesconstituted a major advance in genomic studies, offering new prospects in awide range of applications. We propose a rigorous and flexible algorithmicsolution to mapping SOLiD color-space reads to a reference genome. The solutionrelies on an advanced method of seed design that uses a faithful probabilisticmodel of read matches and, on the other hand, a novel seeding principleespecially adapted to read mapping. Our method can handle both lossy andlossless frameworks and is able to distinguish, at the level of seed design,between SNPs and reading errors. We illustrate our approach by several seeddesigns and demonstrate their efficiency.}, publisher = {Springer}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-pays = {RU}, aeres = {ACT}, selectif = {oui}, optnote = {(submitted: 176, accepted: 36, acceptance rate: 0.20)}}@article{RoytbergEtAlTCBB09, author = {Roytberg, Mikhail A. and Gambin, Anna and No{\'e}, Laurent and Lasota, S{\l}awomir and Furletova, Eugenia and Szczurek, Ewa and Kucherov, Gregory}, title = {On subset seeds for protein alignment}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, year = {2009}, month = {July}, pages = {483--494}, volume = {6}, number = {3}, doi = {10.1109/tcbb.2009.4}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=4752807}, pdf = {https://drive.google.com/file/d/19HH-jVjF0naIYWfcSCsSVPkOQw44RjBM/view}, postscript = {https://drive.google.com/file/d/1bj-egMxUFBO9YkMh3r7hs1BESLxDcOSf/view}, hal-url = {https://hal.inria.fr/inria-00354773/en/}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/19644175}, eprint = {0901.3198}, abstract = {We apply the concept of subset seeds proposed in [1] tosimilarity search in protein sequences. The main question studied is the designof efficient seed alphabets to construct seeds with optimalsensitivity/selectivity trade-offs. We propose several different design methodsand use them to construct several alphabets. We then perform a comparativeanalysis of seeds built over those alphabets and compare them with the standardBLASTP seeding method [2], [3], as well as with the family of vector seedsproposed in [4]. While the formalism of subset seeds is less expressive (butless costly to implement) than the cumulative principle used in BLASTP andvector seeds, our seeds show a similar or even better performance than BLASTPon Bernoulli models of proteins compatible with the common BLOSUM62 matrix.Finally, we perform a large-scale benchmarking of our seeds against severalmain databases of protein alignments. Here again, the results show a comparableor better performance of our seeds vs. BLASTP.}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-pays = {RU,PL}, aeres = {ACL}, selectif = {oui}}@inproceedings{RoytbergEtAlALBIO08, author = {Roytberg, Mikhail A. and Gambin, Anna and No{\'e}, Laurent and Lasota, S{\l}awomir and Furletova, Eugenia and Szczurek, Ewa and Kucherov, Gregory}, title = {Efficient seeding techniques for protein similarity search}, booktitle = {Bioinformatics Research and Development, Proceedings of the 2nd International Conference BIRD 2008, Vienna (Austria), July 7-9, 2008}, pages = {466--478}, year = {2008}, month = {July}, editor = {Elloumi, M and K\"{u}ng, J. and Linial, M. and Murphy, R.F. and Schneider, K. and Toma, C.}, volume = {13}, series = {Communications in Computer and Information Science}, publisher = {Springer}, doi = {10.1007/978-3-540-70600-7_36}, url = {https://link.springer.com/chapter/10.1007/978-3-540-70600-7_36}, hal-url = {https://hal.inria.fr/inria-00335564/en/}, eprint = {0810.5434}, pdf = {https://drive.google.com/file/d/1BnSTuWJu5GMVIGf80pkzo-8mV5lDoGUU/view}, postscript = {https://drive.google.com/file/d/1DHdSuZhU0h5jZAxctDaXx34CHk7uU1lK/view}, abstract = {We apply the concept of subset seeds proposed in [A unifyingframework for seed sensitivity and its application to subset seeds] tosimilarity search in protein sequences. The main question studied is thedesign of efficient seed alphabets to construct seeds with optimal sensitivity/selectivity trade-offs. We propose several different design methodsand use them to construct several alphabets.We then perform an analysisof seeds built over those alphabet and compare them with the standardBlastp seeding method [2,3], as well as with the family of vector seedsproposed in [4]. While the formalism of subset seed is less expressive(but less costly to implement) than the accumulative principle used inBlastp and vector seeds, our seeds show a similar or even better performancethan Blastp on Bernoulli models of proteins compatible withthe common BLOSUM62 matrix.}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-proceedings = {yes}, x-pays = {RU,PL}, aeres = {ACT}, selectif = {non}, optnote = {(submitted: 61, accepted: 30, acceptance rate: 0.50)}}@inproceedings{KucherovNoeRoytbergCIAA07, author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.}, title = {Subset Seed Automaton}, booktitle = {Proceedings of the 12th International {C}onference on{I}mplementation and {A}pplication of {A}utomata ({CIAA}), July 16-18, 2007, Prague (Czech Republic)}, pages = {180--191}, year = {2007}, month = {July}, editor = {Holub, J. and Zdarek, J.}, volume = {4783}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, doi = {10.1007/978-3-540-76336-9_18}, url = {https://link.springer.com/chapter/10.1007/978-3-540-76336-9_18}, pdf = {https://drive.google.com/file/d/1yOkz1OGhg3-o593dZRMLj_4jvylvs-Oq/view}, postscript = {https://drive.google.com/file/d/1h7-7dazAtuTooodaE1fi69qDLeGgDR52/view}, hal-url = {https://hal.inria.fr/inria-00170414/en/}, eprint = {1408.6198}, abstract = {We study the pattern matching automaton introduced in[Kucherov-Noe-Roytberg-JBCB-06] for the purpose of seed-based similaritysearch. We show that our definition provides a compact automaton, much smaller than the one obtained by applying the Aho-Corasick construction. We studyproperties of this automaton and present an efficient implementation of theautomaton construction. We also present some experimental results and show thatthis automaton can be successfully applied to more general situations.}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-proceedings = {yes}, x-pays = {RU}, aeres = {ACT}, selectif = {oui}, optnote = {(submitted: 79, accepted: 23, acceptance rate: 0.29)}}@inproceedings{PeterlongoEtAlPBC07, author = {Peterlongo, Pierre and No{\'e}, Laurent and Lavenier, Dominique and Georges, Gilles and Jacques, Julien and Kucherov, Gregory and Giraud, Mathieu}, title = {Protein similarity search with subset seeds on a dedicated reconfigurable hardware}, booktitle = {Proceedings of the 2nd Workshop on {P}arallel {B}io-{C}omputing (PBC), September 9-12, 2007 Gdansk (Poland)}, pages = {1240--1248}, year = {2008}, month = {September}, editor = {Wyrzykowski, R. and Dongarra, J. and Karczewski, K. and Wasniewski, J.}, volume = {4967}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, doi = {10.1007/978-3-540-68111-3}, pdf = {https://cristal.univ-lille.fr/~giraud/publis/peterlongo-pbc-07.pdf}, url = {https://link.springer.com/chapter/10.1007/978-3-540-68111-3_131}, hal-url = {https://hal.inria.fr/inria-00178325/en/}, opteprint = {}, abstract = {With a sharp increase of available DNA and protein sequencedata, new precise and fast similarity search methods are needed for largescalegenome and proteome comparisons. Modern seed-based techniquesof similarity search (spaced seeds, multiple seeds, subset seeds) providea better sensitivity/specificity ratio. We present an implementation ofsuch a seed-based technique on a parallel specialized hardware embeddingreconfigurable architecture (FPGA), where the FPGA is tightlyconnected to large capacity Flash memories. This parallel system allowslarge databases to be fully indexed and rapidly accessed. Compared totraditional approaches presented by the Blastp software, we obtain botha significant speed-up and better results. To the best of our knowledge,this is the first attempt to exploit efficient seed-based algorithms forparallelizing the sequence similarity search.}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-proceedings = {yes}, aeres = {ACT}, selectif = {oui}}@article{KucherovNoeRoytbergJBCB06, author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.}, title = {A unifying framework for seed sensitivity and its application to subset seeds}, journal = {Journal of Bioinformatics and Computational Biology}, year = {2006}, month = {November}, volume = {4}, number = {2}, pages = {553--569}, doi = {10.1142/S0219720006001977}, url = {https://www.worldscinet.com/jbcb/04/0402/S0219720006001977.html}, hal-url = {https://hal.archives-ouvertes.fr/hal-00018114}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/16819802}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2824148}, eprint = {cs/0601116}, postscript = {https://drive.google.com/file/d/1HR4wJFxi5zQr8oIBA0LPdYpiIWvuIn7_/view}, pdf = {https://drive.google.com/file/d/1eGZN8O3khBUbN0e8GTPTlKXxWJa6r8dO/view}, abstract = {We propose a general approach to compute the seed sensitivity,that can be applied to different definitions of seeds. It treats separatelythree components of the seed sensitivity problem -- a set of target alignments,an associated probability distribution, and a seed model -- that are specifiedby distinct finite automata. The approach is then applied to a new concept of{\em subset seeds} for which we propose an efficient automaton construction.Experimental results confirm that sensitive subset seeds can be efficientlydesigned using our approach, and can then be used in similarity search producingbetter results than ordinary spaced seeds.}, inria = {Sequoia}, labo = {dans}, x-editorial-board = {yes}, x-international-audience = {yes}, x-pays = {RU}, aeres = {ACL}, selectif = {oui}}@inproceedings{KucherovNoeRoytbergWABI05, author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.}, title = {A unifying framework for seed sensitivity and its application to subset seeds (Extended Abstract)}, booktitle = {Proceedings of the 5th International Workshop on Algorithms in Bioinformatics ({WABI}), October 3-6, 2005, Mallorca (Spain)}, pages = {251--263}, year = {2005}, month = {October}, editor = {Casadio, R and Myers, G}, volume = {3692}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, doi = {10.1007/11557067_21}, url = {https://link.springer.com/chapter/10.1007/11557067_21}, hal-url = {https://hal.inria.fr/inria-00001164/en/}, eprint = {cs/0603106}, pdf = {https://drive.google.com/file/d/19VNwrrvbvz89WhepLX4_amaJbq-R3ajt/view}, postscript = {https://drive.google.com/file/d/16tlAb3_xLhdDRiVNdi4-6STp-lsp0pGb/view}, abstract = {We propose a general approach to compute the seed sensitivity,that can be applied to different definitions of seeds. It treats separatelythree components of the seed sensitivity problem -- a set of target alignments,an associated probability distribution, and a seed model -- that are specifiedby distinct finite automata. The approach is then applied to a new concept of{\em subset seeds} for which we propose an efficient automaton construction.Experimental results confirm that sensitive subset seeds can be efficientlydesigned using our approach, and can then be used in similarity search producingbetter results than ordinary spaced seeds.}, inria = {ADAGE}, labo = {hors}, x-editorial-board = {yes}, x-international-audience = {yes}, x-proceedings = {yes}, x-pays = {RU}, aeres = {ACT}, selectif = {oui}, optnote = {(submitted: 94, accepted: 35, acceptance rate: 0.37)}}@article{NoeKucherovNAR05, author = {No{\'e}, Laurent and Kucherov, Gregory}, title = {{YASS}: enhancing the sensitivity of {DNA} similarity search}, journal = {Nucleic Acids Research}, year = {2005}, month = {April}, volume = {33 (web-server issue)}, number = {suppl\_2}, pages = {W540--W543}, doi = {10.1093/nar/gki478}, url = {https://nar.oxfordjournals.org/content/33/suppl_2/W540.full}, pdf = {https://nar.oxfordjournals.org/content/33/suppl_2/W540.full.pdf}, hal-url = {https://hal.inria.fr/inria-00448742/en/}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/15980530}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1160238}, opteprint = {}, abstract = {YASS is a DNA local alignment tool based on an efficient andsensitive filtering algorithm. It applies transition-constrained seeds tospecify the most probable conserved motifs between homologous sequences,combined with a flexible hit criterion used to identify groups of seeds that arelikely to exhibit significant alignments. A web interface(https://www.loria.fr/projects/YASS/) is available to upload input sequences infasta format, query the program and visualize the results obtained in severalforms (dot-plot, tabular output and others). A standalone version is availablefor download from the web page.}, inria = {ADAGE}, labo = {hors}, x-editorial-board = {yes}, x-international-audience = {yes}, aeres = {ACL}, selectif = {oui}}@article{KucherovNoeRoytbergTCBB05, author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.}, title = {Multiseed lossless filtration}, journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics (TCBB)}, pages = {51--61}, year = {2005}, month = {January}, key = {ISSN:1545-5963}, volume = {2}, number = {1}, doi = {10.1109/tcbb.2005.12}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=1416851}, hal-url = {https://hal.inria.fr/inria-00354810/en/}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/17044164}, eprint = {0901.3215}, pdf = {https://drive.google.com/file/d/10hnNHl7SqGI_77MPH_xRatzZIjZ7-RNf/view}, postscript = {https://drive.google.com/file/d/1FHI3EOIkqmSZ4EVsVeV27SYLpk-IC0AR/view}, abstract = {We study a method of seed-based lossless filtration forapproximate string matching and related bioinformatics applications. The methodis based on a simultaneous use of several spaced seeds rather than a single seedas studied by Burkhardt and Karkkainen. We present algorithms to compute severalimportant parameters of seed families, study their combinatorial properties, anddescribe several techniques to construct efficient families. We also report alarge-scale application of the proposed technique to the problem ofoligonucleotide selection for an {EST} sequence database.}, inria = {ADAGE}, labo = {hors}, x-editorial-board = {yes}, x-international-audience = {yes}, aeres = {ACL}, selectif = {oui}}@phdthesis{NoePHD05, author = {No{\'e}, Laurent}, title = {Recherche de similarit{\'e}s dans les s{\'e}quences d'ADN: mod{\`e}les et algorithmes pour la conception de graines efficaces}, school = {Universit{\'e} Henri Poincar{\'e} - Nancy}, year = {2005}, month = {September}, pdf = {https://drive.google.com/file/d/1tCzDmuc5xlFKxtN1mEV6eXptlFdherWV/view}, postscript = {https://drive.google.com/file/d/1WWFX24WowR5AIj2FGuJBlgXPucr8hQXi/view}, hal-url = {https://tel.archives-ouvertes.fr/tel-00011482}, opteprint = {}, abstract = {Les m{\'e}thodes de recherche de similarit{\'e}s les plusfr{\'e}quemment utilis{\'e}es dans le cadre de la g{\'e}nomique sontheuristiques. Elles se basent sur un principe de filtrage du texte qui permetde localiser les r{\'e}gions potentiellement similaires.
Dans cette th{\`e}se, nous proposons de nouvelles d{\'e}finitions de filtrespour la recherche de similarit{\'e}s sur les s{\'e}quences g{\'e}nomiques etdes algorithmes associ{\'e}s pour mesurer leurs caract{\'e}ristiques. Pluspr{\'e}cis{\'e}ment, nous avons {\'e}tudi{\'e} le mod{\`e}le des {\em grainesespac{\'e}es}, et propos{\'e} un algorithme d'{\'e}valuation de l'efficacit{\'e}des graines sur des similarit{\'e}s d'une classe particuli{\`e}re(similarit{\'e}s dites {\em homog{\`e}nes}). Nous avons {\'e}galementd{\'e}velopp{\'e} un algorithme g{\'e}n{\'e}ral pour la mesure del'efficacit{\'e} des graines, ainsi qu'un nouveau mod{\`e}le de graineappel{\'e} {\em graine sous-ensemble}, extension du mod{\`e}le des {\em grainesespac{\'e}es}. Enfin nous donnons, dans le cadre du {\em filtrage sans perte},une extension {\`a} l'aide de graines multiples, que nous analysons etappliquons au probl{\`e}me de la conception d'oligonucl{\'e}otides.
Nous avons r{\'e}alis{\'e} et donnons acc{\`e}s {\`a} des outils pour laconception des filtres, ainsi que pour la recherche de similarit{\'e}s.}, aeres = {TH}, labo = {hors}, inria = {ADAGE}}@inproceedings{KucherovNoeRoytbergCPM04, author = {Kucherov, Gregory and No{\'e}, Laurent and Roytberg, Mikhail A.}, title = {Multi-seed lossless filtration (Extended Abstract)}, booktitle = {Proceedings of the 15th Annual Combinatorial Pattern Matching Symposium (CPM), July 5-7, 2004, Istanbul (Turkey)}, pages = {297--310}, year = {2004}, month = {July}, editor = {Sahinalp, S.C. and Muthukrishnan, S. and Dogrusoz, U.}, volume = {3109}, series = {Lecture Notes in Computer Science}, publisher = {Springer}, doi = {10.1007/978-3-540-27801-6_22}, url = {https://link.springer.com/chapter/10.1007/978-3-540-27801-6_22}, pdf = {https://drive.google.com/file/d/1gYQ3PyPcEA9i76tamyWPL86NfnUm-i4n/view}, postscript = {https://drive.google.com/file/d/17htF2bJUC0ejKyqWzNPI1N4L7GR_c3UP/view}, hal-url = {https://hal.inria.fr/inria-00001162/en/}, opteprint = {}, isnb = {3-540-22341-X}, abstract = {We study a method of seed-based lossless filtration forapproximate string matching and related bioinformatics applications. The methodis based on a simultaneous use of several spaced seeds rather than a singleseed as studied by Burkhardt and Karkkainen. We present algorithms to computeseveral important parameters of seed families, study their combinatorialproperties, and describe several techniques to construct efficient families. Wealso report a large-scale application of the proposed technique to the problemof oligonucleotide selection for an {EST} sequence database.}, inria = {ADAGE}, labo = {hors}, x-editorial-board = {yes}, x-international-audience = {yes}, x-proceedings = {yes}, aeres = {ACT}, selectif = {oui}, optnote = {(submitted: 79, accepted: 36, acceptance rate: 0.46)}}@inproceedings{KucherovNoePontyBIBE04, author = {Kucherov, Gregory and No{\'e}, Laurent and Ponty, Yann}, title = {Estimating seed sensitivity on homogeneous alignments}, booktitle = {Proceedings of the IEEE 4th Symposium on Bioinformatics and Bioengineering (BIBE), May 19-21, 2004, Taichung (Taiwan)}, pages = {387--394}, year = {2004}, month = {April}, opteditor = {}, optvolume = {}, optnumber = {}, optseries = {the IEEE 4th Symposium on Bioinformatics and Bioengineering - BIBE'2004}, optaddress = {}, optorganization = {}, publisher = {IEEE Computer Society Press}, doi = {10.1109/BIBE.2004.1317369}, url = {https://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=1317369}, pdf = {https://drive.google.com/file/d/1euiTIaEg2QU2hwp8BCPic9rwttUhO64W/view}, postscript = {https://drive.google.com/file/d/1BHJXCeukaNwSKbLNb4Umw7K5mFJTthbZ/view}, hal-url = {https://hal.inria.fr/inria-00001163/en/}, eprint = {cs/0603106}, abstract = {We address the problem of estimating the sensitivity ofseed-based similarity search algorithms. In contrast to approaches based onMarkov models [Faster and more sensitive homology search, Designing seeds forsimilarity search in genomic DNA, Optimal spaced seeds for Hidden Markov Models,with application to homologous coding regions, Vector seeds: an extension tospaced seeds allows substantial improvements in sensitivity and specificity,Sensitivity analysis and efficient method for identifying optimal spaced seeds],we study the estimation based on homogeneous alignments. We describe analgorithm for counting and random generation of those alignments and analgorithm for exact computation of the sensitivity for a broad class of seedstrategies. We provide experimental results demonstrating a bias introduced byignoring the homogeneousness condition.}, inria = {ADAGE}, labo = {hors}, x-editorial-board = {yes}, x-international-audience = {yes}, x-proceedings = {yes}, aeres = {ACT}, selectif = {oui}, optnote = {(submitted: 145, accepted: 71, acceptance rate: 0.49)}}@article{NoeKucherovBMCBioinformatics04, author = {No{\'e}, Laurent and Kucherov, Gregory}, title = {Improved hit criteria for {DNA} local alignment}, journal = {{BMC} {B}ioinformatics}, year = {2004}, volume = {5}, pages = {149}, month = {October}, doi = {10.1186/1471-2105-5-149}, url = {https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-5-149}, pdf = {https://www.biomedcentral.com/content/pdf/1471-2105-5-149.pdf}, hal-url = {https://hal.inria.fr/inria-00448743/en/}, pubmed-url = {https://pubmed.ncbi.nlm.nih.gov/15485572}, pmc-url = {https://www.ncbi.nlm.nih.gov/pmc/articles/PMC526756}, opteprint = {}, abstract = {Background: the hit criterion is a key component of heuristiclocal alignment algorithms. It specifies a class of patterns assumed to witnessa potential similarity, and this choice is decisive for the selectivity andsensitivity of the whole method.
Results : in this paper, we propose two ways to improve the hit criterion.First, we define the group criterion combining the advantages of thesingle-seed and double-seed approaches used in existing algorithms. Second, weintroduce transition-constrained seeds that extend spaced seeds by thepossibility of distinguishing transition and transversion mismatches. We provideanalytical data as well as experimental results, obtained with the YASSsoftware, supporting both improvements.
Conclusions : proposed algorithmic ideas allow to obtain a significant gain insensitivity of similarity search without increase in execution time. The methodhas been implemented in YASS software available athttps://www.loria.fr/projects/YASS/}, inria = {ADAGE}, labo = {hors}, x-editorial-board = {yes}, x-international-audience = {yes}, aeres = {ACL}, selectif = {oui}}@inproceedings{NoeKucherovJOBIM04, author = {No{\'e}, Laurent and Kucherov, Gregory}, title = {Improved hit criteria for {DNA} local alignment}, booktitle = {Proceedings of the 5th Open Days in Biology, Computer Science and Mathematics (JOBIM), June 28-30, 2004, Montr\'eal (Canada)}, year = {2004}, month = {June}, pdf = {https://drive.google.com/file/d/1iTyjznBaSTgL-rHjLjjT_3Ed5scUiQcW/view}, postscript = {https://drive.google.com/file/d/1fqec4X_P32_dBB0TVI01kZm4S_uJE_A3/view}, hal-url = {https://hal.inria.fr/inria-00099999/en/}, abstract = {The hit criterion is a key component of heuristic localalignment algorithms. It specifies a class of patterns assumed to witness apotential similarity, and this choice is decisive for the selectivity andsensitivity of the whole method. In this paper, we propose two ways to improvethe hit criterion. First, we define the group criterion combining the advantagesof the single-seed and double-seed approaches used in existing algorithms.Second, we introduce transition-constrained seeds that extend spaced seeds bythe possibility of distinguishing transition and transversion mismatches. Weprovide analytical data as well as experimental results, obtained with our YASSsoftware, supporting both improvements.}}@phdthesis{BrejovaPHD05, author = {Brejov{\'a}, Bro{\v{n}}a}, title = {Evidence Combination in Hidden Markov Models for Gene Prediction}, school = {University of Waterloo}, year = {2005}, url = {https://uwspace.uwaterloo.ca/handle/10012/1036}, pdf = {https://etd.uwaterloo.ca/etd/bbrejova2005.pdf}}