I am confused why this piece of code does not yield the expected
enumeration of k-mer substrings out of a short read. seqan::SequenceStream seqStream(argv[1]); if (!isGood(seqStream)) { std::cerr << "ERROR: Could not open the file.\n"; return 1; } // statistics uint64_t reads = 0; uint64_t total_kmers = 0; while (!atEnd(seqStream)) { // Buffers for the sequence ids and characters. seqan::CharString id; seqan::String<Dna5> shortRead; // Read next sequence from the file. In case of sequences with qualities, // the qualities are directly stored in the Dna5Q qualities. if (0 == readRecord(id, shortRead, seqStream)) { reads++; Iterator<Dna5String >::Type srBegin = begin(shortRead); Iterator<Dna5String >::Type srEnd = end(shortRead); Iterator<Dna5String >::Type it = srBegin; typedef seqan::String<Dna5> Kmer; size_t k = 31; // 31-mers for (size_t i = 0; i <= length(shortRead)-k; ++i,++it) { total_kmers++; Kmer forward; reserve(forward, k); copy(it,it+k-1,begin(forward)); Kmer backward(forward); reverse(backward); Kmer kmer = (forward < backward) ? forward : backward; bool bReverseComplement = (backward < forward); std::cout << forward << endl << backward << endl << bReverseComplement << endl; } } } |