Hi! So if i adapt the code i still get the weired results: test.sam: illumina_80bp_3kb.000000571 2128,2048 bug2.sam (70000 lines of test.sam): illumina_80bp_3kb.000000571 237,157 (code attached...) cheers
//============================================================================ // Name : SAMtest.cpp // Author : // Version : // Copyright : Your copyright notice // Description : Hello World in C++, Ansi-style //============================================================================ #include <iostream> #include <iostream> #include <sstream> #include <seqan/sequence.h> // CharString, ... #include <seqan/file.h> // to stream a CharString into cout #include <seqan/misc/misc_cmdparser.h> #include <seqan/store.h> #include <seqan/file.h> #include <seqan/graph_types.h> using namespace seqan; int main(int argc, const char* argv[]) { CommandLineParser cmdParser("parser"); addOption(cmdParser,CommandLineOption("s","sam", "SAM file(s) with aligned reads", OptionType::String)); if(!parse(cmdParser,argc,argv,std::cerr)){ std::cerr << "Error parsing command line. Check the mandatory parameters." << std::endl; help(cmdParser,std::cerr); return -1; } std::string SAMFileName; typedef FragmentStore<> TFragmentStore; TFragmentStore fragStore; typedef TFragmentStore::TAlignedReadStore ARStore; typedef TFragmentStore::TReadStore TReadStore; typedef Value<TReadStore>::Type TReadStoreElement; typedef TFragmentStore::TContigStore TContigStore; typedef Value<TContigStore>::Type TContig; typedef TFragmentStore::TContigSeq TContigSeq; typedef Gaps<TContigSeq, AnchorGaps<TContig::TGapAnchors> > TContigGaps; typedef TFragmentStore::TAlignedReadStore TAlignedReadStore; typedef Value<TAlignedReadStore>::Type TAlignedRead; typedef TAlignedRead::TPos TAlignedReadPos; typedef unsigned int TID; typedef unsigned int TValue; if(getOptionValueLong(cmdParser,"sam",SAMFileName)&&(length(SAMFileName)>0)){ std::cout << "Loading SAM file " << SAMFileName << std::endl; std::fstream strmReads(SAMFileName.c_str(), std::fstream::in | std::fstream::binary); if(!strmReads.is_open()){ std::cerr << "Couldn't open file " << SAMFileName.c_str() << std::endl; return -1; } else{ read(strmReads, fragStore, seqan::Sam()); std::cout << "Reads cached..." << std::endl; unsigned int nrAlignments = length(fragStore.alignedReadStore); //sortAlignedReads(fragStore.alignedReadStore, SortPairMatchId()); //sortAlignedReads(fragStore.alignedReadStore, SortId()); //std::fstream strmWrite("/Users/mat/test2.sam",std::fstream::out | std::fstream::binary); //write(strmWrite,fragStore,seqan::Sam()); TID lastPairMatchId = TReadStoreElement::INVALID_ID; TID id1,id2,cid1,cId2; //Iterate over aligned reads (sorted by pairMatchId) for (unsigned int n=0;n<nrAlignments;++n){ //if current alignment has a mate... cid1 = fragStore.alignedReadStore[n].contigId; TContigGaps contigGaps(fragStore.contigStore[cid1].seq, fragStore.contigStore[cid1].gaps); TAlignedRead const & alignedRead = fragStore.alignedReadStore[n]; // Translate end position from aligned read record to sequence space in reference. TAlignedReadPos endPos = positionGapToSeq(contigGaps, alignedRead.endPos); TAlignedReadPos beginPos = positionGapToSeq(contigGaps, alignedRead.beginPos); std::cout << "n:" << n << " id: " << fragStore.alignedReadStore[n].id << std::endl; id1 = fragStore.alignedReadStore[n].id; //TValue start1 = fragStore.alignedReadStore[id1].beginPos; //TValue end1 = fragStore.alignedReadStore[id1].endPos; //TValue l1 = length(fragStore.contigStore[cId1].seq); // TValue l2 = length(fragStore.contigStore[cId2].seq); std::cout << fragStore.readNameStore[fragStore.alignedReadStore[id1].readId]<<std::endl; std::cout << beginPos << "," << endPos << std::endl; if(fragStore.readNameStore[fragStore.alignedReadStore[id1].readId]=="illumina_80bp_3kb.000000571"){ //std::cout << fragStore.readNameStore[fragStore.alignedReadStore[id1].readId]<<std::endl; //std::cout << start1 << "," << end1 << std::endl; exit(0); } /*if(fragStore.alignedReadStore[n].pairMatchId!=TReadStoreElement::INVALID_ID){ //indexed by pairMatchID if(lastPairMatchId == fragStore.alignedReadStore[n].pairMatchId){ id2 = fragStore.alignedReadStore[n].id; cId2 = fragStore.alignedReadStore[n].contigId; //now check if contigs do not span... if(cId1==cId2){ //Now check coordinates TValue start1 = fragStore.alignedReadStore[id1].beginPos; TValue end1 = fragStore.alignedReadStore[id1].endPos; TValue start2 = fragStore.alignedReadStore[id2].beginPos; TValue end2 = fragStore.alignedReadStore[id2].endPos; //TValue l1 = length(fragStore.contigStore[cId1].seq); // TValue l2 = length(fragStore.contigStore[cId2].seq); if(fragStore.readNameStore[fragStore.alignedReadStore[id1].readId]=="illumina_80bp_3kb.000000571"){ std::cout << fragStore.readNameStore[fragStore.alignedReadStore[id1].readId]<<std::endl; std::cout << fragStore.readNameStore[fragStore.alignedReadStore[id2].readId]<<std::endl; std::cout << start1 << "," << end1 << " and " << start2 << "," << end2 << std::endl; } } lastPairMatchId = TReadStoreElement::INVALID_ID; } else { id1 = fragStore.alignedReadStore[n].id; cId1 = fragStore.alignedReadStore[n].contigId; lastPairMatchId = fragStore.alignedReadStore[n].pairMatchId; } }*/ } } } else { std::cout << "No SAM file specified...\n" << std::endl; } return 0; }