Hi,I am trying to sort a sam/bam file by read-id and am using the following construct:
sortAlignedReads(reader, SortReadId()); Unfortunately the compiler is complaining:In file included from /pasteur/solexa2/solexa_travail/PF2/programs/seqan/workspace/seqan/core/include/seqan/store.h:63: /pasteur/solexa2/solexa_travail/PF2/programs/seqan/workspace/seqan/core/include/seqan/store/store_align.h:369:13: error: member reference base type 'const char' is not a
structure or union return a1.readId < a2.readId; ~~ ^/usr/lib/gcc/x86_64-redhat-linux6E/4.4.4/../../../../include/c++/4.4.4/bits/stl_algo.h:2128:8: note: in instantiation of member function 'seqan::_LessAlignedRead<char,
const seqan::Tag<seqan::SortReadId_> >::operator()' requested here if (__comp(__val, *__first)) ^/usr/lib/gcc/x86_64-redhat-linux6E/4.4.4/../../../../include/c++/4.4.4/bits/stl_algo.h:3433:4: note: in instantiation of function template specialization 'std::__insertion_sort<char *, seqan::_LessAlignedRead<char, const seqan::Tag<seqan::SortReadId_> > >' requested here
std::__insertion_sort(__first, __last, __comp); ^/usr/lib/gcc/x86_64-redhat-linux6E/4.4.4/../../../../include/c++/4.4.4/bits/stl_algo.h:5468:2: note: in instantiation of function template specialization 'std::__inplace_stable_sort<char *, seqan::_LessAlignedRead<char, const seqan::Tag<seqan::SortReadId_> > >' requested here
std::__inplace_stable_sort(__first, __last, __comp); ^/pasteur/solexa2/solexa_travail/PF2/programs/seqan/workspace/seqan/core/include/seqan/store/store_align.h:433:2: note: in instantiation of function template specialization 'std::stable_sort<char *, seqan::_LessAlignedRead<char, const seqan::Tag<seqan::SortReadId_> > >' requested here
std::stable_sort( ^/pasteur/solexa2/solexa_travail/PF2/programs/seqan/workspace/seqan/sandbox/jagla/apps/sambamstat/sambamstat.h:286:5: note: in instantiation of function template specialization 'seqan::sortAlignedReads<seqan::Stream<Bgzf>, seqan::SortReadId_>' requested here
sortAlignedReads(reader, SortReadId()); ^/pasteur/solexa2/solexa_travail/PF2/programs/seqan/workspace/seqan/sandbox/jagla/apps/sambamstat/sambamstat.h:340:16: note: in instantiation of function template specialization 'doWork<seqan::Stream<Bgzf>, seqan::String<seqan::SimpleType<unsigned char, seqan::Dna5_>, seqan::Alloc<void> >, seqan::Owner<seqan::Tag<seqan::Default_> >, seqan::Tag<seqan::Bam_> >' requested here
return doWork(bamStream, seqs, options, Bam()); ^ 4 warnings and 15 errors generated.make[2]: *** [sandbox/jagla/apps/sambamstat/CMakeFiles/sambamstat.dir/sambamstat.cpp.o] Error 1 make[1]: *** [sandbox/jagla/apps/sambamstat/CMakeFiles/sambamstat.dir/all] Error 2
make: *** [all] Error 2 I also attach the the cpp and .h files.. Thanks for having a look at this ... Bernd
// ========================================================================== // sambamstat // ========================================================================== // Copyright (c) 2006-2012, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Knut Reinert or the FU Berlin nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH // DAMAGE. // // ========================================================================== // Author: Your Name <your.email@example.net> // ========================================================================== #ifndef SANDBOX_JAGLA_APPS_SAMBAMSTAT_SAMBAMSTAT_H_ #define SANDBOX_JAGLA_APPS_SAMBAMSTAT_SAMBAMSTAT_H_ #include <seqan/basic.h> #include <seqan/sequence.h> #include <seqan/bam_io.h> #include <seqan/misc/misc_cmdparser.h> #include <seqan/file.h> // For printing SeqAn Strings. #include <seqan/misc/misc_cmdparser.h> using namespace seqan; // ============================================================================ // Forwards // ============================================================================ // ============================================================================ // Tags, Classes, Enums // ============================================================================ enum Format { FORMAT_AUTO, FORMAT_SAM, FORMAT_BAM }; enum SortOrder { SORTORDER_NA, SORTORDER_ID, SORTORDER_POS }; struct Options { bool showHelp; bool showVersion; unsigned int verbosity; CharString inFileName; CharString outFileName; bool statistics; Format inFormat; SortOrder sortOrder; Options() { // Set defaults. showHelp = false; showVersion = false; verbosity = 0; statistics = false; inFileName = ""; outFileName = ""; statistics = false; inFormat = FORMAT_AUTO; sortOrder = SORTORDER_NA; } }; // ============================================================================ // Metafunctions // ============================================================================ // ============================================================================ // Functions // ============================================================================ void setupCommandLineParser(CommandLineParser & parser, Options const & options) { addVersionLine(parser, "0.1"); addTitleLine(parser, "**********************"); addTitleLine(parser, "* sambamstat *"); addTitleLine(parser, "**********************"); addTitleLine(parser, ""); addTitleLine(parser, "(c) 2012 by Bernd Jagla <Bernd.Jagla@pasteur.fr>"); addUsageLine(parser, "[OPTIONS] -if inputFile -of outputFile"); addSection(parser, "Main Options"); addOption(parser, CommandLineOption("if", "input-file", "Input file.", OptionType::String | OptionType::Label, options.inFileName)); addOption(parser, CommandLineOption("S", "input-sam", "Input file is SAM (default: auto).", OptionType::Bool, options.inFormat == FORMAT_SAM)); addOption(parser, CommandLineOption("B", "input-bam", "Input file is BAM (default: auto).", OptionType::Bool, options.inFormat == FORMAT_BAM)); addOption(parser, CommandLineOption("sP", "sortOrderPos", "assume sorted by position", OptionType::Bool, options.sortOrder == SORTORDER_POS)); addOption(parser, CommandLineOption("sI", "sortOrderID", "assume sorted by ID", OptionType::Bool, options.sortOrder == SORTORDER_ID)); addOption(parser, CommandLineOption("of", "output-file", "Output file.", OptionType::String | OptionType::Label, options.outFileName)); addSection(parser, "optional parameters:"); addOption(parser, CommandLineOption("s", "stats", "Perform stats", OptionType::Bool, options.statistics)); addSection(parser, "General Options"); addOption(parser, CommandLineOption("vv", "very-verbose", "Very verbose output.", OptionType::Bool)); addOption(parser, CommandLineOption("v", "verbose", "verbose output.", OptionType::Bool)); requiredArguments(parser, 0); } int parseCommandLineAndCheck(Options & options, CommandLineParser & parser, int argc, char const ** argv) { bool stop = !parse(parser, argc, argv); if (stop) return 1; if (isSetLong(parser, "help")) { options.showHelp = true; return 0; } if (isSetLong(parser, "version")) { options.showVersion = true; return 0; } getOptionValueLong(parser, "input-file", options.inFileName); getOptionValueLong(parser, "output-file", options.outFileName); if (isSetLong(parser, "verbose")) options.verbosity = 1; if (isSetLong(parser, "very-verbose")) options.verbosity = 2; if (isSetLong(parser, "stats")) options.statistics = true; return 0; } struct Stats { __uint64 numRecords; __uint64 alignedRecords; String<unsigned> editDistanceHisto; String<unsigned> mismatchHisto; String<unsigned> insertHisto; String<unsigned> deletionHisto; String<double> avrgQuality; Stats() : numRecords(0), alignedRecords(0) {} }; /* * Analyze read that are sorted by ID */ template <typename TStreamOrReader, typename TSeqString, typename TSpec, typename TFormat> int analyze_idSorted(TStreamOrReader & reader, StringSet<TSeqString, TSpec> & seqs, Options const & options, TFormat const & tag) { StringSet<CharString> refNames; NameStoreCache<StringSet<CharString> > refNamesCache(refNames); BamIOContext<StringSet<CharString> > context(refNames, refNamesCache); String<__uint64> qualSum; // Read alignments. BamAlignmentRecord record; if (options.verbosity >= 2) std::cerr << "Reading alignments" << std::endl; Align<Dna5String> align; __int64 reads = 0; // while (!atEnd(reader)) { // Read alignment record. if (readRecord(record, context, reader, tag) != 0) { std::cerr << "Could not read alignment!" << std::endl; return 1; } if (options.verbosity >= 2) write2(std::cerr, record, context, Sam()); } return 0; } /* * Analyze reads that are sorted by position */ template <typename TStreamOrReader, typename TSeqString, typename TSpec, typename TFormat> int analyze_posSorted(TStreamOrReader & reader, StringSet<TSeqString, TSpec> & seqs, Options const & options, TFormat const & tag) { StringSet<CharString> refNames; NameStoreCache<StringSet<CharString> > refNamesCache(refNames); BamIOContext<StringSet<CharString> > context(refNames, refNamesCache); String<__uint64> qualSum; // Read alignments. BamAlignmentRecord record; if (options.verbosity >= 2) std::cerr << "Reading alignments" << std::endl; Align<Dna5String> align; __int64 reads = 0; // while (!atEnd(reader)) { // Read alignment record. if (readRecord(record, context, reader, tag) != 0) { std::cerr << "Could not read alignment!" << std::endl; return 1; } if (options.verbosity >= 2) write2(std::cerr, record, context, Sam()); } return 0; } /* * Here we deal with the sorting and organizing different order of events... */ template <typename TStreamOrReader, typename TSeqString, typename TSpec, typename TFormat> int doWork(TStreamOrReader & reader, StringSet<TSeqString, TSpec> & seqs, Options const & options, TFormat const & tag) { StringSet<CharString> refNames; NameStoreCache<StringSet<CharString> > refNamesCache(refNames); BamIOContext<StringSet<CharString> > context(refNames, refNamesCache); String<__uint64> qualSum; // Read header. BamHeader header; if (options.verbosity >= 2) std::cerr << "Reading header" << std::endl; if (readRecord(header, context, reader, tag) != 0) { std::cerr << "Could not read header!" << std::endl; return 1; } Stats stats; switch (options.sortOrder){ case SORTORDER_ID : analyze_idSorted(reader, seqs, options, tag); //TODO sort sortAlignedReads(reader, SortBeginPos()); analyze_posSorted(reader, seqs, options, tag); break; case SORTORDER_POS : analyze_posSorted(reader, seqs, options, tag); //TODO sort sortAlignedReads(reader, SortReadId()); analyze_idSorted(reader, seqs, options, tag); break; case SORTORDER_NA : //TODO sort sortAlignedReads(reader, SortReadId()); analyze_idSorted(reader, seqs, options, tag); //TODO sort sortAlignedReads(reader, SortBeginPos()); analyze_posSorted(reader, seqs, options, tag); break; } return 0; } int mainWithOptions(Options & options) { typedef Iterator<String<CharString> >::Type TIterator; std::cout << "Non-option Arguments:" << std::endl; std::cout << " input file: \"" << options.inFileName << "\"" << std::endl; std::cout << " output file: \"" << options.outFileName << "\"" << std::endl; std::cout << "Option Arguments:" << std::endl; StringSet<CharString> seqIds; StringSet<Dna5String> seqs; String<char, MMap<> > seqMMapString; if (options.inFormat == FORMAT_SAM) { if (options.verbosity >= 2) std::cerr << "Opening SAM file " << options.inFileName << std::endl; String<char, MMap<> > samMMapString; if (!open(samMMapString, toCString(options.inFileName), OPEN_RDONLY)) { std::cerr << "Could not open " << options.inFileName << std::endl; return 1; } RecordReader<String<char, MMap<> >, SinglePass<Mapped> > samReader(samMMapString); return doWork(samReader, seqs, options, Sam()); } else // options.inFormat == FORMAT_BAM { if (options.verbosity >= 2) std::cerr << "Opening BAM file " << options.inFileName << std::endl; Stream<Bgzf> bamStream; if (!open(bamStream, toCString(options.inFileName), "r")) { std::cerr << "Could not open " << options.inFileName << std::endl; return 1; } return doWork(bamStream, seqs, options, Bam()); } return 0; } #endif // #ifndef SANDBOX_JAGLA_APPS_SAMBAMSTAT_SAMBAMSTAT_H_
// ========================================================================== // sambamstat // ========================================================================== // Copyright (c) 2006-2012, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Knut Reinert or the FU Berlin nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH // DAMAGE. // // ========================================================================== // Author: Your Name <your.email@example.net> // ========================================================================== #include <seqan/basic.h> #include <seqan/sequence.h> #include <seqan/misc/misc_cmdparser.h> #include "sambamstat.h" using namespace seqan; // Program entry point int main(int argc, char const ** argv) { // Setup command line parser. CommandLineParser parser; Options options; setupCommandLineParser(parser, options); // Then, parse the command line and handle the cases where help display // is requested or erroneous parameters were given. int ret = parseCommandLineAndCheck(options, parser, argc, argv); if (ret != 0) { std::cerr << "Invalid usage!" << std::endl; return ret; } if (options.showHelp || options.showVersion) return 0; if (options.inFormat == FORMAT_AUTO) { std::ifstream guessIn(toCString(options.inFileName), std::ios::binary | std::ios::in); if (!guessIn.good()) { std::cerr << "Could not open " << options.inFileName << std::endl; return 1; } CharString magic; resize(magic, 2); guessIn.read(&magic[0], 2); if (magic != "\x1f\x8b") // Is not gzip-compressed. options.inFormat = FORMAT_SAM; else options.inFormat = FORMAT_BAM; } // Finally, launch the program. ret = mainWithOptions(options); return ret; }