//#define GFF_DEBUG 1 //debugging guides loading #include "rlink.h" #include "tmerge.h" #ifndef NOTHREADS #include "GThreads.h" #endif //#define GMEMTRACE 1 #ifdef GMEMTRACE #include "proc_mem.h" #endif #define VERSION "2.2.1" //#define DEBUGPRINT 1 #ifdef DEBUGPRINT #define DBGPRINT(x) GMessage(x) #define DBGPRINT2(a,b) GMessage(a,b) #define DBGPRINT3(a,b,c) GMessage(a,b,c) #define DBGPRINT4(a,b,c,d) GMessage(a,b,c,d) #define DBGPRINT5(a,b,c,d,e) GMessage(a,b,c,d,e) #else #define DBGPRINT(x) #define DBGPRINT2(a,b) #define DBGPRINT3(a,b,c) #define DBGPRINT4(a,b,c,d) #define DBGPRINT5(a,b,c,d,e) #endif #define USAGE "StringTie v" VERSION " usage:\n\n\ stringtie [-G ] [-l ] [-o ] [-p ]\n\ [-v] [-a ] [-m ] [-j ] [-f ]\n\ [-c ] [-g ] [-u] [-L] [-e] [--viral] [-E ]\n\ [--ptf ] [-x ] [-A ] [-h] {-B|-b }\n\ [--mix] [--conservative] [--rf] [--fr]\n\ Assemble RNA-Seq alignments into potential transcripts.\n\ Options:\n\ --version : print just the version at stdout and exit\n\ --conservative : conservative transcript assembly, same as -t -c 1.5 -f 0.05\n\ --mix : both short and long read data alignments are provided\n\ (long read alignments must be the 2nd BAM/CRAM input file)\n\ --rf : assume stranded library fr-firststrand\n\ --fr : assume stranded library fr-secondstrand\n\ -G reference annotation to use for guiding the assembly process (GTF/GFF)\n\ --ptf : load point-features from a given 4 column feature file \n\ -o output path/file name for the assembled transcripts GTF (default: stdout)\n\ -l name prefix for output transcripts (default: STRG)\n\ -f minimum isoform fraction (default: 0.01)\n\ -L long reads processing; also enforces -s 1.5 -g 0 (default:false)\n\ -R if long reads are provided, just clean and collapse the reads but\n\ do not assemble\n\ -m minimum assembled transcript length (default: 200)\n\ -a minimum anchor length for junctions (default: 10)\n\ -j minimum junction coverage (default: 1)\n\ -t disable trimming of predicted transcripts based on coverage\n\ (default: coverage trimming is enabled)\n\ -c minimum reads per bp coverage to consider for multi-exon transcript\n\ (default: 1)\n\ -s minimum reads per bp coverage to consider for single-exon transcript\n\ (default: 4.75)\n\ -v verbose (log bundle processing details)\n\ -g maximum gap allowed between read mappings (default: 50)\n\ -M fraction of bundle allowed to be covered by multi-hit reads (default:1)\n\ -p number of threads (CPUs) to use (default: 1)\n\ -A gene abundance estimation output file\n\ -E define window around possibly erroneous splice sites from long reads to\n\ look out for correct splice sites (default: 25)\n\ -B enable output of Ballgown table files which will be created in the\n\ same directory as the output GTF (requires -G, -o recommended)\n\ -b enable output of Ballgown table files but these files will be \n\ created under the directory path given as \n\ -e only estimate the abundance of given reference transcripts (requires -G)\n\ --viral : only relevant for long reads from viral data where splice sites\n\ do not follow consensus (default:false)\n\ -x do not assemble any transcripts on the given reference sequence(s)\n\ -u no multi-mapping correction (default: correction enabled)\n\ -h print this usage message and exit\n\ --ref/--cram-ref reference genome FASTA file for CRAM input\n\ \n\ Transcript merge usage mode: \n\ stringtie --merge [Options] { gtf_list | strg1.gtf ...}\n\ With this option StringTie will assemble transcripts from multiple\n\ input files generating a unified non-redundant set of isoforms. In this mode\n\ the following options are available:\n\ -G reference annotation to include in the merging (GTF/GFF3)\n\ -o output file name for the merged transcripts GTF\n\ (default: stdout)\n\ -m minimum input transcript length to include in the merge\n\ (default: 50)\n\ -c minimum input transcript coverage to include in the merge\n\ (default: 0)\n\ -F minimum input transcript FPKM to include in the merge\n\ (default: 1.0)\n\ -T minimum input transcript TPM to include in the merge\n\ (default: 1.0)\n\ -f minimum isoform fraction (default: 0.01)\n\ -g gap between transcripts to merge together (default: 250)\n\ -i keep merged transcripts with retained introns; by default\n\ these are not kept unless there is strong evidence for them\n\ -l