summaryrefslogtreecommitdiff
path: root/vcffilter.c
diff options
context:
space:
mode:
Diffstat (limited to 'vcffilter.c')
-rw-r--r--vcffilter.c563
1 files changed, 563 insertions, 0 deletions
diff --git a/vcffilter.c b/vcffilter.c
new file mode 100644
index 0000000..481941c
--- /dev/null
+++ b/vcffilter.c
@@ -0,0 +1,563 @@
+/* vcffilter.c -- Apply fixed-threshold filters.
+
+ Copyright (C) 2013-2014 Genome Research Ltd.
+
+ Author: Petr Danecek <pd3@sanger.ac.uk>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE. */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <ctype.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <math.h>
+#include <htslib/vcf.h>
+#include <htslib/synced_bcf_reader.h>
+#include <htslib/vcfutils.h>
+#include "bcftools.h"
+#include "filter.h"
+#include "rbuf.h"
+
+// Logic of the filters: include or exclude sites which match the filters?
+#define FLT_INCLUDE 1
+#define FLT_EXCLUDE 2
+
+// FILTER columns annotation: replace or add to existing FILTERs; set FILTER to PASS at good sites?
+#define ANNOT_ADD 1
+#define ANNOT_RESET 2
+
+// Set genotypes of filtered samples
+#define SET_GTS_MISSING 1
+#define SET_GTS_REF 2
+
+typedef struct _args_t
+{
+ filter_t *filter;
+ char *filter_str;
+ int filter_logic; // include or exclude sites which match the filters? One of FLT_INCLUDE/FLT_EXCLUDE
+ const uint8_t *smpl_pass;
+ int set_gts;
+ char *soft_filter; // drop failed sites or annotate FILTER column?
+ int annot_mode; // add to existing FILTER annotation or replace? Otherwise reset FILTER to PASS or leave as it is?
+ int flt_fail, flt_pass; // BCF ids of fail and pass filters
+ int snp_gap, indel_gap, IndelGap_id, SnpGap_id;
+ int32_t ntmpi, *tmpi, ntmp_ac, *tmp_ac;
+ rbuf_t rbuf;
+ bcf1_t **rbuf_lines;
+
+ bcf_srs_t *files;
+ bcf_hdr_t *hdr;
+ htsFile *out_fh;
+ int output_type;
+
+ char **argv, *output_fname, *targets_list, *regions_list;
+ int argc;
+}
+args_t;
+
+static void init_data(args_t *args)
+{
+ args->out_fh = hts_open(args->output_fname,hts_bcf_wmode(args->output_type));
+ if ( args->out_fh == NULL ) error("Can't write to \"%s\": %s\n", args->output_fname, strerror(errno));
+
+ args->hdr = args->files->readers[0].header;
+ args->flt_pass = bcf_hdr_id2int(args->hdr,BCF_DT_ID,"PASS"); assert( !args->flt_pass ); // sanity check: required by BCF spec
+
+ // -i or -e: append FILTER line
+ if ( args->soft_filter && args->filter_logic )
+ {
+ kstring_t flt_name = {0,0,0};
+ if ( strcmp(args->soft_filter,"+") )
+ kputs(args->soft_filter, &flt_name);
+ else
+ {
+ // Make up a filter name
+ int i = 0, id = -1;
+ do
+ {
+ ksprintf(&flt_name,"Filter%d", ++i);
+ id = bcf_hdr_id2int(args->hdr,BCF_DT_ID,flt_name.s);
+ }
+ while ( bcf_hdr_idinfo_exists(args->hdr,BCF_HL_FLT,id) );
+ }
+ // escape quotes
+ kstring_t tmp = {0,0,0};
+ char *t = args->filter_str;
+ while ( *t )
+ {
+ if ( *t=='"' ) kputc('\\',&tmp);
+ kputc(*t,&tmp);
+ t++;
+ }
+ int ret = bcf_hdr_printf(args->hdr, "##FILTER=<ID=%s,Description=\"Set if %s: %s\">", flt_name.s,args->filter_logic & FLT_INCLUDE ? "not true" : "true", tmp.s);
+ if ( ret!=0 )
+ error("Failed to append header line: ##FILTER=<ID=%s,Description=\"Set if %s: %s\">\n", flt_name.s,args->filter_logic & FLT_INCLUDE ? "not true" : "true", tmp.s);
+ args->flt_fail = bcf_hdr_id2int(args->hdr,BCF_DT_ID,flt_name.s); assert( args->flt_fail>=0 );
+ free(flt_name.s);
+ free(tmp.s);
+ }
+
+ if ( args->snp_gap || args->indel_gap )
+ {
+ if ( !args->filter_logic && args->soft_filter && strcmp(args->soft_filter,"+") )
+ {
+ kstring_t tmp = {0,0,0};
+ if ( args->snp_gap ) kputs("\"SnpGap\"", &tmp);
+ if ( args->indel_gap )
+ {
+ if ( tmp.s ) kputs(" and ", &tmp);
+ kputs("\"IndelGap\"", &tmp);
+ }
+ fprintf(stderr,"Warning: using %s filter name instead of \"%s\"\n", tmp.s,args->soft_filter);
+ free(tmp.s);
+ }
+
+ rbuf_init(&args->rbuf, 64);
+ args->rbuf_lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*));
+ if ( args->snp_gap )
+ {
+ bcf_hdr_printf(args->hdr, "##FILTER=<ID=SnpGap,Description=\"SNP within %d bp of an indel\">", args->snp_gap);
+ args->SnpGap_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "SnpGap");
+ assert( args->SnpGap_id>=0 );
+ }
+ if ( args->indel_gap )
+ {
+ bcf_hdr_printf(args->hdr, "##FILTER=<ID=IndelGap,Description=\"Indel within %d bp of an indel\">", args->indel_gap);
+ args->IndelGap_id = bcf_hdr_id2int(args->hdr, BCF_DT_ID, "IndelGap");
+ assert( args->IndelGap_id>=0 );
+ }
+ }
+
+ bcf_hdr_append_version(args->hdr, args->argc, args->argv, "bcftools_filter");
+
+ if ( args->filter_str )
+ args->filter = filter_init(args->hdr, args->filter_str);
+}
+
+static void destroy_data(args_t *args)
+{
+ if ( args->rbuf_lines )
+ {
+ int i;
+ for (i=0; i<args->rbuf.m; i++)
+ if ( args->rbuf_lines[i] ) bcf_destroy1(args->rbuf_lines[i]);
+ free(args->rbuf_lines);
+ }
+ if ( args->filter )
+ filter_destroy(args->filter);
+ free(args->tmpi);
+ free(args->tmp_ac);
+}
+
+static void flush_buffer(args_t *args, int n)
+{
+ int i, j;
+ for (i=0; i<n; i++)
+ {
+ int k = rbuf_shift(&args->rbuf);
+ bcf1_t *rec = args->rbuf_lines[k];
+
+ int pass = 1;
+ if ( !args->soft_filter )
+ {
+ for (j=0; j<rec->d.n_flt; j++)
+ {
+ if ( args->indel_gap && rec->d.flt[j]==args->IndelGap_id ) { pass = 0; break; }
+ if ( args->snp_gap && rec->d.flt[j]==args->SnpGap_id ) { pass = 0; break; }
+ }
+ }
+ if ( pass ) bcf_write1(args->out_fh, args->hdr, rec);
+ }
+}
+
+#define SWAP(type_t, a, b) { type_t t = a; a = b; b = t; }
+static void buffered_filters(args_t *args, bcf1_t *line)
+{
+ /**
+ * The logic of SnpGap=3. The SNPs at positions 1 and 7 are filtered,
+ * positions 0 and 8 are not:
+ * 0123456789
+ * ref .G.GT..G..
+ * del .A.G-..A..
+ * Here the positions 1 and 6 are filtered, 0 and 7 are not:
+ * 0123-456789
+ * ref .G.G-..G..
+ * ins .A.GT..A..
+ *
+ * The logic of IndelGap=2. The second indel is filtered:
+ * 012345678901
+ * ref .GT.GT..GT..
+ * del .G-.G-..G-..
+ * And similarly here, the second is filtered:
+ * 01 23 456 78
+ * ref .A-.A-..A-..
+ * ins .AT.AT..AT..
+ */
+
+ // To avoid additional data structure, we abuse bcf1_t's var and var_type records.
+ const int SnpGap_set = VCF_OTHER<<1;
+ const int IndelGap_set = VCF_OTHER<<2;
+ const int IndelGap_flush = VCF_OTHER<<3;
+
+ int var_type = 0, i;
+ if ( line )
+ {
+ // Still on the same chromosome?
+ int ilast = rbuf_last(&args->rbuf);
+ if ( ilast>=0 && line->rid != args->rbuf_lines[ilast]->rid )
+ flush_buffer(args, args->rbuf.n); // new chromosome, flush everything
+
+ if ( args->rbuf.n >= args->rbuf.m ) rbuf_expand0(&args->rbuf,bcf1_t*,args->rbuf_lines);
+
+ // Insert the new record in the buffer. The line would be overwritten in
+ // the next bcf_sr_next_line call, therefore we need to swap it with an
+ // unused one
+ ilast = rbuf_append(&args->rbuf);
+ if ( !args->rbuf_lines[ilast] ) args->rbuf_lines[ilast] = bcf_init1();
+ SWAP(bcf1_t*, args->files->readers[0].buffer[0], args->rbuf_lines[ilast]);
+
+ var_type = bcf_get_variant_types(line);
+
+ // Find out the size of an indel. The indel boundaries are based on REF
+ // (POS+1,POS+rlen-1). This is not entirely correct: mpileup likes to
+ // output REF=CAGAGAGAGA, ALT=CAGAGAGAGAGA where REF=C,ALT=CGA could be
+ // used. This filter is therefore more strict and may remove some valid
+ // SNPs.
+ int len = 1;
+ if ( var_type & VCF_INDEL )
+ {
+ for (i=1; i<line->n_allele; i++)
+ if ( len < 1-line->d.var[i].n ) len = 1-line->d.var[i].n;
+ }
+
+ // Set the REF allele's length to max deletion length or to 1 if a SNP or an insertion.
+ line->d.var[0].n = len;
+ }
+
+ int k_flush = 1;
+ if ( args->indel_gap )
+ {
+ k_flush = 0;
+ // Find indels which are too close to each other
+ int last_to = -1;
+ for (i=-1; rbuf_next(&args->rbuf,&i); )
+ {
+ bcf1_t *rec = args->rbuf_lines[i];
+ int rec_from = rec->pos;
+ if ( last_to!=-1 && last_to < rec_from ) break;
+
+ k_flush++;
+ if ( !(rec->d.var_type & VCF_INDEL) ) continue;
+
+ rec->d.var_type |= IndelGap_set;
+ last_to = args->indel_gap + rec->pos + rec->d.var[0].n - 1;
+ }
+ if ( i==args->rbuf.f && line && last_to!=-1 ) k_flush = 0;
+ if ( k_flush || !line )
+ {
+ // Select the best indel from the cluster of k_flush indels
+ int k = 0, max_ac = -1, imax_ac = -1;
+ for (i=-1; rbuf_next(&args->rbuf,&i) && k<k_flush; )
+ {
+ k++;
+ bcf1_t *rec = args->rbuf_lines[i];
+ if ( !(rec->d.var_type & IndelGap_set) ) continue;
+ hts_expand(int, rec->n_allele, args->ntmpi, args->tmpi);
+ int ret = bcf_calc_ac(args->hdr, rec, args->tmpi, BCF_UN_ALL);
+ if ( imax_ac==-1 || (ret && max_ac < args->tmpi[1]) ) { max_ac = args->tmpi[1]; imax_ac = i; }
+ }
+
+ // Filter all but the best indel (with max AF or first if AF not available)
+ k = 0;
+ for (i=-1; rbuf_next(&args->rbuf,&i) && k<k_flush; )
+ {
+ k++;
+ bcf1_t *rec = args->rbuf_lines[i];
+ if ( !(rec->d.var_type & IndelGap_set) ) continue;
+ rec->d.var_type |= IndelGap_flush;
+ if ( i!=imax_ac ) bcf_add_filter(args->hdr, args->rbuf_lines[i], args->IndelGap_id);
+ }
+ }
+ }
+
+ if ( !line )
+ {
+ // Finished: flush everything
+ flush_buffer(args, args->rbuf.n);
+ return;
+ }
+
+ int j_flush = 1;
+ if ( args->snp_gap )
+ {
+ j_flush = 0;
+ int last_from = line->pos;
+ for (i=-1; rbuf_next(&args->rbuf,&i); )
+ {
+ bcf1_t *rec = args->rbuf_lines[i];
+ int rec_to = rec->pos + rec->d.var[0].n - 1; // last position affected by the variant
+ if ( rec_to + args->snp_gap < last_from )
+ j_flush++;
+ else if ( (var_type & VCF_INDEL) && (rec->d.var_type & VCF_SNP) && !(rec->d.var_type & SnpGap_set) )
+ {
+ // this SNP has not been SnpGap-filtered yet
+ rec->d.var_type |= SnpGap_set;
+ bcf_add_filter(args->hdr, rec, args->SnpGap_id);
+ }
+ else if ( (var_type & VCF_SNP) && (rec->d.var_type & VCF_INDEL) )
+ {
+ // the line which we are adding is a SNP and needs to be filtered
+ line->d.var_type |= SnpGap_set;
+ bcf_add_filter(args->hdr, line, args->SnpGap_id);
+ break;
+ }
+ }
+ }
+ flush_buffer(args, j_flush < k_flush ? j_flush : k_flush);
+}
+
+static void set_genotypes(args_t *args, bcf1_t *line, int pass_site)
+{
+ int i,j;
+ if ( !bcf_hdr_nsamples(args->hdr) ) return;
+ if ( args->smpl_pass )
+ {
+ int npass = 0;
+ for (i=0; i<bcf_hdr_nsamples(args->hdr); i++) npass += args->smpl_pass[i];
+
+ // return if all samples pass
+ if ( npass==bcf_hdr_nsamples(args->hdr) && (args->filter_logic & FLT_INCLUDE) ) return;
+ if ( npass==0 && (args->filter_logic & FLT_EXCLUDE) ) return;
+ }
+ else if ( pass_site ) return;
+
+ int an = 0, has_an = bcf_get_info_int32(args->hdr, line, "AN", &args->tmp_ac, &args->ntmp_ac);
+ if ( has_an==1 ) an = args->tmp_ac[0];
+ else has_an = 0;
+
+ int has_ac = bcf_get_info_int32(args->hdr, line, "AC", &args->tmp_ac, &args->ntmp_ac);
+ has_ac = has_ac==line->n_allele-1 ? 1 : 0;
+
+ int new_gt = 0, ngts = bcf_get_format_int32(args->hdr, line, "GT", &args->tmpi, &args->ntmpi);
+ ngts /= bcf_hdr_nsamples(args->hdr);
+ if ( args->set_gts==SET_GTS_MISSING ) new_gt = bcf_gt_missing;
+ else if ( args->set_gts==SET_GTS_REF ) new_gt = bcf_gt_unphased(0);
+ else error("todo: set_gts=%d\n", args->set_gts);
+ for (i=0; i<bcf_hdr_nsamples(args->hdr); i++)
+ {
+ if ( args->smpl_pass )
+ {
+ int pass = args->smpl_pass[i];
+ if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
+ if ( pass ) continue;
+ }
+ int32_t *gts = args->tmpi + ngts*i;
+ for (j=0; j<ngts; j++)
+ {
+ if ( gts[j]==bcf_int32_vector_end ) break;
+ if ( args->set_gts==SET_GTS_MISSING && !bcf_gt_is_missing(gts[j]) )
+ {
+ int ial = bcf_gt_allele(gts[j]);
+ if ( has_ac && ial>0 && ial<=line->n_allele ) args->tmp_ac[ ial-1 ]--;
+ an--;
+ }
+ else if ( args->set_gts==SET_GTS_REF )
+ {
+ int ial = bcf_gt_allele(gts[j]);
+ if ( bcf_gt_is_missing(gts[j]) ) an++;
+ else if ( has_ac && ial>0 && ial<=line->n_allele ) args->tmp_ac[ ial-1 ]--;
+ }
+ gts[j] = new_gt;
+ }
+ }
+ bcf_update_genotypes(args->hdr,line,args->tmpi,ngts*bcf_hdr_nsamples(args->hdr));
+ if ( has_an ) bcf_update_info_int32(args->hdr,line,"AN",&an,1);
+ if ( has_ac ) bcf_update_info_int32(args->hdr,line,"AC",args->tmp_ac,line->n_allele-1);
+}
+
+static void usage(args_t *args)
+{
+ fprintf(stderr, "\n");
+ fprintf(stderr, "About: Apply fixed-threshold filters.\n");
+ fprintf(stderr, "Usage: bcftools filter [options] <in.vcf.gz>\n");
+ fprintf(stderr, "\n");
+ fprintf(stderr, "Options:\n");
+ fprintf(stderr, " -e, --exclude <expr> exclude sites for which the expression is true (see man page for details)\n");
+ fprintf(stderr, " -g, --SnpGap <int> filter SNPs within <int> base pairs of an indel\n");
+ fprintf(stderr, " -G, --IndelGap <int> filter clusters of indels separated by <int> or fewer base pairs allowing only one to pass\n");
+ fprintf(stderr, " -i, --include <expr> include only sites for which the expression is true (see man page for details\n");
+ fprintf(stderr, " -m, --mode [+x] \"+\": do not replace but add to existing FILTER; \"x\": reset filters at sites which pass\n");
+ fprintf(stderr, " -o, --output <file> write output to a file [standard output]\n");
+ fprintf(stderr, " -O, --output-type <b|u|z|v> b: compressed BCF, u: uncompressed BCF, z: compressed VCF, v: uncompressed VCF [v]\n");
+ fprintf(stderr, " -r, --regions <region> restrict to comma-separated list of regions\n");
+ fprintf(stderr, " -R, --regions-file <file> restrict to regions listed in a file\n");
+ fprintf(stderr, " -s, --soft-filter <string> annotate FILTER column with <string> or unique filter name (\"Filter%%d\") made up by the program (\"+\")\n");
+ fprintf(stderr, " -S, --set-GTs <.|0> set genotypes of failed samples to missing (.) or ref (0)\n");
+ fprintf(stderr, " -t, --targets <region> similar to -r but streams rather than index-jumps\n");
+ fprintf(stderr, " -T, --targets-file <file> similar to -R but streams rather than index-jumps\n");
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+int main_vcffilter(int argc, char *argv[])
+{
+ int c;
+ args_t *args = (args_t*) calloc(1,sizeof(args_t));
+ args->argc = argc; args->argv = argv;
+ args->files = bcf_sr_init();
+ args->output_fname = "-";
+ args->output_type = FT_VCF;
+ int regions_is_file = 0, targets_is_file = 0;
+
+ static struct option loptions[] =
+ {
+ {"set-GTs",1,0,'S'},
+ {"mode",1,0,'m'},
+ {"soft-filter",1,0,'s'},
+ {"exclude",1,0,'e'},
+ {"include",1,0,'i'},
+ {"targets",1,0,'t'},
+ {"targets-file",1,0,'T'},
+ {"regions",1,0,'r'},
+ {"regions-file",1,0,'R'},
+ {"output",1,0,'o'},
+ {"output-type",1,0,'O'},
+ {"SnpGap",1,0,'g'},
+ {"IndelGap",1,0,'G'},
+ {0,0,0,0}
+ };
+ char *tmp;
+ while ((c = getopt_long(argc, argv, "e:i:t:T:r:R:h?s:m:o:O:g:G:S:",loptions,NULL)) >= 0) {
+ switch (c) {
+ case 'g':
+ args->snp_gap = strtol(optarg,&tmp,10);
+ if ( *tmp ) error("Could not parse argument: --SnpGap %s\n", optarg);
+ break;
+ case 'G':
+ args->indel_gap = strtol(optarg,&tmp,10);
+ if ( *tmp ) error("Could not parse argument: --IndelGap %s\n", optarg);
+ break;
+ case 'o': args->output_fname = optarg; break;
+ case 'O':
+ switch (optarg[0]) {
+ case 'b': args->output_type = FT_BCF_GZ; break;
+ case 'u': args->output_type = FT_BCF; break;
+ case 'z': args->output_type = FT_VCF_GZ; break;
+ case 'v': args->output_type = FT_VCF; break;
+ default: error("The output type \"%s\" not recognised\n", optarg);
+ }
+ break;
+ case 's': args->soft_filter = optarg; break;
+ case 'm':
+ if ( strchr(optarg,'x') ) args->annot_mode |= ANNOT_RESET;
+ if ( strchr(optarg,'+') ) args->annot_mode |= ANNOT_ADD;
+ break;
+ case 't': args->targets_list = optarg; break;
+ case 'T': args->targets_list = optarg; targets_is_file = 1; break;
+ case 'r': args->regions_list = optarg; break;
+ case 'R': args->regions_list = optarg; regions_is_file = 1; break;
+ case 'e': args->filter_str = optarg; args->filter_logic |= FLT_EXCLUDE; break;
+ case 'i': args->filter_str = optarg; args->filter_logic |= FLT_INCLUDE; break;
+ case 'S':
+ if ( !strcmp(".",optarg) ) args->set_gts = SET_GTS_MISSING;
+ else if ( !strcmp("0",optarg) ) args->set_gts = SET_GTS_REF;
+ else error("The argument to -S not recognised: %s\n", optarg);
+ break;
+ case 'h':
+ case '?': usage(args);
+ default: error("Unknown argument: %s\n", optarg);
+ }
+ }
+
+ if ( args->filter_logic == (FLT_EXCLUDE|FLT_INCLUDE) ) error("Only one of -i or -e can be given.\n");
+ char *fname = NULL;
+ if ( optind>=argc )
+ {
+ if ( !isatty(fileno((FILE *)stdin)) ) fname = "-"; // reading from stdin
+ else usage(args);
+ }
+ else fname = argv[optind];
+
+ // read in the regions from the command line
+ if ( args->regions_list )
+ {
+ args->files->require_index = 1;
+ if ( bcf_sr_set_regions(args->files, args->regions_list, regions_is_file)<0 )
+ error("Failed to read the regions: %s\n", args->regions_list);
+ }
+ else if ( optind+1 < argc )
+ {
+ int i;
+ kstring_t tmp = {0,0,0};
+ kputs(argv[optind+1],&tmp);
+ for (i=optind+2; i<argc; i++) { kputc(',',&tmp); kputs(argv[i],&tmp); }
+ args->files->require_index = 1;
+ if ( bcf_sr_set_regions(args->files, tmp.s, regions_is_file)<0 )
+ error("Failed to read the regions: %s\n", args->regions_list);
+ free(tmp.s);
+ }
+ if ( args->targets_list )
+ {
+ if ( bcf_sr_set_targets(args->files, args->targets_list,targets_is_file, 0)<0 )
+ error("Failed to read the targets: %s\n", args->targets_list);
+ }
+ if ( !bcf_sr_add_reader(args->files, fname) ) error("Failed to open %s: %s\n", fname,bcf_sr_strerror(args->files->errnum));
+
+ init_data(args);
+ bcf_hdr_write(args->out_fh, args->hdr);
+ while ( bcf_sr_next_line(args->files) )
+ {
+ bcf1_t *line = bcf_sr_get_line(args->files, 0);
+ int pass = 1;
+ if ( args->filter )
+ {
+ pass = filter_test(args->filter, line, &args->smpl_pass);
+ if ( args->filter_logic & FLT_EXCLUDE ) pass = pass ? 0 : 1;
+ }
+ if ( args->soft_filter || args->set_gts || pass )
+ {
+ if ( pass )
+ {
+ bcf_unpack(line,BCF_UN_FLT);
+ if ( args->annot_mode & ANNOT_RESET || !line->d.n_flt ) bcf_add_filter(args->hdr, line, args->flt_pass);
+ }
+ else if ( args->soft_filter )
+ {
+ if ( (args->annot_mode & ANNOT_ADD) ) bcf_add_filter(args->hdr, line, args->flt_fail);
+ else bcf_update_filter(args->hdr, line, &args->flt_fail, 1);
+ }
+ if ( args->set_gts ) set_genotypes(args, line, pass);
+ if ( !args->rbuf_lines )
+ bcf_write1(args->out_fh, args->hdr, line);
+ else
+ buffered_filters(args, line);
+ }
+ }
+ buffered_filters(args, NULL);
+
+ hts_close(args->out_fh);
+ destroy_data(args);
+ bcf_sr_destroy(args->files);
+ free(args);
+ return 0;
+}