summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorÉtienne Mollier <emollier@debian.org>2023-08-12 22:44:42 +0200
committerÉtienne Mollier <emollier@debian.org>2023-08-12 22:44:42 +0200
commit3847b64c450e81572b02d6ddf9bbc2880c3c4711 (patch)
tree93aae9566b9c7fa4dd6274e934ee2ebf52e8cea4
parente06a933efc0bf1f1470f00ac79bcb9bdb474ae48 (diff)
New upstream version 1.3.1+ds
-rw-r--r--.github/workflows/build-test.yml4
-rwxr-xr-xREADME.md3
-rw-r--r--doc/Programs.md8
-rw-r--r--src/Makefile.in119
-rw-r--r--src/TEST/Alignment_empty-sites38
-rw-r--r--src/TEST/un_aln_nuc_mixed-stop.fa8
-rw-r--r--src/TEST/un_aln_nuc_with-stop.fa8
-rw-r--r--src/aa2cdn.cpp44
-rw-r--r--src/aa2cdn.h4
-rw-r--r--src/clean_seq.cpp40
-rw-r--r--src/clean_seq.h8
-rw-r--r--src/comp_test.cpp10
-rw-r--r--src/generate_manpages.py2
-rw-r--r--src/main_aa2cdn.cpp37
-rw-r--r--src/main_bd_fit.cpp1
-rw-r--r--src/main_bd_sim.cpp16
-rw-r--r--src/main_boot.cpp1
-rw-r--r--src/main_bp.cpp1
-rw-r--r--src/main_cat.cpp9
-rw-r--r--src/main_clsq.cpp20
-rw-r--r--src/main_cltr.cpp1
-rw-r--r--src/main_colt.cpp1
-rw-r--r--src/main_comp.cpp1
-rw-r--r--src/main_consq.cpp1
-rw-r--r--src/main_contrates.cpp11
-rw-r--r--src/main_fqfilt.cpp1
-rw-r--r--src/main_log.cpp20
-rw-r--r--src/main_lssq.cpp1
-rw-r--r--src/main_lstr.cpp19
-rw-r--r--src/main_mono.cpp45
-rw-r--r--src/main_mrca.cpp31
-rw-r--r--src/main_mrca_cut.cpp19
-rw-r--r--src/main_mrca_name.cpp19
-rw-r--r--src/main_nj.cpp1
-rw-r--r--src/main_nni.cpp1
-rw-r--r--src/main_nw.cpp1
-rw-r--r--src/main_poly.cpp1
-rw-r--r--src/main_recode.cpp1
-rw-r--r--src/main_revcomp.cpp1
-rw-r--r--src/main_rls.cpp264
-rw-r--r--src/main_rlt.cpp140
-rw-r--r--src/main_rmk.cpp1
-rw-r--r--src/main_rms.cpp35
-rw-r--r--src/main_rmt.cpp45
-rw-r--r--src/main_rr.cpp45
-rw-r--r--src/main_s2fa.cpp1
-rw-r--r--src/main_s2nex.cpp1
-rw-r--r--src/main_s2phy.cpp1
-rw-r--r--src/main_seqgen.cpp1
-rw-r--r--src/main_ssort.cpp1
-rw-r--r--src/main_ssplit.cpp203
-rw-r--r--src/main_sstat.cpp1
-rw-r--r--src/main_strec.cpp21
-rw-r--r--src/main_sw.cpp1
-rw-r--r--src/main_t2new.cpp1
-rw-r--r--src/main_t2nex.cpp1
-rw-r--r--src/main_tcol.cpp1
-rw-r--r--src/main_tcomb.cpp33
-rw-r--r--src/main_tgen.cpp14
-rw-r--r--src/main_tlate.cpp1
-rw-r--r--src/main_trt.cpp37
-rw-r--r--src/main_tscale.cpp28
-rw-r--r--src/main_upgma.cpp1
-rw-r--r--src/main_vcf2fa.cpp1
-rw-r--r--src/man/pxaa2cdn.1.in9
-rw-r--r--src/man/pxbdfit.1.in4
-rw-r--r--src/man/pxbdsim.1.in4
-rw-r--r--src/man/pxboot.1.in4
-rw-r--r--src/man/pxbp.1.in4
-rw-r--r--src/man/pxcat.1.in6
-rw-r--r--src/man/pxclsq.1.in11
-rw-r--r--src/man/pxcltr.1.in4
-rw-r--r--src/man/pxcolt.1.in4
-rw-r--r--src/man/pxcomp.1.in4
-rw-r--r--src/man/pxconsq.1.in4
-rw-r--r--src/man/pxcontrates.1.in4
-rw-r--r--src/man/pxfqfilt.1.in4
-rw-r--r--src/man/pxlog.1.in4
-rw-r--r--src/man/pxlssq.1.in4
-rw-r--r--src/man/pxlstr.1.in4
-rw-r--r--src/man/pxmono.1.in4
-rw-r--r--src/man/pxmrca.1.in4
-rw-r--r--src/man/pxmrcacut.1.in4
-rw-r--r--src/man/pxmrcaname.1.in4
-rw-r--r--src/man/pxnw.1.in4
-rw-r--r--src/man/pxpoly.1.in4
-rw-r--r--src/man/pxrecode.1.in4
-rw-r--r--src/man/pxrevcomp.1.in4
-rw-r--r--src/man/pxrls.1.in9
-rw-r--r--src/man/pxrlt.1.in9
-rw-r--r--src/man/pxrmk.1.in4
-rw-r--r--src/man/pxrms.1.in4
-rw-r--r--src/man/pxrmt.1.in4
-rw-r--r--src/man/pxrr.1.in4
-rw-r--r--src/man/pxs2fa.1.in4
-rw-r--r--src/man/pxs2nex.1.in4
-rw-r--r--src/man/pxs2phy.1.in4
-rw-r--r--src/man/pxseqgen.1.in4
-rw-r--r--src/man/pxssort.1.in4
-rw-r--r--src/man/pxssplit.1.in38
-rw-r--r--src/man/pxsstat.1.in4
-rw-r--r--src/man/pxstrec.1.in4
-rw-r--r--src/man/pxsw.1.in4
-rw-r--r--src/man/pxt2new.1.in4
-rw-r--r--src/man/pxt2nex.1.in4
-rw-r--r--src/man/pxtcol.1.in4
-rw-r--r--src/man/pxtcomb.1.in4
-rw-r--r--src/man/pxtgen.1.in4
-rw-r--r--src/man/pxtlate.1.in4
-rw-r--r--src/man/pxtrt.1.in4
-rw-r--r--src/man/pxtscale.1.in4
-rw-r--r--src/man/pxvcf2fa.1.in4
-rw-r--r--src/recode.cpp20
-rw-r--r--src/relabel.cpp29
-rw-r--r--src/relabel.h6
-rw-r--r--src/run_tests.py59
-rw-r--r--src/seq_info.cpp8
-rw-r--r--src/seq_utils.cpp21
-rw-r--r--src/seq_utils.h1
-rw-r--r--src/sequence.cpp1
-rw-r--r--src/tree_utils.cpp5
-rw-r--r--src/utils.cpp32
-rw-r--r--src/utils.h11
123 files changed, 1367 insertions, 512 deletions
diff --git a/.github/workflows/build-test.yml b/.github/workflows/build-test.yml
index 117ca5d..3c32352 100644
--- a/.github/workflows/build-test.yml
+++ b/.github/workflows/build-test.yml
@@ -26,6 +26,10 @@ jobs:
- name: Install apt dependencies
if: runner.os == 'Linux'
run: |
+ # Ensure the script doesnt hang waiting for user input
+ export DEBIAN_FRONTEND=noninteractive
+ # Update system
+ sudo apt-get update
sudo apt-get install -y \
autotools-dev \
automake \
diff --git a/README.md b/README.md
index 6f248c5..1261969 100755
--- a/README.md
+++ b/README.md
@@ -61,8 +61,9 @@ pxrr | rerooting and unrooting trees
pxs2fa | convert an alignment to fasta format
pxs2nex | convert an alignment to nexus format
pxs2phy | convert an alignment to phylip format
-pxseqgen | Sequence simulation program
+pxseqgen | sequence simulation program
pxssort | sequence sorter
+pxssplit | split alignment into N individual sequence files
pxsstat | multinomial alignment test statistics
pxstrec | a state reconstructor
pxsw | smith waterman alignment
diff --git a/doc/Programs.md b/doc/Programs.md
index 383e224..54120b9 100644
--- a/doc/Programs.md
+++ b/doc/Programs.md
@@ -294,6 +294,13 @@ Sort sequences by id (`-b 1`; default), reverse id (`-b 2`), length (`-b 3`), or
pxssort -s alignment_file -b 3
```
+* **pxssplit**: alignment splitter
+
+From an alignment file containing N sequences, write N alignments, each containing a single sequence.
+```
+pxssplit -s alignment_file
+```
+
* **pxsstat**: multinomial alignment test statistics
This program calculates multinomial alignment test statistics that can be used for assessing model adequacy. Currently limited to the test statistic of Bollback (2002) MBE, but more are coming.
@@ -386,6 +393,7 @@ pxvcf2fa -s vcf_file
* **pxtrsq**: tree-seq; remove from tree and alignment taxa not found in both (JWB)
* **pxgbdb**: a basic genbank database creator
* **pxtdist**: tree distance calculator (JWB)
+* **pxuncat**: an alignment decomposer (opposite of pxcat) (JWB)
# Programs in planning
* **pxcoal**: gene trees in species trees; simulation and probabilities. (JWB)
diff --git a/src/Makefile.in b/src/Makefile.in
index 1fe0f83..a819145 100644
--- a/src/Makefile.in
+++ b/src/Makefile.in
@@ -6,13 +6,13 @@ CXX := @CXX@
PREFIX := @prefix@
MANDIR = $(PREFIX)/share/man
+CXXFLAGS := @CXXFLAGS@
CPPFLAGS := @CPPFLAGS@
LDFLAGS := @LDFLAGS@
# profiling:
#-g -pg
-## set the nlopt around
HNLOPT := @HNLOPT@
HARM := @HARM@
HOMP := @HOMP@
@@ -227,6 +227,10 @@ RELABEL_SEQ_SRC = relabel.cpp main_rls.cpp
RELABEL_SEQ_OBJ = relabel.o main_rls.o
RELABEL_SEQ_DEP = relabel.d main_rls.d
+SEQ_SPLIT_SRC = main_ssplit.cpp
+SEQ_SPLIT_OBJ = main_ssplit.o
+SEQ_SPLIT_DEP = main_ssplit.d
+
RESCALE_TREE_SRC = tscale.cpp main_tscale.cpp
RESCALE_TREE_OBJ = tscale.o main_tscale.o
RESCALE_TREE_DEP = tscale.d main_tscale.d
@@ -352,10 +356,10 @@ RATE_DEPS += \
#./state_reconstructor_simple.d \
main_%.o: ./main_%.cpp
- $(CXX) $(OPT_FLAGS) $(CPPFLAGS) $(LDFLAGS) -c -fmessage-length=0 -MMD -MP -MF "$(@:%.o=%.d)" -o "$@" "$<"
+ $(CXX) $(CXXFLAGS) $(OPT_FLAGS) $(CPPFLAGS) $(LDFLAGS) -c -fmessage-length=0 -MMD -MP -MF "$(@:%.o=%.d)" -o "$@" "$<"
%.o: ./%.cpp ./%.h
- $(CXX) $(OPT_FLAGS) $(CPPFLAGS) $(LDFLAGS) -c -fmessage-length=0 -MMD -MP -MF "$(@:%.o=%.d)" -o "$@" "$<"
+ $(CXX) $(CXXFLAGS) $(OPT_FLAGS) $(CPPFLAGS) $(LDFLAGS) -c -fmessage-length=0 -MMD -MP -MF "$(@:%.o=%.d)" -o "$@" "$<"
PROGS := pxaa2cdn pxbdsim pxboot pxbp pxcat pxclsq pxcltr pxcolt pxcomp pxconsq pxfqfilt pxlog pxlssq pxlstr pxmono pxmrca pxmrcacut pxmrcaname pxnw pxpoly pxrecode pxrevcomp pxrls pxrlt pxrmk pxrms pxrmt pxrr pxs2fa pxs2nex pxs2phy pxseqgen pxssort pxsstat pxsw pxt2new pxt2nex pxtcol pxtcomb pxtgen pxtlate pxtrt pxtscale pxvcf2fa $(NLOPT_PROGRAMS)
@@ -366,102 +370,107 @@ all: $(PROGS)
pxlssq: $(CPP_OBJS) $(SEQ_OBJS) $(LS_SQ_OBJ)
@echo 'bulding pxlssq'
- $(CXX) -o "pxlssq" $(OPT_FLAGS) $(LS_SQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxlssq" $(OPT_FLAGS) $(LS_SQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxconsq: $(CPP_OBJS) $(SEQ_OBJS) $(CONSEQ_OBJ)
@echo 'building pxconsq'
- $(CXX) -o "pxconsq" $(OPT_FLAGS) $(CONSEQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxconsq" $(OPT_FLAGS) $(CONSEQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxclsq: $(CPP_OBJS) $(SEQ_OBJS) $(CLSQ_OBJ)
@echo 'building pxclsq'
- $(CXX) -o "pxclsq" $(OPT_FLAGS) $(CLSQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxclsq" $(OPT_FLAGS) $(CLSQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxrms: $(CPP_OBJS) $(SEQ_OBJS) $(RMS_OBJ)
@echo 'building pxrms'
- $(CXX) -o "pxrms" $(OPT_FLAGS) $(RMS_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrms" $(OPT_FLAGS) $(RMS_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxaa2cdn: $(CPP_OBJS) $(SEQ_OBJS) $(AA2C_OBJ)
@echo 'building pxaa2cdn'
- $(CXX) -o "pxaa2cdn" $(OPT_FLAGS) $(AA2C_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxaa2cdn" $(OPT_FLAGS) $(AA2C_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxs2fa: $(CPP_OBJS) $(SEQ_OBJS) $(2FA_OBJ)
@echo 'building pxs2fa'
- $(CXX) -o "pxs2fa" $(OPT_FLAGS) $(2FA_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxs2fa" $(OPT_FLAGS) $(2FA_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxs2phy: $(CPP_OBJS) $(SEQ_OBJS) $(2PHY_OBJ)
@echo 'building pxs2phy'
- $(CXX) -o "pxs2phy" $(OPT_FLAGS) $(2PHY_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxs2phy" $(OPT_FLAGS) $(2PHY_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxs2nex: $(CPP_OBJS) $(SEQ_OBJS) $(2NEX_OBJ)
@echo 'building pxs2nex'
- $(CXX) -o "pxs2nex" $(OPT_FLAGS) $(2NEX_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxs2nex" $(OPT_FLAGS) $(2NEX_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxfqfilt: $(CPP_OBJS) $(SEQ_OBJS) $(FQFILT_OBJ)
@echo 'building pxfqfilt'
- $(CXX) -o "pxfqfilt" $(OPT_FLAGS) $(FQFILT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxfqfilt" $(OPT_FLAGS) $(FQFILT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxrevcomp: $(CPP_OBJS) $(SEQ_OBJS) $(REVCOMP_OBJ)
@echo 'building pxrevcomp'
- $(CXX) -o "pxrevcomp" $(OPT_FLAGS) $(REVCOMP_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrevcomp" $(OPT_FLAGS) $(REVCOMP_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxnw: $(CPP_OBJS) $(SEQ_OBJS) $(NW_OBJ)
@echo 'building pxnw'
- $(CXX) -o "pxnw" $(OPT_FLAGS) $(NW_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxnw" $(OPT_FLAGS) $(NW_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxsw: $(CPP_OBJS) $(SEQ_OBJS) $(SW_OBJ)
@echo 'building pxsw'
- $(CXX) -o "pxsw" $(OPT_FLAGS) $(SW_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxsw" $(OPT_FLAGS) $(SW_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxboot: $(CPP_OBJS) $(SEQ_OBJS) $(SEQSAMP_OBJ)
@echo 'building pxboot'
- $(CXX) -o "pxboot" $(OPT_FLAGS) $(SEQSAMP_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxboot" $(OPT_FLAGS) $(SEQSAMP_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxtlate: $(CPP_OBJS) $(SEQ_OBJS) $(TLATE_OBJ)
@echo 'building pxtlate'
- $(CXX) -o "pxtlate" $(OPT_FLAGS) $(TLATE_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxtlate" $(OPT_FLAGS) $(TLATE_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxcat: $(CPP_OBJS) $(SEQ_OBJS) $(CONCAT_OBJ)
@echo 'building pxcat'
- $(CXX) -o "pxcat" $(OPT_FLAGS) $(CONCAT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxcat" $(OPT_FLAGS) $(CONCAT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxsstat: $(CPP_OBJS) $(SEQ_OBJS) $(SSTAT_OBJ)
@echo 'building pxsstat'
- $(CXX) -o "pxsstat" $(OPT_FLAGS) $(SSTAT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxsstat" $(OPT_FLAGS) $(SSTAT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxrecode: $(CPP_OBJS) $(SEQ_OBJS) $(SEQRECODE_OBJ)
@echo 'building pxrecode'
- $(CXX) -o "pxrecode" $(OPT_FLAGS) $(SEQRECODE_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrecode" $(OPT_FLAGS) $(SEQRECODE_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxssort: $(CPP_OBJS) $(SEQ_OBJS) $(SSORT_OBJ)
@echo 'building pxssort'
- $(CXX) -o "pxssort" $(OPT_FLAGS) $(SSORT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxssort" $(OPT_FLAGS) $(SSORT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxcomp: $(CPP_OBJS) $(SEQ_OBJS) $(COMPTEST_OBJ)
@echo 'building pxcomp'
- $(CXX) -o "pxcomp" $(OPT_FLAGS) $(COMPTEST_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxcomp" $(OPT_FLAGS) $(COMPTEST_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
pxvcf2fa: $(CPP_OBJS) $(SEQ_OBJS) $(VCF2FA_OBJ)
@echo 'building pxvcf2fa'
- $(CXX) -o "pxvcf2fa" $(OPT_FLAGS) $(VCF2FA_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxvcf2fa" $(OPT_FLAGS) $(VCF2FA_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
+ cat man/$@.1.in > man/$@.1
+
+pxssplit: $(CPP_OBJS) $(SEQ_OBJS) $(SEQ_SPLIT_OBJ)
+ @echo 'building pxssplit'
+ $(CXX) $(CXXFLAGS) -o "pxssplit" $(OPT_FLAGS) $(SEQ_SPLIT_OBJ) $(CPP_OBJS) $(SEQ_OBJS)
cat man/$@.1.in > man/$@.1
@@ -469,117 +478,117 @@ pxvcf2fa: $(CPP_OBJS) $(SEQ_OBJS) $(VCF2FA_OBJ)
pxbdsim: $(CPP_OBJS) $(TREE_OBJS) $(BD_SIM_OBJ)
@echo 'building pxbdsim'
- $(CXX) -o "pxbdsim" $(OPT_FLAGS) $(BD_SIM_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxbdsim" $(OPT_FLAGS) $(BD_SIM_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxbdfit: $(CPP_OBJS) $(TREE_OBJS) $(BD_FIT_OBJ)
@echo 'building pxbdfit'
- $(CXX) -o "pxbdfit" $(OPT_FLAGS) $(BD_FIT_OBJ) $(CPP_OBJS) $(TREE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
+ $(CXX) $(CXXFLAGS) -o "pxbdfit" $(OPT_FLAGS) $(BD_FIT_OBJ) $(CPP_OBJS) $(TREE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
cat man/$@.1.in > man/$@.1
pxnni: $(CPP_OBJS) $(TREE_OBJS) $(NNI_OBJ)
@echo 'building pxnni'
- $(CXX) -o "pxnni" $(OPT_FLAGS) $(NNI_OBJ) $(CPP_OBJS) $(TREE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
+ $(CXX) $(CXXFLAGS) -o "pxnni" $(OPT_FLAGS) $(NNI_OBJ) $(CPP_OBJS) $(TREE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
cat man/$@.1.in > man/$@.1
pxlstr: $(CPP_OBJS) $(TREE_OBJS) $(LS_TR_OBJ)
@echo 'building pxlstr'
- $(CXX) -o "pxlstr" $(OPT_FLAGS) $(LS_TR_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxlstr" $(OPT_FLAGS) $(LS_TR_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxmrca: $(CPP_OBJS) $(TREE_OBJS) $(MRCA_OBJ)
@echo 'building pxmrca'
- $(CXX) -o "pxmrca" $(OPT_FLAGS) $(MRCA_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxmrca" $(OPT_FLAGS) $(MRCA_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxmrcacut: $(CPP_OBJS) $(TREE_OBJS) $(MRCACUT_OBJ)
@echo 'building pxmrcacut'
- $(CXX) -o "pxmrcacut" $(OPT_FLAGS) $(MRCACUT_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxmrcacut" $(OPT_FLAGS) $(MRCACUT_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxmrcaname: $(CPP_OBJS) $(TREE_OBJS) $(MRCANAME_OBJ)
@echo 'building pxmrcaname'
- $(CXX) -o "pxmrcaname" $(OPT_FLAGS) $(MRCANAME_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxmrcaname" $(OPT_FLAGS) $(MRCANAME_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxcltr: $(CPP_OBJS) $(TREE_OBJS) $(CLTR_OBJ)
@echo 'building pxcltr'
- $(CXX) -o "pxcltr" $(OPT_FLAGS) $(CLTR_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxcltr" $(OPT_FLAGS) $(CLTR_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxt2new: $(CPP_OBJS) $(TREE_OBJS) $(T2NEW_OBJ)
@echo 'building pxt2new'
- $(CXX) -o "pxt2new" $(OPT_FLAGS) $(T2NEW_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxt2new" $(OPT_FLAGS) $(T2NEW_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxt2nex: $(CPP_OBJS) $(TREE_OBJS) $(T2NEX_OBJ)
@echo 'building pxt2nex'
- $(CXX) -o "pxt2nex" $(OPT_FLAGS) $(T2NEX_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxt2nex" $(OPT_FLAGS) $(T2NEX_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxrr: $(CPP_OBJS) $(TREE_OBJS) $(RR_OBJ)
@echo 'building pxrr'
- $(CXX) -o "pxrr" $(OPT_FLAGS) $(RR_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrr" $(OPT_FLAGS) $(RR_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxmono: $(CPP_OBJS) $(TREE_OBJS) $(MONO_OBJ)
@echo 'building pxmono'
- $(CXX) -o "pxmono" $(OPT_FLAGS) $(MONO_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxmono" $(OPT_FLAGS) $(MONO_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxrmt: $(CPP_OBJS) $(TREE_OBJS) $(RMT_OBJ)
@echo 'building pxrmt'
- $(CXX) -o "pxrmt" $(OPT_FLAGS) $(RMT_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrmt" $(OPT_FLAGS) $(RMT_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxtrt: $(CPP_OBJS) $(TREE_OBJS) $(TRT_OBJ)
@echo 'building pxtrt'
- $(CXX) -o "pxtrt" $(OPT_FLAGS) $(TRT_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxtrt" $(OPT_FLAGS) $(TRT_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxrmk: $(CPP_OBJS) $(TREE_OBJS) $(RMK_OBJ)
@echo 'building pxrmk'
- $(CXX) -o "pxrmk" $(OPT_FLAGS) $(RMK_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrmk" $(OPT_FLAGS) $(RMK_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxlog: $(CPP_OBJS) $(TREE_OBJS) $(LOGMANIP_OBJ)
@echo 'building pxlog'
- $(CXX) -o "pxlog" $(OPT_FLAGS) $(LOGMANIP_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxlog" $(OPT_FLAGS) $(LOGMANIP_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxtscale: $(CPP_OBJS) $(TREE_OBJS) $(RESCALE_TREE_OBJ)
@echo 'building pxtscale'
- $(CXX) -o "pxtscale" $(OPT_FLAGS) $(RESCALE_TREE_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxtscale" $(OPT_FLAGS) $(RESCALE_TREE_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxtcol: $(CPP_OBJS) $(TREE_OBJS) $(TCOL_OBJ)
@echo 'building pxtcol'
- $(CXX) -o "pxtcol" $(OPT_FLAGS) $(TCOL_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxtcol" $(OPT_FLAGS) $(TCOL_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxtcomb: $(CPP_OBJS) $(TREE_OBJS) $(TCOMB_OBJ)
@echo 'building pxtcomb'
- $(CXX) -o "pxtcomb" $(OPT_FLAGS) $(TCOMB_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxtcomb" $(OPT_FLAGS) $(TCOMB_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxtgen: $(CPP_OBJS) $(TREE_OBJS) $(TGEN_OBJ)
@echo 'building pxtgen'
- $(CXX) -o "pxtgen" $(OPT_FLAGS) $(TGEN_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxtgen" $(OPT_FLAGS) $(TGEN_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxpoly: $(CPP_OBJS) $(TREE_OBJS) $(POLY_OBJ)
@echo 'building pxpoly'
- $(CXX) -o "pxpoly" $(OPT_FLAGS) $(POLY_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxpoly" $(OPT_FLAGS) $(POLY_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxcolt: $(CPP_OBJS) $(TREE_OBJS) $(COLLAPSE_TREE_OBJ)
@echo 'building pxcolt'
- $(CXX) -o "pxcolt" $(OPT_FLAGS) $(COLLAPSE_TREE_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxcolt" $(OPT_FLAGS) $(COLLAPSE_TREE_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxbp: $(CPP_OBJS) $(TREE_OBJS) $(BP_OBJ)
@echo 'building pxbp'
- $(CXX) -o "pxbp" $(OPT_FLAGS) $(BP_OBJ) $(CPP_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxbp" $(OPT_FLAGS) $(BP_OBJ) $(CPP_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
@@ -587,22 +596,22 @@ pxbp: $(CPP_OBJS) $(TREE_OBJS) $(BP_OBJ)
pxrls: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RELABEL_SEQ_OBJ)
@echo 'building pxrls'
- $(CXX) -o "pxrls" $(OPT_FLAGS) $(RELABEL_SEQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrls" $(OPT_FLAGS) $(RELABEL_SEQ_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxrlt: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RELABEL_TREE_OBJ)
@echo 'building pxrlt'
- $(CXX) -o "pxrlt" $(OPT_FLAGS) $(RELABEL_TREE_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxrlt" $(OPT_FLAGS) $(RELABEL_TREE_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxupgma: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(UPGMA_OBJ)
@echo 'building pxupgma'
- $(CXX) -o "pxupgma" $(OPT_FLAGS) $(UPGMA_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxupgma" $(OPT_FLAGS) $(UPGMA_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
pxnj: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(NJ_OBJ)
@echo 'building pxnj'
- $(CXX) -o "pxnj" $(OPT_FLAGS) $(NJ_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
+ $(CXX) $(CXXFLAGS) -o "pxnj" $(OPT_FLAGS) $(NJ_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS)
cat man/$@.1.in > man/$@.1
@@ -610,17 +619,17 @@ pxnj: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(NJ_OBJ)
pxseqgen: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(SEQGEN_OBJ) $(FORT_OBJS)
@echo 'building pxseqgen'
- $(CXX) -o "pxseqgen" $(OPT_FLAGS) $(SEQGEN_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
+ $(CXX) $(CXXFLAGS) -o "pxseqgen" $(OPT_FLAGS) $(SEQGEN_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
cat man/$@.1.in > man/$@.1
pxcontrates: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(CONTRATES_OBJ) $(RATE_OBJS)
@echo 'building pxcontrates'
- $(CXX) -o "pxcontrates" $(OPT_FLAGS) $(CONTRATES_OBJ) $(RATE_OBJS) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
+ $(CXX) $(CXXFLAGS) -o "pxcontrates" $(OPT_FLAGS) $(CONTRATES_OBJ) $(RATE_OBJS) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS)
cat man/$@.1.in > man/$@.1
pxstrec: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(STREC_OBJ) #$(FORT_OBJS)
@echo 'building pxstrec'
- $(CXX) -o "pxstrec" $(OPT_FLAGS) $(STREC_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS) #$(FORT_OBJS) -lgfortran
+ $(CXX) $(CXXFLAGS) -o "pxstrec" $(OPT_FLAGS) $(STREC_OBJ) $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(CPPFLAGS) $(LDFLAGS) $(CPP_LIBS) #$(FORT_OBJS) -lgfortran
cat man/$@.1.in > man/$@.1
@@ -628,11 +637,11 @@ pxstrec: $(CPP_OBJS) $(SEQ_OBJS) $(TREE_OBJS) $(RATE_OBJS) $(STREC_OBJ) #$(FORT_
#removing until fortran update
#pxsm0: $(CPP_OBJS) $(RATE_OBJS) $(SM0_OBJ) $(FORT_OBJS)
# @echo 'building pxsm0'
-# $(CXX) -o "pxsm0" $(OPT_FLAGS) $(SM0_OBJ) $(CPP_OBJS) $(RATE_OBJS) $(FORT_OBJS) $(CPP_LIBS) -lgfortran
+# $(CXX) $(CXXFLAGS) -o "pxsm0" $(OPT_FLAGS) $(SM0_OBJ) $(CPP_OBJS) $(RATE_OBJS) $(FORT_OBJS) $(CPP_LIBS) -lgfortran
#pxsm2a: $(CPP_OBJS) $(RATE_OBJS) $(SM2A_OBJ) $(FORT_OBJS)
# @echo 'building pxsm2a'
-# $(CXX) -o "pxsm2a" $(OPT_FLAGS) $(SM2A_OBJ) $(CPP_OBJS) $(RATE_OBJS) $(FORT_OBJS) $(CPP_LIBS) -lgfortran
+# $(CXX) $(CXXFLAGS) -o "pxsm2a" $(OPT_FLAGS) $(SM2A_OBJ) $(CPP_OBJS) $(RATE_OBJS) $(FORT_OBJS) $(CPP_LIBS) -lgfortran
diff --git a/src/TEST/Alignment_empty-sites b/src/TEST/Alignment_empty-sites
new file mode 100644
index 0000000..f01361e
--- /dev/null
+++ b/src/TEST/Alignment_empty-sites
@@ -0,0 +1,38 @@
+>Drosophila_setosifrons
+-------------T-A-TAATAATATTCTTATATAAAATATAATTATTTTTATATT-T--GG-TCCNN--TTTC--GTACTAAAATATAACAA--TTT--TT---T
+>Drosophila_adunca
+-------------T---TAATAATATTCTTATTAAAAATATAATTATTTAAATATT-T--GG-TCCCC--TTTC--GTACTAAAATATTTTAAA-TTT--TT---T
+>Drosophila_oahuensis
+-------------T---TAATGAAATTCTTGCTAAAAATATGATTATTTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_picticornis
+-------------T---TAATAATATTCTTATTTAAAATATAATTATTTTAACATT-T--GG-TCCCC--TTTC--GTACTAAAATGTTATAAG-TTT--TT---T
+>Drosophila_primaeva
+-------------T-A-TAATAATATTCTTACCAAAAATATAATTATTTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_nigribasis
+-------------T---TAATAATATTCTTATTAAAAATATAATTATTTTAGTATT-T--GG-TCCNN--TTTC--GTACTAAAATACATTAA--TTT--TT---T
+>Drosophila_obscuripes
+----AA-------T---TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-T--GG-TCCNN--TTTC--GTACTAAAATACTATAA--TTT--TT---T
+>Drosophila_silvestris
+----NN-------T---TAATAATATTCTTATTTAAAATATAATTATTTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_heteroneura
+-------------T---TAATAATATTCTTATTTAAAATATAATTATTTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_differens
+-------------T---TAATAATATTCTTATTAAAAATATAATTATCTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_planitibia
+-------------T---TAATAATATTCTTATTAAAAATATAATTATTTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_hemipeza
+-------------T---TAATAATATTCTTAATAAAAATATAATTATTTTAATATT-T--GG-TCCCC--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_ingens
+-------------T---TAATAATATTCTTATTAAAAATATAATTATTTTAATATT-T--GG-TCCNN--TTTC--GTACTAAAATATTATAA--TTT--TT---T
+>Drosophila_neopicta
+-------------T---TAATAATATTCTTATTAAAAATATAATTATTTTCATATTAT--GG-TCCNN--TTTC--GTACTAAAATATAATAA--TTT--TT---T
+>Drosophila_substenoptera
+-------------T---TAATAATATTCTTATTAAAAATACAATTATTTTCATATT-T--GG-TCCNN--TTTC--GTACTAAAATATAATAA--TTT--TT---T
+>Drosophila_neoperkinsi
+-------------T---TAATAAGATTCTTACTAAAAATATAATTATTTTAGTATT-T--GG-TCCNN--TTTC--GTACTAAAATACTATAA--TTT--TT---T
+>Drosophila_melanocephala
+-------------T---TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-T--GG-TCCNN--TTTC--GTACTAAAATACTATAA--TTT--TT---T
+>Drosophila_hanaulae
+-------------T-A-TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-T--GG-TCCCC--TTTC--GTACTAAAATACTATAAG-TTT--TT---T
+>Drosophila_cyrtoloma
+-------------T---TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-T--GG-TCCNN--TTTC--GTACTAAAATACTATAA--TTT--TT---T
diff --git a/src/TEST/un_aln_nuc_mixed-stop.fa b/src/TEST/un_aln_nuc_mixed-stop.fa
new file mode 100644
index 0000000..97c61a8
--- /dev/null
+++ b/src/TEST/un_aln_nuc_mixed-stop.fa
@@ -0,0 +1,8 @@
+>Sequence5
+ATGATGATGATGATGtaa
+>Sequence1
+ATGAAAAAG
+>Sequence2
+ATGtag
+>Sequence3
+ATGATGATGATGATGTAA
diff --git a/src/TEST/un_aln_nuc_with-stop.fa b/src/TEST/un_aln_nuc_with-stop.fa
new file mode 100644
index 0000000..01f8b12
--- /dev/null
+++ b/src/TEST/un_aln_nuc_with-stop.fa
@@ -0,0 +1,8 @@
+>Sequence5
+ATGATGATGATGATGTAG
+>Sequence1
+ATGAAAAAGTAA
+>Sequence2
+ATGTAA
+>Sequence3
+ATGATGATGATGATGTGA
diff --git a/src/aa2cdn.cpp b/src/aa2cdn.cpp
index a1664e7..331d51e 100644
--- a/src/aa2cdn.cpp
+++ b/src/aa2cdn.cpp
@@ -16,15 +16,23 @@ struct SequenceIDListCompare {
AAtoCDN::AAtoCDN (std::vector<Sequence> nuc_seqs, std::vector<Sequence> aa_seqs,
- const bool& remove_last):remove_last_(remove_last), nuc_seqs_(std::move(nuc_seqs)),
- aa_seqs_(std::move(aa_seqs)) {
+ const bool& remove_last, const bool& remove_stop):remove_last_(remove_last),
+ remove_stop_(remove_stop), nuc_seqs_(std::move(nuc_seqs)), aa_seqs_(std::move(aa_seqs)) {
// set up names
nuc_names_ = collect_names(nuc_seqs_);
aa_names_ = collect_names(aa_seqs_);
sort(aa_seqs_.begin(), aa_seqs_.end(), SequenceIDListCompare);
sort(nuc_seqs_.begin(), nuc_seqs_.end(), SequenceIDListCompare);
+
+ // check (and purge) taxa that are only present in one file type
check_names();
+
+ //std::cout << "remove_stop_ = " << remove_stop_ << std::endl;
+
+ if (remove_stop_) {
+ process_stop_codons();
+ }
generate_codon_alignment();
}
@@ -73,6 +81,21 @@ void AAtoCDN::check_names () {
}
+// if stop codons are detected...ELIMINATE them, deadpool-style...
+void AAtoCDN::process_stop_codons () {
+ // looking for TAG, TGA, TAA
+ std::string nucseq;
+ for (unsigned int i = 0; i < nuc_seqs_.size(); i++) {
+ nucseq = nuc_seqs_[i].get_sequence();
+ if (is_stop_terminated(nucseq)) {
+ remove_last_N(nucseq, 3);
+ nuc_seqs_[i].set_sequence(nucseq);
+ }
+ }
+
+
+}
+
// at this point, alignments should be of the same size and order
void AAtoCDN::generate_codon_alignment () {
std::string aaseq;
@@ -89,17 +112,24 @@ void AAtoCDN::generate_codon_alignment () {
// check that seq lengths correspond
unsigned int ncodons = static_cast<unsigned int>(nucseq.length()) / 3u;
unsigned int naachars = aalen - static_cast<unsigned int>(std::count(aaseq.begin(), aaseq.end(), '-'));
+
+ // remove last 3 nucleotides. assumes all seqs the same (i.e., all got final (stop) codon)
+ if (remove_last_) {
+ ncodons--;
+ }
+
if (ncodons != naachars) {
std::cerr << "Error: for taxon '" << aa_seqs_[i].get_id()
<< "' nucleotide alignment involves " << ncodons
- << " codons, but protein alignment involves " << naachars
+ << " codons";
+ if (remove_last_) {
+ std::cerr << " (after removing final codon)";
+ }
+ std::cerr << ", but protein alignment involves " << naachars
<< " amino acids. Skipping." << std::endl;
continue;
}
-
- if (remove_last_) {
- aalen--;
- }
+
unsigned int nuccntr = 0;
for (unsigned int j = 0; j < aalen; j++) {
if (aaseq[j] == '-') {
diff --git a/src/aa2cdn.h b/src/aa2cdn.h
index 962c1b5..9d505cb 100644
--- a/src/aa2cdn.h
+++ b/src/aa2cdn.h
@@ -9,6 +9,7 @@
class AAtoCDN {
private:
bool remove_last_;
+ bool remove_stop_;
std::vector<Sequence> nuc_seqs_;
std::vector<Sequence> aa_seqs_;
std::vector<Sequence> codon_seqs_;
@@ -16,12 +17,13 @@ private:
std::vector<std::string> aa_names_;
void check_names ();
+ void process_stop_codons ();
void generate_codon_alignment ();
public:
AAtoCDN ();
AAtoCDN (std::vector<Sequence> nuc_seqs, std::vector<Sequence> aa_seqs,
- const bool& remove_last);
+ const bool& remove_last, const bool& remove_stop);
void write_codon_alignment (std::ostream* poos);
std::vector<Sequence> get_codon_alignment () const;
};
diff --git a/src/clean_seq.cpp b/src/clean_seq.cpp
index c394625..b64fd2f 100644
--- a/src/clean_seq.cpp
+++ b/src/clean_seq.cpp
@@ -11,11 +11,23 @@
SequenceCleaner::SequenceCleaner (std::istream* pios, double& prop_required,
- const bool& by_taxon, const bool& by_codon, const bool& count_only,
- const bool& verbose):num_taxa_(0u), num_char_(0u), num_retained_(0u),
- missing_allowed_(1.0 - prop_required), by_taxon_(by_taxon),
- by_codon_(by_codon), count_only_(count_only), verbose_(verbose) {
+ const bool& remove_empty, const int& min_chars, const bool& by_taxon,
+ const bool& by_codon, const bool& count_only, const bool& verbose):num_taxa_(0u),
+ num_char_(0u), num_retained_(0u), min_chars_per_site_(min_chars),
+ missing_allowed_(1.0 - prop_required), by_taxon_(by_taxon), by_codon_(by_codon),
+ count_only_(count_only), verbose_(verbose), remove_empty_(remove_empty),
+ min_chars_(false) {
read_in_sequences(pios);
+ if (min_chars_per_site_ != 0) {
+ min_chars_ = true;
+ if (min_chars_per_site_ > num_taxa_) {
+ std::cerr << "Error: minimum characters required (" << min_chars_per_site_
+ << ") exceeds number of taxa (" << num_taxa_ << "). Exiting."
+ << std::endl;
+ exit(0);
+ }
+ max_missing_ = num_taxa_ - min_chars_per_site_;
+ }
count_missing();
if (!count_only_) {
generate_cleaned_sequences();
@@ -245,12 +257,22 @@ void SequenceCleaner::count_missing () {
// get proportions
for (unsigned int i = 0; i < num_char_; i++) {
- missing_per_site_proportion_[i] = static_cast<double>(missing_per_site_counts_[i])
+ if (min_chars_) {
+ if (static_cast<unsigned int>(missing_per_site_counts_[i]) <= max_missing_) {
+ retained_sites_.push_back(i);
+ }
+ } else if (remove_empty_) { // equivalent to min_chars_per_site_ == 1
+ if (missing_per_site_counts_[i] != static_cast<int>(num_taxa_)) {
+ retained_sites_.push_back(i);
+ }
+ } else {
+ missing_per_site_proportion_[i] = static_cast<double>(missing_per_site_counts_[i])
/ static_cast<double>(num_taxa_);
- //std::cout << i << ". missing = " << missing_per_site_counts_[i] << "("
- // << missing_per_site_proportion_[i] << ")" << std::endl;
- if (missing_per_site_proportion_[i] <= missing_allowed_) {
- retained_sites_.push_back(i);
+ //std::cout << i << ". missing = " << missing_per_site_counts_[i] << "("
+ // << missing_per_site_proportion_[i] << ")" << std::endl;
+ if (missing_per_site_proportion_[i] <= missing_allowed_) {
+ retained_sites_.push_back(i);
+ }
}
}
diff --git a/src/clean_seq.h b/src/clean_seq.h
index 08991b3..675b955 100644
--- a/src/clean_seq.h
+++ b/src/clean_seq.h
@@ -12,12 +12,16 @@ private:
unsigned int num_taxa_;
unsigned int num_char_;
unsigned int num_retained_;
+ unsigned int min_chars_per_site_;
+ unsigned int max_missing_;
double missing_allowed_;
bool by_taxon_;
bool by_codon_;
bool count_only_;
bool verbose_;
+ bool remove_empty_;
+ bool min_chars_;
std::string badChars_;
std::string alpha_name_;
@@ -39,8 +43,8 @@ private:
void set_bad_chars ();
public:
- SequenceCleaner (std::istream* pios, double& prop_required,
- const bool& by_taxon, const bool& by_codon, const bool& count_only,
+ SequenceCleaner (std::istream* pios, double& prop_required, const bool& remove_empty,
+ const int& min_chars, const bool& by_taxon, const bool& by_codon, const bool& count_only,
const bool& verbose);
std::vector<Sequence> get_cleaned_seqs () const; // not used, but available
void write_seqs (std::ostream* poos);
diff --git a/src/comp_test.cpp b/src/comp_test.cpp
index 1e60d6f..e82d67c 100644
--- a/src/comp_test.cpp
+++ b/src/comp_test.cpp
@@ -121,8 +121,10 @@ void CompTest::get_longest_taxon_label () {
void CompTest::return_freq_table () {
const int colWidth = 12;
// need to take into account longest_tax_label_
- get_longest_taxon_label();
- std::string pad = std::string(longest_tax_label_, ' ');
+ get_longest_taxon_label(); // label might be shorter than row label 'Total'
+ unsigned int longest_label = longest_tax_label_;
+ longest_label = std::max(longest_label, 5U);
+ std::string pad = std::string(longest_label, ' ');
// header
(*poos_) << "Observed character counts:" << std::endl;
(*poos_) << pad << " ";
@@ -131,7 +133,7 @@ void CompTest::return_freq_table () {
}
(*poos_) << std::right << std::setw(colWidth) << "Nchar" << std::endl;
for (size_t i = 0; i < static_cast<size_t>(num_taxa_); i++) {
- unsigned int diff = longest_tax_label_ - static_cast<unsigned int>(taxon_labels_[i].size());
+ unsigned int diff = longest_label - static_cast<unsigned int>(taxon_labels_[i].size());
(*poos_) << taxon_labels_[i];
if (diff > 0) {
pad = std::string(diff, ' ');
@@ -143,7 +145,7 @@ void CompTest::return_freq_table () {
}
(*poos_) << std::right << std::setw(colWidth) << row_totals_[i] << std::endl;
}
- unsigned int diff = longest_tax_label_ - 5;
+ unsigned int diff = longest_label - 5L;
pad = std::string(diff, ' ');
(*poos_) << "Total" << pad << " ";
for (unsigned int col_total : col_totals_) {
diff --git a/src/generate_manpages.py b/src/generate_manpages.py
index 26845f5..8ca395d 100644
--- a/src/generate_manpages.py
+++ b/src/generate_manpages.py
@@ -58,7 +58,7 @@ if __name__ == "__main__":
n = 0
print ("=================")
- for i in os.listdir("."):
+ for i in sorted(os.listdir(".")):
if i[:2] == "px":
make_manpage(i)
print ("=================")
diff --git a/src/main_aa2cdn.cpp b/src/main_aa2cdn.cpp
index 1f2fe92..ba18452 100644
--- a/src/main_aa2cdn.cpp
+++ b/src/main_aa2cdn.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Generate a codon alignment from aligned amino acids and unaligned nucleotides." << std::endl;
std::cout << "Taxa found in only 1 input file will be removed." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus inputs." << std::endl;
@@ -25,7 +26,8 @@ void print_help () {
std::cout << "Options:" << std::endl;
std::cout << " -a, --aaseqf=FILE input sequence file, STDIN otherwise" << std::endl;
std::cout << " -n, --nucseqf=FILE input sequence file, STDIN otherwise" << std::endl;
- std::cout << " -r, --rmlastcdn remove last codon (default: false)" << std::endl;
+ std::cout << " -r, --rmlastcdn remove last codon from *all* nuc sequences (default: false)" << std::endl;
+ std::cout << " -s, --stopremove remove stop codon from nuc sequences if present (default: false)" << std::endl;
std::cout << " -o, --outf=FILE output fasta file, STOUT otherwise" << std::endl;
std::cout << " -h, --help display this help and exit" << std::endl;
std::cout << " -V, --version display version and exit" << std::endl;
@@ -49,6 +51,7 @@ static struct option const long_options[] =
{"nucseqf", required_argument, nullptr, 'n'},
{"outf", required_argument, nullptr, 'o'},
{"rmlastcdn", no_argument, nullptr, 'r'},
+ {"stopremove", no_argument, nullptr, 's'},
{"help", no_argument, nullptr, 'h'},
{"version", no_argument, nullptr, 'V'},
{"citation", no_argument, nullptr, 'C'},
@@ -63,13 +66,15 @@ int main(int argc, char * argv[]) {
bool outfileset = false;
bool nucfileset = false;
bool rm_last = false;
+ bool rm_stop = false;
char * aaseqf = nullptr;
char * nucseqf = nullptr;
char * outf = nullptr;
+
while (true) {
int oi = -1;
- int c = getopt_long(argc, argv, "a:o:n:rhVC", long_options, &oi);
+ int c = getopt_long(argc, argv, "a:o:n:rshVC", long_options, &oi);
if (c == -1) {
break;
}
@@ -91,6 +96,9 @@ int main(int argc, char * argv[]) {
case 'r':
rm_last = true;
break;
+ case 's':
+ rm_stop = true;
+ break;
case 'h':
print_help();
exit(0);
@@ -113,15 +121,6 @@ int main(int argc, char * argv[]) {
check_inout_streams_identical(nucseqf, outf);
}
- if (!fileset) {
- std::cerr << "Error: you must specify an input amino acid sequence file. Exiting." << std::endl;
- exit(0);
- }
- if (!nucfileset) {
- std::cerr << "Error: you must specify an input nucleotide sequence file. Exiting." << std::endl;
- exit(0);
- }
-
std::ostream * poos = nullptr;
std::ofstream * ofstr = nullptr;
std::ifstream * fstr = nullptr;
@@ -156,6 +155,15 @@ int main(int argc, char * argv[]) {
poos = &std::cout;
}
+ if (!fileset) {
+ std::cerr << "Error: you must specify an input amino acid sequence file. Exiting." << std::endl;
+ exit(0);
+ }
+ if (!nucfileset) {
+ std::cerr << "Error: you must specify an input nucleotide sequence file. Exiting." << std::endl;
+ exit(0);
+ }
+
// use general purpose reader
std::vector<Sequence> nuc_seqs;
std::vector<Sequence> aa_seqs;
@@ -191,7 +199,12 @@ int main(int argc, char * argv[]) {
exit(0);
}
- AAtoCDN A2C(nuc_seqs, aa_seqs, rm_last);
+ if (rm_last && rm_stop) {
+ std::cerr << "Error: you may set -r or -s, but not both. Exiting." << std::endl;
+ exit(0);
+ }
+
+ AAtoCDN A2C(nuc_seqs, aa_seqs, rm_last, rm_stop);
A2C.write_codon_alignment(poos);
if (fileset) {
diff --git a/src/main_bd_fit.cpp b/src/main_bd_fit.cpp
index 85e48b4..689cce3 100644
--- a/src/main_bd_fit.cpp
+++ b/src/main_bd_fit.cpp
@@ -21,6 +21,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Fit a lineage diversification model to a tree." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_bd_sim.cpp b/src/main_bd_sim.cpp
index 7fc182c..2d0108c 100644
--- a/src/main_bd_sim.cpp
+++ b/src/main_bd_sim.cpp
@@ -20,6 +20,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Birth-death tree simulator." << std::endl;
std::cout << std::endl;
std::cout << "Usage: pxbdsim [OPTIONS]..." << std::endl;
@@ -83,6 +84,7 @@ int main(int argc, char * argv[]) {
bool showd = false;
bool verbose = false;
long int seed = -1;
+ bool argspresent = false;
while (true) {
int oi = -1;
@@ -94,10 +96,12 @@ int main(int argc, char * argv[]) {
case 'e':
ext = string_to_int(optarg, "-e");
extantset = true;
+ argspresent = true;
break;
case 't':
time = string_to_double(optarg, "-t");
timeset = true;
+ argspresent = true;
break;
case 'b':
birth = string_to_double(optarg, "-b");
@@ -105,6 +109,7 @@ int main(int argc, char * argv[]) {
std::cerr << "Error: birth rate must be > 0. Exiting." << std::endl;
exit(0);
}
+ argspresent = true;
break;
case 'd':
death = string_to_double(optarg, "-d");
@@ -112,22 +117,28 @@ int main(int argc, char * argv[]) {
std::cerr << "Error: death rate must be >= 0. Exiting." << std::endl;
exit(0);
}
+ argspresent = true;
break;
case 'n':
nreps = string_to_int(optarg, "-n");
+ argspresent = true;
break;
case 'v':
verbose = true;
+ argspresent = true;
break;
case 'o':
outfileset = true;
outf = strdup(optarg);
+ argspresent = true;
break;
case 'x':
seed = string_to_long_int(optarg, "-x");
+ argspresent = true;
break;
case 's':
showd = true;
+ argspresent = true;
break;
case 'h':
print_help();
@@ -144,6 +155,11 @@ int main(int argc, char * argv[]) {
}
}
+ if (!argspresent) {
+ print_help();
+ exit(1);
+ }
+
if (!extantset && !timeset) {
std::cerr << "Error: you have to set -e or -t. Exiting." << std::endl;
exit(0);
diff --git a/src/main_boot.cpp b/src/main_boot.cpp
index 732600e..e42f7fc 100644
--- a/src/main_boot.cpp
+++ b/src/main_boot.cpp
@@ -22,6 +22,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Sequence alignment bootstrap or jackknife resampling." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_bp.cpp b/src/main_bp.cpp
index c2153a4..8c2d990 100644
--- a/src/main_bp.cpp
+++ b/src/main_bp.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "This will print out bipartitions found in treefile." << std::endl;
std::cout << "Trees are assumed rooted unless the -e argument is provided." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
diff --git a/src/main_cat.cpp b/src/main_cat.cpp
index e113759..d2af148 100644
--- a/src/main_cat.cpp
+++ b/src/main_cat.cpp
@@ -4,6 +4,7 @@
#include <vector>
#include <cstring>
#include <getopt.h>
+#include <unistd.h>
#include "sequence.h"
#include "seq_reader.h"
@@ -17,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Sequence file concatenation." << std::endl;
std::cout << "Can use wildcards e.g.:" << std::endl;
std::cout << " pxcat -s *.phy -o my_cat_file.fa" << std::endl;
@@ -33,7 +35,7 @@ void print_help () {
std::cout << std::endl;
std::cout << "Options:" << std::endl;
std::cout << " -s, --seqf=FILE list of input sequence files (space delimited)" << std::endl;
- std::cout << " -f, --flistFILE file listing input files (one per line)" << std::endl;
+ std::cout << " -f, --flist=FILE file listing input files (one per line)" << std::endl;
std::cout << " -p, --partf=FILE output partition file, none otherwise" << std::endl;
std::cout << " -u, --uppercase export characters in uppercase" << std::endl;
std::cout << " -o, --outf=FILE output sequence file, STOUT otherwise" << std::endl;
@@ -139,8 +141,9 @@ int main(int argc, char * argv[]) {
}
if (!fileset) {
- std::cerr << "Error: must specify 1 or more files to concatenate. Exiting." << std::endl;
- exit(0);
+ sleep(1);
+ print_help();
+ exit(1);
}
if (!listf.empty()) {
std::string line;
diff --git a/src/main_clsq.cpp b/src/main_clsq.cpp
index 3991c7f..adae166 100644
--- a/src/main_clsq.cpp
+++ b/src/main_clsq.cpp
@@ -13,11 +13,13 @@
// TODO: throw out stop_codons: "TAG", "TAA", "TGA"
+// TODO: read in partition file, edit and write out
void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Clean alignments by removing positions/taxa with too much ambiguous data." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Results are written in fasta format." << std::endl;
@@ -27,6 +29,9 @@ void print_help () {
std::cout << "Options:" << std::endl;
std::cout << " -s, --seqf=FILE input sequence file, STDIN otherwise" << std::endl;
std::cout << " -p, --prop=DOUBLE proportion required to be present, default=0.5" << std::endl;
+ std::cout << " -e, --empty remove columns that are completely empty (- or ?)" << std::endl;
+ std::cout << " -m, --min=INT the minimum number of good characters required per site" << std::endl;
+ std::cout << " - a min of 1 is equivalent to -e above" << std::endl;
std::cout << " -t, --taxa consider missing data per taxon (default: per site)" << std::endl;
std::cout << " -c, --codon examine sequences by codon rather than site" << std::endl;
std::cout << " - requires all sequences be in frame and of correct length" << std::endl;
@@ -56,6 +61,8 @@ static struct option const long_options[] =
{"seqf", required_argument, nullptr, 's'},
{"outf", required_argument, nullptr, 'o'},
{"prop", required_argument, nullptr, 'p'},
+ {"empty", no_argument, nullptr, 'e'},
+ {"min", required_argument, nullptr, 'm'},
{"taxa", required_argument, nullptr, 't'},
{"codon", required_argument, nullptr, 'c'},
{"info", required_argument, nullptr, 'i'},
@@ -79,10 +86,12 @@ int main(int argc, char * argv[]) {
bool by_taxon = false;
bool by_codon = false;
bool count_only = false;
+ bool remove_empty = false;
+ int min_chars = 0;
while (true) {
int oi = -1;
- int c = getopt_long(argc, argv, "s:o:p:atcivhVC", long_options, &oi);
+ int c = getopt_long(argc, argv, "s:o:p:em:atcivhVC", long_options, &oi);
if (c == -1) {
break;
}
@@ -104,6 +113,12 @@ int main(int argc, char * argv[]) {
exit(0);
}
break;
+ case 'e':
+ remove_empty = true;
+ break;
+ case 'm':
+ min_chars = string_to_int(optarg, "-m");;
+ break;
case 't':
by_taxon = true;
break;
@@ -157,7 +172,8 @@ int main(int argc, char * argv[]) {
}
}
- SequenceCleaner SC(pios, prop_required, by_taxon, by_codon, count_only, verbose);
+ SequenceCleaner SC(pios, prop_required, remove_empty, min_chars, by_taxon,
+ by_codon, count_only, verbose);
// write sequences. currently only fasta format.
if (!count_only) {
diff --git a/src/main_cltr.cpp b/src/main_cltr.cpp
index 5dc533e..ab4a2fc 100644
--- a/src/main_cltr.cpp
+++ b/src/main_cltr.cpp
@@ -20,6 +20,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "General tree cleaner." << std::endl;
std::cout << "Removes annotations (node labels), 'knuckles' (2-degree nodes), and" << std::endl;
std::cout << "root edges to generate a 'vanilla' newick representation." << std::endl;
diff --git a/src/main_colt.cpp b/src/main_colt.cpp
index 55a6aee..7856349 100644
--- a/src/main_colt.cpp
+++ b/src/main_colt.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Collapse edges with support below some threshold." << std::endl;
std::cout << "If annotated Nexus, may require passing in the support identifier (-s)." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
diff --git a/src/main_comp.cpp b/src/main_comp.cpp
index af76bd4..9d4080c 100644
--- a/src/main_comp.cpp
+++ b/src/main_comp.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Sequence compositional homogeneity test." << std::endl;
std::cout << "Chi-square test for equivalent character state counts across lineages." << std::endl;
std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
diff --git a/src/main_consq.cpp b/src/main_consq.cpp
index f463004..36b9086 100644
--- a/src/main_consq.cpp
+++ b/src/main_consq.cpp
@@ -17,6 +17,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Compute a consensus sequence from an alignment." << std::endl;
std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Results are written in fasta format." << std::endl;
diff --git a/src/main_contrates.cpp b/src/main_contrates.cpp
index 00ee2d9..a63ee51 100644
--- a/src/main_contrates.cpp
+++ b/src/main_contrates.cpp
@@ -23,6 +23,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Continuous character rate estimation with Brownian and OU." << std::endl;
std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << std::endl;
@@ -128,6 +129,16 @@ int main(int argc, char * argv[]) {
poos = tfstr;
} else {
poos = &std::cin;
+ if (!check_for_input_to_stream()) {
+ // if both inputs missing: print help
+ if (!cfileset) {
+ print_help();
+ exit(1);
+ } else {
+ std::cerr << "Error: missing required tree input. Exiting." << std::endl;
+ exit(1);
+ }
+ }
}
if (cfileset) {
diff --git a/src/main_fqfilt.cpp b/src/main_fqfilt.cpp
index 6034c36..15cef52 100644
--- a/src/main_fqfilt.cpp
+++ b/src/main_fqfilt.cpp
@@ -15,6 +15,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Filter fastq files by mean quality." << std::endl;
std::cout << "Data can be read from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_log.cpp b/src/main_log.cpp
index d870619..656b304 100644
--- a/src/main_log.cpp
+++ b/src/main_log.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "MCMC log file manipulator." << std::endl;
std::cout << "Can combine and resample parameters or trees across files." << std::endl;
std::cout << "Log files need not contain the same number of samples." << std::endl;
@@ -100,6 +101,7 @@ int main(int argc, char * argv[]) {
std::string incolids;
std::string logtype;
char * outf = nullptr;
+ bool argspresent = false;
while (true) {
int oi = -1;
@@ -130,6 +132,7 @@ int main(int argc, char * argv[]) {
}
}
logtype = "parameter";
+ argspresent = true;
break;
case 't':
tfileset = true;
@@ -152,10 +155,12 @@ int main(int argc, char * argv[]) {
}
}
logtype = "tree";
+ argspresent = true;
break;
case 'o':
outfileset = true;
outf = strdup(optarg);
+ argspresent = true;
break;
case 'b':
burnin = string_to_int(optarg, "-b");
@@ -163,6 +168,7 @@ int main(int argc, char * argv[]) {
std::cerr << "Error: burnin must be a positive integer. Exiting." << std::endl;
exit(0);
}
+ argspresent = true;
break;
case 'n':
nthin = string_to_int(optarg, "-n");
@@ -170,36 +176,45 @@ int main(int argc, char * argv[]) {
std::cerr << "Error: nthin must be a >= 1. Exiting." << std::endl;
exit(0);
}
+ argspresent = true;
break;
case 'r':
nrandom = string_to_int(optarg, "-r");
+ argspresent = true;
break;
case 'i':
count = true;
+ argspresent = true;
break;
case 's':
summarize = true;
+ argspresent = true;
break;
case 'c':
get_columns = true;
+ argspresent = true;
break;
case 'd':
delete_columns = true;
incolids = strdup(optarg);
parse_comma_list(incolids, col_indices);
sort(col_indices.begin(), col_indices.end());
+ argspresent = true;
break;
case 'k':
keep_columns = true;
incolids = strdup(optarg);
parse_comma_list(incolids, col_indices);
sort(col_indices.begin(), col_indices.end());
+ argspresent = true;
break;
case 'x':
seed = string_to_long_int(optarg, "-x");
+ argspresent = true;
break;
case 'v':
verbose = true;
+ argspresent = true;
break;
case 'h':
print_help();
@@ -216,6 +231,11 @@ int main(int argc, char * argv[]) {
}
}
+ if (!argspresent) {
+ print_help();
+ exit(1);
+ }
+
std::ostream * poos = nullptr;
std::ofstream * ofstr = nullptr;
diff --git a/src/main_lssq.cpp b/src/main_lssq.cpp
index bccc962..31e6041 100644
--- a/src/main_lssq.cpp
+++ b/src/main_lssq.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Print sequence file summary." << std::endl;
std::cout << "By default returns all properties. Alternatively choose 1 property." << std::endl;
std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
diff --git a/src/main_lstr.cpp b/src/main_lstr.cpp
index 70021a0..54138ab 100644
--- a/src/main_lstr.cpp
+++ b/src/main_lstr.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Print tree summary." << std::endl;
std::cout << "By default returns all properties. Alternatively choose 1 property." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
@@ -159,15 +160,6 @@ int main(int argc, char * argv[]) {
}
}
- if (fileset && outfileset) {
- check_inout_streams_identical(treef, outf);
- }
-
- if (propcount > 1) {
- std::cerr << "Error: specify 1 property only (or leave blank to show all properties). Exiting." << std::endl;
- exit(0);
- }
-
std::istream * pios = nullptr;
std::ostream * poos = nullptr;
std::ifstream * fstr = nullptr;
@@ -190,6 +182,15 @@ int main(int argc, char * argv[]) {
exit(1);
}
}
+
+ if (fileset && outfileset) {
+ check_inout_streams_identical(treef, outf);
+ }
+
+ if (propcount > 1) {
+ std::cerr << "Error: specify 1 property only (or leave blank to show all properties). Exiting." << std::endl;
+ exit(0);
+ }
std::string retstring;
int ft = test_tree_filetype_stream(*pios, retstring);
diff --git a/src/main_mono.cpp b/src/main_mono.cpp
index f6ff22b..7fd112d 100644
--- a/src/main_mono.cpp
+++ b/src/main_mono.cpp
@@ -20,6 +20,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Monophyly checker." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << std::endl;
@@ -125,6 +126,28 @@ int main(int argc, char * argv[]) {
}
}
+ std::istream * pios = nullptr;
+ std::ostream * poos = nullptr;
+ std::ifstream * fstr = nullptr;
+ std::ofstream * ofstr = nullptr;
+
+ if (fileset) {
+ fstr = new std::ifstream(treef);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ print_help();
+ exit(1);
+ }
+ }
+ if (outfileset) {
+ ofstr = new std::ofstream(outf);
+ poos = ofstr;
+ } else {
+ poos = &std::cout;
+ }
+
if (fileset && outfileset) {
check_inout_streams_identical(treef, outf);
}
@@ -152,28 +175,6 @@ int main(int argc, char * argv[]) {
std::cerr << "Error: you need to provide a set of tip names (-n). Exiting." << std::endl;
exit(0);
}
-
- std::istream * pios = nullptr;
- std::ostream * poos = nullptr;
- std::ifstream * fstr = nullptr;
- std::ofstream * ofstr = nullptr;
-
- if (fileset) {
- fstr = new std::ifstream(treef);
- pios = fstr;
- } else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- print_help();
- exit(1);
- }
- }
- if (outfileset) {
- ofstr = new std::ofstream(outf);
- poos = ofstr;
- } else {
- poos = &std::cout;
- }
// read trees
std::string retstring;
diff --git a/src/main_mrca.cpp b/src/main_mrca.cpp
index fd27964..0708051 100644
--- a/src/main_mrca.cpp
+++ b/src/main_mrca.cpp
@@ -20,6 +20,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Get the number of descendant tips of internal nodes specified by mrca statements." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN," << std::endl;
std::cout << "and an MRCA file with format:" << std::endl;
@@ -103,26 +104,11 @@ int main(int argc, char * argv[]) {
}
}
- if (fileset && outfileset) {
- check_inout_streams_identical(treef, outf);
- }
-
- if (!mrcaset) {
- std::cout << "Must supply mrca file" << std::endl;
- exit(0);
- }
-
std::istream * pios = nullptr;
std::ostream * poos = nullptr;
std::ifstream * fstr = nullptr;
std::ofstream * ofstr = nullptr;
- if (outfileset) {
- ofstr = new std::ofstream(outf, std::ios::app);
- poos = ofstr;
- } else {
- poos = &std::cout;
- }
if (fileset) {
fstr = new std::ifstream(treef);
pios = fstr;
@@ -133,6 +119,21 @@ int main(int argc, char * argv[]) {
exit(1);
}
}
+ if (outfileset) {
+ ofstr = new std::ofstream(outf, std::ios::app);
+ poos = ofstr;
+ } else {
+ poos = &std::cout;
+ }
+
+ if (fileset && outfileset) {
+ check_inout_streams_identical(treef, outf);
+ }
+
+ if (!mrcaset) {
+ std::cout << "Must supply mrca file" << std::endl;
+ exit(0);
+ }
std::ifstream inmrca(mrcaf);
std::string mrcaline;
diff --git a/src/main_mrca_cut.cpp b/src/main_mrca_cut.cpp
index 42b1395..dfa9193 100644
--- a/src/main_mrca_cut.cpp
+++ b/src/main_mrca_cut.cpp
@@ -20,6 +20,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Extract subclade(s) from tree(s)." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN," << std::endl;
std::cout << "and an MRCA file with format:" << std::endl;
@@ -106,15 +107,6 @@ int main(int argc, char * argv[]) {
}
}
- if (fileset && outfileset) {
- check_inout_streams_identical(treef, outf);
- }
-
- if (!mrcaset) {
- std::cerr << "Error: must supply mrca file. Exiting." << std::endl;
- exit(0);
- }
-
std::istream * pios = nullptr;
std::ostream * poos = nullptr;
std::ifstream * fstr = nullptr;
@@ -137,6 +129,15 @@ int main(int argc, char * argv[]) {
}
}
+ if (fileset && outfileset) {
+ check_inout_streams_identical(treef, outf);
+ }
+
+ if (!mrcaset) {
+ std::cerr << "Error: must supply mrca file. Exiting." << std::endl;
+ exit(0);
+ }
+
/*
collect clade names
expecting (new) format:
diff --git a/src/main_mrca_name.cpp b/src/main_mrca_name.cpp
index c198827..12fa1c8 100644
--- a/src/main_mrca_name.cpp
+++ b/src/main_mrca_name.cpp
@@ -25,6 +25,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Label internal nodes with clade names." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN," << std::endl;
std::cout << "and an MRCA file with format:" << std::endl;
@@ -114,15 +115,6 @@ int main(int argc, char * argv[]) {
}
}
- if (fileset && outfileset) {
- check_inout_streams_identical(treef, outf);
- }
-
- if (!mrcaset) {
- std::cerr << "Because no mrca file was provided, all the internal nodes will be labelled"
- << std::endl;
- }
-
std::istream * pios = nullptr;
std::ostream * poos = nullptr;
std::ifstream * fstr = nullptr;
@@ -145,6 +137,15 @@ int main(int argc, char * argv[]) {
}
}
+ if (fileset && outfileset) {
+ check_inout_streams_identical(treef, outf);
+ }
+
+ if (!mrcaset) {
+ std::cerr << "Because no mrca file was provided, all the internal nodes will be labelled"
+ << std::endl;
+ }
+
/*
collect clade names
expecting (new) format:
diff --git a/src/main_nj.cpp b/src/main_nj.cpp
index d2c975c..0d1f164 100644
--- a/src/main_nj.cpp
+++ b/src/main_nj.cpp
@@ -25,6 +25,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Basic neighbour-joining tree maker." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus inputs from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_nni.cpp b/src/main_nni.cpp
index c0d83fe..0522317 100644
--- a/src/main_nni.cpp
+++ b/src/main_nni.cpp
@@ -19,6 +19,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Generate a Nearest Neighbor Interchange (NNI) tree." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << "Output is written in newick format." << std::endl;
diff --git a/src/main_nw.cpp b/src/main_nw.cpp
index a4fff9c..94b4c7b 100644
--- a/src/main_nw.cpp
+++ b/src/main_nw.cpp
@@ -24,6 +24,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Conduct Needleman-Wunsch analysis for all the seqs in a file." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Output is a list of the scores and distances (and the alignments if asked)." << std::endl;
diff --git a/src/main_poly.cpp b/src/main_poly.cpp
index a2e7377..5ca9261 100644
--- a/src/main_poly.cpp
+++ b/src/main_poly.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Randomly sample polytomies to generate a binary tree." << std::endl;
std::cout << "Currently only works with rooted trees (checked)" << std::endl;
std::cout << "Output is written in newick format." << std::endl;
diff --git a/src/main_recode.cpp b/src/main_recode.cpp
index 428cbc7..69ade9a 100644
--- a/src/main_recode.cpp
+++ b/src/main_recode.cpp
@@ -22,6 +22,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Nucleotide sequence recoding." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_revcomp.cpp b/src/main_revcomp.cpp
index c5b6c34..249703e 100644
--- a/src/main_revcomp.cpp
+++ b/src/main_revcomp.cpp
@@ -22,6 +22,7 @@ std::string get_version_line ();
bool reverse_it_or_not (std::vector<Sequence>& seqs, Sequence comp_seq);
void print_help () {
+ std::cout << std::endl;
std::cout << "Reverse complement sequences." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Results are written in fasta format." << std::endl;
diff --git a/src/main_rls.cpp b/src/main_rls.cpp
index 2b9a05e..3f2790b 100644
--- a/src/main_rls.cpp
+++ b/src/main_rls.cpp
@@ -18,22 +18,26 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Taxon relabelling for alignments." << std::endl;
std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Two ordered lists of taxa, -c (current) and -n (new) must be provided." << std::endl;
+ std::cout << "Alternatively, a regex pattern (-p) and replacement (-r) text can be provided." << std::endl;
std::cout << "Results are written in fasta format." << std::endl;
std::cout << std::endl;
std::cout << "Usage: pxrls [OPTIONS]... FILES" << std::endl;
std::cout << std::endl;
std::cout << "Options:" << std::endl;
- std::cout << " -s, --seqf=FILE input seq file, STDIN otherwise" << std::endl;
- std::cout << " -c, --cnames=FILE file containing current taxon labels (one per line)" << std::endl;
- std::cout << " -n, --nnames=FILE file containing new taxon labels (one per line)" << std::endl;
- std::cout << " -v, --verbose make the output more verbose" << std::endl;
- std::cout << " -o, --outf=FILE output file, STOUT otherwise" << std::endl;
- std::cout << " -h, --help display this help and exit" << std::endl;
- std::cout << " -V, --version display version and exit" << std::endl;
- std::cout << " -C, --citation display phyx citation and exit" << std::endl;
+ std::cout << " -s, --seqf=FILE input seq file, STDIN otherwise" << std::endl;
+ std::cout << " -c, --cnames=FILE file containing current taxon labels (one per line)" << std::endl;
+ std::cout << " -n, --nnames=FILE file containing new taxon labels (one per line)" << std::endl;
+ std::cout << " -p, --pattern=STRING regex pattern to replace" << std::endl;
+ std::cout << " -r, --replace=STRING replacement pattern" << std::endl;
+ std::cout << " -v, --verbose make the output more verbose" << std::endl;
+ std::cout << " -o, --outf=FILE output file, STOUT otherwise" << std::endl;
+ std::cout << " -h, --help display this help and exit" << std::endl;
+ std::cout << " -V, --version display version and exit" << std::endl;
+ std::cout << " -C, --citation display phyx citation and exit" << std::endl;
std::cout << std::endl;
std::cout << "Report bugs to: <https://github.com/FePhyFoFum/phyx/issues>" << std::endl;
std::cout << "phyx home page: <https://github.com/FePhyFoFum/phyx>" << std::endl;
@@ -52,6 +56,8 @@ static struct option const long_options[] =
{"seqf", required_argument, nullptr, 's'},
{"cnames", required_argument, nullptr, 'c'},
{"nnames", required_argument, nullptr, 'n'},
+ {"pattern", required_argument, nullptr, 'p'},
+ {"replace", required_argument, nullptr, 'r'},
{"outf", required_argument, nullptr, 'o'},
{"verbose", no_argument, nullptr, 'v'},
{"help", no_argument, nullptr, 'h'},
@@ -69,6 +75,9 @@ int main(int argc, char * argv[]) {
bool cfileset = false;
bool nfileset = false;
bool verbose = false;
+ bool regex = false;
+ std::string regex_pattern;
+ std::string replacement_text;
char * outf = nullptr;
char * seqf = nullptr;
std::string cnamef;
@@ -76,7 +85,7 @@ int main(int argc, char * argv[]) {
while (true) {
int oi = -1;
- int c = getopt_long(argc, argv, "s:c:n:o:vhVC", long_options, &oi);
+ int c = getopt_long(argc, argv, "s:c:n:p:r:o:vhVC", long_options, &oi);
if (c == -1) {
break;
}
@@ -96,6 +105,14 @@ int main(int argc, char * argv[]) {
nnamef = strdup(optarg);
check_file_exists(nnamef);
break;
+ case 'p':
+ regex = true;
+ regex_pattern = strdup(optarg);
+ break;
+ case 'r':
+ regex = true;
+ replacement_text = strdup(optarg);
+ break;
case 'o':
outfileset = true;
outf = strdup(optarg);
@@ -127,11 +144,6 @@ int main(int argc, char * argv[]) {
std::ifstream * fstr = nullptr;
std::ofstream * ofstr = nullptr;
- if (!nfileset || !cfileset) {
- std::cerr << "Error: must supply both name files (-c for current, -n for new). Exiting." << std::endl;
- exit(0);
- }
-
if (sfileset) {
fstr = new std::ifstream(seqf);
pios = fstr;
@@ -149,93 +161,169 @@ int main(int argc, char * argv[]) {
poos = &std::cout;
}
- Relabel rl (cnamef, nnamef, verbose);
-
- std::set<std::string> orig = rl.get_names_to_replace();
-
- Sequence seq;
- std::string retstring;
- bool success = false;
- int num_taxa, num_char; // not used, but required by some reader functions
-
- int ft = test_seq_filetype_stream(*pios, retstring);
-
- // extra stuff to deal with possible interleaved nexus
- if (ft == 0) {
- bool interleave = false;
- get_nexus_dimensions(*pios, num_taxa, num_char, interleave);
- retstring = ""; // need to do this to let seqreader know we are mid-file
- if (!interleave) {
- while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
- std::string terp = seq.get_id();
- success = rl.relabel_sequence(seq);
- if (success) {
- orig.erase(terp);
+ if (!regex) {
+ if (!nfileset || !cfileset) {
+ std::cerr << "Error: must supply both name files (-c for current, -n for new). Exiting." << std::endl;
+ exit(0);
+ }
+
+ Relabel rl (cnamef, nnamef, verbose);
+
+ std::set<std::string> orig = rl.get_names_to_replace();
+
+ Sequence seq;
+ std::string retstring;
+ bool success = false;
+ int num_taxa, num_char; // not used, but required by some reader functions
+
+ int ft = test_seq_filetype_stream(*pios, retstring);
+
+ // extra stuff to deal with possible interleaved nexus
+ if (ft == 0) {
+ bool interleave = false;
+ get_nexus_dimensions(*pios, num_taxa, num_char, interleave);
+ retstring = ""; // need to do this to let seqreader know we are mid-file
+ if (!interleave) {
+ while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
+ std::string terp = seq.get_id();
+ success = rl.relabel_sequence(seq);
+ if (success) {
+ orig.erase(terp);
+ }
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ } else {
+ std::vector<Sequence> seqs = read_interleaved_nexus(*pios, num_taxa, num_char);
+ for (const auto & sq : seqs) {
+ seq = sq;
+ std::string terp = seq.get_id();
+ success = rl.relabel_sequence(seq);
+ if (success) {
+ orig.erase(terp);
+ }
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
}
- (*poos) << ">" << seq.get_id() << std::endl;
- (*poos) << seq.get_sequence() << std::endl;
}
} else {
- std::vector<Sequence> seqs = read_interleaved_nexus(*pios, num_taxa, num_char);
- for (const auto & sq : seqs) {
- seq = sq;
- std::string terp = seq.get_id();
- success = rl.relabel_sequence(seq);
- if (success) {
- orig.erase(terp);
+ bool complicated_phylip = false;
+ // check if we are dealing with a complicated phylip format
+ if (ft == 1) {
+ get_phylip_dimensions(retstring, num_taxa, num_char);
+ complicated_phylip = is_complicated_phylip(*pios, num_char);
+ }
+ if (complicated_phylip) {
+ std::vector<Sequence> seqs = read_phylip(*pios, num_taxa, num_char);
+ for (const auto & sq : seqs) {
+ seq = sq;
+ std::string terp = seq.get_id();
+ success = rl.relabel_sequence(seq);
+ if (success) {
+ orig.erase(terp);
+ }
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ } else {
+ // fasta, fastq, or simple phylip
+ while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
+ std::string terp = seq.get_id();
+ success = rl.relabel_sequence(seq);
+ if (success) {
+ orig.erase(terp);
+ }
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ // fasta has a trailing one
+ if (ft == 2) {
+ std::string terp = seq.get_id();
+ success = rl.relabel_sequence(seq);
+ if (success) {
+ orig.erase(terp);
+ }
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ }
+ }
+
+ if (!orig.empty()) {
+ if (verbose) {
+ std::cerr << "The following names to match were not found in the alignment:" << std::endl;
+ for (const auto & elem : orig) {
+ std::cerr << elem << std::endl;
}
- (*poos) << ">" << seq.get_id() << std::endl;
- (*poos) << seq.get_sequence() << std::endl;
}
}
} else {
- bool complicated_phylip = false;
- // check if we are dealing with a complicated phylip format
- if (ft == 1) {
- get_phylip_dimensions(retstring, num_taxa, num_char);
- complicated_phylip = is_complicated_phylip(*pios, num_char);
+ // regex
+ if (replacement_text.empty() || regex_pattern.empty()) {
+ std::cerr << "Error: must supply both pattern to match and replacement text. Exiting." << std::endl;
+ exit(0);
}
- if (complicated_phylip) {
- std::vector<Sequence> seqs = read_phylip(*pios, num_taxa, num_char);
- for (const auto & sq : seqs) {
- seq = sq;
- std::string terp = seq.get_id();
- success = rl.relabel_sequence(seq);
- if (success) {
- orig.erase(terp);
+
+ Relabel rl (regex_pattern, replacement_text);
+
+ Sequence seq;
+ std::string retstring;
+ int num_taxa, num_char; // not used, but required by some reader functions
+
+ int ft = test_seq_filetype_stream(*pios, retstring);
+
+ // extra stuff to deal with possible interleaved nexus
+ if (ft == 0) {
+ bool interleave = false;
+ get_nexus_dimensions(*pios, num_taxa, num_char, interleave);
+ retstring = ""; // need to do this to let seqreader know we are mid-file
+ if (!interleave) {
+ while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
+ rl.regex_relabel_sequence(seq);
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ } else {
+ std::vector<Sequence> seqs = read_interleaved_nexus(*pios, num_taxa, num_char);
+ for (const auto & sq : seqs) {
+ seq = sq;
+ std::string terp = seq.get_id();
+ rl.regex_relabel_sequence(seq);
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
}
- (*poos) << ">" << seq.get_id() << std::endl;
- (*poos) << seq.get_sequence() << std::endl;
}
} else {
- // fasta, fastq, or simple phylip
- while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
- std::string terp = seq.get_id();
- success = rl.relabel_sequence(seq);
- if (success) {
- orig.erase(terp);
- }
- (*poos) << ">" << seq.get_id() << std::endl;
- (*poos) << seq.get_sequence() << std::endl;
+ bool complicated_phylip = false;
+ // check if we are dealing with a complicated phylip format
+ if (ft == 1) {
+ get_phylip_dimensions(retstring, num_taxa, num_char);
+ complicated_phylip = is_complicated_phylip(*pios, num_char);
}
- // fasta has a trailing one
- if (ft == 2) {
- std::string terp = seq.get_id();
- success = rl.relabel_sequence(seq);
- if (success) {
- orig.erase(terp);
+ if (complicated_phylip) {
+ std::vector<Sequence> seqs = read_phylip(*pios, num_taxa, num_char);
+ for (const auto & sq : seqs) {
+ seq = sq;
+ std::string terp = seq.get_id();
+ rl.regex_relabel_sequence(seq);
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ } else {
+ // fasta, fastq, or simple phylip
+ while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
+ std::string terp = seq.get_id();
+ rl.regex_relabel_sequence(seq);
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ // fasta has a trailing one
+ if (ft == 2) {
+ std::string terp = seq.get_id();
+ rl.regex_relabel_sequence(seq);
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
}
- (*poos) << ">" << seq.get_id() << std::endl;
- (*poos) << seq.get_sequence() << std::endl;
- }
- }
- }
-
- if (!orig.empty()) {
- if (verbose) {
- std::cerr << "The following names to match were not found in the alignment:" << std::endl;
- for (const auto & elem : orig) {
- std::cerr << elem << std::endl;
}
}
}
diff --git a/src/main_rlt.cpp b/src/main_rlt.cpp
index cdc3497..6b001d6 100644
--- a/src/main_rlt.cpp
+++ b/src/main_rlt.cpp
@@ -18,22 +18,26 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Taxon relabelling for trees." << std::endl;
std::cout << "Two ordered lists of taxa, -c (current) and -n (new) must be provided." << std::endl;
+ std::cout << "Alternatively, a regex pattern (-p) and replacement (-r) text can be provided." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << "Output is written in newick format." << std::endl;
std::cout << std::endl;
std::cout << "Usage: pxrlt [OPTIONS]..." << std::endl;
std::cout << std::endl;
std::cout << "Options:" << std::endl;
- std::cout << " -t, --treef=FILE input tree file, STDIN otherwise" << std::endl;
- std::cout << " -c, --cnames=FILE file containing current taxon labels (one per line)" << std::endl;
- std::cout << " -n, --nnames=FILE file containing new taxon labels (one per line)" << std::endl;
- std::cout << " -v, --verbose make the output more verbose" << std::endl;
- std::cout << " -o, --outf=FILE output file, STOUT otherwise" << std::endl;
- std::cout << " -h, --help display this help and exit" << std::endl;
- std::cout << " -V, --version display version and exit" << std::endl;
- std::cout << " -C, --citation display phyx citation and exit" << std::endl;
+ std::cout << " -t, --treef=FILE input tree file, STDIN otherwise" << std::endl;
+ std::cout << " -c, --cnames=FILE file containing current taxon labels (one per line)" << std::endl;
+ std::cout << " -n, --nnames=FILE file containing new taxon labels (one per line)" << std::endl;
+ std::cout << " -p, --pattern=STRING regex pattern to replace" << std::endl;
+ std::cout << " -r, --replace=STRING replacement pattern" << std::endl;
+ std::cout << " -v, --verbose make the output more verbose" << std::endl;
+ std::cout << " -o, --outf=FILE output file, STOUT otherwise" << std::endl;
+ std::cout << " -h, --help display this help and exit" << std::endl;
+ std::cout << " -V, --version display version and exit" << std::endl;
+ std::cout << " -C, --citation display phyx citation and exit" << std::endl;
std::cout << std::endl;
std::cout << "Report bugs to: <https://github.com/FePhyFoFum/phyx/issues>" << std::endl;
std::cout << "phyx home page: <https://github.com/FePhyFoFum/phyx>" << std::endl;
@@ -52,6 +56,8 @@ static struct option const long_options[] =
{"treef", required_argument, nullptr, 't'},
{"cnames", required_argument, nullptr, 'c'},
{"nnames", required_argument, nullptr, 'n'},
+ {"pattern", required_argument, nullptr, 'p'},
+ {"replace", required_argument, nullptr, 'r'},
{"outf", required_argument, nullptr, 'o'},
{"verbose", no_argument, nullptr, 'v'},
{"help", no_argument, nullptr, 'h'},
@@ -69,6 +75,9 @@ int main(int argc, char * argv[]) {
bool cfileset = false;
bool nfileset = false;
bool verbose = false;
+ bool regex = false;
+ std::string regex_pattern;
+ std::string replacement_text;
char * outf = nullptr;
char * treef = nullptr;
std::string cnamef;
@@ -76,7 +85,7 @@ int main(int argc, char * argv[]) {
while (true) {
int oi = -1;
- int c = getopt_long(argc, argv, "t:c:n:o:vhVC", long_options, &oi);
+ int c = getopt_long(argc, argv, "t:c:n:p:r:o:vhVC", long_options, &oi);
if (c == -1) {
break;
}
@@ -96,6 +105,14 @@ int main(int argc, char * argv[]) {
nnamef = strdup(optarg);
check_file_exists(nnamef);
break;
+ case 'p':
+ regex = true;
+ regex_pattern = strdup(optarg);
+ break;
+ case 'r':
+ regex = true;
+ replacement_text = strdup(optarg);
+ break;
case 'o':
outfileset = true;
outf = strdup(optarg);
@@ -127,11 +144,6 @@ int main(int argc, char * argv[]) {
std::ifstream * fstr = nullptr;
std::ofstream * ofstr = nullptr;
- if (!nfileset || !cfileset) {
- std::cerr << "Error: must supply both name files (-c for current, -n for new). Exiting." << std::endl;
- exit(0);
- }
-
if (tfileset) {
fstr = new std::ifstream(treef);
pios = fstr;
@@ -149,35 +161,81 @@ int main(int argc, char * argv[]) {
poos = &std::cout;
}
- Relabel rl (cnamef, nnamef, verbose);
-
- std::string retstring;
- int ft = test_tree_filetype_stream(*pios, retstring);
- if (ft != 0 && ft != 1) {
- std::cerr << "Error: this really only works with nexus or newick. Exiting." << std::endl;
- exit(0);
- }
- bool going = true;
- if (ft == 1) {
- while (going) {
- Tree * tree = read_next_tree_from_stream_newick(*pios, retstring, &going);
- if (going) {
- rl.relabel_tree(tree);
- (*poos) << getNewickString(tree) << std::endl;
- delete tree;
+ if (!regex) {
+ if (!nfileset || !cfileset) {
+ std::cerr << "Error: must supply both name files (-c for current, -n for new). Exiting." << std::endl;
+ exit(0);
+ }
+
+ Relabel rl (cnamef, nnamef, verbose);
+
+ std::string retstring;
+ int ft = test_tree_filetype_stream(*pios, retstring);
+ if (ft != 0 && ft != 1) {
+ std::cerr << "Error: this really only works with nexus or newick. Exiting." << std::endl;
+ exit(0);
+ }
+ bool going = true;
+ if (ft == 1) {
+ while (going) {
+ Tree * tree = read_next_tree_from_stream_newick(*pios, retstring, &going);
+ if (going) {
+ rl.relabel_tree(tree);
+ (*poos) << getNewickString(tree) << std::endl;
+ delete tree;
+ }
+ }
+ } else if (ft == 0) { // Nexus. need to worry about possible translation tables
+ std::map<std::string, std::string> translation_table;
+ bool ttexists;
+ ttexists = get_nexus_translation_table(*pios, &translation_table, &retstring);
+ while (going) {
+ Tree * tree = read_next_tree_from_stream_nexus(*pios, retstring, ttexists,
+ &translation_table, &going);
+ if (tree != nullptr) {
+ rl.relabel_tree(tree);
+ (*poos) << getNewickString(tree) << std::endl;
+ delete tree;
+ }
}
}
- } else if (ft == 0) { // Nexus. need to worry about possible translation tables
- std::map<std::string, std::string> translation_table;
- bool ttexists;
- ttexists = get_nexus_translation_table(*pios, &translation_table, &retstring);
- while (going) {
- Tree * tree = read_next_tree_from_stream_nexus(*pios, retstring, ttexists,
- &translation_table, &going);
- if (tree != nullptr) {
- rl.relabel_tree(tree);
- (*poos) << getNewickString(tree) << std::endl;
- delete tree;
+ } else {
+ // regex
+ if (replacement_text.empty() || regex_pattern.empty()) {
+ std::cerr << "Error: must supply both pattern to match and replacement text. Exiting." << std::endl;
+ exit(0);
+ }
+
+ Relabel rl (regex_pattern, replacement_text);
+
+ std::string retstring;
+ int ft = test_tree_filetype_stream(*pios, retstring);
+ if (ft != 0 && ft != 1) {
+ std::cerr << "Error: this really only works with nexus or newick. Exiting." << std::endl;
+ exit(0);
+ }
+ bool going = true;
+ if (ft == 1) {
+ while (going) {
+ Tree * tree = read_next_tree_from_stream_newick(*pios, retstring, &going);
+ if (going) {
+ rl.regex_relabel_tree(tree);
+ (*poos) << getNewickString(tree) << std::endl;
+ delete tree;
+ }
+ }
+ } else if (ft == 0) { // Nexus. need to worry about possible translation tables
+ std::map<std::string, std::string> translation_table;
+ bool ttexists;
+ ttexists = get_nexus_translation_table(*pios, &translation_table, &retstring);
+ while (going) {
+ Tree * tree = read_next_tree_from_stream_nexus(*pios, retstring, ttexists,
+ &translation_table, &going);
+ if (tree != nullptr) {
+ rl.regex_relabel_tree(tree);
+ (*poos) << getNewickString(tree) << std::endl;
+ delete tree;
+ }
}
}
}
diff --git a/src/main_rmk.cpp b/src/main_rmk.cpp
index c0529db..3765a7d 100644
--- a/src/main_rmk.cpp
+++ b/src/main_rmk.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Remove two-degree internal nodes ('knuckles') from a tree." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << "Output is written in newick format." << std::endl;
diff --git a/src/main_rms.cpp b/src/main_rms.cpp
index 4f35301..79a32a7 100644
--- a/src/main_rms.cpp
+++ b/src/main_rms.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Remove sequences by label." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Results are written in fasta format." << std::endl;
@@ -139,6 +140,23 @@ int main(int argc, char * argv[]) {
std::ifstream * fstr = nullptr;
std::ofstream * ofstr = nullptr;
+ if (fileset) {
+ fstr = new std::ifstream(seqf);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ print_help();
+ exit(1);
+ }
+ }
+ if (outfileset) {
+ ofstr = new std::ofstream(outf);
+ poos = ofstr;
+ } else {
+ poos = &std::cout;
+ }
+
if (namesset) {
std::vector<std::string> tokens2;
std::string del2(",");
@@ -167,23 +185,6 @@ int main(int argc, char * argv[]) {
exit(0);
}
- if (fileset) {
- fstr = new std::ifstream(seqf);
- pios = fstr;
- } else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- print_help();
- exit(1);
- }
- }
- if (outfileset) {
- ofstr = new std::ofstream(outf);
- poos = ofstr;
- } else {
- poos = &std::cout;
- }
-
Sequence seq;
std::string retstring;
std::string seq_name;
diff --git a/src/main_rmt.cpp b/src/main_rmt.cpp
index b00cf74..1b3419b 100644
--- a/src/main_rmt.cpp
+++ b/src/main_rmt.cpp
@@ -19,6 +19,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Remove tree tips by label." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << "Output is written in newick format." << std::endl;
@@ -137,6 +138,28 @@ int main(int argc, char * argv[]) {
check_inout_streams_identical(treef, outf);
}
+ std::istream * pios = nullptr;
+ std::ostream * poos = nullptr;
+ std::ifstream * fstr = nullptr;
+ std::ofstream * ofstr = nullptr;
+
+ if (fileset) {
+ fstr = new std::ifstream(treef);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ print_help();
+ exit(1);
+ }
+ }
+ if (outfileset) {
+ ofstr = new std::ofstream(outf);
+ poos = ofstr;
+ } else {
+ poos = &std::cout;
+ }
+
if (namesset) {
std::vector<std::string> tokens2;
std::string del2(",");
@@ -161,28 +184,6 @@ int main(int argc, char * argv[]) {
std::cerr << "Exiting." << std::endl;
exit(0);
}
-
- std::istream * pios = nullptr;
- std::ostream * poos = nullptr;
- std::ifstream * fstr = nullptr;
- std::ofstream * ofstr = nullptr;
-
- if (fileset) {
- fstr = new std::ifstream(treef);
- pios = fstr;
- } else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- print_help();
- exit(1);
- }
- }
- if (outfileset) {
- ofstr = new std::ofstream(outf);
- poos = ofstr;
- } else {
- poos = &std::cout;
- }
//read trees
std::string retstring;
diff --git a/src/main_rr.cpp b/src/main_rr.cpp
index 28a00eb..e891253 100644
--- a/src/main_rr.cpp
+++ b/src/main_rr.cpp
@@ -20,6 +20,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Reroot (or unroot) a tree file and produce a newick." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << "Output is written in newick format." << std::endl;
@@ -136,6 +137,28 @@ int main(int argc, char * argv[]) {
check_inout_streams_identical(treef, outf);
}
+ std::istream * pios = nullptr;
+ std::ostream * poos = nullptr;
+ std::ifstream * fstr = nullptr;
+ std::ofstream * ofstr = nullptr;
+
+ if (fileset) {
+ fstr = new std::ifstream(treef);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ print_help();
+ exit(1);
+ }
+ }
+ if (outfileset) {
+ ofstr = new std::ofstream(outf);
+ poos = ofstr;
+ } else {
+ poos = &std::cout;
+ }
+
if (outgroupsset) {
std::vector<std::string> tokens2;
tokenize(outgroupsc, tokens2, ",");
@@ -165,28 +188,6 @@ int main(int argc, char * argv[]) {
std::cerr << "Error: you need to set the outgroup (-g). Exiting." << std::endl;
exit(0);
}
-
- std::istream * pios = nullptr;
- std::ostream * poos = nullptr;
- std::ifstream * fstr = nullptr;
- std::ofstream * ofstr = nullptr;
-
- if (fileset) {
- fstr = new std::ifstream(treef);
- pios = fstr;
- } else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- print_help();
- exit(1);
- }
- }
- if (outfileset) {
- ofstr = new std::ofstream(outf);
- poos = ofstr;
- } else {
- poos = &std::cout;
- }
//read trees
std::string retstring;
diff --git a/src/main_s2fa.cpp b/src/main_s2fa.cpp
index 12e542c..7c809be 100644
--- a/src/main_s2fa.cpp
+++ b/src/main_s2fa.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Convert seqfiles from nexus, phylip, fastq to fasta." << std::endl;
std::cout << "Data can be read from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_s2nex.cpp b/src/main_s2nex.cpp
index e253e77..92ff2d0 100644
--- a/src/main_s2nex.cpp
+++ b/src/main_s2nex.cpp
@@ -17,6 +17,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Convert seqfiles from nexus, phylip, or fastq to nexus." << std::endl;
std::cout << "Can read from STDIN or file." << std::endl;
std::cout << std::endl;
diff --git a/src/main_s2phy.cpp b/src/main_s2phy.cpp
index 45e2924..ee4016a 100644
--- a/src/main_s2phy.cpp
+++ b/src/main_s2phy.cpp
@@ -17,6 +17,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Convert seqfiles from nexus, phylip, or fastq to phylip." << std::endl;
std::cout << "Can read from STDIN or file." << std::endl;
std::cout << std::endl;
diff --git a/src/main_seqgen.cpp b/src/main_seqgen.cpp
index 12612ec..392c775 100644
--- a/src/main_seqgen.cpp
+++ b/src/main_seqgen.cpp
@@ -17,6 +17,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Basic sequence simulator under the GTR model." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_ssort.cpp b/src/main_ssort.cpp
index 093a60f..8ec1528 100644
--- a/src/main_ssort.cpp
+++ b/src/main_ssort.cpp
@@ -19,6 +19,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Sort sequences by id or length." << std::endl;
std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_ssplit.cpp b/src/main_ssplit.cpp
new file mode 100644
index 0000000..76c1be7
--- /dev/null
+++ b/src/main_ssplit.cpp
@@ -0,0 +1,203 @@
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <string>
+#include <set>
+#include <cstring>
+#include <getopt.h>
+
+#include "utils.h"
+#include "sequence.h"
+#include "seq_reader.h"
+#include "relabel.h"
+#include "log.h"
+#include "citations.h"
+
+
+void print_help ();
+std::string get_version_line ();
+
+// write individual sequence to file named {sequence id}.fa
+void write_sequence_to_file (Sequence& seq, std::vector<std::string>& outnames) {
+ std::string fname = seq.get_id() + ".fa";
+ // no spaces in file names!!!
+ std::replace(fname.begin(), fname.end(), ' ', '_');
+ outnames.push_back(fname);
+ std::ofstream outf(fname);
+ outf << ">" << seq.get_id() << std::endl;
+ outf << seq.get_sequence() << std::endl;
+ outf.close();
+}
+
+void print_help () {
+ std::cout << std::endl;
+ std::cout << "Split a multi-sequence alignment into separate files by taxon." << std::endl;
+ std::cout << "This will take fasta, phylip, and nexus formats from a file or STDIN." << std::endl;
+ std::cout << "Results are written in fasta format named '{sequence id}.fa'." << std::endl;
+ std::cout << "Note: existing files will be overwritten." << std::endl;
+ std::cout << std::endl;
+ std::cout << "Usage: pxssplit [OPTIONS]... FILES" << std::endl;
+ std::cout << std::endl;
+ std::cout << "Options:" << std::endl;
+ std::cout << " -s, --seqf=FILE input seq file, STDIN otherwise" << std::endl;
+ std::cout << " -v, --verbose make the output more verbose" << std::endl;
+ std::cout << " -h, --help display this help and exit" << std::endl;
+ std::cout << " -V, --version display version and exit" << std::endl;
+ std::cout << " -C, --citation display phyx citation and exit" << std::endl;
+ std::cout << std::endl;
+ std::cout << "Report bugs to: <https://github.com/FePhyFoFum/phyx/issues>" << std::endl;
+ std::cout << "phyx home page: <https://github.com/FePhyFoFum/phyx>" << std::endl;
+}
+
+std::string get_version_line () {
+ std::string vl = "pxssplit 1.3\n";
+ vl += "Copyright (C) 2021 FePhyFoFum\n";
+ vl += "License GPLv3\n";
+ vl += "Written by Joseph W. Brown";
+ return vl;
+}
+
+static struct option const long_options[] =
+{
+ {"seqf", required_argument, nullptr, 's'},
+ {"verbose", no_argument, nullptr, 'v'},
+ {"help", no_argument, nullptr, 'h'},
+ {"version", no_argument, nullptr, 'V'},
+ {"citation", no_argument, nullptr, 'C'},
+ {nullptr, 0, nullptr, 0}
+};
+
+int main(int argc, char * argv[]) {
+
+ log_call(argc, argv);
+
+ bool sfileset = false;
+ bool verbose = false;
+ char * seqf = nullptr;
+
+ // keep track of the files produced
+ std::vector<std::string> outnames;
+
+ while (true) {
+ int oi = -1;
+ int c = getopt_long(argc, argv, "s:vhVC", long_options, &oi);
+ if (c == -1) {
+ break;
+ }
+ switch(c) {
+ case 's':
+ sfileset = true;
+ seqf = strdup(optarg);
+ check_file_exists(seqf);
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ case 'h':
+ print_help();
+ exit(0);
+ case 'V':
+ std::cout << get_version_line() << std::endl;
+ exit(0);
+ case 'C':
+ std::cout << get_phyx_citation() << std::endl;
+ exit(0);
+ default:
+ print_error(*argv);
+ exit(0);
+ }
+ }
+
+ std::istream * pios = nullptr;
+ std::ostream * poos = nullptr;
+ std::ifstream * fstr = nullptr;
+
+ if (sfileset) {
+ fstr = new std::ifstream(seqf);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ print_help();
+ exit(1);
+ }
+ }
+
+ // this will be different for each taxon
+// if (outfileset) {
+// ofstr = new std::ofstream(outf);
+// poos = ofstr;
+// } else {
+ poos = &std::cout;
+// }
+
+ Sequence seq;
+ std::string retstring;
+ int num_taxa, num_char; // not used, but required by some reader functions
+
+ int ft = test_seq_filetype_stream(*pios, retstring);
+
+ // extra stuff to deal with possible interleaved nexus
+ if (ft == 0) { // nexus
+ bool interleave = false;
+ get_nexus_dimensions(*pios, num_taxa, num_char, interleave);
+ retstring = ""; // need to do this to let seqreader know we are mid-file
+ if (!interleave) {
+ while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
+
+
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ } else {
+ std::vector<Sequence> seqs = read_interleaved_nexus(*pios, num_taxa, num_char);
+ for (const auto & sq : seqs) {
+ seq = sq;
+
+
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ }
+ } else {
+ bool complicated_phylip = false;
+ // check if we are dealing with a complicated phylip format
+ if (ft == 1) {
+ get_phylip_dimensions(retstring, num_taxa, num_char);
+ complicated_phylip = is_complicated_phylip(*pios, num_char);
+ }
+ if (complicated_phylip) {
+ std::vector<Sequence> seqs = read_phylip(*pios, num_taxa, num_char);
+ for (const auto & sq : seqs) {
+ seq = sq;
+
+
+ (*poos) << ">" << seq.get_id() << std::endl;
+ (*poos) << seq.get_sequence() << std::endl;
+ }
+ } else {
+ // fasta, fastq, or simple phylip
+ while (read_next_seq_from_stream(*pios, ft, retstring, seq)) {
+ write_sequence_to_file(seq, outnames);
+ }
+ // fasta has a trailing one
+ if (ft == 2) {
+ write_sequence_to_file(seq, outnames);
+ }
+ }
+ }
+
+ std::cerr << "Wrote " << outnames.size() << " files." << std::endl;
+ if (verbose) {
+ for (unsigned int i = 0; i < outnames.size(); i++) {
+ std::cout << outnames[i] << std::endl;
+ }
+ }
+
+ if (sfileset) {
+ fstr->close();
+ delete pios;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/src/main_sstat.cpp b/src/main_sstat.cpp
index 0685cd1..83ba8be 100644
--- a/src/main_sstat.cpp
+++ b/src/main_sstat.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Calculates multinomial alignment test statistics." << std::endl;
std::cout << "Currently only calculates the test statistic from Bollback (2002) MBE." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
diff --git a/src/main_strec.cpp b/src/main_strec.cpp
index 6f7700d..e14e6f6 100644
--- a/src/main_strec.cpp
+++ b/src/main_strec.cpp
@@ -26,6 +26,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "This will conduct state reconstruction analyses." << std::endl;
std::cout << std::endl;
std::cout << "Usage: pxstrec [OPTIONS]... FILES" << std::endl;
@@ -79,7 +80,7 @@ static struct option const long_options[] =
{nullptr, 0, nullptr, 0}
};
-bool checkdata(Tree * intree, std::vector<Sequence> runseqs);
+//bool checkdata(Tree * intree, std::vector<Sequence> runseqs);
bool checkdata(Tree * intree, std::vector<Sequence> runseqs) {
//std::vector<std::string> ret; // not used
std::set<std::string> seqnames;
@@ -133,6 +134,7 @@ int main(int argc, char * argv[]) {
std::string periodstring;
std::vector<std::string> ptokens;
std::vector<double> period_times;
+ bool argspresent = false;
while (true) {
int oi = -1;
@@ -145,51 +147,63 @@ int main(int argc, char * argv[]) {
//datafileset = true;
dataf = strdup(optarg);
check_file_exists(dataf);
+ argspresent = true;
break;
case 'z':
dataz = true;
datawide = true;
+ argspresent = true;
break;
case 'w':
datawide = true;
+ argspresent = true;
break;
case 't':
//treefileset = true;
treef = strdup(optarg);
check_file_exists(treef);
+ argspresent = true;
break;
case 'c':
conffileset = true;
conff = strdup(optarg);
check_file_exists(conff);
+ argspresent = true;
break;
case 'o':
outancfileset = true;
outanc = strdup(optarg);
+ argspresent = true;
break;
case 'n':
outstochnumfileset = true;
outnum = strdup(optarg);
+ argspresent = true;
break;
case 'm':
outstochtimefileset = true;
outtime = strdup(optarg);
+ argspresent = true;
break;
case 'a':
outstochnumanyfileset = true;
outnumany = strdup(optarg);
+ argspresent = true;
break;
case 'p':
periodsset = true;
periodstring = (strdup(optarg));
parse_comma_list(periodstring, period_times);
+ argspresent = true;
break;
case 'l':
logfileset = true;
logf = strdup(optarg);
+ argspresent = true;
break;
case 's':
silent = true;
+ argspresent = true;
break;
case 'h':
print_help();
@@ -206,6 +220,11 @@ int main(int argc, char * argv[]) {
}
}
+ if (!argspresent) {
+ print_help();
+ exit(1);
+ }
+
std::ofstream * logout = nullptr;
std::ostream * loos = nullptr;
diff --git a/src/main_sw.cpp b/src/main_sw.cpp
index 51dd38a..36b38f7 100644
--- a/src/main_sw.cpp
+++ b/src/main_sw.cpp
@@ -23,6 +23,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Conduct Smith-Waterman analysis for all the seqs in a file." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "Output is a list of the scores and distances (and the alignments if asked)." << std::endl;
diff --git a/src/main_t2new.cpp b/src/main_t2new.cpp
index 1fbf685..062ddb3 100644
--- a/src/main_t2new.cpp
+++ b/src/main_t2new.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "This will convert a tree file to newick." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_t2nex.cpp b/src/main_t2nex.cpp
index 7532ae1..a2d5857 100644
--- a/src/main_t2nex.cpp
+++ b/src/main_t2nex.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "This will convert a tree file to vanilla Nexus format." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << std::endl;
diff --git a/src/main_tcol.cpp b/src/main_tcol.cpp
index 0bd1dd6..9c007f3 100644
--- a/src/main_tcol.cpp
+++ b/src/main_tcol.cpp
@@ -18,6 +18,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Add information to a tree so that you can color the edges." << std::endl;
std::cout << "This will take nexus and newick inputs from a file or STDIN." << std::endl;
std::cout << "Results are written in nexus format so that it can be read by figtree." << std::endl;
diff --git a/src/main_tcomb.cpp b/src/main_tcomb.cpp
index 98bcb90..3b7f98e 100644
--- a/src/main_tcomb.cpp
+++ b/src/main_tcomb.cpp
@@ -26,6 +26,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Combine a set of trees from one file into a tree from another." << std::endl;
std::cout << "Pass in 2 trees with `t` and `a`." << std::endl;
std::cout << std::endl;
@@ -123,12 +124,22 @@ int main(int argc, char * argv[]) {
std::ifstream * fstr = nullptr;
std::ifstream * afstr = nullptr;
std::ofstream * ofstr = nullptr;
-
- if (outfileset) {
- ofstr = new std::ofstream(outf);
- poos = ofstr;
+
+ if (tfileset) {
+ fstr = new std::ifstream(treef);
+ pios = fstr;
} else {
- poos = &std::cout;
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ // if both inputs missing: print help
+ if (!addfileset) {
+ print_help();
+ exit(1);
+ } else {
+ std::cerr << "Error: you need to set an tfile (-t). Exiting." << std::endl;
+ exit(1);
+ }
+ }
}
if (addfileset) {
@@ -139,15 +150,11 @@ int main(int argc, char * argv[]) {
exit(0);
}
- if (tfileset) {
- fstr = new std::ifstream(treef);
- pios = fstr;
+ if (outfileset) {
+ ofstr = new std::ofstream(outf);
+ poos = ofstr;
} else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- std::cerr << "Error: you need to set an tfile (-t). Exiting." << std::endl;
- exit(1);
- }
+ poos = &std::cout;
}
std::string retstring;
diff --git a/src/main_tgen.cpp b/src/main_tgen.cpp
index 7e90364..180d8e3 100644
--- a/src/main_tgen.cpp
+++ b/src/main_tgen.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Generate all tree topologies for n (<= 10) taxa." << std::endl;
std::cout << "Random tree samples are a-coming." << std::endl;
std::cout << std::endl;
@@ -69,6 +70,7 @@ int main(int argc, char * argv[]) {
bool outfileset = false;
std::string lprefix = "t";
char * outf = nullptr;
+ bool argspresent = false;
// limit on number of terminals supported (exhaustive)
unsigned int sim_limit_exh = 10;
@@ -86,25 +88,30 @@ int main(int argc, char * argv[]) {
switch(c) {
case 'n':
nt = string_to_int(optarg, "-n");
- if (nt < 0) {
+ if (nt <= 0) {
std::cerr << "Error: ntax must be a positive integer. Exiting." << std::endl;
exit(0);
} else {
num_taxa = static_cast<unsigned int>(nt);
}
+ argspresent = true;
break;
case 'r':
rooted = true;
+ argspresent = true;
break;
case 'c':
count = true;
+ argspresent = true;
break;
case 'l':
lprefix = strdup(optarg);
+ argspresent = true;
break;
case 'o':
outfileset = true;
outf = strdup(optarg);
+ argspresent = true;
break;
// case 'x':
// seed = string_to_int(optarg, "-x");
@@ -124,6 +131,11 @@ int main(int argc, char * argv[]) {
}
}
+ if (!argspresent) {
+ print_help();
+ exit(1);
+ }
+
std::string rootstat = (rooted) ? "rooted" : "unrooted";
if (num_taxa == 0) {
diff --git a/src/main_tlate.cpp b/src/main_tlate.cpp
index 7d569b7..f7f14fa 100644
--- a/src/main_tlate.cpp
+++ b/src/main_tlate.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Translate DNA alignment to amino acids." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
std::cout << "NOTE: assumes sequences are in frame." << std::endl;
diff --git a/src/main_trt.cpp b/src/main_trt.cpp
index 76d876a..bd94454 100644
--- a/src/main_trt.cpp
+++ b/src/main_trt.cpp
@@ -19,6 +19,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "This will trace a big tree given a taxon list and and produce newick." << std::endl;
std::cout << "Data can be read from a file or STDIN." << std::endl;
std::cout << std::endl;
@@ -123,6 +124,27 @@ int main(int argc, char * argv[]) {
}
}
+ std::istream * pios = nullptr;
+ std::ostream * poos = nullptr;
+ std::ifstream * fstr = nullptr;
+ std::ofstream * ofstr = nullptr;
+
+ if (fileset) {
+ fstr = new std::ifstream(treef);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ if (!namesset && !namefileset && !outfileset) {
+ print_help();
+ exit(1);
+ } else {std::cout << "Error: missing required tree input. Exiting."
+ << std::endl;
+ exit(1);
+ }
+ }
+ }
+
if (fileset && outfileset) {
check_inout_streams_identical(treef, outf);
}
@@ -151,21 +173,6 @@ int main(int argc, char * argv[]) {
exit(0);
}
- std::istream * pios = nullptr;
- std::ostream * poos = nullptr;
- std::ifstream * fstr = nullptr;
- std::ofstream * ofstr = nullptr;
-
- if (fileset) {
- fstr = new std::ifstream(treef);
- pios = fstr;
- } else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- print_help();
- exit(1);
- }
- }
if (outfileset) {
ofstr = new std::ofstream(outf);
poos = ofstr;
diff --git a/src/main_tscale.cpp b/src/main_tscale.cpp
index 5d683be..a2791bb 100644
--- a/src/main_tscale.cpp
+++ b/src/main_tscale.cpp
@@ -17,6 +17,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Tree rescaling by providing either scaling factor or root height." << std::endl;
std::cout << "This will take a newick- or nexus-formatted tree from a file or STDIN." << std::endl;
std::cout << "Output is written in newick format." << std::endl;
@@ -117,6 +118,23 @@ int main(int argc, char * argv[]) {
std::ifstream * fstr = nullptr;
std::ofstream * ofstr = nullptr;
+ if (tfileset) {
+ fstr = new std::ifstream(treef);
+ pios = fstr;
+ } else {
+ pios = &std::cin;
+ if (!check_for_input_to_stream()) {
+ if (!heightset && !scaleset) {
+ print_help();
+ exit(1);
+ } else {
+ std::cerr << "Error: missing tree input. Exiting."
+ << std::endl;
+ exit(1);
+ }
+ }
+ }
+
if (heightset && scaleset) {
std::cerr << "Error: supply only rootheight (-r) or scale (-s), not both. Exiting."
<< std::endl;
@@ -128,16 +146,6 @@ int main(int argc, char * argv[]) {
exit(0);
}
- if (tfileset) {
- fstr = new std::ifstream(treef);
- pios = fstr;
- } else {
- pios = &std::cin;
- if (!check_for_input_to_stream()) {
- print_help();
- exit(1);
- }
- }
if (outfileset) {
ofstr = new std::ofstream(outf);
poos = ofstr;
diff --git a/src/main_upgma.cpp b/src/main_upgma.cpp
index 20b57d3..60be3a2 100644
--- a/src/main_upgma.cpp
+++ b/src/main_upgma.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Bare bones UPGMA tree generator" << std::endl;
std::cout << "Currently only uses uncorrected p-distances." << std::endl;
std::cout << "This will take fasta, fastq, phylip, and nexus formats from a file or STDIN." << std::endl;
diff --git a/src/main_vcf2fa.cpp b/src/main_vcf2fa.cpp
index 82c0f2e..4bf7fc0 100644
--- a/src/main_vcf2fa.cpp
+++ b/src/main_vcf2fa.cpp
@@ -16,6 +16,7 @@ void print_help ();
std::string get_version_line ();
void print_help () {
+ std::cout << std::endl;
std::cout << "Convert vcf file to fasta." << std::endl;
std::cout << "Currently only handles haploid data; phased data will come soon." << std::endl;
std::cout << "Data can be read from a file or STDIN." << std::endl;
diff --git a/src/man/pxaa2cdn.1.in b/src/man/pxaa2cdn.1.in
index 375a7a0..a34ccab 100644
--- a/src/man/pxaa2cdn.1.in
+++ b/src/man/pxaa2cdn.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXAA2CDN "1" "July 2021" "pxaa2cdn 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXAA2CDN "1" "July 2023" "pxaa2cdn 1.3" "User Commands"
.SH NAME
pxaa2cdn \- manual page for pxaa2cdn 1.3
.SH SYNOPSIS
@@ -18,7 +18,10 @@ input sequence file, STDIN otherwise
input sequence file, STDIN otherwise
.TP
\fB\-r\fR, \fB\-\-rmlastcdn\fR
-remove last codon (default: false)
+remove last codon from all nuc sequences (default: false)
+.TP
+\fB\-s\fR, \fB\-\-stopremove\fR
+remove stop codon from nuc sequences if present (default: false)
.TP
\fB\-o\fR, \fB\-\-outf\fR=\fI\,FILE\/\fR
output fasta file, STOUT otherwise
diff --git a/src/man/pxbdfit.1.in b/src/man/pxbdfit.1.in
index 27ad8a4..64161d1 100644
--- a/src/man/pxbdfit.1.in
+++ b/src/man/pxbdfit.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXBDFIT "1" "July 2021" "pxbdfit 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXBDFIT "1" "July 2023" "pxbdfit 1.3" "User Commands"
.SH NAME
pxbdfit \- manual page for pxbdfit 1.3
.SH SYNOPSIS
diff --git a/src/man/pxbdsim.1.in b/src/man/pxbdsim.1.in
index de3a924..7a0997e 100644
--- a/src/man/pxbdsim.1.in
+++ b/src/man/pxbdsim.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXBDSIM "1" "July 2021" "pxbdsim 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXBDSIM "1" "July 2023" "pxbdsim 1.3" "User Commands"
.SH NAME
pxbdsim \- manual page for pxbdsim 1.3
.SH SYNOPSIS
diff --git a/src/man/pxboot.1.in b/src/man/pxboot.1.in
index 95045c1..e1a6a61 100644
--- a/src/man/pxboot.1.in
+++ b/src/man/pxboot.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXBOOT "1" "July 2021" "pxboot 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXBOOT "1" "July 2023" "pxboot 1.3" "User Commands"
.SH NAME
pxboot \- manual page for pxboot 1.3
.SH SYNOPSIS
diff --git a/src/man/pxbp.1.in b/src/man/pxbp.1.in
index 2555e6d..8ddc649 100644
--- a/src/man/pxbp.1.in
+++ b/src/man/pxbp.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXBP "1" "July 2021" "pxbp 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXBP "1" "July 2023" "pxbp 1.3" "User Commands"
.SH NAME
pxbp \- manual page for pxbp 1.3
.SH SYNOPSIS
diff --git a/src/man/pxcat.1.in b/src/man/pxcat.1.in
index 12d3319..b0a8aea 100644
--- a/src/man/pxcat.1.in
+++ b/src/man/pxcat.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCAT "1" "July 2021" "pxcat 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCAT "1" "July 2023" "pxcat 1.3" "User Commands"
.SH NAME
pxcat \- manual page for pxcat 1.3
.SH SYNOPSIS
@@ -28,7 +28,7 @@ Individual files may be of different formats.
\fB\-s\fR, \fB\-\-seqf\fR=\fI\,FILE\/\fR
list of input sequence files (space delimited)
.TP
-\fB\-f\fR, \fB\-\-flistFILE\fR
+\fB\-f\fR, \fB\-\-flist\fR=\fI\,FILE\/\fR
file listing input files (one per line)
.TP
\fB\-p\fR, \fB\-\-partf\fR=\fI\,FILE\/\fR
diff --git a/src/man/pxclsq.1.in b/src/man/pxclsq.1.in
index 8526f75..f599eae 100644
--- a/src/man/pxclsq.1.in
+++ b/src/man/pxclsq.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCLSQ "1" "July 2021" "pxclsq 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCLSQ "1" "July 2023" "pxclsq 1.3" "User Commands"
.SH NAME
pxclsq \- manual page for pxclsq 1.3
.SH SYNOPSIS
@@ -17,6 +17,13 @@ input sequence file, STDIN otherwise
\fB\-p\fR, \fB\-\-prop\fR=\fI\,DOUBLE\/\fR
proportion required to be present, default=0.5
.TP
+\fB\-e\fR, \fB\-\-empty\fR
+remove columns that are completely empty (\- or ?)
+.TP
+\fB\-m\fR, \fB\-\-min\fR=\fI\,INT\/\fR
+the minimum number of good characters required per site
+\- a min of 1 is equivalent to \fB\-e\fR above
+.TP
\fB\-t\fR, \fB\-\-taxa\fR
consider missing data per taxon (default: per site)
.TP
diff --git a/src/man/pxcltr.1.in b/src/man/pxcltr.1.in
index 15ccda0..ee9587d 100644
--- a/src/man/pxcltr.1.in
+++ b/src/man/pxcltr.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCLTR "1" "July 2021" "pxcltr 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCLTR "1" "July 2023" "pxcltr 1.3" "User Commands"
.SH NAME
pxcltr \- manual page for pxcltr 1.3
.SH SYNOPSIS
diff --git a/src/man/pxcolt.1.in b/src/man/pxcolt.1.in
index 10f3e9b..9bbfb5a 100644
--- a/src/man/pxcolt.1.in
+++ b/src/man/pxcolt.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCOLT "1" "July 2021" "pxcolt 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCOLT "1" "July 2023" "pxcolt 1.3" "User Commands"
.SH NAME
pxcolt \- manual page for pxcolt 1.3
.SH SYNOPSIS
diff --git a/src/man/pxcomp.1.in b/src/man/pxcomp.1.in
index 852308e..0da2464 100644
--- a/src/man/pxcomp.1.in
+++ b/src/man/pxcomp.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCOMP "1" "July 2021" "pxcomp 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCOMP "1" "July 2023" "pxcomp 1.3" "User Commands"
.SH NAME
pxcomp \- manual page for pxcomp 1.3
.SH SYNOPSIS
diff --git a/src/man/pxconsq.1.in b/src/man/pxconsq.1.in
index 23a9c0c..eeea6ae 100644
--- a/src/man/pxconsq.1.in
+++ b/src/man/pxconsq.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCONSQ "1" "July 2021" "pxconsq 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCONSQ "1" "July 2023" "pxconsq 1.3" "User Commands"
.SH NAME
pxconsq \- manual page for pxconsq 1.3
.SH SYNOPSIS
diff --git a/src/man/pxcontrates.1.in b/src/man/pxcontrates.1.in
index 9b49dbf..280f496 100644
--- a/src/man/pxcontrates.1.in
+++ b/src/man/pxcontrates.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXCONTRATES "1" "July 2021" "pxcontrates 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXCONTRATES "1" "July 2023" "pxcontrates 1.3" "User Commands"
.SH NAME
pxcontrates \- manual page for pxcontrates 1.3
.SH SYNOPSIS
diff --git a/src/man/pxfqfilt.1.in b/src/man/pxfqfilt.1.in
index 251b3b1..82877f7 100644
--- a/src/man/pxfqfilt.1.in
+++ b/src/man/pxfqfilt.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXFQFILT "1" "July 2021" "pxfqfilt 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXFQFILT "1" "July 2023" "pxfqfilt 1.3" "User Commands"
.SH NAME
pxfqfilt \- manual page for pxfqfilt 1.3
.SH SYNOPSIS
diff --git a/src/man/pxlog.1.in b/src/man/pxlog.1.in
index f9a4330..57e3a14 100644
--- a/src/man/pxlog.1.in
+++ b/src/man/pxlog.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXLOG "1" "July 2021" "pxlog 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXLOG "1" "July 2023" "pxlog 1.3" "User Commands"
.SH NAME
pxlog \- manual page for pxlog 1.3
.SH SYNOPSIS
diff --git a/src/man/pxlssq.1.in b/src/man/pxlssq.1.in
index cbd4aab..fd450bd 100644
--- a/src/man/pxlssq.1.in
+++ b/src/man/pxlssq.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXLSSQ "1" "July 2021" "pxlssq 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXLSSQ "1" "July 2023" "pxlssq 1.3" "User Commands"
.SH NAME
pxlssq \- manual page for pxlssq 1.3
.SH SYNOPSIS
diff --git a/src/man/pxlstr.1.in b/src/man/pxlstr.1.in
index 7dd82e1..3daf1a5 100644
--- a/src/man/pxlstr.1.in
+++ b/src/man/pxlstr.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXLSTR "1" "July 2021" "pxlstr 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXLSTR "1" "July 2023" "pxlstr 1.3" "User Commands"
.SH NAME
pxlstr \- manual page for pxlstr 1.3
.SH SYNOPSIS
diff --git a/src/man/pxmono.1.in b/src/man/pxmono.1.in
index 9fbae5a..b8ecf7f 100644
--- a/src/man/pxmono.1.in
+++ b/src/man/pxmono.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXMONO "1" "July 2021" "pxmono 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXMONO "1" "July 2023" "pxmono 1.3" "User Commands"
.SH NAME
pxmono \- manual page for pxmono 1.3
.SH SYNOPSIS
diff --git a/src/man/pxmrca.1.in b/src/man/pxmrca.1.in
index e0ef9d1..e8dd30c 100644
--- a/src/man/pxmrca.1.in
+++ b/src/man/pxmrca.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXMRCA "1" "July 2021" "pxmrca 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXMRCA "1" "July 2023" "pxmrca 1.3" "User Commands"
.SH NAME
pxmrca \- manual page for pxmrca 1.3
.SH SYNOPSIS
diff --git a/src/man/pxmrcacut.1.in b/src/man/pxmrcacut.1.in
index c3472d5..5afa217 100644
--- a/src/man/pxmrcacut.1.in
+++ b/src/man/pxmrcacut.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXMRCACUT "1" "July 2021" "pxmrcacut 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXMRCACUT "1" "July 2023" "pxmrcacut 1.3" "User Commands"
.SH NAME
pxmrcacut \- manual page for pxmrcacut 1.3
.SH SYNOPSIS
diff --git a/src/man/pxmrcaname.1.in b/src/man/pxmrcaname.1.in
index 48190e1..812ff3a 100644
--- a/src/man/pxmrcaname.1.in
+++ b/src/man/pxmrcaname.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXMRCANAME "1" "July 2021" "pxmrcaname 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXMRCANAME "1" "July 2023" "pxmrcaname 1.3" "User Commands"
.SH NAME
pxmrcaname \- manual page for pxmrcaname 1.3
.SH SYNOPSIS
diff --git a/src/man/pxnw.1.in b/src/man/pxnw.1.in
index 6c45c14..42a575c 100644
--- a/src/man/pxnw.1.in
+++ b/src/man/pxnw.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXNW "1" "July 2021" "pxnw 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXNW "1" "July 2023" "pxnw 1.3" "User Commands"
.SH NAME
pxnw \- manual page for pxnw 1.3
.SH SYNOPSIS
diff --git a/src/man/pxpoly.1.in b/src/man/pxpoly.1.in
index 386b23c..e722865 100644
--- a/src/man/pxpoly.1.in
+++ b/src/man/pxpoly.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXPOLY "1" "July 2021" "pxpoly 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXPOLY "1" "July 2023" "pxpoly 1.3" "User Commands"
.SH NAME
pxpoly \- manual page for pxpoly 1.3
.SH SYNOPSIS
diff --git a/src/man/pxrecode.1.in b/src/man/pxrecode.1.in
index 3f48717..8b04d98 100644
--- a/src/man/pxrecode.1.in
+++ b/src/man/pxrecode.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRECODE "1" "July 2021" "pxrecode 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRECODE "1" "July 2023" "pxrecode 1.3" "User Commands"
.SH NAME
pxrecode \- manual page for pxrecode 1.3
.SH SYNOPSIS
diff --git a/src/man/pxrevcomp.1.in b/src/man/pxrevcomp.1.in
index 90a3bf5..ebe9ce1 100644
--- a/src/man/pxrevcomp.1.in
+++ b/src/man/pxrevcomp.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXREVCOMP "1" "July 2021" "pxrevcomp 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXREVCOMP "1" "July 2023" "pxrevcomp 1.3" "User Commands"
.SH NAME
pxrevcomp \- manual page for pxrevcomp 1.3
.SH SYNOPSIS
diff --git a/src/man/pxrls.1.in b/src/man/pxrls.1.in
index 6aaf01e..5c4b2cd 100644
--- a/src/man/pxrls.1.in
+++ b/src/man/pxrls.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRLS "1" "July 2021" "pxrls 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRLS "1" "July 2023" "pxrls 1.3" "User Commands"
.SH NAME
pxrls \- manual page for pxrls 1.3
.SH SYNOPSIS
@@ -9,6 +9,7 @@ pxrls \- manual page for pxrls 1.3
Taxon relabelling for alignments.
This will take fasta, phylip, and nexus formats from a file or STDIN.
Two ordered lists of taxa, \fB\-c\fR (current) and \fB\-n\fR (new) must be provided.
+Alternatively, a regex pattern (\fB\-p\fR) and replacement (\fB\-r\fR) text can be provided.
Results are written in fasta format.
.SH OPTIONS
.TP
@@ -20,6 +21,10 @@ file containing current taxon labels (one per line)
.TP
\fB\-n\fR, \fB\-\-nnames\fR=\fI\,FILE\/\fR
file containing new taxon labels (one per line)
+.HP
+\fB\-p\fR, \fB\-\-pattern\fR=\fI\,STRING\/\fR regex pattern to replace
+.HP
+\fB\-r\fR, \fB\-\-replace\fR=\fI\,STRING\/\fR replacement pattern
.TP
\fB\-v\fR, \fB\-\-verbose\fR
make the output more verbose
diff --git a/src/man/pxrlt.1.in b/src/man/pxrlt.1.in
index 6aee51f..27672a4 100644
--- a/src/man/pxrlt.1.in
+++ b/src/man/pxrlt.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRLT "1" "July 2021" "pxrlt 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRLT "1" "July 2023" "pxrlt 1.3" "User Commands"
.SH NAME
pxrlt \- manual page for pxrlt 1.3
.SH SYNOPSIS
@@ -8,6 +8,7 @@ pxrlt \- manual page for pxrlt 1.3
.SH DESCRIPTION
Taxon relabelling for trees.
Two ordered lists of taxa, \fB\-c\fR (current) and \fB\-n\fR (new) must be provided.
+Alternatively, a regex pattern (\fB\-p\fR) and replacement (\fB\-r\fR) text can be provided.
This will take a newick\- or nexus\-formatted tree from a file or STDIN.
Output is written in newick format.
.SH OPTIONS
@@ -20,6 +21,10 @@ file containing current taxon labels (one per line)
.TP
\fB\-n\fR, \fB\-\-nnames\fR=\fI\,FILE\/\fR
file containing new taxon labels (one per line)
+.HP
+\fB\-p\fR, \fB\-\-pattern\fR=\fI\,STRING\/\fR regex pattern to replace
+.HP
+\fB\-r\fR, \fB\-\-replace\fR=\fI\,STRING\/\fR replacement pattern
.TP
\fB\-v\fR, \fB\-\-verbose\fR
make the output more verbose
diff --git a/src/man/pxrmk.1.in b/src/man/pxrmk.1.in
index 2eb35e4..7601b86 100644
--- a/src/man/pxrmk.1.in
+++ b/src/man/pxrmk.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRMK "1" "July 2021" "pxrmk 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRMK "1" "July 2023" "pxrmk 1.3" "User Commands"
.SH NAME
pxrmk \- manual page for pxrmk 1.3
.SH SYNOPSIS
diff --git a/src/man/pxrms.1.in b/src/man/pxrms.1.in
index ae62b7a..f2ce7d8 100644
--- a/src/man/pxrms.1.in
+++ b/src/man/pxrms.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRMS "1" "July 2021" "pxrms 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRMS "1" "July 2023" "pxrms 1.3" "User Commands"
.SH NAME
pxrms \- manual page for pxrms 1.3
.SH SYNOPSIS
diff --git a/src/man/pxrmt.1.in b/src/man/pxrmt.1.in
index 223a314..8dbdb50 100644
--- a/src/man/pxrmt.1.in
+++ b/src/man/pxrmt.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRMT "1" "July 2021" "pxrmt 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRMT "1" "July 2023" "pxrmt 1.3" "User Commands"
.SH NAME
pxrmt \- manual page for pxrmt 1.3
.SH SYNOPSIS
diff --git a/src/man/pxrr.1.in b/src/man/pxrr.1.in
index 1c8102b..2cc6728 100644
--- a/src/man/pxrr.1.in
+++ b/src/man/pxrr.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXRR "1" "July 2021" "pxrr 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXRR "1" "July 2023" "pxrr 1.3" "User Commands"
.SH NAME
pxrr \- manual page for pxrr 1.3
.SH SYNOPSIS
diff --git a/src/man/pxs2fa.1.in b/src/man/pxs2fa.1.in
index 8d8fe55..dbb2a5b 100644
--- a/src/man/pxs2fa.1.in
+++ b/src/man/pxs2fa.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXS2FA "1" "July 2021" "pxs2fa 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXS2FA "1" "July 2023" "pxs2fa 1.3" "User Commands"
.SH NAME
pxs2fa \- manual page for pxs2fa 1.3
.SH SYNOPSIS
diff --git a/src/man/pxs2nex.1.in b/src/man/pxs2nex.1.in
index e75ef6b..f4cecb9 100644
--- a/src/man/pxs2nex.1.in
+++ b/src/man/pxs2nex.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXS2NEX "1" "July 2021" "pxs2nex 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXS2NEX "1" "July 2023" "pxs2nex 1.3" "User Commands"
.SH NAME
pxs2nex \- manual page for pxs2nex 1.3
.SH SYNOPSIS
diff --git a/src/man/pxs2phy.1.in b/src/man/pxs2phy.1.in
index 205bc31..e779eee 100644
--- a/src/man/pxs2phy.1.in
+++ b/src/man/pxs2phy.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXS2PHY "1" "July 2021" "pxs2phy 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXS2PHY "1" "July 2023" "pxs2phy 1.3" "User Commands"
.SH NAME
pxs2phy \- manual page for pxs2phy 1.3
.SH SYNOPSIS
diff --git a/src/man/pxseqgen.1.in b/src/man/pxseqgen.1.in
index d24a68b..d11574b 100644
--- a/src/man/pxseqgen.1.in
+++ b/src/man/pxseqgen.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXSEQGEN "1" "July 2021" "pxseqgen 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXSEQGEN "1" "July 2023" "pxseqgen 1.3" "User Commands"
.SH NAME
pxseqgen \- manual page for pxseqgen 1.3
.SH SYNOPSIS
diff --git a/src/man/pxssort.1.in b/src/man/pxssort.1.in
index cb09b02..96ee4b7 100644
--- a/src/man/pxssort.1.in
+++ b/src/man/pxssort.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXSSORT "1" "July 2021" "pxssort 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXSSORT "1" "July 2023" "pxssort 1.3" "User Commands"
.SH NAME
pxssort \- manual page for pxssort 1.3
.SH SYNOPSIS
diff --git a/src/man/pxssplit.1.in b/src/man/pxssplit.1.in
new file mode 100644
index 0000000..1ac0f00
--- /dev/null
+++ b/src/man/pxssplit.1.in
@@ -0,0 +1,38 @@
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
+.TH PXSSPLIT "1" "August 2021" "pxssplit 1.3" "User Commands"
+.SH NAME
+pxssplit \- manual page for pxssplit 1.3
+.SH SYNOPSIS
+.B pxssplit
+[\fI\,OPTIONS\/\fR]... \fI\,FILES\/\fR
+.SH DESCRIPTION
+Split a multi\-sequence alignment into separate files by taxon.
+This will take fasta, phylip, and nexus formats from a file or STDIN.
+Results are written in fasta format named '{sequence id}.fa'.
+Note: existing files will be overwritten.
+.SH OPTIONS
+.TP
+\fB\-s\fR, \fB\-\-seqf\fR=\fI\,FILE\/\fR
+input seq file, STDIN otherwise
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+make the output more verbose
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+display this help and exit
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+display version and exit
+.TP
+\fB\-C\fR, \fB\-\-citation\fR
+display phyx citation and exit
+.SH CITATION
+Brown, Joseph W., Joseph F. Walker, and Stephen A. Smith. 2017. Phyx: phylogenetic tools for unix. Bioinformatics 33(12): 1886-1888. https://doi.org/10.1093/bioinformatics/btx063.
+.SH AUTHOR
+Written by Joseph W. Brown
+.SH "REPORTING BUGS"
+Report bugs to: <https://github.com/FePhyFoFum/phyx/issues>
+phyx home page: <https://github.com/FePhyFoFum/phyx>
+.SH COPYRIGHT
+Copyright \(co 2021 FePhyFoFum
+License GPLv3
diff --git a/src/man/pxsstat.1.in b/src/man/pxsstat.1.in
index 57e0cbc..9d67b90 100644
--- a/src/man/pxsstat.1.in
+++ b/src/man/pxsstat.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXSSTAT "1" "July 2021" "pxsstat 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXSSTAT "1" "July 2023" "pxsstat 1.3" "User Commands"
.SH NAME
pxsstat \- manual page for pxsstat 1.3
.SH SYNOPSIS
diff --git a/src/man/pxstrec.1.in b/src/man/pxstrec.1.in
index 14cb25f..580c70c 100644
--- a/src/man/pxstrec.1.in
+++ b/src/man/pxstrec.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXSTREC "1" "July 2021" "pxstrec 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXSTREC "1" "July 2023" "pxstrec 1.3" "User Commands"
.SH NAME
pxstrec \- manual page for pxstrec 1.3
.SH SYNOPSIS
diff --git a/src/man/pxsw.1.in b/src/man/pxsw.1.in
index 89dbe38..5570dcf 100644
--- a/src/man/pxsw.1.in
+++ b/src/man/pxsw.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXSW "1" "July 2021" "pxsw 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXSW "1" "July 2023" "pxsw 1.3" "User Commands"
.SH NAME
pxsw \- manual page for pxsw 1.3
.SH SYNOPSIS
diff --git a/src/man/pxt2new.1.in b/src/man/pxt2new.1.in
index 28bfed6..02561e5 100644
--- a/src/man/pxt2new.1.in
+++ b/src/man/pxt2new.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXT2NEW "1" "July 2021" "pxt2new 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXT2NEW "1" "July 2023" "pxt2new 1.3" "User Commands"
.SH NAME
pxt2new \- manual page for pxt2new 1.3
.SH SYNOPSIS
diff --git a/src/man/pxt2nex.1.in b/src/man/pxt2nex.1.in
index 4bdbbcd..21fba8c 100644
--- a/src/man/pxt2nex.1.in
+++ b/src/man/pxt2nex.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXT2NEX "1" "July 2021" "pxt2nex 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXT2NEX "1" "July 2023" "pxt2nex 1.3" "User Commands"
.SH NAME
pxt2nex \- manual page for pxt2nex 1.3
.SH SYNOPSIS
diff --git a/src/man/pxtcol.1.in b/src/man/pxtcol.1.in
index 4dc5d2e..ab70710 100644
--- a/src/man/pxtcol.1.in
+++ b/src/man/pxtcol.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXTCOL "1" "July 2021" "pxtcol 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXTCOL "1" "July 2023" "pxtcol 1.3" "User Commands"
.SH NAME
pxtcol \- manual page for pxtcol 1.3
.SH SYNOPSIS
diff --git a/src/man/pxtcomb.1.in b/src/man/pxtcomb.1.in
index acc79ea..32bcc2b 100644
--- a/src/man/pxtcomb.1.in
+++ b/src/man/pxtcomb.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXTCOMB "1" "July 2021" "pxtcomb 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXTCOMB "1" "July 2023" "pxtcomb 1.3" "User Commands"
.SH NAME
pxtcomb \- manual page for pxtcomb 1.3
.SH SYNOPSIS
diff --git a/src/man/pxtgen.1.in b/src/man/pxtgen.1.in
index e094098..7d395f5 100644
--- a/src/man/pxtgen.1.in
+++ b/src/man/pxtgen.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXTGEN "1" "July 2021" "pxtgen 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXTGEN "1" "July 2023" "pxtgen 1.3" "User Commands"
.SH NAME
pxtgen \- manual page for pxtgen 1.3
.SH SYNOPSIS
diff --git a/src/man/pxtlate.1.in b/src/man/pxtlate.1.in
index 9ba677a..c10a8fe 100644
--- a/src/man/pxtlate.1.in
+++ b/src/man/pxtlate.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXTLATE "1" "July 2021" "pxtlate 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXTLATE "1" "July 2023" "pxtlate 1.3" "User Commands"
.SH NAME
pxtlate \- manual page for pxtlate 1.3
.SH SYNOPSIS
diff --git a/src/man/pxtrt.1.in b/src/man/pxtrt.1.in
index 0cfe77e..bb359a3 100644
--- a/src/man/pxtrt.1.in
+++ b/src/man/pxtrt.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXTRT "1" "July 2021" "pxtrt 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXTRT "1" "July 2023" "pxtrt 1.3" "User Commands"
.SH NAME
pxtrt \- manual page for pxtrt 1.3
.SH SYNOPSIS
diff --git a/src/man/pxtscale.1.in b/src/man/pxtscale.1.in
index a9c0c2d..7920847 100644
--- a/src/man/pxtscale.1.in
+++ b/src/man/pxtscale.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXTSCALE "1" "July 2021" "pxtscale 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXTSCALE "1" "July 2023" "pxtscale 1.3" "User Commands"
.SH NAME
pxtscale \- manual page for pxtscale 1.3
.SH SYNOPSIS
diff --git a/src/man/pxvcf2fa.1.in b/src/man/pxvcf2fa.1.in
index adc639b..96abeef 100644
--- a/src/man/pxvcf2fa.1.in
+++ b/src/man/pxvcf2fa.1.in
@@ -1,5 +1,5 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
-.TH PXVCF2FA "1" "July 2021" "pxvcf2fa 1.3" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.49.1.
+.TH PXVCF2FA "1" "July 2023" "pxvcf2fa 1.3" "User Commands"
.SH NAME
pxvcf2fa \- manual page for pxvcf2fa 1.3
.SH SYNOPSIS
diff --git a/src/recode.cpp b/src/recode.cpp
index 4eec83e..194feee 100644
--- a/src/recode.cpp
+++ b/src/recode.cpp
@@ -132,33 +132,33 @@ std::string SequenceRecoder::get_recoded_seq (const std::string& origseq) {
void SequenceRecoder::recode_seq (std::string& s) {
if (R_) {
- s = std::regex_replace (s, r_, "R");
+ s = std::regex_replace(s, r_, "R");
}
if (Y_) {
- s = std::regex_replace (s, y_, "Y");
+ s = std::regex_replace(s, y_, "Y");
}
if (S_) {
- s = std::regex_replace (s, s_, "S");
+ s = std::regex_replace(s, s_, "S");
}
if (W_) {
- s = std::regex_replace (s, w_, "W");
+ s = std::regex_replace(s, w_, "W");
}
if (M_) {
- s = std::regex_replace (s, m_, "M");
+ s = std::regex_replace(s, m_, "M");
}
if (K_) {
- s = std::regex_replace (s, k_, "K");
+ s = std::regex_replace(s, k_, "K");
}
if (B_) {
- s = std::regex_replace (s, b_, "B");
+ s = std::regex_replace(s, b_, "B");
}
if (D_) {
- s = std::regex_replace (s, d_, "D");
+ s = std::regex_replace(s, d_, "D");
}
if (H_) {
- s = std::regex_replace (s, h_, "H");
+ s = std::regex_replace(s, h_, "H");
}
if (V_) {
- s = std::regex_replace (s, v_, "V");
+ s = std::regex_replace(s, v_, "V");
}
}
diff --git a/src/relabel.cpp b/src/relabel.cpp
index 6d52798..ef15270 100644
--- a/src/relabel.cpp
+++ b/src/relabel.cpp
@@ -1,6 +1,7 @@
#include <iostream>
#include <fstream>
#include <set>
+#include <regex>
#include "relabel.h"
#include "tree.h"
@@ -10,11 +11,28 @@
Relabel::Relabel (const std::string& cnamesf, const std::string& nnamesf,
const bool& verbose):num_taxa_(0) {
- store_name_lists (cnamesf, nnamesf);
+ store_name_lists(cnamesf, nnamesf);
verbose_ = verbose;
}
+Relabel::Relabel (std::string& regex_pattern, std::string& regex_replace) {
+ regex_pattern_ = regex_pattern;
+ regex_replace_ = regex_replace;
+ verbose_ = false;
+}
+
+
+void Relabel::regex_relabel_tree (Tree * tr) {
+ for (unsigned int i = 0; i < tr->getExternalNodeCount(); i++) {
+ std::string str = tr->getExternalNode(i)->getName();
+ std::string res = std::regex_replace(str, regex_pattern_, regex_replace_);
+ if (str != res) {
+ tr->getExternalNode(i)->setName(res);
+ }
+ }
+}
+
void Relabel::store_name_lists (const std::string& cnamesf,
const std::string& nnamesf) {
std::vector<std::string> terp;
@@ -139,6 +157,15 @@ bool Relabel::relabel_sequence (Sequence& seq) {
}
+void Relabel::regex_relabel_sequence (Sequence& seq) {
+ std::string str = seq.get_id();
+ std::string res = std::regex_replace(str, regex_pattern_, regex_replace_);
+ if (str != res) {
+ seq.set_id(res);
+ }
+}
+
+
std::set<std::string> Relabel::get_names_to_replace () const {
std::set<std::string> orig(old_names_.begin(), old_names_.end());
return orig;
diff --git a/src/relabel.h b/src/relabel.h
index de30878..be0ae49 100644
--- a/src/relabel.h
+++ b/src/relabel.h
@@ -4,6 +4,7 @@
#include <map>
#include <vector>
#include <set>
+#include <regex>
class Tree; // forward declaration
class Sequence; // forward declaration
@@ -11,6 +12,8 @@ class Sequence; // forward declaration
class Relabel {
private:
int num_taxa_;
+ std::regex regex_pattern_;
+ std::string regex_replace_;
std::vector<std::string> old_names_;
std::vector<std::string> new_names_;
std::map<std::string, std::string> name_map_;
@@ -20,8 +23,11 @@ private:
public:
Relabel (const std::string& cnamesf, const std::string& nnamesf, const bool& verbose);
+ Relabel (std::string& regex_pattern, std::string& regex_replace);
void relabel_tree (Tree * tr);
+ void regex_relabel_tree (Tree * tr);
bool relabel_sequence (Sequence& seq);
+ void regex_relabel_sequence (Sequence& seq);
std::set<std::string> get_names_to_replace () const;
};
diff --git a/src/run_tests.py b/src/run_tests.py
index c7c0f4b..205b533 100644
--- a/src/run_tests.py
+++ b/src/run_tests.py
@@ -46,8 +46,24 @@ def test_program(name):
t = None
print("TESTING", name)
if name == "pxaa2cdn":
+ res = True
+ print("aa2cdn ", end = '')
cm = "./pxaa2cdn -a TEST/AA.fa -n TEST/un_aln_nuc.fa"
t = '>Sequence1\nATG---AAA---AAG\n>Sequence2\nATG------------\n>Sequence3\nATGATGATGATGATG\n>Sequence5\nATGATGATGATGATG\n'
+ if not check_individual_results(cm, t):
+ res = False
+ print("aa2cdn (remove last codon, all) ", end = '')
+ cm = "./pxaa2cdn -r -a TEST/AA.fa -n TEST/un_aln_nuc_with-stop.fa"
+ t = '>Sequence1\nATG---AAA---AAG\n>Sequence2\nATG------------\n>Sequence3\nATGATGATGATGATG\n>Sequence5\nATGATGATGATGATG\n'
+ if not check_individual_results(cm, t):
+ res = False
+ print("aa2cdn (remove stop codon, mixed) ", end = '')
+ cm = "./pxaa2cdn -s -a TEST/AA.fa -n TEST/un_aln_nuc_mixed-stop.fa"
+ t = '>Sequence1\nATG---AAA---AAG\n>Sequence2\nATG------------\n>Sequence3\nATGATGATGATGATG\n>Sequence5\nATGATGATGATGATG\n'
+ if not check_individual_results(cm, t):
+ res = False
+ print_success(res)
+ return res
elif name == "pxbdfit":
res = True
print("yule ", end = '')
@@ -103,11 +119,21 @@ def test_program(name):
t = '>TaxonA\nAAATTTCCCTGTCCCTTTAAQVFTHNEVNCRWHIEKGILTHEPLDHFDAHNHHILANNVETPVIYRPLWWRTRKAMDMWFLKSSMHWDFQENFDTMAYSMHAPQQQSKHSAHYRQQVNNLRRFIQPQMGHGRKMNNKQGCRMHYFIAYASLQFNSLYHAEMLKQLWTGGLDFSGEKSMPNWAKMSGESEGWSHWVLMLMRNYYRHGKLHGCSHRKEILESEKLRRIGIMRWWKQYAFKVRDISGYRFDRGYFGCKTLLNDAGCKVNCNWPAAATTTCCCTGTCCCTTTAA\n>TaxonB\nGCTCGAGGGGCCCCAAGACCQVFYHNEVNCRWINEKGILTHEPLDHFDAHNQHILANNVETPVIYRPLWWRTRKAMDRWFYKTSVHWDFQERFDTHGYSMVAPQQQSCNRAEYRQVVNNLRRFIQPQDGHGRKMNVKQGCRMHYFIARASLEFNSGYHAEYLKQLWLGGLDFSGEKSMLNWAKMSGESEGWSHWVYMLMRNYYRHGKLHGCSHRKEALESEKLRCIGIMRWWKQYAFKVRDISGYRFDRCYLICKTLLYDDGCKANCNWPGCTCGAGGGGCCCCAAGACC\n>TaxonC\nACGCTCCCCCTTAAAAATGAQVFYATEVNFRDIGSEGFLTCEFLDHFDAHEEKLLHNYVEKPWIEEPLWWRTRKRDDMYFSYHWDFLDFQENFDMHAYSMVMPQQHSYRFGEYRQVVNNLYYFNAGPMGHRQKMNNKQGCRMHYFIAYAQAQFNSGYHAEYLKMLSTGGLDLSGESSDPNWAKMSGESEGWSHWVAMLMRNYYRHGKSHGCKMFKHAYESEKLRCIGISSSGDQYCFTVRDSSGYRFDACVLECKTSLAACGHNANCGWPACGCTCCCCCTTAAAAATGA\n>TaxonD\nTCCTTGTTCAACTCCGGTGGHWPYHNEWNQRGWGVFRYLTHEFMCHQHRLFYHICHNYDHGPTTERFKAITTWTAQDRWKSGHSVHGDFQENFCTYGMKMRFQNQHSCNRHIGRRLQNNLRFNYCYCQIKRYKGNKCQFVQLKVDIHDKSLAANLIAASMYLRHMWFGPLTPYGFAVPPNWMMVSGPSFGKSWHMQIQIRNAYVTWKYRGKGYSICQSPQNKLMWPNRYWMIEIYTFIVTDIRTALFIQISLSEYAFSCDFIPRQDMNWQTCCTTGTTCAACTCCGGTGG\n>TaxonE\nTTACTATTCCCCCCCGCCGGAWWYHNTLNCRGWGEFRYLTHEFCCHWGNAFYHICHNYDNGPTTERFLTIQTWTARDRWCCKHSVHWNFQENFSTYGLKQRFQNQHSCNRHIVRQLWNNLRFFICICQIKRGKGNKCQFCILHYFIHLPSLACRLSYAFGYLRHNWFGTLTPYGFASPPNWYMRSEPSFGNSWNMAIQIANAFRTWKYRECGYSIKQSCDSTLMWPHRYWMIAQYTFQVTDSRTYDFITSSLVVYAFLCDAQARADCIDQTTACTATTCCCCCCCGCCGG\n'
elif name == "pxclsq":
res = True
- print("delete sites ", end = '')
+ print("delete proportion ", end = '')
cm = "./pxclsq -s TEST/Alignment -p 0.6"
t = '>Sequence1\nAAATTTCCCTTTCCCTTTAAAAAATTTCCCTTTCCCTTTAAA\n>Sequence2\nGGGGGGGGGGCCCCCCCCCCAGGGGGGGGGGCCCCCCCCCCA\n>Sequence3\nCCCCCCCCCCCCAAAAAAAAACCCCCCCCCCCCAAAAAAAAA\n>Sequence9\nAAATTTCCCTTTCCCTTTAAAAAATTTCCCTTTCCCTTTAAA\n>Sequence10\nGGGGGGGGGGCCCCCCCCCCAGGGGGGGGGGCCCCCCCCCCA\n>Sequence11\nCCCCCCCCCCCCAAAAAAAAACCCCCCCCCCCCAAAAAAAAA\n>Sequence8\nTTTTTTTTCCCCCCCGGGGGATTTTTTTTCCCCCCCGGGGGA\n'
if not check_individual_results(cm, t):
res = False
+ print("remove empty ", end = '')
+ cm = "./pxclsq -s TEST/Alignment_empty-sites -e"
+ t = '>Drosophila_setosifrons\n--TATAATAATATTCTTATATAAAATATAATTATTTTTATATT-TGGTCCNNTTTCGTACTAAAATATAACAA-TTTTTT\n>Drosophila_adunca\n--T-TAATAATATTCTTATTAAAAATATAATTATTTAAATATT-TGGTCCCCTTTCGTACTAAAATATTTTAAATTTTTT\n>Drosophila_oahuensis\n--T-TAATGAAATTCTTGCTAAAAATATGATTATTTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_picticornis\n--T-TAATAATATTCTTATTTAAAATATAATTATTTTAACATT-TGGTCCCCTTTCGTACTAAAATGTTATAAGTTTTTT\n>Drosophila_primaeva\n--TATAATAATATTCTTACCAAAAATATAATTATTTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_nigribasis\n--T-TAATAATATTCTTATTAAAAATATAATTATTTTAGTATT-TGGTCCNNTTTCGTACTAAAATACATTAA-TTTTTT\n>Drosophila_obscuripes\nAAT-TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-TGGTCCNNTTTCGTACTAAAATACTATAA-TTTTTT\n>Drosophila_silvestris\nNNT-TAATAATATTCTTATTTAAAATATAATTATTTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_heteroneura\n--T-TAATAATATTCTTATTTAAAATATAATTATTTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_differens\n--T-TAATAATATTCTTATTAAAAATATAATTATCTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_planitibia\n--T-TAATAATATTCTTATTAAAAATATAATTATTTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_hemipeza\n--T-TAATAATATTCTTAATAAAAATATAATTATTTTAATATT-TGGTCCCCTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_ingens\n--T-TAATAATATTCTTATTAAAAATATAATTATTTTAATATT-TGGTCCNNTTTCGTACTAAAATATTATAA-TTTTTT\n>Drosophila_neopicta\n--T-TAATAATATTCTTATTAAAAATATAATTATTTTCATATTATGGTCCNNTTTCGTACTAAAATATAATAA-TTTTTT\n>Drosophila_substenoptera\n--T-TAATAATATTCTTATTAAAAATACAATTATTTTCATATT-TGGTCCNNTTTCGTACTAAAATATAATAA-TTTTTT\n>Drosophila_neoperkinsi\n--T-TAATAAGATTCTTACTAAAAATATAATTATTTTAGTATT-TGGTCCNNTTTCGTACTAAAATACTATAA-TTTTTT\n>Drosophila_melanocephala\n--T-TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-TGGTCCNNTTTCGTACTAAAATACTATAA-TTTTTT\n>Drosophila_hanaulae\n--TATAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-TGGTCCCCTTTCGTACTAAAATACTATAAGTTTTTT\n>Drosophila_cyrtoloma\n--T-TAATAATATTCTTACTAAAAATATAATTATTTTAGTATT-TGGTCCNNTTTCGTACTAAAATACTATAA-TTTTTT\n'
+ if not check_individual_results(cm, t):
+ res = False
+ print("min 4 chars per site ", end = '')
+ cm = "./pxclsq -s TEST/Alignment_empty-sites -m 4"
+ t = '>Drosophila_setosifrons\nTTAATAATATTCTTATATAAAATATAATTATTTTTATATTTGGTCCNNTTTCGTACTAAAATATAACAATTTTTT\n>Drosophila_adunca\nTTAATAATATTCTTATTAAAAATATAATTATTTAAATATTTGGTCCCCTTTCGTACTAAAATATTTTAATTTTTT\n>Drosophila_oahuensis\nTTAATGAAATTCTTGCTAAAAATATGATTATTTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_picticornis\nTTAATAATATTCTTATTTAAAATATAATTATTTTAACATTTGGTCCCCTTTCGTACTAAAATGTTATAATTTTTT\n>Drosophila_primaeva\nTTAATAATATTCTTACCAAAAATATAATTATTTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_nigribasis\nTTAATAATATTCTTATTAAAAATATAATTATTTTAGTATTTGGTCCNNTTTCGTACTAAAATACATTAATTTTTT\n>Drosophila_obscuripes\nTTAATAATATTCTTACTAAAAATATAATTATTTTAGTATTTGGTCCNNTTTCGTACTAAAATACTATAATTTTTT\n>Drosophila_silvestris\nTTAATAATATTCTTATTTAAAATATAATTATTTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_heteroneura\nTTAATAATATTCTTATTTAAAATATAATTATTTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_differens\nTTAATAATATTCTTATTAAAAATATAATTATCTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_planitibia\nTTAATAATATTCTTATTAAAAATATAATTATTTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_hemipeza\nTTAATAATATTCTTAATAAAAATATAATTATTTTAATATTTGGTCCCCTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_ingens\nTTAATAATATTCTTATTAAAAATATAATTATTTTAATATTTGGTCCNNTTTCGTACTAAAATATTATAATTTTTT\n>Drosophila_neopicta\nTTAATAATATTCTTATTAAAAATATAATTATTTTCATATTTGGTCCNNTTTCGTACTAAAATATAATAATTTTTT\n>Drosophila_substenoptera\nTTAATAATATTCTTATTAAAAATACAATTATTTTCATATTTGGTCCNNTTTCGTACTAAAATATAATAATTTTTT\n>Drosophila_neoperkinsi\nTTAATAAGATTCTTACTAAAAATATAATTATTTTAGTATTTGGTCCNNTTTCGTACTAAAATACTATAATTTTTT\n>Drosophila_melanocephala\nTTAATAATATTCTTACTAAAAATATAATTATTTTAGTATTTGGTCCNNTTTCGTACTAAAATACTATAATTTTTT\n>Drosophila_hanaulae\nTTAATAATATTCTTACTAAAAATATAATTATTTTAGTATTTGGTCCCCTTTCGTACTAAAATACTATAATTTTTT\n>Drosophila_cyrtoloma\nTTAATAATATTCTTACTAAAAATATAATTATTTTAGTATTTGGTCCNNTTTCGTACTAAAATACTATAATTTTTT\n'
+ if not check_individual_results(cm, t):
+ res = False
print("info by taxon ", end = '')
cm = "./pxclsq -s TEST/Alignment -i -t"
t = 'Length of sequences: 82 characters\nTaxon Missing Prop.\n------------------------------------\nSequence1 0 0\nSequence2 0 0\nSequence3 0 0\nSequence4 42 0.512195\nSequence9 40 0.487805\nSequence10 40 0.487805\nSequence11 40 0.487805\nSequence8 40 0.487805\n'
@@ -209,8 +235,7 @@ def test_program(name):
res = False
print("indidivual stats ", end = '')
cm = "./pxlssq -s TEST/test.fa -i"
- # same here
- t = 'A C G T - ? Nchar\nTaxonA 0.25 0.3 0.05 0.4 0 0 20\nTaxonB 0.2 0.4 0.35 0.05 0 0 20\nTaxonC 0.35 0.35 0.1 0.2 0 0 20\nTaxonD 0.1 0.3 0.25 0.35 0 0 20\nTaxonE 0.1 0.5 0.15 0.25 0 0 20\n'
+ t = 'taxon A C G T - ? Nchar\nTaxonA 0.25 0.3 0.05 0.4 0 0 20\nTaxonB 0.2 0.4 0.35 0.05 0 0 20\nTaxonC 0.35 0.35 0.1 0.2 0 0 20\nTaxonD 0.1 0.3 0.25 0.35 0 0 20\nTaxonE 0.1 0.5 0.15 0.25 0 0 20\n'
if not check_individual_results(cm, t):
res = False
print("binary data ", end = '')
@@ -267,6 +292,11 @@ def test_program(name):
t = 'tree #: 0\nrooted: true\nbinary: true\nnterminal: 29\nninternal: 28\nbranch lengths: true\nrttipvar: 8.9928e-07\ntreelength: 0.13572\nultrametric: false\nrootheight: NA\n'
if not check_individual_results(cm, t):
res = False
+ print("terminal labels ", end = '')
+ cm = "./pxlstr -t TEST/binary_test.tre -i"
+ t = 'TaxonX\nTaxonC\nTaxonA1\nTaxonA2\nTaxonA3\nTaxonB\nTaxonW\nTaxonD\nTaxonE\nTaxonX\nTaxonW\nTaxonD\nTaxonE\n'
+ if not check_individual_results(cm, t):
+ res = False
print_success(res)
return res
elif name == "pxmono":
@@ -313,11 +343,31 @@ def test_program(name):
cm = "./pxrevcomp -s TEST/test.fa"
t = '>TaxonA\nTTAAAGGGACAGGGAAATTT\n>TaxonB\nGGTCTTGGGGCCCCTCGAGC\n>TaxonC\nTCATTTTTAAGGGGGAGCGT\n>TaxonD\nCCACCGGAGTTGAACAAGGA\n>TaxonE\nCCGGCGGGGGGGAATAGTAA\n'
elif name == "pxrls":
+ res = True
+ print("name files ", end = '')
cm = "./pxrls -s TEST/test.fa -c TEST/oldnames.txt -n TEST/newnames.txt"
t = '>Cinclus_mexicanus\nAAATTTCCCTGTCCCTTTAA\n>Anodorhynchus_hyacinthinus\nGCTCGAGGGGCCCCAAGACC\n>Stellula_calliope\nACGCTCCCCCTTAAAAATGA\n>Anser_anser\nTCCTTGTTCAACTCCGGTGG\n>Gallus_gallus\nTTACTATTCCCCCCCGCCGG\n'
+ if not check_individual_results(cm, t):
+ res = False
+ print("regex ", end = '')
+ cm = "./pxrls -s TEST/test.fa -p Taxon -r species_"
+ t = '>species_A\nAAATTTCCCTGTCCCTTTAA\n>species_B\nGCTCGAGGGGCCCCAAGACC\n>species_C\nACGCTCCCCCTTAAAAATGA\n>species_D\nTCCTTGTTCAACTCCGGTGG\n>species_E\nTTACTATTCCCCCCCGCCGG\n'
+ if not check_individual_results(cm, t):
+ res = False
+ return res
elif name == "pxrlt":
+ res = True
+ print("name files ", end = '')
cm = "./pxrlt -t TEST/kingdoms.tre -c TEST/kingdoms.oldnames.txt -n TEST/kingdoms.newnames.txt"
t = '(Eubacteria:4200,(Archaea:3806.0102499999998145,((((SAR:1626.0427319999998872,Plant:1626.0276670000000649):72.3801839999999999,Excavata:1698.4229159999999865):74.7312199999999933,Amoebozoa:1773.1541360000001077):7.9375349999999996,(Animal:1295.717544000000089,Fungi:1295.7033610000000863):485.3800600000000145):2024.9200000000000728):394);\n'
+ if not check_individual_results(cm, t):
+ res = False
+ print("regex ", end = '')
+ cm = "./pxrlt -t TEST/test.tre -p Taxon -r species_"
+ t = '(((species_A:0.1,species_B:0.03):0.125,species_C:0.25):0.4,(species_D:0.23,species_E:0.16):0.3);\n'
+ if not check_individual_results(cm, t):
+ res = False
+ return res
elif name == "pxrmk":
cm = "./pxrmk -t TEST/knuckle_tree.tre"
t = '(((((s4:0.1190332191,s5:0.1190332191)C:0.02544171746,s6:0.1444749366)D:0.7647640019,s7:0.9092389385)E:0.76874935,(s3:1.2572336289999999,(s1:0.3603553431,s2:0.3603553431)A:1.1968782862)B:0.5207546592)F:0.06349158892,((s8:0.3106889265,s9:0.3106889265)G:0.1031508249,s10:0.4138397514)H:1.3276401259999999);\n'
@@ -556,7 +606,7 @@ def test_program(name):
# this are skipped in the testing because they are preliminary, tests will
# be added as they become more permanent.
-notest = ["pxnni"]
+notest = ["pxnni", "pxssplit"]
# (for now) ignore tests that fail due to random numbers
ignore = ["pxbdsim", "pxboot", "pxpoly"]
@@ -581,6 +631,7 @@ if __name__ == "__main__":
if i in notest:
print(bcolors.WARNING + "skipping " + i + bcolors.ENDC)
skipped += 1
+ print("=================")
continue
t = test_program(i)
if t == True:
diff --git a/src/seq_info.cpp b/src/seq_info.cpp
index 76806a8..6f6979e 100644
--- a/src/seq_info.cpp
+++ b/src/seq_info.cpp
@@ -156,8 +156,8 @@ void SeqInfo::read_in_alignment () {
if (file_ntax != 0) {
if (file_ntax != static_cast<int>(seqs_.size())) {
std::cerr << "Error: number of taxa declared in the file ("
- << ") does not match the number read (" << seqs_.size()
- << "). Exiting." << std::endl;
+ << file_ntax << ") does not match the number read ("
+ << seqs_.size() << "). Exiting." << std::endl;
exit(1);
}
}
@@ -186,8 +186,10 @@ void SeqInfo::return_freq_table () {
if (output_indiv_) {
// need to take into account longest_tax_label_
longest_tax_label_ = get_longest_label(taxon_labels_);
- std::string pad = std::string(longest_tax_label_, ' ');
// header
+ (*poos_) << "taxon";
+ std::string pad = std::string(longest_tax_label_ - 5, ' ');
+
(*poos_) << pad << " ";
for (char seq_char : seq_chars_) {
(*poos_) << std::right << std::setw(colWidth) << seq_char << " ";
diff --git a/src/seq_utils.cpp b/src/seq_utils.cpp
index 4dbdc08..218f51a 100644
--- a/src/seq_utils.cpp
+++ b/src/seq_utils.cpp
@@ -427,10 +427,10 @@ bool is_prot_char (char& residue) {
}
-// ignore ambiguity codes
+// ignore ambiguity codes. include N for seqs that use N instead of ? or -
int count_dna_chars (const std::string& str) {
int ndna = 0;
- std::string dnaChars = "ACGT";
+ std::string dnaChars = "ACGTN";
for (char dnaChar : dnaChars) {
ndna += std::count(str.begin(), str.end(), dnaChar);
}
@@ -471,6 +471,23 @@ bool is_codon_alignment (const std::vector<Sequence>& seqs) {
}
+// looking for TAG, TGA, TAA, at least until alien DNA is decoded
+bool is_stop_terminated (std::string& seq) {
+ bool stop = false;
+
+ // convert to uppercase for weirdo Nat files :)
+ std::string tester = string_to_upper(seq);
+
+ if (ends_with(tester, "TAG")) {
+ stop = true;
+ } else if (ends_with(tester, "TGA")) {
+ stop = true;
+ } else if (ends_with(tester, "TAA")) {
+ stop = true;
+ }
+ return stop;
+}
+
// these are not currently used
/*
void populate_codon_list (std::vector<std::string> * codon_list) {
diff --git a/src/seq_utils.h b/src/seq_utils.h
index b35c182..f8e9648 100644
--- a/src/seq_utils.h
+++ b/src/seq_utils.h
@@ -30,6 +30,7 @@ bool is_prot_char (char& residue);
int count_dna_chars (const std::string& str);
bool is_aligned (const std::vector<Sequence>& seqs);
bool is_codon_alignment (const std::vector<Sequence>& seqs);
+bool is_stop_terminated (std::string& seq);
// not used
/*
diff --git a/src/sequence.cpp b/src/sequence.cpp
index 62929f3..acf4e7b 100644
--- a/src/sequence.cpp
+++ b/src/sequence.cpp
@@ -69,6 +69,7 @@ void Sequence::set_alpha (seqAlpha s) {
// figure out the sequence type.
// not perfect: for _very_ short AA seqs it is possible all chars are valid nuc chars
+// also incorrect when only characters are A, C, G, T, and N. this has been fixed
void Sequence::infer_alpha () {
std::string str = seq_;
diff --git a/src/tree_utils.cpp b/src/tree_utils.cpp
index 9e778c4..b49ced6 100644
--- a/src/tree_utils.cpp
+++ b/src/tree_utils.cpp
@@ -688,14 +688,15 @@ std::vector<std::string> get_names_in_tree_regex (Tree * tr,
}
-// returns a sorted vector of all terminal labels
+// returns a *sorted* vector of all terminal labels
+// removing the sorted business, since we may want the ordering in some other aspect
std::vector<std::string> get_tip_labels (Tree * tr) {
unsigned int nc = tr->getExternalNodeCount();
std::vector<std::string> labels(static_cast<size_t>(nc), "");
for (unsigned int i = 0; i < nc; i++) {
labels[static_cast<size_t>(i)] = tr->getExternalNode(i)->getName();
}
- sort(labels.begin(), labels.end());
+ //sort(labels.begin(), labels.end());
return labels;
}
diff --git a/src/utils.cpp b/src/utils.cpp
index 9099983..49457eb 100644
--- a/src/utils.cpp
+++ b/src/utils.cpp
@@ -13,6 +13,7 @@
#include <chrono>
#include <poll.h>
#include <unistd.h>
+#include <regex>
#include "utils.h"
#include "superdouble.h"
@@ -815,6 +816,24 @@ bool check_for_input_to_stream () {
}
+// do strings statr/end with suffix?!?
+bool ends_with (const std::string& str, const std::string& suffix) {
+ return str.size() >= suffix.size() && 0 == str.compare(str.size()-suffix.size(), suffix.size(), suffix);
+}
+
+
+bool starts_with (const std::string& str, const std::string& prefix) {
+ return str.size() >= prefix.size() && 0 == str.compare(0, prefix.size(), prefix);
+}
+
+
+void remove_last_N (std::string &str, const long unsigned int& n) {
+ if (str.length() < n) {
+ return;
+ }
+ str.erase(str.length() - n);
+}
+
// not using right now
// return elements in a *not* found in b
std::vector<std::string> get_complement (std::vector<std::string>& a, std::vector<std::string>& b) {
@@ -871,3 +890,16 @@ std::vector<std::string> regex_search_labels (const std::vector<std::string>& na
}
return results;
}
+
+
+// not currently used
+std::vector<std::string> regex_replace_labels (const std::vector<std::string>& names,
+ const std::string& pattern, const std::string& replacetext) {
+ std::vector<std::string> results;
+ const std::regex regexp(pattern);
+ for (const auto & name : names) {
+ std::string res = std::regex_replace(name, regexp, replacetext);
+ //std::cout << name << ": " << std::regex_search(name, regexp) << std::endl;
+ }
+ return results;
+}
diff --git a/src/utils.h b/src/utils.h
index 917ad25..f681bb8 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -7,7 +7,6 @@
#include <iostream>
#include <sstream>
#include <algorithm>
-#include <regex>
#include "superdouble.h"
@@ -173,15 +172,19 @@ template<typename T> bool essentially_equal (T a, T b) {
return equal;
}
-
bool all_equal (std::vector<double> vals);
bool all_equal (std::vector<int> vals);
bool all_equal (std::vector<unsigned int> vals);
-
// a basic poll checker for stream inputs
bool check_for_input_to_stream ();
+// string checks. this is kinda ridiculous, amirite?!?
+bool ends_with (const std::string& str, const std::string& suffix);
+bool starts_with (const std::string& str, const std::string& prefix);
+
+void remove_last_N(std::string &str, const long unsigned int& n);
+
std::vector<std::string> get_complement (std::vector<std::string>& a, std::vector<std::string>& b);
std::string peek_line (std::istream& pios);
@@ -189,5 +192,7 @@ std::vector<std::string> peek_lines (std::istream& pios, const int& n);
std::vector<std::string> regex_search_labels (const std::vector<std::string>& names,
const std::string& pattern);
+std::vector<std::string> regex_replace_labels (const std::vector<std::string>& names,
+ const std::string& pattern, const std::string& replacetext);
#endif /* PX_UTILS_H */