17 files changed, 2374 insertions, 173 deletions
diff --git a/passes/techmap/Makefile.inc b/passes/techmap/Makefile.inc
index 4faa0ab0..cf9e198a 100644
--- a/passes/techmap/Makefile.inc
+++ b/passes/techmap/Makefile.inc
@@ -36,6 +36,7 @@ OBJS += passes/techmap/attrmvcp.o
 OBJS += passes/techmap/attrmap.o
 OBJS += passes/techmap/zinit.o
 OBJS += passes/techmap/dff2dffs.o
+OBJS += passes/techmap/flowmap.o
 endif
 
 GENFILES += passes/techmap/techmap.inc
diff --git a/passes/techmap/abc.cc b/passes/techmap/abc.cc
index adc9614a..75daaa69 100644
--- a/passes/techmap/abc.cc
+++ b/passes/techmap/abc.cc
@@ -29,17 +29,17 @@
 // Kahn, Arthur B. (1962), "Topological sorting of large networks", Communications of the ACM 5 (11): 558-562, doi:10.1145/368996.369025
 // http://en.wikipedia.org/wiki/Topological_sorting
 
-#define ABC_COMMAND_LIB "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put"
-#define ABC_COMMAND_CTR "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put; buffer; upsize {D}; dnsize {D}; stime -p"
-#define ABC_COMMAND_LUT "strash; ifraig; scorr; dc2; dretime; strash; dch -f; if; mfs2"
-#define ABC_COMMAND_SOP "strash; ifraig; scorr; dc2; dretime; strash; dch -f; cover {I} {P}"
-#define ABC_COMMAND_DFL "strash; ifraig; scorr; dc2; dretime; strash; &get -n; &dch -f; &nf {D}; &put"
-
-#define ABC_FAST_COMMAND_LIB "strash; dretime; map {D}"
-#define ABC_FAST_COMMAND_CTR "strash; dretime; map {D}; buffer; upsize {D}; dnsize {D}; stime -p"
-#define ABC_FAST_COMMAND_LUT "strash; dretime; if"
-#define ABC_FAST_COMMAND_SOP "strash; dretime; cover -I {I} -P {P}"
-#define ABC_FAST_COMMAND_DFL "strash; dretime; map"
+#define ABC_COMMAND_LIB "strash; ifraig; scorr; dc2; dretime; retime {D}; strash; &get -n; &dch -f; &nf {D}; &put"
+#define ABC_COMMAND_CTR "strash; ifraig; scorr; dc2; dretime; retime {D}; strash; &get -n; &dch -f; &nf {D}; &put; buffer; upsize {D}; dnsize {D}; stime -p"
+#define ABC_COMMAND_LUT "strash; ifraig; scorr; dc2; dretime; retime {D}; strash; dch -f; if; mfs2"
+#define ABC_COMMAND_SOP "strash; ifraig; scorr; dc2; dretime; retime {D}; strash; dch -f; cover {I} {P}"
+#define ABC_COMMAND_DFL "strash; ifraig; scorr; dc2; dretime; retime {D}; strash; &get -n; &dch -f; &nf {D}; &put"
+
+#define ABC_FAST_COMMAND_LIB "strash; dretime; retime {D}; map {D}"
+#define ABC_FAST_COMMAND_CTR "strash; dretime; retime {D}; map {D}; buffer; upsize {D}; dnsize {D}; stime -p"
+#define ABC_FAST_COMMAND_LUT "strash; dretime; retime {D}; if"
+#define ABC_FAST_COMMAND_SOP "strash; dretime; retime {D}; cover -I {I} -P {P}"
+#define ABC_FAST_COMMAND_DFL "strash; dretime; retime {D}; map"
 
 #include "kernel/register.h"
 #include "kernel/sigtools.h"
@@ -49,6 +49,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <cctype>
 #include <cerrno>
 #include <sstream>
 #include <climits>
@@ -327,8 +328,43 @@ void extract_cell(RTLIL::Cell *cell, bool keepff)
 	}
 }
 
-std::string remap_name(RTLIL::IdString abc_name)
+std::string remap_name(RTLIL::IdString abc_name, RTLIL::Wire **orig_wire = nullptr)
 {
+	std::string abc_sname = abc_name.substr(1);
+	bool isnew = false;
+	if (abc_sname.substr(0, 4) == "new_")
+	{
+		abc_sname.erase(0, 4);
+		isnew = true;
+	}
+	if (abc_sname.substr(0, 5) == "ys__n")
+	{
+		abc_sname.erase(0, 5);
+		if (std::isdigit(abc_sname.at(0)))
+		{
+			int sid = std::stoi(abc_sname);
+			size_t postfix_start = abc_sname.find_first_not_of("0123456789");
+			std::string postfix = postfix_start != std::string::npos ? abc_sname.substr(postfix_start) : "";
+
+			if (sid < GetSize(signal_list))
+			{
+				auto sig = signal_list.at(sid);
+				if (sig.bit.wire != nullptr)
+				{
+					std::stringstream sstr;
+					sstr << "$abc$" << map_autoidx << "$" << sig.bit.wire->name.substr(1);
+					if (sig.bit.wire->width != 1)
+						sstr << "[" << sig.bit.offset << "]";
+					if (isnew)
+						sstr << "_new";
+					sstr << postfix;
+					if (orig_wire != nullptr)
+						*orig_wire = sig.bit.wire;
+					return sstr.str();
+				}
+			}
+		}
+	}
 	std::stringstream sstr;
 	sstr << "$abc$" << map_autoidx << "$" << abc_name.substr(1);
 	return sstr.str();
@@ -353,12 +389,12 @@ void dump_loop_graph(FILE *f, int &nr, std::map<int, std::set<int>> &edges, std:
 	}
 
 	for (auto n : nodes)
-		fprintf(f, "  n%d [label=\"%s\\nid=%d, count=%d\"%s];\n", n, log_signal(signal_list[n].bit),
+		fprintf(f, "  ys__n%d [label=\"%s\\nid=%d, count=%d\"%s];\n", n, log_signal(signal_list[n].bit),
 				n, in_counts[n], workpool.count(n) ? ", shape=box" : "");
 
 	for (auto &e : edges)
 	for (auto n : e.second)
-		fprintf(f, "  n%d -> n%d;\n", e.first, n);
+		fprintf(f, "  ys__n%d -> ys__n%d;\n", e.first, n);
 
 	fprintf(f, "}\n");
 }
@@ -624,7 +660,7 @@ struct abc_output_filter
 void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::string script_file, std::string exe_file,
 		std::string liberty_file, std::string constr_file, bool cleanup, vector<int> lut_costs, bool dff_mode, std::string clk_str,
 		bool keepff, std::string delay_target, std::string sop_inputs, std::string sop_products, std::string lutin_shared, bool fast_mode,
-		const std::vector<RTLIL::Cell*> &cells, bool show_tempdir, bool sop_mode)
+		const std::vector<RTLIL::Cell*> &cells, bool show_tempdir, bool sop_mode, bool abc_dress)
 {
 	module = current_module;
 	map_autoidx = autoidx++;
@@ -713,10 +749,6 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 	else
 		abc_script += fast_mode ? ABC_FAST_COMMAND_DFL : ABC_COMMAND_DFL;
 
-	if (script_file.empty() && !delay_target.empty())
-		for (size_t pos = abc_script.find("dretime;"); pos != std::string::npos; pos = abc_script.find("dretime;", pos+1))
-			abc_script = abc_script.substr(0, pos) + "dretime; retime -o {D};" + abc_script.substr(pos+8);
-
 	for (size_t pos = abc_script.find("{D}"); pos != std::string::npos; pos = abc_script.find("{D}", pos))
 		abc_script = abc_script.substr(0, pos) + delay_target + abc_script.substr(pos+3);
 
@@ -728,7 +760,8 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 
 	for (size_t pos = abc_script.find("{S}"); pos != std::string::npos; pos = abc_script.find("{S}", pos))
 		abc_script = abc_script.substr(0, pos) + lutin_shared + abc_script.substr(pos+3);
-
+	if (abc_dress)
+		abc_script += "; dress";
 	abc_script += stringf("; write_blif %s/output.blif", tempdir_name.c_str());
 	abc_script = add_echos_to_abc_cmd(abc_script);
 
@@ -784,7 +817,7 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 	for (auto &si : signal_list) {
 		if (!si.is_port || si.type != G(NONE))
 			continue;
-		fprintf(f, " n%d", si.id);
+		fprintf(f, " ys__n%d", si.id);
 		pi_map[count_input++] = log_signal(si.bit);
 	}
 	if (count_input == 0)
@@ -796,17 +829,17 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 	for (auto &si : signal_list) {
 		if (!si.is_port || si.type == G(NONE))
 			continue;
-		fprintf(f, " n%d", si.id);
+		fprintf(f, " ys__n%d", si.id);
 		po_map[count_output++] = log_signal(si.bit);
 	}
 	fprintf(f, "\n");
 
 	for (auto &si : signal_list)
-		fprintf(f, "# n%-5d %s\n", si.id, log_signal(si.bit));
+		fprintf(f, "# ys__n%-5d %s\n", si.id, log_signal(si.bit));
 
 	for (auto &si : signal_list) {
 		if (si.bit.wire == NULL) {
-			fprintf(f, ".names n%d\n", si.id);
+			fprintf(f, ".names ys__n%d\n", si.id);
 			if (si.bit == RTLIL::State::S1)
 				fprintf(f, "1\n");
 		}
@@ -815,68 +848,68 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 	int count_gates = 0;
 	for (auto &si : signal_list) {
 		if (si.type == G(BUF)) {
-			fprintf(f, ".names n%d n%d\n", si.in1, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d\n", si.in1, si.id);
 			fprintf(f, "1 1\n");
 		} else if (si.type == G(NOT)) {
-			fprintf(f, ".names n%d n%d\n", si.in1, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d\n", si.in1, si.id);
 			fprintf(f, "0 1\n");
 		} else if (si.type == G(AND)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "11 1\n");
 		} else if (si.type == G(NAND)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "0- 1\n");
 			fprintf(f, "-0 1\n");
 		} else if (si.type == G(OR)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "-1 1\n");
 			fprintf(f, "1- 1\n");
 		} else if (si.type == G(NOR)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "00 1\n");
 		} else if (si.type == G(XOR)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "01 1\n");
 			fprintf(f, "10 1\n");
 		} else if (si.type == G(XNOR)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "00 1\n");
 			fprintf(f, "11 1\n");
 		} else if (si.type == G(ANDNOT)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "10 1\n");
 		} else if (si.type == G(ORNOT)) {
-			fprintf(f, ".names n%d n%d n%d\n", si.in1, si.in2, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.id);
 			fprintf(f, "1- 1\n");
 			fprintf(f, "-0 1\n");
 		} else if (si.type == G(MUX)) {
-			fprintf(f, ".names n%d n%d n%d n%d\n", si.in1, si.in2, si.in3, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.in3, si.id);
 			fprintf(f, "1-0 1\n");
 			fprintf(f, "-11 1\n");
 		} else if (si.type == G(AOI3)) {
-			fprintf(f, ".names n%d n%d n%d n%d\n", si.in1, si.in2, si.in3, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.in3, si.id);
 			fprintf(f, "-00 1\n");
 			fprintf(f, "0-0 1\n");
 		} else if (si.type == G(OAI3)) {
-			fprintf(f, ".names n%d n%d n%d n%d\n", si.in1, si.in2, si.in3, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.in3, si.id);
 			fprintf(f, "00- 1\n");
 			fprintf(f, "--0 1\n");
 		} else if (si.type == G(AOI4)) {
-			fprintf(f, ".names n%d n%d n%d n%d n%d\n", si.in1, si.in2, si.in3, si.in4, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.in3, si.in4, si.id);
 			fprintf(f, "-0-0 1\n");
 			fprintf(f, "-00- 1\n");
 			fprintf(f, "0--0 1\n");
 			fprintf(f, "0-0- 1\n");
 		} else if (si.type == G(OAI4)) {
-			fprintf(f, ".names n%d n%d n%d n%d n%d\n", si.in1, si.in2, si.in3, si.in4, si.id);
+			fprintf(f, ".names ys__n%d ys__n%d ys__n%d ys__n%d ys__n%d\n", si.in1, si.in2, si.in3, si.in4, si.id);
 			fprintf(f, "00-- 1\n");
 			fprintf(f, "--00 1\n");
 		} else if (si.type == G(FF)) {
 			if (si.init == State::S0 || si.init == State::S1) {
-				fprintf(f, ".latch n%d n%d %d\n", si.in1, si.id, si.init == State::S1 ? 1 : 0);
+				fprintf(f, ".latch ys__n%d ys__n%d %d\n", si.in1, si.id, si.init == State::S1 ? 1 : 0);
 				recover_init = true;
 			} else
-				fprintf(f, ".latch n%d n%d 2\n", si.in1, si.id);
+				fprintf(f, ".latch ys__n%d ys__n%d 2\n", si.in1, si.id);
 		} else if (si.type != G(NONE))
 			log_abort();
 		if (si.type != G(NONE))
@@ -889,7 +922,6 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 	log("Extracted %d gates and %d wires to a netlist network with %d inputs and %d outputs.\n",
 			count_gates, GetSize(signal_list), count_input, count_output);
 	log_push();
-
 	if (count_output > 0)
 	{
 		log_header(design, "Executing ABC.\n");
@@ -988,7 +1020,10 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 			log_error("ABC output file does not contain a module `netlist'.\n");
 		for (auto &it : mapped_mod->wires_) {
 			RTLIL::Wire *w = it.second;
-			RTLIL::Wire *wire = module->addWire(remap_name(w->name));
+			RTLIL::Wire *orig_wire = nullptr;
+			RTLIL::Wire *wire = module->addWire(remap_name(w->name, &orig_wire));
+			if (orig_wire != nullptr && orig_wire->attributes.count("\\src"))
+				wire->attributes["\\src"] = orig_wire->attributes["\\src"];
 			if (markgroups) wire->attributes["\\abcgroup"] = map_autoidx;
 			design->select(module, wire);
 		}
@@ -1213,7 +1248,7 @@ void abc_module(RTLIL::Design *design, RTLIL::Module *current_module, std::strin
 		for (auto &si : signal_list)
 			if (si.is_port) {
 				char buffer[100];
-				snprintf(buffer, 100, "\\n%d", si.id);
+				snprintf(buffer, 100, "\\ys__n%d", si.id);
 				RTLIL::SigSig conn;
 				if (si.type != G(NONE)) {
 					conn.first = si.bit;
@@ -1407,6 +1442,11 @@ struct AbcPass : public Pass {
 		log("        this attribute is a unique integer for each ABC process started. This\n");
 		log("        is useful for debugging the partitioning of clock domains.\n");
 		log("\n");
+		log("    -dress\n");
+		log("        run the 'dress' command after all other ABC commands. This aims to\n");
+		log("        preserve naming by an equivalence check between the original and post-ABC\n");
+		log("        netlists (experimental).\n");
+		log("\n");
 		log("When neither -liberty nor -lut is used, the Yosys standard cell library is\n");
 		log("loaded into ABC before the ABC script is executed.\n");
 		log("\n");
@@ -1437,6 +1477,7 @@ struct AbcPass : public Pass {
 		std::string delay_target, sop_inputs, sop_products, lutin_shared = "-S 1";
 		bool fast_mode = false, dff_mode = false, keepff = false, cleanup = true;
 		bool show_tempdir = false, sop_mode = false;
+		bool abc_dress = false;
 		vector<int> lut_costs;
 		markgroups = false;
 
@@ -1551,6 +1592,10 @@ struct AbcPass : public Pass {
 				map_mux16 = true;
 				continue;
 			}
+			if (arg == "-dress") {
+				abc_dress = true;
+				continue;
+			}
 			if (arg == "-g" && argidx+1 < args.size()) {
 				for (auto g : split_tokens(args[++argidx], ",")) {
 					vector<string> gate_list;
@@ -1691,7 +1736,7 @@ struct AbcPass : public Pass {
 								signal_init[initsig[i]] = State::S0;
 								break;
 							case State::S1:
-								signal_init[initsig[i]] = State::S0;
+								signal_init[initsig[i]] = State::S1;
 								break;
 							default:
 								break;
@@ -1700,7 +1745,7 @@ struct AbcPass : public Pass {
 
 			if (!dff_mode || !clk_str.empty()) {
 				abc_module(design, mod, script_file, exe_file, liberty_file, constr_file, cleanup, lut_costs, dff_mode, clk_str, keepff,
-						delay_target, sop_inputs, sop_products, lutin_shared, fast_mode, mod->selected_cells(), show_tempdir, sop_mode);
+						delay_target, sop_inputs, sop_products, lutin_shared, fast_mode, mod->selected_cells(), show_tempdir, sop_mode, abc_dress);
 				continue;
 			}
 
@@ -1845,7 +1890,7 @@ struct AbcPass : public Pass {
 				en_polarity = std::get<2>(it.first);
 				en_sig = assign_map(std::get<3>(it.first));
 				abc_module(design, mod, script_file, exe_file, liberty_file, constr_file, cleanup, lut_costs, !clk_sig.empty(), "$",
-						keepff, delay_target, sop_inputs, sop_products, lutin_shared, fast_mode, it.second, show_tempdir, sop_mode);
+						keepff, delay_target, sop_inputs, sop_products, lutin_shared, fast_mode, it.second, show_tempdir, sop_mode, abc_dress);
 				assign_map.set(mod);
 			}
 		}
diff --git a/passes/techmap/attrmap.cc b/passes/techmap/attrmap.cc
index 0b5576b0..aa48e112 100644
--- a/passes/techmap/attrmap.cc
+++ b/passes/techmap/attrmap.cc
@@ -111,9 +111,10 @@ struct AttrmapMap : AttrmapAction {
 };
 
 struct AttrmapRemove : AttrmapAction {
+	bool has_value;
 	string name, value;
 	bool apply(IdString &id, Const &val) YS_OVERRIDE {
-		return !(match_name(name, id) && match_value(value, val));
+		return !(match_name(name, id) && (!has_value || match_value(value, val)));
 	}
 };
 
@@ -235,6 +236,7 @@ struct AttrmapPass : public Pass {
 				}
 				auto action = new AttrmapRemove;
 				action->name = arg1;
+				action->has_value = (p != string::npos);
 				action->value = val1;
 				actions.push_back(std::unique_ptr<AttrmapAction>(action));
 				continue;
diff --git a/passes/techmap/deminout.cc b/passes/techmap/deminout.cc
index 9f0c7bf6..47d0ff41 100644
--- a/passes/techmap/deminout.cc
+++ b/passes/techmap/deminout.cc
@@ -83,9 +83,9 @@ struct DeminoutPass : public Pass {
 						for (auto bit : sigmap(conn.second))
 							bits_used.insert(bit);
 
-					if (conn.first == "\\Y" && cell->type.in("$mux", "$pmux", "$_MUX_", "$_TBUF_"))
+					if (conn.first == "\\Y" && cell->type.in("$mux", "$pmux", "$_MUX_", "$_TBUF_", "$tribuf"))
 					{
-						bool tribuf = (cell->type == "$_TBUF_");
+						bool tribuf = (cell->type == "$_TBUF_" || cell->type == "$tribuf");
 
 						if (!tribuf) {
 							for (auto &c : cell->connections()) {
@@ -113,7 +113,8 @@ struct DeminoutPass : public Pass {
 						{
 							if (bits_numports[bit] > 1 || bits_inout.count(bit))
 								new_input = true, new_output = true;
-
+							if (bit == State::S0 || bit == State::S1)
+								new_output = true;
 							if (bits_written.count(bit)) {
 								new_output = true;
 								if (bits_tribuf.count(bit))
diff --git a/passes/techmap/dff2dffe.cc b/passes/techmap/dff2dffe.cc
index 3291f5a4..7e104096 100644
--- a/passes/techmap/dff2dffe.cc
+++ b/passes/techmap/dff2dffe.cc
@@ -267,6 +267,10 @@ struct Dff2dffePass : public Pass {
 		log("        operate in the opposite direction: replace $dffe cells with combinations\n");
 		log("        of $dff and $mux cells. the options below are ignore in unmap mode.\n");
 		log("\n");
+		log("    -unmap-mince N\n");
+		log("        Same as -unmap but only unmap $dffe where the clock enable port\n");
+		log("        signal is used by less $dffe than the specified number\n");
+		log("\n");
 		log("    -direct <internal_gate_type> <external_gate_type>\n");
 		log("        map directly to external gate type. <internal_gate_type> can\n");
 		log("        be any internal gate-level FF cell (except $_DFFE_??_). the\n");
@@ -289,6 +293,7 @@ struct Dff2dffePass : public Pass {
 		log_header(design, "Executing DFF2DFFE pass (transform $dff to $dffe where applicable).\n");
 
 		bool unmap_mode = false;
+		int min_ce_use = -1;
 		dict<IdString, IdString> direct_dict;
 
 		size_t argidx;
@@ -297,6 +302,11 @@ struct Dff2dffePass : public Pass {
 				unmap_mode = true;
 				continue;
 			}
+			if (args[argidx] == "-unmap-mince" && argidx + 1 < args.size()) {
+				unmap_mode = true;
+				min_ce_use = std::stoi(args[++argidx]);
+				continue;
+			}
 			if (args[argidx] == "-direct" && argidx + 2 < args.size()) {
 				string direct_from = RTLIL::escape_id(args[++argidx]);
 				string direct_to = RTLIL::escape_id(args[++argidx]);
@@ -343,8 +353,21 @@ struct Dff2dffePass : public Pass {
 			if (!mod->has_processes_warn())
 			{
 				if (unmap_mode) {
+					SigMap sigmap(mod);
 					for (auto cell : mod->selected_cells()) {
 						if (cell->type == "$dffe") {
+							if (min_ce_use >= 0) {
+								int ce_use = 0;
+								for (auto cell_other : mod->selected_cells()) {
+									if (cell_other->type != cell->type)
+										continue;
+									if (sigmap(cell->getPort("\\EN")) == sigmap(cell_other->getPort("\\EN")))
+										ce_use++;
+								}
+								if (ce_use >= min_ce_use)
+									continue;
+							}
+
 							RTLIL::SigSpec tmp = mod->addWire(NEW_ID, GetSize(cell->getPort("\\D")));
 							mod->addDff(NEW_ID, cell->getPort("\\CLK"), tmp, cell->getPort("\\Q"), cell->getParam("\\CLK_POLARITY").as_bool());
 							if (cell->getParam("\\EN_POLARITY").as_bool())
@@ -355,6 +378,18 @@ struct Dff2dffePass : public Pass {
 							continue;
 						}
 						if (cell->type.substr(0, 7) == "$_DFFE_") {
+							if (min_ce_use >= 0) {
+								int ce_use = 0;
+								for (auto cell_other : mod->selected_cells()) {
+									if (cell_other->type != cell->type)
+										continue;
+									if (sigmap(cell->getPort("\\E")) == sigmap(cell_other->getPort("\\E")))
+										ce_use++;
+								}
+								if (ce_use >= min_ce_use)
+									continue;
+							}
+
 							bool clk_pol = cell->type.substr(7, 1) == "P";
 							bool en_pol = cell->type.substr(8, 1) == "P";
 							RTLIL::SigSpec tmp = mod->addWire(NEW_ID);
diff --git a/passes/techmap/dffinit.cc b/passes/techmap/dffinit.cc
index a8eecc97..0ad33dc0 100644
--- a/passes/techmap/dffinit.cc
+++ b/passes/techmap/dffinit.cc
@@ -43,18 +43,37 @@ struct DffinitPass : public Pass {
 		log("        initial value of 1 or 0. (multi-bit values are not supported in this\n");
 		log("        mode.)\n");
 		log("\n");
+		log("    -strinit <string for high> <string for low> \n");
+		log("        use string values in the command line to represent a single-bit\n");
+		log("        initial value of 1 or 0. (multi-bit values are not supported in this\n");
+		log("        mode.)\n");
+		log("\n");
+		log("    -noreinit\n");
+		log("        fail if the FF cell has already a defined initial value set in other\n");
+		log("        passes and the initial value of the net it drives is not equal to\n");
+		log("        the already defined initial value.\n");
+		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
 	{
 		log_header(design, "Executing DFFINIT pass (set INIT param on FF cells).\n");
 
 		dict<IdString, dict<IdString, IdString>> ff_types;
-		bool highlow_mode = false;
+		bool highlow_mode = false, noreinit = false;
+		std::string high_string, low_string;
 
 		size_t argidx;
 		for (argidx = 1; argidx < args.size(); argidx++) {
 			if (args[argidx] == "-highlow") {
 				highlow_mode = true;
+				high_string = "high";
+				low_string = "low";
+				continue;
+			}
+			if (args[argidx] == "-strinit" && argidx+2 < args.size()) {
+				highlow_mode = true;
+				high_string = args[++argidx];
+				low_string = args[++argidx];
 				continue;
 			}
 			if (args[argidx] == "-ff" && argidx+3 < args.size()) {
@@ -64,6 +83,10 @@ struct DffinitPass : public Pass {
 				ff_types[cell_name][output_port] = init_param;
 				continue;
 			}
+			if (args[argidx] == "-noreinit") {
+				noreinit = true;
+				continue;
+			}
 			break;
 		}
 		extra_args(args, argidx, design);
@@ -79,7 +102,8 @@ struct DffinitPass : public Pass {
 				if (wire->attributes.count("\\init")) {
 					Const value = wire->attributes.at("\\init");
 					for (int i = 0; i < min(GetSize(value), GetSize(wire)); i++)
-						init_bits[sigmap(SigBit(wire, i))] = value[i];
+						if (value[i] != State::Sx)
+							init_bits[sigmap(SigBit(wire, i))] = value[i];
 				}
 				if (wire->port_output)
 					for (auto bit : sigmap(wire))
@@ -112,6 +136,10 @@ struct DffinitPass : public Pass {
 							continue;
 						while (GetSize(value.bits) <= i)
 							value.bits.push_back(State::S0);
+						if (noreinit && value.bits[i] != State::Sx && value.bits[i] != init_bits.at(sig[i]))
+							log_error("Trying to assign a different init value for %s.%s.%s which technically "
+									"have a conflicted init value.\n",
+									log_id(module), log_id(cell), log_id(it.second));
 						value.bits[i] = init_bits.at(sig[i]);
 						cleanup_bits.insert(sig[i]);
 					}
@@ -121,9 +149,9 @@ struct DffinitPass : public Pass {
 							log_error("Multi-bit init value for %s.%s.%s is incompatible with -highlow mode.\n",
 									log_id(module), log_id(cell), log_id(it.second));
 						if (value[0] == State::S1)
-							value = Const("high");
+							value = Const(high_string);
 						else
-							value = Const("low");
+							value = Const(low_string);
 					}
 
 					log("Setting %s.%s.%s (port=%s, net=%s) to %s.\n", log_id(module), log_id(cell), log_id(it.second),
diff --git a/passes/techmap/dfflibmap.cc b/passes/techmap/dfflibmap.cc
index 416ed2bd..b5c0498d 100644
--- a/passes/techmap/dfflibmap.cc
+++ b/passes/techmap/dfflibmap.cc
@@ -100,6 +100,18 @@ static bool parse_pin(LibertyAst *cell, LibertyAst *attr, std::string &pin_name,
 	for (auto child : cell->children)
 		if (child->id == "pin" && child->args.size() == 1 && child->args[0] == pin_name)
 			return true;
+
+	/* If we end up here, the pin specified in the attribute does not exist, which is an error,
+	   or, the attribute contains an expression which we do not yet support.
+       For now, we'll simply produce a warning to let the user know something is up.
+	*/
+	if (pin_name.find_first_of("^*|&") == std::string::npos) {
+		log_warning("Malformed liberty file - cannot find pin '%s' in cell '%s' - skipping.\n", pin_name.c_str(), cell->args[0].c_str());
+	}
+	else {
+		log_warning("Found unsupported expression '%s' in pin attribute of cell '%s' - skipping.\n", pin_name.c_str(), cell->args[0].c_str());
+	}
+
 	return false;
 }
 
@@ -648,11 +660,11 @@ struct DfflibmapPass : public Pass {
 		map_adff_to_dff("$_DFF_PP0_", "$_DFF_P_");
 		map_adff_to_dff("$_DFF_PP1_", "$_DFF_P_");
 
- 		log("  final dff cell mappings:\n");
- 		logmap_all();
+		log("  final dff cell mappings:\n");
+		logmap_all();
 
 		for (auto &it : design->modules_)
-			if (design->selected(it.second) && !it.second->get_bool_attribute("\\blackbox"))
+			if (design->selected(it.second) && !it.second->get_blackbox_attribute())
 				dfflibmap(design, it.second, prepare_mode);
 
 		cell_mappings.clear();
diff --git a/passes/techmap/flowmap.cc b/passes/techmap/flowmap.cc
new file mode 100644
index 00000000..96d0df5f
--- /dev/null
+++ b/passes/techmap/flowmap.cc
@@ -0,0 +1,1613 @@
+/*
+ *  yosys -- Yosys Open SYnthesis Suite
+ *
+ *  Copyright (C) 2018  whitequark <whitequark@whitequark.org>
+ *
+ *  Permission to use, copy, modify, and/or distribute this software for any
+ *  purpose with or without fee is hereby granted, provided that the above
+ *  copyright notice and this permission notice appear in all copies.
+ *
+ *  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ *  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ *  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ *  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ *  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ *  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ *
+ */
+
+// [[CITE]] FlowMap algorithm
+// Jason Cong; Yuzheng Ding, "An Optimal Technology Mapping Algorithm for Delay Optimization in Lookup-Table Based FPGA Designs,"
+// Computer-Aided Design of Integrated Circuits and Systems, IEEE Transactions on, Vol. 13, pp. 1-12, Jan. 1994.
+// doi: 10.1109/43.273754
+
+// [[CITE]] FlowMap-r algorithm
+// Jason Cong; Yuzheng Ding, "On Area/Depth Tradeoff in LUT-Based FPGA Technology Mapping,"
+// Very Large Scale Integration Systems, IEEE Transactions on, Vol. 2, June 1994.
+// doi: 10.1109/92.28574
+
+// Required reading material:
+//
+// Min-cut max-flow theorem:
+//   https://www.coursera.org/lecture/algorithms-part2/maxflow-mincut-theorem-beb9G
+// FlowMap paper:
+//   http://cadlab.cs.ucla.edu/~cong/papers/iccad92.pdf   (short version)
+//   https://limsk.ece.gatech.edu/book/papers/flowmap.pdf (long version)
+// FlowMap-r paper:
+//   http://cadlab.cs.ucla.edu/~cong/papers/dac93.pdf     (short version)
+//   https://sci-hub.tw/10.1109/92.285741                 (long version)
+
+// Notes on correspondence between paper and implementation:
+//
+// 1. In the FlowMap paper, the nodes are logic elements (analogous to Yosys cells) and edges are wires. However, in our implementation,
+// we use an inverted approach: the nodes are Yosys wire bits, and the edges are derived from (but aren't represented by) Yosys cells.
+// This may seem counterintuitive. Three observations may help understanding this. First, for a cell with a 1-bit Y output that is
+// the sole driver of its output net (which is the typical case), these representations are equivalent, because there is an exact
+// correspondence between cells and output wires. Second, in the paper, primary inputs (analogous to Yosys cell or module ports) are
+// nodes, and in Yosys, inputs are wires; our approach allows a direct mapping from both primary inputs and 1-output logic elements to
+// flow graph nodes. Third, Yosys cells may have multiple outputs or multi-bit outputs, and by using Yosys wire bits as flow graph nodes,
+// such cells are supported without any additional effort; any Yosys cell with n output wire bits ends up being split into n flow graph
+// nodes.
+//
+// 2. The FlowMap paper introduces three networks: Nt, Nt', and Nt''. The network Nt is directly represented by a subgraph of RTLIL graph,
+// which is parsed into an equivalent but easier to traverse representation in FlowmapWorker. The network Nt' is built explicitly
+// from a subgraph of Nt, and uses a similar representation in FlowGraph. The network Nt'' is implicit in FlowGraph, which is possible
+// because of the following observation: each Nt' node corresponds to an Nt'' edge of capacity 1, and each Nt' edge corresponds to
+// an Nt'' edge of capacity ∞. Therefore, we only need to explicitly record flow for Nt' edges and through Nt' nodes.
+//
+// 3. The FlowMap paper ambiguously states: "Moreover, we can find such a cut (X′′, X̅′′) by performing a depth first search starting at
+// the source s, and including in X′′ all the nodes which are reachable from s." This actually refers to a specific kind of search,
+// min-cut computation. Min-cut computation involves computing the set of nodes reachable from s by an undirected path with no full
+// (i.e. zero capacity) forward edges or empty (i.e. no flow) backward edges. In addition, the depth first search is required to compute
+// a max-volume max-flow min-cut specifically, because a max-flow min-cut is not, in general, unique.
+
+// Notes on implementation:
+//
+// 1. To compute depth optimal packing, an intermediate representation is used, where each cell with n output bits is split into n graph
+// nodes. Each such graph node is represented directly with the wire bit (RTLIL::SigBit instance) that corresponds to the output bit
+// it is created from. Fan-in and fan-out are represented explicitly by edge lists derived from the RTLIL graph. This IR never changes
+// after it has been computed.
+//
+// In terms of data, this IR is comprised of `inputs`, `outputs`, `nodes`, `edges_fw` and `edges_bw` fields.
+//
+// We call this IR "gate IR".
+//
+// 2. To compute area optimal packing, another intermediate representation is used, which consists of some K-feasible cone for every node
+// that exists in the gate IR. Immediately after depth optimal packing with FlowMap, each such cone occupies the lowest possible depth,
+// but this is not true in general, and transformations of this IR may change the cones, although each transformation has to keep each
+// cone K-feasible. In this IR, LUT fan-in and fan-out are represented explicitly by edge lists; if a K-feasible cone chosen for node A
+// includes nodes B and C, there are edges between all predecessors of A, B and C in the gate IR and node A in this IR. Moreover, in
+// this IR, cones may be *realized* or *derealized*. Only realized cones will end up mapped to actual LUTs in the output of this pass.
+//
+// Intuitively, this IR contains (some, ideally but not necessarily optimal) LUT representation for each input cell. By starting at outputs
+// and traversing the graph of this IR backwards, each K-feasible cone is converted to an actual LUT at the end of the pass. This is
+// the same as iterating through each realized LUT.
+//
+// The following are the invariants of this IR:
+//   a) Each gate IR node corresponds to a K-feasible cut.
+//   b) Each realized LUT is reachable through backward edges from some output.
+//   c) The LUT fan-in is exactly the fan-in of its constituent gates minus the fan-out of its constituent gates.
+// The invariants are kept even for derealized LUTs, since the whole point of this IR is ease of packing, unpacking, and repacking LUTs.
+//
+// In terms of data, this IR is comprised of `lut_nodes` (the set of all realized LUTs), `lut_gates` (the map from a LUT to its
+// constituent gates), `lut_edges_fw` and `lut_edges_bw` fields. The `inputs` and `outputs` fields are shared with the gate IR.
+//
+// We call this IR "LUT IR".
+
+#include "kernel/yosys.h"
+#include "kernel/sigtools.h"
+#include "kernel/modtools.h"
+#include "kernel/consteval.h"
+
+USING_YOSYS_NAMESPACE
+PRIVATE_NAMESPACE_BEGIN
+
+struct GraphStyle
+{
+	string label;
+	string color, fillcolor;
+
+	GraphStyle(string label = "", string color = "black", string fillcolor = "") :
+		label(label), color(color), fillcolor(fillcolor) {}
+};
+
+static string dot_escape(string value)
+{
+	std::string escaped;
+	for (char c : value) {
+		if (c == '\n')
+		{
+			escaped += "\\n";
+			continue;
+		}
+		if (c == '\\' || c == '"')
+			escaped += "\\";
+		escaped += c;
+	}
+	return escaped;
+}
+
+static void dump_dot_graph(string filename,
+                           pool<RTLIL::SigBit> nodes, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges,
+                           pool<RTLIL::SigBit> inputs, pool<RTLIL::SigBit> outputs,
+                           std::function<GraphStyle(RTLIL::SigBit)> node_style =
+                                   [](RTLIL::SigBit) { return GraphStyle{}; },
+                           std::function<GraphStyle(RTLIL::SigBit, RTLIL::SigBit)> edge_style =
+                                   [](RTLIL::SigBit, RTLIL::SigBit) { return GraphStyle{}; },
+                           string name = "")
+{
+	FILE *f = fopen(filename.c_str(), "w");
+	fprintf(f, "digraph \"%s\" {\n", name.c_str());
+	fprintf(f, "  rankdir=\"TB\";\n");
+
+	dict<RTLIL::SigBit, int> ids;
+	for (auto node : nodes)
+	{
+		ids[node] = ids.size();
+
+		string shape = "ellipse";
+		if (inputs[node])
+			shape = "box";
+		if (outputs[node])
+			shape = "octagon";
+		auto prop = node_style(node);
+		string style = "";
+		if (!prop.fillcolor.empty())
+			style = "filled";
+		fprintf(f, "  n%d [ shape=%s, fontname=\"Monospace\", label=\"%s\", color=\"%s\", fillcolor=\"%s\", style=\"%s\" ];\n",
+		        ids[node], shape.c_str(), dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str(), style.c_str());
+	}
+
+	fprintf(f, "  { rank=\"source\"; ");
+	for (auto input : inputs)
+		if (nodes[input])
+			fprintf(f, "n%d; ", ids[input]);
+	fprintf(f, "}\n");
+
+	fprintf(f, "  { rank=\"sink\"; ");
+	for (auto output : outputs)
+		if (nodes[output])
+			fprintf(f, "n%d; ", ids[output]);
+	fprintf(f, "}\n");
+
+	for (auto edge : edges)
+	{
+		auto source = edge.first;
+		for (auto sink : edge.second) {
+			if (nodes[source] && nodes[sink])
+			{
+				auto prop = edge_style(source, sink);
+				fprintf(f, "  n%d -> n%d [ label=\"%s\", color=\"%s\", fillcolor=\"%s\" ];\n",
+				        ids[source], ids[sink], dot_escape(prop.label.c_str()).c_str(), prop.color.c_str(), prop.fillcolor.c_str());
+			}
+		}
+	}
+
+	fprintf(f, "}\n");
+	fclose(f);
+}
+
+struct FlowGraph
+{
+	const RTLIL::SigBit source;
+	RTLIL::SigBit sink;
+	pool<RTLIL::SigBit> nodes = {source};
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw;
+
+	const int MAX_NODE_FLOW = 1;
+	dict<RTLIL::SigBit, int> node_flow;
+	dict<pair<RTLIL::SigBit, RTLIL::SigBit>, int> edge_flow;
+
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed;
+
+	void dump_dot_graph(string filename)
+	{
+		auto node_style = [&](RTLIL::SigBit node) {
+			string label = (node == source) ? "(source)" : log_signal(node);
+			for (auto collapsed_node : collapsed[node])
+				label += stringf(" %s", log_signal(collapsed_node));
+			int flow = node_flow[node];
+			if (node != source && node != sink)
+				label += stringf("\n%d/%d", flow, MAX_NODE_FLOW);
+			else
+				label += stringf("\n%d/∞", flow);
+			return GraphStyle{label, flow < MAX_NODE_FLOW ? "green" : "black"};
+		};
+		auto edge_style = [&](RTLIL::SigBit source, RTLIL::SigBit sink) {
+			int flow = edge_flow[{source, sink}];
+			return GraphStyle{stringf("%d/∞", flow), flow > 0 ? "blue" : "black"};
+		};
+		::dump_dot_graph(filename, nodes, edges_fw, {source}, {sink}, node_style, edge_style);
+	}
+
+	// Here, we are working on the Nt'' network, but our representation is the Nt' network.
+	// The difference between these is that where in Nt' we have a subgraph:
+	//
+	//   v1 -> v2 -> v3
+	//
+	// in Nt'' we have a corresponding subgraph:
+	//
+	//   v'1b -∞-> v'2t -f-> v'2b -∞-> v'3t
+	//
+	// To address this, we split each node v into two nodes, v't and v'b. This representation is virtual,
+	// in the sense that nodes v't and v'b are overlaid on top of the original node v, and only exist
+	// in paths and worklists.
+
+	struct NodePrime
+	{
+		RTLIL::SigBit node;
+		bool is_bottom;
+
+		NodePrime(RTLIL::SigBit node, bool is_bottom) :
+			node(node), is_bottom(is_bottom) {}
+
+		bool operator==(const NodePrime &other) const
+		{
+			return node == other.node && is_bottom == other.is_bottom;
+		}
+		bool operator!=(const NodePrime &other) const
+		{
+			return !(*this == other);
+		}
+		unsigned int hash() const
+		{
+			return hash_ops<pair<RTLIL::SigBit, int>>::hash({node, is_bottom});
+		}
+
+		static NodePrime top(RTLIL::SigBit node)
+		{
+			return NodePrime(node, /*is_bottom=*/false);
+		}
+
+		static NodePrime bottom(RTLIL::SigBit node)
+		{
+			return NodePrime(node, /*is_bottom=*/true);
+		}
+
+		NodePrime as_top() const
+		{
+			log_assert(is_bottom);
+			return top(node);
+		}
+
+		NodePrime as_bottom() const
+		{
+			log_assert(!is_bottom);
+			return bottom(node);
+		}
+	};
+
+	bool find_augmenting_path(bool commit)
+	{
+		NodePrime source_prime = {source, true};
+		NodePrime sink_prime = {sink, false};
+		vector<NodePrime> path = {source_prime};
+		pool<NodePrime> visited = {};
+		bool found;
+		do {
+			found = false;
+
+			auto node_prime = path.back();
+			visited.insert(node_prime);
+
+			if (!node_prime.is_bottom) // vt
+			{
+				if (!visited[node_prime.as_bottom()] && node_flow[node_prime.node] < MAX_NODE_FLOW)
+				{
+					path.push_back(node_prime.as_bottom());
+					found = true;
+				}
+				else
+				{
+					for (auto node_pred : edges_bw[node_prime.node])
+					{
+						if (!visited[NodePrime::bottom(node_pred)] && edge_flow[{node_pred, node_prime.node}] > 0)
+						{
+							path.push_back(NodePrime::bottom(node_pred));
+							found = true;
+							break;
+						}
+					}
+				}
+			}
+			else // vb
+			{
+				if (!visited[node_prime.as_top()] && node_flow[node_prime.node] > 0)
+				{
+					path.push_back(node_prime.as_top());
+					found = true;
+				}
+				else
+				{
+					for (auto node_succ : edges_fw[node_prime.node])
+					{
+						if (!visited[NodePrime::top(node_succ)] /* && edge_flow[...] < ∞ */)
+						{
+							path.push_back(NodePrime::top(node_succ));
+							found = true;
+							break;
+						}
+					}
+				}
+			}
+
+			if (!found && path.size() > 1)
+			{
+				path.pop_back();
+				found = true;
+			}
+		} while(path.back() != sink_prime && found);
+
+		if (commit && path.back() == sink_prime)
+		{
+			auto prev_prime = path.front();
+			for (auto node_prime : path)
+			{
+				if (node_prime == source_prime)
+					continue;
+
+				log_assert(prev_prime.is_bottom ^ node_prime.is_bottom);
+				if (prev_prime.node == node_prime.node)
+				{
+					auto node = node_prime.node;
+					if (!prev_prime.is_bottom && node_prime.is_bottom)
+					{
+						log_assert(node_flow[node] == 0);
+						node_flow[node]++;
+					}
+					else
+					{
+						log_assert(node_flow[node] != 0);
+						node_flow[node]--;
+					}
+				}
+				else
+				{
+					if (prev_prime.is_bottom && !node_prime.is_bottom)
+					{
+						log_assert(true /* edge_flow[...] < ∞ */);
+						edge_flow[{prev_prime.node, node_prime.node}]++;
+					}
+					else
+					{
+						log_assert((edge_flow[{node_prime.node, prev_prime.node}] > 0));
+						edge_flow[{node_prime.node, prev_prime.node}]--;
+					}
+				}
+				prev_prime = node_prime;
+			}
+
+			node_flow[source]++;
+			node_flow[sink]++;
+		}
+		return path.back() == sink_prime;
+	}
+
+	int maximum_flow(int order)
+	{
+		int flow = 0;
+		while (flow < order && find_augmenting_path(/*commit=*/true))
+			flow++;
+		return flow + find_augmenting_path(/*commit=*/false);
+	}
+
+	pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> edge_cut()
+	{
+		pool<RTLIL::SigBit> x, xi;
+
+		NodePrime source_prime = {source, true};
+		pool<NodePrime> visited;
+		vector<NodePrime> worklist = {source_prime};
+		while (!worklist.empty())
+		{
+			auto node_prime = worklist.back();
+			worklist.pop_back();
+			if (visited[node_prime])
+				continue;
+			visited.insert(node_prime);
+
+			if (!node_prime.is_bottom)
+				x.insert(node_prime.node);
+
+			// Mincut is constructed by traversing a graph in an undirected way along forward edges that aren't full, or backward edges
+			// that aren't empty.
+			if (!node_prime.is_bottom) // top
+			{
+				if (node_flow[node_prime.node] < MAX_NODE_FLOW)
+					worklist.push_back(node_prime.as_bottom());
+				for (auto node_pred : edges_bw[node_prime.node])
+					if (edge_flow[{node_pred, node_prime.node}] > 0)
+						worklist.push_back(NodePrime::bottom(node_pred));
+			}
+			else // bottom
+			{
+				if (node_flow[node_prime.node] > 0)
+					worklist.push_back(node_prime.as_top());
+				for (auto node_succ : edges_fw[node_prime.node])
+					if (true /* edge_flow[...] < ∞ */)
+						worklist.push_back(NodePrime::top(node_succ));
+			}
+		}
+
+		for (auto node : nodes)
+			if (!x[node])
+				xi.insert(node);
+
+		for (auto collapsed_node : collapsed[sink])
+			xi.insert(collapsed_node);
+
+		log_assert(!x[sink] && xi[sink]);
+		return {x, xi};
+	}
+};
+
+struct FlowmapWorker
+{
+	int order;
+	int r_alpha, r_beta, r_gamma;
+	bool debug, debug_relax;
+
+	RTLIL::Module *module;
+	SigMap sigmap;
+	ModIndex index;
+
+	dict<RTLIL::SigBit, ModIndex::PortInfo> node_origins;
+
+	// Gate IR
+	pool<RTLIL::SigBit> nodes, inputs, outputs;
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> edges_fw, edges_bw;
+	dict<RTLIL::SigBit, int> labels;
+
+	// LUT IR
+	pool<RTLIL::SigBit> lut_nodes;
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_gates;
+	dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_edges_fw, lut_edges_bw;
+	dict<RTLIL::SigBit, int> lut_depths, lut_altitudes, lut_slacks;
+
+	int gate_count = 0, lut_count = 0, packed_count = 0;
+	int gate_area = 0, lut_area = 0;
+
+	enum class GraphMode {
+		Label,
+		Cut,
+		Slack,
+	};
+
+	void dump_dot_graph(string filename, GraphMode mode,
+	                    pool<RTLIL::SigBit> subgraph_nodes = {}, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> subgraph_edges = {},
+	                    dict<RTLIL::SigBit, pool<RTLIL::SigBit>> collapsed = {},
+	                    pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut = {})
+	{
+		if (subgraph_nodes.empty())
+			subgraph_nodes = nodes;
+		if (subgraph_edges.empty())
+			subgraph_edges = edges_fw;
+
+		auto node_style = [&](RTLIL::SigBit node) {
+			string label = log_signal(node);
+			for (auto collapsed_node : collapsed[node])
+				if (collapsed_node != node)
+					label += stringf(" %s", log_signal(collapsed_node));
+			switch (mode)
+			{
+				case GraphMode::Label:
+					if (labels[node] == -1)
+					{
+						label += "\nl=?";
+						return GraphStyle{label};
+					}
+					else
+					{
+						label += stringf("\nl=%d", labels[node]);
+						string fillcolor = stringf("/set311/%d", 1 + labels[node] % 11);
+						return GraphStyle{label, "", fillcolor};
+					}
+
+				case GraphMode::Cut:
+					if (cut.first[node])
+						return GraphStyle{label, "blue"};
+					if (cut.second[node])
+						return GraphStyle{label, "red"};
+					return GraphStyle{label};
+
+				case GraphMode::Slack:
+					label += stringf("\nd=%d a=%d\ns=%d", lut_depths[node], lut_altitudes[node], lut_slacks[node]);
+					return GraphStyle{label, lut_slacks[node] == 0 ? "red" : "black"};
+			}
+			return GraphStyle{label};
+		};
+		auto edge_style = [&](RTLIL::SigBit, RTLIL::SigBit) {
+			return GraphStyle{};
+		};
+		::dump_dot_graph(filename, subgraph_nodes, subgraph_edges, inputs, outputs, node_style, edge_style, module->name.str());
+	}
+
+	void dump_dot_lut_graph(string filename, GraphMode mode)
+	{
+		pool<RTLIL::SigBit> lut_and_input_nodes;
+		lut_and_input_nodes.insert(lut_nodes.begin(), lut_nodes.end());
+		lut_and_input_nodes.insert(inputs.begin(), inputs.end());
+		dump_dot_graph(filename, mode, lut_and_input_nodes, lut_edges_fw, lut_gates);
+	}
+
+	pool<RTLIL::SigBit> find_subgraph(RTLIL::SigBit sink)
+	{
+		pool<RTLIL::SigBit> subgraph;
+		pool<RTLIL::SigBit> worklist = {sink};
+		while (!worklist.empty())
+		{
+			auto node = worklist.pop();
+			subgraph.insert(node);
+			for (auto source : edges_bw[node])
+			{
+				if (!subgraph[source])
+					worklist.insert(source);
+			}
+		}
+		return subgraph;
+	}
+
+	FlowGraph build_flow_graph(RTLIL::SigBit sink, int p)
+	{
+		FlowGraph flow_graph;
+		flow_graph.sink = sink;
+
+		pool<RTLIL::SigBit> worklist = {sink}, visited;
+		while (!worklist.empty())
+		{
+			auto node = worklist.pop();
+			visited.insert(node);
+
+			auto collapsed_node = labels[node] == p ? sink : node;
+			if (node != collapsed_node)
+				flow_graph.collapsed[collapsed_node].insert(node);
+			flow_graph.nodes.insert(collapsed_node);
+
+			for (auto node_pred : edges_bw[node])
+			{
+				auto collapsed_node_pred = labels[node_pred] == p ? sink : node_pred;
+				if (node_pred != collapsed_node_pred)
+					flow_graph.collapsed[collapsed_node_pred].insert(node_pred);
+				if (collapsed_node != collapsed_node_pred)
+				{
+					flow_graph.edges_bw[collapsed_node].insert(collapsed_node_pred);
+					flow_graph.edges_fw[collapsed_node_pred].insert(collapsed_node);
+				}
+				if (inputs[node_pred])
+				{
+					flow_graph.edges_bw[collapsed_node_pred].insert(flow_graph.source);
+					flow_graph.edges_fw[flow_graph.source].insert(collapsed_node_pred);
+				}
+
+				if (!visited[node_pred])
+					worklist.insert(node_pred);
+			}
+		}
+		return flow_graph;
+	}
+
+	void discover_nodes(pool<IdString> cell_types)
+	{
+		for (auto cell : module->selected_cells())
+		{
+			if (!cell_types[cell->type])
+				continue;
+
+			if (!cell->known())
+				log_error("Cell %s (%s.%s) is unknown.\n", cell->type.c_str(), log_id(module), log_id(cell));
+
+			pool<RTLIL::SigBit> fanout;
+			for (auto conn : cell->connections())
+			{
+				if (!cell->output(conn.first)) continue;
+				int offset = -1;
+				for (auto bit : conn.second)
+				{
+					offset++;
+					if (!bit.wire) continue;
+					auto mapped_bit = sigmap(bit);
+					if (nodes[mapped_bit])
+						log_error("Multiple drivers found for wire %s.\n", log_signal(mapped_bit));
+					nodes.insert(mapped_bit);
+					node_origins[mapped_bit] = ModIndex::PortInfo(cell, conn.first, offset);
+					fanout.insert(mapped_bit);
+				}
+			}
+
+			int fanin = 0;
+			for (auto conn : cell->connections())
+			{
+				if (!cell->input(conn.first)) continue;
+				for (auto bit : sigmap(conn.second))
+				{
+					if (!bit.wire) continue;
+					for (auto fanout_bit : fanout)
+					{
+						edges_fw[bit].insert(fanout_bit);
+						edges_bw[fanout_bit].insert(bit);
+					}
+					fanin++;
+				}
+			}
+
+			if (fanin > order)
+				log_error("Cell %s (%s.%s) with fan-in %d cannot be mapped to a %d-LUT.\n",
+				          cell->type.c_str(), log_id(module), log_id(cell), fanin, order);
+
+			gate_count++;
+			gate_area += 1 << fanin;
+		}
+
+		for (auto edge : edges_fw)
+		{
+			if (!nodes[edge.first])
+			{
+				inputs.insert(edge.first);
+				nodes.insert(edge.first);
+			}
+		}
+
+		for (auto node : nodes)
+		{
+			auto node_info = index.query(node);
+			if (node_info->is_output && !inputs[node])
+				outputs.insert(node);
+			for (auto port : node_info->ports)
+				if (!cell_types[port.cell->type] && !inputs[node])
+					outputs.insert(node);
+		}
+
+		if (debug)
+		{
+			dump_dot_graph("flowmap-initial.dot", GraphMode::Label);
+			log("Dumped initial graph to `flowmap-initial.dot`.\n");
+		}
+	}
+
+	void label_nodes()
+	{
+		for (auto node : nodes)
+			labels[node] = -1;
+		for (auto input : inputs)
+		{
+			if (input.wire->attributes.count("\\$flowmap_level"))
+				labels[input] = input.wire->attributes["\\$flowmap_level"].as_int();
+			else
+				labels[input] = 0;
+		}
+
+		pool<RTLIL::SigBit> worklist = nodes;
+		int debug_num = 0;
+		while (!worklist.empty())
+		{
+			auto sink = worklist.pop();
+			if (labels[sink] != -1)
+				continue;
+
+			bool inputs_have_labels = true;
+			for (auto sink_input : edges_bw[sink])
+			{
+				if (labels[sink_input] == -1)
+				{
+					inputs_have_labels = false;
+					break;
+				}
+			}
+			if (!inputs_have_labels)
+				continue;
+
+			if (debug)
+			{
+				debug_num++;
+				log("Examining subgraph %d rooted in %s.\n", debug_num, log_signal(sink));
+			}
+
+			pool<RTLIL::SigBit> subgraph = find_subgraph(sink);
+
+			int p = 1;
+			for (auto subgraph_node : subgraph)
+				p = max(p, labels[subgraph_node]);
+
+			FlowGraph flow_graph = build_flow_graph(sink, p);
+			int flow = flow_graph.maximum_flow(order);
+			pool<RTLIL::SigBit> x, xi;
+			if (flow <= order)
+			{
+				labels[sink] = p;
+				auto cut = flow_graph.edge_cut();
+				x = cut.first;
+				xi = cut.second;
+			}
+			else
+			{
+				labels[sink] = p + 1;
+				x = subgraph;
+				x.erase(sink);
+				xi.insert(sink);
+			}
+			lut_gates[sink] = xi;
+
+			pool<RTLIL::SigBit> k;
+			for (auto xi_node : xi)
+			{
+				for (auto xi_node_pred : edges_bw[xi_node])
+					if (x[xi_node_pred])
+						k.insert(xi_node_pred);
+			}
+			log_assert((int)k.size() <= order);
+			lut_edges_bw[sink] = k;
+			for (auto k_node : k)
+				lut_edges_fw[k_node].insert(sink);
+
+			if (debug)
+			{
+				log("  Maximum flow: %d. Assigned label %d.\n", flow, labels[sink]);
+				dump_dot_graph(stringf("flowmap-%d-sub.dot", debug_num), GraphMode::Cut, subgraph, {}, {}, {x, xi});
+				log("  Dumped subgraph to `flowmap-%d-sub.dot`.\n", debug_num);
+				flow_graph.dump_dot_graph(stringf("flowmap-%d-flow.dot", debug_num));
+				log("  Dumped flow graph to `flowmap-%d-flow.dot`.\n", debug_num);
+				log("    LUT inputs:");
+				for (auto k_node : k)
+					log(" %s", log_signal(k_node));
+				log(".\n");
+				log("    LUT packed gates:");
+				for (auto xi_node : xi)
+					log(" %s", log_signal(xi_node));
+				log(".\n");
+			}
+
+			for (auto sink_succ : edges_fw[sink])
+				worklist.insert(sink_succ);
+		}
+
+		if (debug)
+		{
+			dump_dot_graph("flowmap-labeled.dot", GraphMode::Label);
+			log("Dumped labeled graph to `flowmap-labeled.dot`.\n");
+		}
+	}
+
+	int map_luts()
+	{
+		pool<RTLIL::SigBit> worklist = outputs;
+		while (!worklist.empty())
+		{
+			auto lut_node = worklist.pop();
+			lut_nodes.insert(lut_node);
+			for (auto input_node : lut_edges_bw[lut_node])
+				if (!lut_nodes[input_node] && !inputs[input_node])
+					worklist.insert(input_node);
+		}
+
+		int depth = 0;
+		for (auto label : labels)
+			depth = max(depth, label.second);
+		log("Mapped to %d LUTs with maximum depth %d.\n", GetSize(lut_nodes), depth);
+
+		if (debug)
+		{
+			dump_dot_lut_graph("flowmap-mapped.dot", GraphMode::Label);
+			log("Dumped mapped graph to `flowmap-mapped.dot`.\n");
+		}
+
+		return depth;
+	}
+
+	void realize_derealize_lut(RTLIL::SigBit lut, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		pool<RTLIL::SigBit> worklist = {lut};
+		while (!worklist.empty())
+		{
+			auto lut = worklist.pop();
+			if (inputs[lut])
+				continue;
+
+			bool realized_successors = false;
+			for (auto lut_succ : lut_edges_fw[lut])
+				if (lut_nodes[lut_succ])
+					realized_successors = true;
+
+			if (realized_successors && !lut_nodes[lut])
+				lut_nodes.insert(lut);
+			else if (!realized_successors && lut_nodes[lut])
+				lut_nodes.erase(lut);
+			else
+				continue;
+
+			for (auto lut_pred : lut_edges_bw[lut])
+				worklist.insert(lut_pred);
+
+			if (changed)
+				changed->insert(lut);
+		}
+	}
+
+	void add_lut_edge(RTLIL::SigBit pred, RTLIL::SigBit succ, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		log_assert(!lut_edges_fw[pred][succ] && !lut_edges_bw[succ][pred]);
+		log_assert((int)lut_edges_bw[succ].size() < order);
+
+		lut_edges_fw[pred].insert(succ);
+		lut_edges_bw[succ].insert(pred);
+		realize_derealize_lut(pred, changed);
+
+		if (changed)
+		{
+			changed->insert(pred);
+			changed->insert(succ);
+		}
+	}
+
+	void remove_lut_edge(RTLIL::SigBit pred, RTLIL::SigBit succ, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		log_assert(lut_edges_fw[pred][succ] && lut_edges_bw[succ][pred]);
+
+		lut_edges_fw[pred].erase(succ);
+		lut_edges_bw[succ].erase(pred);
+		realize_derealize_lut(pred, changed);
+
+		if (changed)
+		{
+			if (lut_nodes[pred])
+				changed->insert(pred);
+			changed->insert(succ);
+		}
+	}
+
+	pair<pool<RTLIL::SigBit>, pool<RTLIL::SigBit>> cut_lut_at_gate(RTLIL::SigBit lut, RTLIL::SigBit lut_gate)
+	{
+		pool<RTLIL::SigBit> gate_inputs = lut_edges_bw[lut];
+		pool<RTLIL::SigBit> other_inputs;
+		pool<RTLIL::SigBit> worklist = {lut};
+		while (!worklist.empty())
+		{
+			auto node = worklist.pop();
+			for (auto node_pred : edges_bw[node])
+			{
+				if (node_pred == lut_gate)
+					continue;
+				if (lut_gates[lut][node_pred])
+					worklist.insert(node_pred);
+				else
+				{
+					gate_inputs.erase(node_pred);
+					other_inputs.insert(node_pred);
+				}
+			}
+		}
+		return {gate_inputs, other_inputs};
+	}
+
+	void compute_lut_distances(dict<RTLIL::SigBit, int> &lut_distances, bool forward,
+	                          pool<RTLIL::SigBit> initial = {}, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		pool<RTLIL::SigBit> terminals = forward ? inputs : outputs;
+		auto &lut_edges_next = forward ? lut_edges_fw : lut_edges_bw;
+		auto &lut_edges_prev = forward ? lut_edges_bw : lut_edges_fw;
+
+		if (initial.empty())
+			initial = terminals;
+		for (auto node : initial)
+			lut_distances.erase(node);
+
+		pool<RTLIL::SigBit> worklist = initial;
+		while (!worklist.empty())
+		{
+			auto lut = worklist.pop();
+			int lut_distance = 0;
+			if (forward && inputs[lut])
+				lut_distance = labels[lut]; // to support (* $flowmap_level=n *)
+			for (auto lut_prev : lut_edges_prev[lut])
+				if ((lut_nodes[lut_prev] || inputs[lut_prev]) && lut_distances.count(lut_prev))
+					lut_distance = max(lut_distance, lut_distances[lut_prev] + 1);
+			if (!lut_distances.count(lut) || lut_distances[lut] != lut_distance)
+			{
+				lut_distances[lut] = lut_distance;
+				if (changed != nullptr && !inputs[lut])
+					changed->insert(lut);
+				for (auto lut_next : lut_edges_next[lut])
+					if (lut_nodes[lut_next] || inputs[lut_next])
+						worklist.insert(lut_next);
+			}
+		}
+	}
+
+	void check_lut_distances(const dict<RTLIL::SigBit, int> &lut_distances, bool forward)
+	{
+		dict<RTLIL::SigBit, int> gold_lut_distances;
+		compute_lut_distances(gold_lut_distances, forward);
+		for (auto lut_distance : lut_distances)
+			if (lut_nodes[lut_distance.first])
+				log_assert(lut_distance.second == gold_lut_distances[lut_distance.first]);
+	}
+
+	// LUT depth is the length of the longest path from any input in LUT fan-in to LUT.
+	// LUT altitude (for lack of a better term) is the length of the longest path from LUT to any output in LUT fan-out.
+	void update_lut_depths_altitudes(pool<RTLIL::SigBit> worklist = {}, pool<RTLIL::SigBit> *changed = nullptr)
+	{
+		compute_lut_distances(lut_depths, /*forward=*/true, worklist, changed);
+		compute_lut_distances(lut_altitudes, /*forward=*/false, worklist, changed);
+		if (debug_relax && !worklist.empty()) {
+			check_lut_distances(lut_depths, /*forward=*/true);
+			check_lut_distances(lut_altitudes, /*forward=*/false);
+		}
+	}
+
+	// LUT critical output set is the set of outputs whose depth will increase (equivalently, slack will decrease) if the depth of
+	// the LUT increases. (This is referred to as RPOv for LUTv in the paper.)
+	void compute_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs,
+	                                  pool<RTLIL::SigBit> worklist = {})
+	{
+		if (worklist.empty())
+			worklist = lut_nodes;
+
+		while (!worklist.empty())
+		{
+			bool updated_some = false;
+			for (auto lut : worklist)
+			{
+				if (outputs[lut])
+					lut_critical_outputs[lut] = {lut};
+				else
+				{
+					bool all_succ_computed = true;
+					lut_critical_outputs[lut] = {};
+					for (auto lut_succ : lut_edges_fw[lut])
+					{
+						if (lut_nodes[lut_succ] && lut_depths[lut_succ] == lut_depths[lut] + 1)
+						{
+							if (lut_critical_outputs.count(lut_succ))
+								lut_critical_outputs[lut].insert(lut_critical_outputs[lut_succ].begin(), lut_critical_outputs[lut_succ].end());
+							else
+							{
+								all_succ_computed = false;
+								break;
+							}
+						}
+					}
+					if (!all_succ_computed)
+					{
+						lut_critical_outputs.erase(lut);
+						continue;
+					}
+				}
+				worklist.erase(lut);
+				updated_some = true;
+			}
+			log_assert(updated_some);
+		}
+	}
+
+	// Invalidating LUT critical output sets is tricky, because increasing the depth of a LUT may take other, adjacent LUTs off the critical
+	// path to the output. Conservatively, if we increase depth of some LUT, every LUT in its input cone needs to have its critical output
+	// set invalidated, too.
+	pool<RTLIL::SigBit> invalidate_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs,
+	                                                    pool<RTLIL::SigBit> worklist)
+	{
+		pool<RTLIL::SigBit> changed;
+		while (!worklist.empty())
+		{
+			auto lut = worklist.pop();
+			changed.insert(lut);
+			lut_critical_outputs.erase(lut);
+			for (auto lut_pred : lut_edges_bw[lut])
+			{
+				if (lut_nodes[lut_pred] && !changed[lut_pred])
+				{
+					changed.insert(lut_pred);
+					worklist.insert(lut_pred);
+				}
+			}
+		}
+		return changed;
+	}
+
+	void check_lut_critical_outputs(const dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs)
+	{
+		dict<RTLIL::SigBit, pool<RTLIL::SigBit>> gold_lut_critical_outputs;
+		compute_lut_critical_outputs(gold_lut_critical_outputs);
+		for (auto lut_critical_output : lut_critical_outputs)
+			if (lut_nodes[lut_critical_output.first])
+				log_assert(lut_critical_output.second == gold_lut_critical_outputs[lut_critical_output.first]);
+	}
+
+	void update_lut_critical_outputs(dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs,
+	                                 pool<RTLIL::SigBit> worklist = {})
+	{
+		if (!worklist.empty())
+		{
+			pool<RTLIL::SigBit> invalidated = invalidate_lut_critical_outputs(lut_critical_outputs, worklist);
+			compute_lut_critical_outputs(lut_critical_outputs, invalidated);
+			check_lut_critical_outputs(lut_critical_outputs);
+		}
+		else
+			compute_lut_critical_outputs(lut_critical_outputs);
+	}
+
+	void update_breaking_node_potentials(dict<RTLIL::SigBit, dict<RTLIL::SigBit, int>> &potentials,
+	                                     const dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs)
+	{
+		for (auto lut : lut_nodes)
+		{
+			if (potentials.count(lut))
+				continue;
+			if (lut_gates[lut].size() == 1 || lut_slacks[lut] == 0)
+				continue;
+
+			if (debug_relax)
+				log("  Computing potentials for LUT %s.\n", log_signal(lut));
+
+			for (auto lut_gate : lut_gates[lut])
+			{
+				if (lut == lut_gate)
+					continue;
+
+				if (debug_relax)
+					log("    Considering breaking node %s.\n", log_signal(lut_gate));
+
+				int r_ex, r_im, r_slk;
+
+				auto cut_inputs = cut_lut_at_gate(lut, lut_gate);
+				pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second;
+				if (gate_inputs.empty() && (int)other_inputs.size() == order)
+				{
+					if (debug_relax)
+						log("      Breaking would result in a (k+1)-LUT.\n");
+					continue;
+				}
+
+				pool<RTLIL::SigBit> elim_fanin_luts;
+				for (auto gate_input : gate_inputs)
+				{
+					if (lut_edges_fw[gate_input].size() == 1)
+					{
+						log_assert(lut_edges_fw[gate_input][lut]);
+						elim_fanin_luts.insert(gate_input);
+					}
+				}
+				if (debug_relax)
+				{
+					if (!lut_nodes[lut_gate])
+						log("      Breaking requires a new LUT.\n");
+					if (!gate_inputs.empty())
+					{
+						log("      Breaking eliminates LUT inputs");
+						for (auto gate_input : gate_inputs)
+							log(" %s", log_signal(gate_input));
+						log(".\n");
+					}
+					if (!elim_fanin_luts.empty())
+					{
+						log("      Breaking eliminates fan-in LUTs");
+						for (auto elim_fanin_lut : elim_fanin_luts)
+							log(" %s", log_signal(elim_fanin_lut));
+						log(".\n");
+					}
+				}
+				r_ex = (lut_nodes[lut_gate] ? 0 : -1) + elim_fanin_luts.size();
+
+				pool<pair<RTLIL::SigBit, RTLIL::SigBit>> maybe_mergeable_luts;
+
+				// Try to merge LUTv with one of its successors.
+				RTLIL::SigBit last_lut_succ;
+				int fanout = 0;
+				for (auto lut_succ : lut_edges_fw[lut])
+				{
+					if (lut_nodes[lut_succ])
+					{
+						fanout++;
+						last_lut_succ = lut_succ;
+					}
+				}
+				if (fanout == 1)
+					maybe_mergeable_luts.insert({lut, last_lut_succ});
+
+				// Try to merge LUTv with one of its predecessors.
+				for (auto lut_pred : other_inputs)
+				{
+					int fanout = 0;
+					for (auto lut_pred_succ : lut_edges_fw[lut_pred])
+						if (lut_nodes[lut_pred_succ] || lut_pred_succ == lut_gate)
+							fanout++;
+					if (fanout == 1)
+						maybe_mergeable_luts.insert({lut_pred, lut});
+				}
+
+				// Try to merge LUTw with one of its predecessors.
+				for (auto lut_gate_pred : lut_edges_bw[lut_gate])
+				{
+					int fanout = 0;
+					for (auto lut_gate_pred_succ : lut_edges_fw[lut_gate_pred])
+						if (lut_nodes[lut_gate_pred_succ] || lut_gate_pred_succ == lut_gate)
+							fanout++;
+					if (fanout == 1)
+						maybe_mergeable_luts.insert({lut_gate_pred, lut_gate});
+				}
+
+				r_im = 0;
+				for (auto maybe_mergeable_pair : maybe_mergeable_luts)
+				{
+					log_assert(lut_edges_fw[maybe_mergeable_pair.first][maybe_mergeable_pair.second]);
+					pool<RTLIL::SigBit> unique_inputs;
+					for (auto fst_lut_pred : lut_edges_bw[maybe_mergeable_pair.first])
+						if (lut_nodes[fst_lut_pred])
+							unique_inputs.insert(fst_lut_pred);
+					for (auto snd_lut_pred : lut_edges_bw[maybe_mergeable_pair.second])
+						if (lut_nodes[snd_lut_pred])
+							unique_inputs.insert(snd_lut_pred);
+					unique_inputs.erase(maybe_mergeable_pair.first);
+					if ((int)unique_inputs.size() <= order)
+					{
+						if (debug_relax)
+							log("      Breaking may allow merging %s and %s.\n",
+							    log_signal(maybe_mergeable_pair.first), log_signal(maybe_mergeable_pair.second));
+						r_im++;
+					}
+				}
+
+				int lut_gate_depth;
+				if (lut_nodes[lut_gate])
+					lut_gate_depth = lut_depths[lut_gate];
+				else
+				{
+					lut_gate_depth = 0;
+					for (auto lut_gate_pred : lut_edges_bw[lut_gate])
+						lut_gate_depth = max(lut_gate_depth, lut_depths[lut_gate_pred] + 1);
+				}
+				if (lut_depths[lut] >= lut_gate_depth + 1)
+					r_slk = 0;
+				else
+				{
+					int depth_delta = lut_gate_depth + 1 - lut_depths[lut];
+					if (depth_delta > lut_slacks[lut])
+					{
+						if (debug_relax)
+							log("      Breaking would increase depth by %d, which is more than available slack.\n", depth_delta);
+						continue;
+					}
+
+					if (debug_relax)
+					{
+						log("      Breaking increases depth of LUT by %d.\n", depth_delta);
+						if (lut_critical_outputs.at(lut).size())
+						{
+							log("      Breaking decreases slack of outputs");
+							for (auto lut_critical_output : lut_critical_outputs.at(lut))
+							{
+								log(" %s", log_signal(lut_critical_output));
+								log_assert(lut_slacks[lut_critical_output] > 0);
+							}
+							log(".\n");
+						}
+					}
+					r_slk = lut_critical_outputs.at(lut).size() * depth_delta;
+				}
+
+				int p = 100 * (r_alpha * r_ex + r_beta * r_im + r_gamma) / (r_slk + 1);
+				if (debug_relax)
+					log("    Potential for breaking node %s: %d (Rex=%d, Rim=%d, Rslk=%d).\n",
+					    log_signal(lut_gate), p, r_ex, r_im, r_slk);
+				potentials[lut][lut_gate] = p;
+			}
+		}
+	}
+
+	bool relax_depth_for_bound(bool first, int depth_bound, dict<RTLIL::SigBit, pool<RTLIL::SigBit>> &lut_critical_outputs)
+	{
+		int initial_count = GetSize(lut_nodes);
+
+		for (auto node : lut_nodes)
+		{
+			lut_slacks[node] = depth_bound - (lut_depths[node] + lut_altitudes[node]);
+			log_assert(lut_slacks[node] >= 0);
+		}
+		if (debug)
+		{
+			dump_dot_lut_graph(stringf("flowmap-relax-%d-initial.dot", depth_bound), GraphMode::Slack);
+			log("  Dumped initial slack graph to `flowmap-relax-%d-initial.dot`.\n", depth_bound);
+		}
+
+		dict<RTLIL::SigBit, dict<RTLIL::SigBit, int>> potentials;
+		for (int break_num = 1; ; break_num++)
+		{
+			update_breaking_node_potentials(potentials, lut_critical_outputs);
+
+			if (potentials.empty())
+			{
+				log("  Relaxed to %d (+%d) LUTs.\n", GetSize(lut_nodes), GetSize(lut_nodes) - initial_count);
+				if (!first && break_num == 1)
+				{
+					log("  Design fully relaxed.\n");
+					return true;
+				}
+				else
+				{
+					log("  Slack exhausted.\n");
+					break;
+				}
+			}
+
+			RTLIL::SigBit breaking_lut, breaking_gate;
+			int best_potential = INT_MIN;
+			for (auto lut_gate_potentials : potentials)
+			{
+				for (auto gate_potential : lut_gate_potentials.second)
+				{
+					if (gate_potential.second > best_potential)
+					{
+						breaking_lut = lut_gate_potentials.first;
+						breaking_gate = gate_potential.first;
+						best_potential = gate_potential.second;
+					}
+				}
+			}
+			log("  Breaking LUT %s to %s LUT %s (potential %d).\n",
+			    log_signal(breaking_lut), lut_nodes[breaking_gate] ? "reuse" : "extract", log_signal(breaking_gate), best_potential);
+
+			if (debug_relax)
+				log("    Removing breaking gate %s from LUT.\n", log_signal(breaking_gate));
+			lut_gates[breaking_lut].erase(breaking_gate);
+
+			auto cut_inputs = cut_lut_at_gate(breaking_lut, breaking_gate);
+			pool<RTLIL::SigBit> gate_inputs = cut_inputs.first, other_inputs = cut_inputs.second;
+
+			pool<RTLIL::SigBit> worklist = lut_gates[breaking_lut];
+			pool<RTLIL::SigBit> elim_gates = gate_inputs;
+			while (!worklist.empty())
+			{
+				auto lut_gate = worklist.pop();
+				bool all_gate_preds_elim = true;
+				for (auto lut_gate_pred : edges_bw[lut_gate])
+					if (!elim_gates[lut_gate_pred])
+						all_gate_preds_elim = false;
+				if (all_gate_preds_elim)
+				{
+					if (debug_relax)
+						log("    Removing gate %s from LUT.\n", log_signal(lut_gate));
+					lut_gates[breaking_lut].erase(lut_gate);
+					for (auto lut_gate_succ : edges_fw[lut_gate])
+						worklist.insert(lut_gate_succ);
+				}
+			}
+			log_assert(!lut_gates[breaking_lut].empty());
+
+			pool<RTLIL::SigBit> directly_affected_nodes = {breaking_lut};
+			for (auto gate_input : gate_inputs)
+			{
+				if (debug_relax)
+					log("    Removing LUT edge %s -> %s.\n", log_signal(gate_input), log_signal(breaking_lut));
+				remove_lut_edge(gate_input, breaking_lut, &directly_affected_nodes);
+			}
+			if (debug_relax)
+				log("    Adding LUT edge %s -> %s.\n", log_signal(breaking_gate), log_signal(breaking_lut));
+			add_lut_edge(breaking_gate, breaking_lut, &directly_affected_nodes);
+
+			if (debug_relax)
+				log("  Updating slack and potentials.\n");
+
+			pool<RTLIL::SigBit> indirectly_affected_nodes = {};
+			update_lut_depths_altitudes(directly_affected_nodes, &indirectly_affected_nodes);
+			update_lut_critical_outputs(lut_critical_outputs, indirectly_affected_nodes);
+			for (auto node : indirectly_affected_nodes)
+			{
+				lut_slacks[node] = depth_bound - (lut_depths[node] + lut_altitudes[node]);
+				log_assert(lut_slacks[node] >= 0);
+				if (debug_relax)
+					log("    LUT %s now has depth %d and slack %d.\n", log_signal(node), lut_depths[node], lut_slacks[node]);
+			}
+
+			worklist = indirectly_affected_nodes;
+			pool<RTLIL::SigBit> visited;
+			while (!worklist.empty())
+			{
+				auto node = worklist.pop();
+				visited.insert(node);
+				potentials.erase(node);
+				// We are invalidating the entire output cone of the gate IR node, not just of the LUT IR node. This is done to also invalidate
+				// all LUTs that could contain one of the indirectly affected nodes as a *part* of them, as they may not be in the output cone
+				// of any of the LUT IR nodes, e.g. if we have a LUT IR node A and node B as predecessors of node C, where node B includes all
+				// gates from node A.
+				for (auto node_succ : edges_fw[node])
+					if (!visited[node_succ])
+						worklist.insert(node_succ);
+			}
+
+			if (debug)
+			{
+				dump_dot_lut_graph(stringf("flowmap-relax-%d-break-%d.dot", depth_bound, break_num), GraphMode::Slack);
+				log("  Dumped slack graph after break %d to `flowmap-relax-%d-break-%d.dot`.\n",  break_num, depth_bound, break_num);
+			}
+		}
+
+		return false;
+	}
+
+	void optimize_area(int depth, int optarea)
+	{
+		dict<RTLIL::SigBit, pool<RTLIL::SigBit>> lut_critical_outputs;
+		update_lut_depths_altitudes();
+		update_lut_critical_outputs(lut_critical_outputs);
+
+		for (int depth_bound = depth; depth_bound <= depth + optarea; depth_bound++)
+		{
+			log("Relaxing with depth bound %d.\n", depth_bound);
+			bool fully_relaxed = relax_depth_for_bound(depth_bound == depth, depth_bound, lut_critical_outputs);
+
+			if (fully_relaxed)
+				break;
+		}
+	}
+
+	void pack_cells(int minlut)
+	{
+		ConstEval ce(module);
+		for (auto input_node : inputs)
+			ce.stop(input_node);
+
+		pool<RTLIL::SigBit> mapped_nodes;
+		for (auto node : lut_nodes)
+		{
+			if (node_origins.count(node))
+			{
+				auto origin = node_origins[node];
+				if (origin.cell->getPort(origin.port).size() == 1)
+					log("Packing %s.%s.%s (%s).\n",
+					    log_id(module), log_id(origin.cell), origin.port.c_str(), log_signal(node));
+				else
+					log("Packing %s.%s.%s [%d] (%s).\n",
+					    log_id(module), log_id(origin.cell), origin.port.c_str(), origin.offset, log_signal(node));
+			}
+			else
+			{
+				log("Packing %s.%s.\n", log_id(module), log_signal(node));
+			}
+
+			for (auto gate_node : lut_gates[node])
+			{
+				log_assert(node_origins.count(gate_node));
+
+				if (gate_node == node)
+					continue;
+
+				auto gate_origin = node_origins[gate_node];
+				if (gate_origin.cell->getPort(gate_origin.port).size() == 1)
+					log("  Packing %s.%s.%s (%s).\n",
+					    log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), log_signal(gate_node));
+				else
+					log("  Packing %s.%s.%s [%d] (%s).\n",
+					    log_id(module), log_id(gate_origin.cell), gate_origin.port.c_str(), gate_origin.offset, log_signal(gate_node));
+			}
+
+			vector<RTLIL::SigBit> input_nodes(lut_edges_bw[node].begin(), lut_edges_bw[node].end());
+			RTLIL::Const lut_table(State::Sx, max(1 << input_nodes.size(), 1 << minlut));
+			unsigned const mask = 1 << input_nodes.size();
+			for (unsigned i = 0; i < mask; i++)
+			{
+				ce.push();
+				for (size_t n = 0; n < input_nodes.size(); n++)
+					ce.set(input_nodes[n], ((i >> n) & 1) ? State::S1 : State::S0);
+
+				RTLIL::SigSpec value = node, undef;
+				if (!ce.eval(value, undef))
+				{
+					string env;
+					for (auto input_node : input_nodes)
+						env += stringf("  %s = %s\n", log_signal(input_node), log_signal(ce.values_map(input_node)));
+					log_error("Cannot evaluate %s because %s is not defined.\nEvaluation environment:\n%s",
+					          log_signal(node), log_signal(undef), env.c_str());
+				}
+
+				lut_table[i] = value.as_bool() ? State::S1 : State::S0;
+				ce.pop();
+			}
+
+			RTLIL::SigSpec lut_a, lut_y = node;
+			for (auto input_node : input_nodes)
+				lut_a.append_bit(input_node);
+			lut_a.append(RTLIL::Const(State::Sx, minlut - input_nodes.size()));
+
+			RTLIL::Cell *lut = module->addLut(NEW_ID, lut_a, lut_y, lut_table);
+			mapped_nodes.insert(node);
+			for (auto gate_node : lut_gates[node])
+			{
+				auto gate_origin = node_origins[gate_node];
+				lut->add_strpool_attribute("\\src", gate_origin.cell->get_strpool_attribute("\\src"));
+				packed_count++;
+			}
+			lut_count++;
+			lut_area += lut_table.size();
+
+			if ((int)input_nodes.size() >= minlut)
+				log("  Packed into a %d-LUT %s.%s.\n", GetSize(input_nodes), log_id(module), log_id(lut));
+			else
+				log("  Packed into a %d-LUT %s.%s (implemented as %d-LUT).\n", GetSize(input_nodes), log_id(module), log_id(lut), minlut);
+		}
+
+		for (auto node : mapped_nodes)
+		{
+			auto origin = node_origins[node];
+			RTLIL::SigSpec driver = origin.cell->getPort(origin.port);
+			driver[origin.offset] = module->addWire(NEW_ID);
+			origin.cell->setPort(origin.port, driver);
+		}
+	}
+
+	FlowmapWorker(int order, int minlut, pool<IdString> cell_types, int r_alpha, int r_beta, int r_gamma,
+	              bool relax, int optarea, bool debug, bool debug_relax,
+	              RTLIL::Module *module) :
+		order(order), r_alpha(r_alpha), r_beta(r_beta), r_gamma(r_gamma), debug(debug), debug_relax(debug_relax),
+		module(module), sigmap(module), index(module)
+	{
+		log("Labeling cells.\n");
+		discover_nodes(cell_types);
+		label_nodes();
+		int depth = map_luts();
+
+		if (relax)
+		{
+			log("\n");
+			log("Optimizing area.\n");
+			optimize_area(depth, optarea);
+		}
+
+		log("\n");
+		log("Packing cells.\n");
+		pack_cells(minlut);
+	}
+};
+
+static void split(std::vector<std::string> &tokens, const std::string &text, char sep)
+{
+	size_t start = 0, end = 0;
+	while ((end = text.find(sep, start)) != std::string::npos) {
+		tokens.push_back(text.substr(start, end - start));
+		start = end + 1;
+	}
+	tokens.push_back(text.substr(start));
+}
+
+struct FlowmapPass : public Pass {
+	FlowmapPass() : Pass("flowmap", "pack LUTs with FlowMap") { }
+	void help() YS_OVERRIDE
+	{
+		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
+		log("\n");
+		log("    flowmap [options] [selection]\n");
+		log("\n");
+		log("This pass uses the FlowMap technology mapping algorithm to pack logic gates\n");
+		log("into k-LUTs with optimal depth. It allows mapping any circuit elements that can\n");
+		log("be evaluated with the `eval` pass, including cells with multiple output ports\n");
+		log("and multi-bit input and output ports.\n");
+		log("\n");
+		log("    -maxlut k\n");
+		log("        perform technology mapping for a k-LUT architecture. if not specified,\n");
+		log("        defaults to 3.\n");
+		log("\n");
+		log("    -minlut n\n");
+		log("        only produce n-input or larger LUTs. if not specified, defaults to 1.\n");
+		log("\n");
+		log("    -cells <cell>[,<cell>,...]\n");
+		log("        map specified cells. if not specified, maps $_NOT_, $_AND_, $_OR_,\n");
+		log("        $_XOR_ and $_MUX_, which are the outputs of the `simplemap` pass.\n");
+		log("\n");
+		log("    -relax\n");
+		log("        perform depth relaxation and area minimization.\n");
+		log("\n");
+		log("    -r-alpha n, -r-beta n, -r-gamma n\n");
+		log("        parameters of depth relaxation heuristic potential function.\n");
+		log("        if not specified, alpha=8, beta=2, gamma=1.\n");
+		log("\n");
+		log("    -optarea n\n");
+		log("        optimize for area by trading off at most n logic levels for fewer LUTs.\n");
+		log("        n may be zero, to optimize for area without increasing depth.\n");
+		log("        implies -relax.\n");
+		log("\n");
+		log("    -debug\n");
+		log("        dump intermediate graphs.\n");
+		log("\n");
+		log("    -debug-relax\n");
+		log("        explain decisions performed during depth relaxation.\n");
+		log("\n");
+	}
+	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
+	{
+		int order = 3;
+		int minlut = 1;
+		vector<string> cells;
+		bool relax = false;
+		int r_alpha = 8, r_beta = 2, r_gamma = 1;
+		int optarea = 0;
+		bool debug = false, debug_relax = false;
+
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++)
+		{
+			if (args[argidx] == "-maxlut" && argidx + 1 < args.size())
+			{
+				order = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-minlut" && argidx + 1 < args.size())
+			{
+				minlut = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-cells" && argidx + 1 < args.size())
+			{
+				split(cells, args[++argidx], ',');
+				continue;
+			}
+			if (args[argidx] == "-relax")
+			{
+				relax = true;
+				continue;
+			}
+			if (args[argidx] == "-r-alpha" && argidx + 1 < args.size())
+			{
+				r_alpha = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-r-beta" && argidx + 1 < args.size())
+			{
+				r_beta = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-r-gamma" && argidx + 1 < args.size())
+			{
+				r_gamma = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-optarea" && argidx + 1 < args.size())
+			{
+				relax = true;
+				optarea = atoi(args[++argidx].c_str());
+				continue;
+			}
+			if (args[argidx] == "-debug")
+			{
+				debug = true;
+				continue;
+			}
+			if (args[argidx] == "-debug-relax")
+			{
+				debug = debug_relax = true;
+				continue;
+			}
+			break;
+		}
+		extra_args(args, argidx, design);
+
+		pool<IdString> cell_types;
+		if (!cells.empty())
+		{
+			for (auto &cell : cells)
+				cell_types.insert(cell);
+		}
+		else
+		{
+			cell_types = {"$_NOT_", "$_AND_", "$_OR_", "$_XOR_", "$_MUX_"};
+		}
+
+		const char *algo_r = relax ? "-r" : "";
+		log_header(design, "Executing FLOWMAP pass (pack LUTs with FlowMap%s).\n", algo_r);
+
+		int gate_count = 0, lut_count = 0, packed_count = 0;
+		int gate_area = 0, lut_area = 0;
+		for (auto module : design->selected_modules())
+		{
+			FlowmapWorker worker(order, minlut, cell_types, r_alpha, r_beta, r_gamma, relax, optarea, debug, debug_relax, module);
+			gate_count += worker.gate_count;
+			lut_count += worker.lut_count;
+			packed_count += worker.packed_count;
+			gate_area += worker.gate_area;
+			lut_area += worker.lut_area;
+		}
+
+		log("\n");
+		log("Packed %d cells (%d of them duplicated) into %d LUTs.\n", packed_count, packed_count - gate_count, lut_count);
+		log("Solution takes %.1f%% of original gate area.\n", lut_area * 100.0 / gate_area);
+	}
+} FlowmapPass;
+
+PRIVATE_NAMESPACE_END
diff --git a/passes/techmap/libparse.cc b/passes/techmap/libparse.cc
index d3b1ff02..349ccc11 100644
--- a/passes/techmap/libparse.cc
+++ b/passes/techmap/libparse.cc
@@ -24,6 +24,7 @@
 #include <istream>
 #include <fstream>
 #include <iostream>
+#include <sstream>
 
 #ifndef FILTERLIB
 #include "kernel/log.h"
@@ -86,12 +87,14 @@ int LibertyParser::lexer(std::string &str)
 {
 	int c;
 
+	// eat whitespace
 	do {
 		c = f.get();
 	} while (c == ' ' || c == '\t' || c == '\r');
 
+	// search for identifiers, numbers, plus or minus.
 	if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_' || c == '-' || c == '+' || c == '.') {
-		str = c;
+		str = static_cast<char>(c);
 		while (1) {
 			c = f.get();
 			if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || ('0' <= c && c <= '9') || c == '_' || c == '-' || c == '+' || c == '.')
@@ -111,6 +114,8 @@ int LibertyParser::lexer(std::string &str)
 		}
 	}
 
+	// if it wasn't an identifer, number of array range,
+	// maybe it's a string?
 	if (c == '"') {
 		str = "";
 		while (1) {
@@ -125,9 +130,10 @@ int LibertyParser::lexer(std::string &str)
 		return 'v';
 	}
 
+	// if it wasn't a string, perhaps it's a comment or a forward slash?
 	if (c == '/') {
 		c = f.get();
-		if (c == '*') {
+		if (c == '*') {         // start of '/*' block comment
 			int last_c = 0;
 			while (c > 0 && (last_c != '*' || c != '/')) {
 				last_c = c;
@@ -136,7 +142,7 @@ int LibertyParser::lexer(std::string &str)
 					line++;
 			}
 			return lexer(str);
-		} else if (c == '/') {
+		} else if (c == '/') {  // start of '//' line comment
 			while (c > 0 && c != '\n')
 				c = f.get();
 			line++;
@@ -144,24 +150,31 @@ int LibertyParser::lexer(std::string &str)
 		}
 		f.unget();
 		// fprintf(stderr, "LEX: char >>/<<\n");
-		return '/';
+		return '/';             // a single '/' charater.
 	}
 
+	// check for a backslash
 	if (c == '\\') {
-		c = f.get();
+		c = f.get();		
 		if (c == '\r')
 			c = f.get();
-		if (c == '\n')
+		if (c == '\n') {
+			line++;
 			return lexer(str);
+		}
 		f.unget();
 		return '\\';
 	}
 
+	// check for a new line
 	if (c == '\n') {
 		line++;
-		return ';';
+		return 'n';
 	}
 
+	// anything else, such as ';' will get passed
+	// through as literal items.
+
 	// if (c >= 32 && c < 255)
 	// 	fprintf(stderr, "LEX: char >>%c<<\n", c);
 	// else
@@ -175,14 +188,39 @@ LibertyAst *LibertyParser::parse()
 
 	int tok = lexer(str);
 
-	while (tok == ';')
+	// there are liberty files in the wild that
+	// have superfluous ';' at the end of
+	// a  { ... }. We simply ignore a ';' here.
+	// and get to the next statement.
+
+	while ((tok == 'n') || (tok == ';'))
 		tok = lexer(str);
 
 	if (tok == '}' || tok < 0)
 		return NULL;
 
-	if (tok != 'v')
-		error();
+	if (tok != 'v') {
+		std::string eReport;
+		switch(tok)
+		{
+		case 'n':
+			error("Unexpected newline.");
+			break;
+		case '[':
+		case ']':
+		case '}':
+		case '{':
+		case '\"':
+		case ':':
+			eReport = "Unexpected '";
+			eReport += static_cast<char>(tok);
+			eReport += "'.";
+			error(eReport);
+			break;
+		default:
+			error();
+		}
+	}
 
 	LibertyAst *ast = new LibertyAst;
 	ast->id = str;
@@ -191,7 +229,9 @@ LibertyAst *LibertyParser::parse()
 	{
 		tok = lexer(str);
 
-		if (tok == ';')
+		// allow both ';' and new lines to 
+		// terminate a statement.
+		if ((tok == ';') || (tok == 'n'))
 			break;
 
 		if (tok == ':' && ast->value.empty()) {
@@ -207,7 +247,12 @@ LibertyAst *LibertyParser::parse()
 				ast->value += str;
 				tok = lexer(str);
 			}
-			if (tok == ';')
+			
+			// In a liberty file, all key : value pairs should end in ';'
+			// However, there are some liberty files in the wild that
+			// just have a newline. We'll be kind and accept a newline
+			// instead of the ';' too..
+			if ((tok == ';') || (tok == 'n'))
 				break;
 			else
 				error();
@@ -222,8 +267,70 @@ LibertyAst *LibertyParser::parse()
 					continue;
 				if (tok == ')')
 					break;
-				if (tok != 'v')
-					error();
+				
+				// FIXME: the AST needs to be extended to store
+				//        these vector ranges.
+				if (tok == '[')
+				{
+					// parse vector range [A] or [A:B]
+					std::string arg;
+					tok = lexer(arg);
+					if (tok != 'v')
+					{
+						// expected a vector array index
+						error("Expected a number.");
+					}
+					else
+					{
+						// fixme: check for number A
+					}
+					tok = lexer(arg);
+					// optionally check for : in case of [A:B]
+					// if it isn't we just expect ']'
+					// as we have [A]
+					if (tok == ':')
+					{
+						tok = lexer(arg);
+						if (tok != 'v')
+						{
+							// expected a vector array index
+							error("Expected a number.");
+						}
+						else
+						{
+							// fixme: check for number B
+							tok = lexer(arg);                            
+						}
+					}
+					// expect a closing bracket of array range
+					if (tok != ']')
+					{
+						error("Expected ']' on array range.");
+					}
+					continue;           
+				}
+				if (tok != 'v') {
+					std::string eReport;
+					switch(tok)
+					{
+					case 'n':
+						error("Unexpected newline.");
+						break;
+					case '[':
+					case ']':
+					case '}':
+					case '{':
+					case '\"':
+					case ':':
+						eReport = "Unexpected '";
+						eReport += static_cast<char>(tok);
+						eReport += "'.";
+						error(eReport);
+						break;
+					default:
+						error();
+					}
+				}
 				ast->args.push_back(arg);
 			}
 			continue;
@@ -249,14 +356,31 @@ LibertyAst *LibertyParser::parse()
 
 void LibertyParser::error()
 {
-	log_error("Syntax error in line %d.\n", line);
+	log_error("Syntax error in liberty file on line %d.\n", line);
+}
+
+void LibertyParser::error(const std::string &str)
+{
+	std::stringstream ss;
+	ss << "Syntax error in liberty file on line " << line << ".\n";
+	ss << "  " << str << "\n";
+	log_error("%s", ss.str().c_str());
 }
 
 #else
 
 void LibertyParser::error()
 {
-	fprintf(stderr, "Syntax error in line %d.\n", line);
+	fprintf(stderr, "Syntax error in liberty file on line %d.\n", line);
+	exit(1);
+}
+
+void LibertyParser::error(const std::string &str)
+{
+	std::stringstream ss;
+	ss << "Syntax error in liberty file on line " << line << ".\n";
+	ss << "  " << str << "\n";
+	printf("%s", ss.str().c_str());
 	exit(1);
 }
 
@@ -264,21 +388,21 @@ void LibertyParser::error()
 
 #define CHECK_NV(result, check)                                      \
    do {                                                              \
-     auto _R = (result);                                             \
-     if (!(_R check)) {                                              \
-       fprintf(stderr, "Error from '%s' (%ld %s) in %s:%d.\n",       \
-               #result, (long int)_R, #check, __FILE__, __LINE__);   \
-       abort();                                                      \
-     }                                                               \
+	 auto _R = (result);                                             \
+	 if (!(_R check)) {                                              \
+	   fprintf(stderr, "Error from '%s' (%ld %s) in %s:%d.\n",       \
+			   #result, (long int)_R, #check, __FILE__, __LINE__);   \
+	   abort();                                                      \
+	 }                                                               \
    } while(0)
 
 #define CHECK_COND(result)                                           \
    do {                                                              \
-     if (!(result)) {                                                \
-       fprintf(stderr, "Error from '%s' in %s:%d.\n",                \
-               #result, __FILE__, __LINE__);                         \
-       abort();                                                      \
-     }                                                               \
+	 if (!(result)) {                                                \
+	   fprintf(stderr, "Error from '%s' in %s:%d.\n",                \
+			   #result, __FILE__, __LINE__);                         \
+	   abort();                                                      \
+	 }                                                               \
    } while(0)
 
 /**** END: http://svn.clifford.at/tools/trunk/examples/check.h ****/
diff --git a/passes/techmap/libparse.h b/passes/techmap/libparse.h
index cf632557..c9ebd06c 100644
--- a/passes/techmap/libparse.h
+++ b/passes/techmap/libparse.h
@@ -46,9 +46,17 @@ namespace Yosys
 		LibertyAst *ast;
 		LibertyParser(std::istream &f) : f(f), line(1), ast(parse()) {}
 		~LibertyParser() { if (ast) delete ast; }
+        
+        /* lexer return values:
+           'v': identifier, string, array range [...] -> str holds the token string
+           'n': newline
+           anything else is a single character.
+        */
 		int lexer(std::string &str);
-		LibertyAst *parse();
+		
+        LibertyAst *parse();
 		void error();
+        void error(const std::string &str);
 	};
 }
 
diff --git a/passes/techmap/lut2mux.cc b/passes/techmap/lut2mux.cc
index d32bbff1..a4ed7955 100644
--- a/passes/techmap/lut2mux.cc
+++ b/passes/techmap/lut2mux.cc
@@ -32,7 +32,7 @@ int lut2mux(Cell *cell)
 
 	if (GetSize(sig_a) == 1)
 	{
-		cell->module->addMuxGate(NEW_ID, lut[0], lut[1], sig_a, sig_y);
+		cell->module->addMuxGate(NEW_ID, lut.extract(0)[0], lut.extract(1)[0], sig_a, sig_y);
 	}
 	else
 	{
diff --git a/passes/techmap/muxcover.cc b/passes/techmap/muxcover.cc
index 12da9ed0..c84cfc39 100644
--- a/passes/techmap/muxcover.cc
+++ b/passes/techmap/muxcover.cc
@@ -23,6 +23,7 @@
 USING_YOSYS_NAMESPACE
 PRIVATE_NAMESPACE_BEGIN
 
+#define COST_DMUX   90
 #define COST_MUX2  100
 #define COST_MUX4  220
 #define COST_MUX8  460
@@ -57,6 +58,13 @@ struct MuxcoverWorker
 	bool use_mux8;
 	bool use_mux16;
 	bool nodecode;
+	bool nopartial;
+
+	int cost_dmux;
+	int cost_mux2;
+	int cost_mux4;
+	int cost_mux8;
+	int cost_mux16;
 
 	MuxcoverWorker(Module *module) : module(module), sigmap(module)
 	{
@@ -64,9 +72,32 @@ struct MuxcoverWorker
 		use_mux8 = false;
 		use_mux16 = false;
 		nodecode = false;
+		nopartial = false;
+		cost_dmux = COST_DMUX;
+		cost_mux2 = COST_MUX2;
+		cost_mux4 = COST_MUX4;
+		cost_mux8 = COST_MUX8;
+		cost_mux16 = COST_MUX16;
 		decode_mux_counter = 0;
 	}
 
+	bool xcmp(std::initializer_list<SigBit> list)
+	{
+		auto cursor = list.begin(), end = list.end();
+		log_assert(cursor != end);
+		SigBit tmp = *(cursor++);
+		while (cursor != end) {
+			SigBit bit = *(cursor++);
+			if (bit == State::Sx)
+				continue;
+			if (tmp == State::Sx)
+				tmp = bit;
+			if (bit != tmp)
+				return false;
+		}
+		return true;
+	}
+
 	void treeify()
 	{
 		pool<SigBit> roots;
@@ -124,13 +155,22 @@ struct MuxcoverWorker
 		log("    Finished treeification: Found %d trees.\n", GetSize(tree_list));
 	}
 
-	bool follow_muxtree(SigBit &ret_bit, tree_t &tree, SigBit bit, const char *path)
+	bool follow_muxtree(SigBit &ret_bit, tree_t &tree, SigBit bit, const char *path, bool first_layer = true)
 	{
 		if (*path) {
-			if (tree.muxes.count(bit) == 0)
-				return false;
+			if (tree.muxes.count(bit) == 0) {
+				if (first_layer || nopartial)
+					return false;
+				while (path[0] && path[1])
+					path++;
+				if (path[0] == 'S')
+					ret_bit = State::Sx;
+				else
+					ret_bit = bit;
+				return true;
+			}
 			char port_name[3] = {'\\', *path, 0};
-			return follow_muxtree(ret_bit, tree, sigmap(tree.muxes.at(bit)->getPort(port_name)), path+1);
+			return follow_muxtree(ret_bit, tree, sigmap(tree.muxes.at(bit)->getPort(port_name)), path+1, false);
 		} else {
 			ret_bit = bit;
 			return true;
@@ -139,7 +179,7 @@ struct MuxcoverWorker
 
 	int prepare_decode_mux(SigBit &A, SigBit B, SigBit sel, SigBit bit)
 	{
-		if (A == B)
+		if (A == B || sel == State::Sx)
 			return 0;
 
 		tuple<SigBit, SigBit, SigBit> key(A, B, sel);
@@ -157,7 +197,10 @@ struct MuxcoverWorker
 		if (std::get<2>(entry))
 			return 0;
 
-		return COST_MUX2 / GetSize(std::get<1>(entry));
+		if (A == State::Sx || B == State::Sx)
+			return 0;
+
+		return cost_dmux / GetSize(std::get<1>(entry));
 	}
 
 	void implement_decode_mux(SigBit ctrl_bit)
@@ -174,9 +217,32 @@ struct MuxcoverWorker
 		implement_decode_mux(std::get<0>(key));
 		implement_decode_mux(std::get<1>(key));
 
-		module->addMuxGate(NEW_ID, std::get<0>(key), std::get<1>(key), std::get<2>(key), ctrl_bit);
+		if (std::get<0>(key) == State::Sx) {
+			module->addBufGate(NEW_ID, std::get<1>(key), ctrl_bit);
+		} else if (std::get<1>(key) == State::Sx) {
+			module->addBufGate(NEW_ID, std::get<0>(key), ctrl_bit);
+		} else {
+			module->addMuxGate(NEW_ID, std::get<0>(key), std::get<1>(key), std::get<2>(key), ctrl_bit);
+			decode_mux_counter++;
+		}
 		std::get<2>(entry) = true;
-		decode_mux_counter++;
+	}
+
+	void find_best_covers(tree_t &tree, const vector<SigBit> &bits)
+	{
+		for (auto bit : bits)
+			find_best_cover(tree, bit);
+	}
+
+	int sum_best_covers(tree_t &tree, const vector<SigBit> &bits)
+	{
+		int sum = 0;
+		for (auto bit : pool<SigBit>(bits.begin(), bits.end())) {
+			int cost = tree.newmuxes.at(bit).cost;
+			log_debug("        Best cost for %s: %d\n", log_signal(bit), cost);
+			sum += cost;
+		}
+		return sum;
 	}
 
 	int find_best_cover(tree_t &tree, SigBit bit)
@@ -209,9 +275,13 @@ struct MuxcoverWorker
 			mux.inputs.push_back(B);
 			mux.selects.push_back(S1);
 
-			mux.cost += COST_MUX2;
-			mux.cost += find_best_cover(tree, A);
-			mux.cost += find_best_cover(tree, B);
+			find_best_covers(tree, mux.inputs);
+			log_debug("        Decode cost for mux2 at %s: %d\n", log_signal(bit), mux.cost);
+
+			mux.cost += cost_mux2;
+			mux.cost += sum_best_covers(tree, mux.inputs);
+
+			log_debug("      Cost of mux2 at %s: %d\n", log_signal(bit), mux.cost);
 
 			best_mux = mux;
 		}
@@ -229,7 +299,7 @@ struct MuxcoverWorker
 			ok = ok && follow_muxtree(S2, tree, bit, "BS");
 
 			if (nodecode)
-				ok = ok && S1 == S2;
+				ok = ok && xcmp({S1, S2});
 
 			ok = ok && follow_muxtree(T1, tree, bit, "S");
 
@@ -247,13 +317,15 @@ struct MuxcoverWorker
 				mux.selects.push_back(S1);
 				mux.selects.push_back(T1);
 
-				mux.cost += COST_MUX4;
-				mux.cost += find_best_cover(tree, A);
-				mux.cost += find_best_cover(tree, B);
-				mux.cost += find_best_cover(tree, C);
-				mux.cost += find_best_cover(tree, D);
+				find_best_covers(tree, mux.inputs);
+				log_debug("        Decode cost for mux4 at %s: %d\n", log_signal(bit), mux.cost);
+
+				mux.cost += cost_mux4;
+				mux.cost += sum_best_covers(tree, mux.inputs);
+
+				log_debug("      Cost of mux4 at %s: %d\n", log_signal(bit), mux.cost);
 
-				if (best_mux.cost > mux.cost)
+				if (best_mux.cost >= mux.cost)
 					best_mux = mux;
 			}
 		}
@@ -277,13 +349,13 @@ struct MuxcoverWorker
 			ok = ok && follow_muxtree(S4, tree, bit, "BBS");
 
 			if (nodecode)
-				ok = ok && S1 == S2 && S2 == S3 && S3 == S4;
+				ok = ok && xcmp({S1, S2, S3, S4});
 
 			ok = ok && follow_muxtree(T1, tree, bit, "AS");
 			ok = ok && follow_muxtree(T2, tree, bit, "BS");
 
 			if (nodecode)
-				ok = ok && T1 == T2;
+				ok = ok && xcmp({T1, T2});
 
 			ok = ok && follow_muxtree(U1, tree, bit, "S");
 
@@ -310,17 +382,15 @@ struct MuxcoverWorker
 				mux.selects.push_back(T1);
 				mux.selects.push_back(U1);
 
-				mux.cost += COST_MUX8;
-				mux.cost += find_best_cover(tree, A);
-				mux.cost += find_best_cover(tree, B);
-				mux.cost += find_best_cover(tree, C);
-				mux.cost += find_best_cover(tree, D);
-				mux.cost += find_best_cover(tree, E);
-				mux.cost += find_best_cover(tree, F);
-				mux.cost += find_best_cover(tree, G);
-				mux.cost += find_best_cover(tree, H);
-
-				if (best_mux.cost > mux.cost)
+				find_best_covers(tree, mux.inputs);
+				log_debug("        Decode cost for mux8 at %s: %d\n", log_signal(bit), mux.cost);
+
+				mux.cost += cost_mux8;
+				mux.cost += sum_best_covers(tree, mux.inputs);
+
+				log_debug("      Cost of mux8 at %s: %d\n", log_signal(bit), mux.cost);
+
+				if (best_mux.cost >= mux.cost)
 					best_mux = mux;
 			}
 		}
@@ -356,7 +426,7 @@ struct MuxcoverWorker
 			ok = ok && follow_muxtree(S8, tree, bit, "BBBS");
 
 			if (nodecode)
-				ok = ok && S1 == S2 && S2 == S3 && S3 == S4 && S4 == S5 && S5 == S6 && S6 == S7 && S7 == S8;
+				ok = ok && xcmp({S1, S2, S3, S4, S5, S6, S7, S8});
 
 			ok = ok && follow_muxtree(T1, tree, bit, "AAS");
 			ok = ok && follow_muxtree(T2, tree, bit, "ABS");
@@ -364,13 +434,13 @@ struct MuxcoverWorker
 			ok = ok && follow_muxtree(T4, tree, bit, "BBS");
 
 			if (nodecode)
-				ok = ok && T1 == T2 && T2 == T3 && T3 == T4;
+				ok = ok && xcmp({T1, T2, T3, T4});
 
 			ok = ok && follow_muxtree(U1, tree, bit, "AS");
 			ok = ok && follow_muxtree(U2, tree, bit, "BS");
 
 			if (nodecode)
-				ok = ok && U1 == U2;
+				ok = ok && xcmp({U1, U2});
 
 			ok = ok && follow_muxtree(V1, tree, bit, "S");
 
@@ -414,25 +484,15 @@ struct MuxcoverWorker
 				mux.selects.push_back(U1);
 				mux.selects.push_back(V1);
 
-				mux.cost += COST_MUX16;
-				mux.cost += find_best_cover(tree, A);
-				mux.cost += find_best_cover(tree, B);
-				mux.cost += find_best_cover(tree, C);
-				mux.cost += find_best_cover(tree, D);
-				mux.cost += find_best_cover(tree, E);
-				mux.cost += find_best_cover(tree, F);
-				mux.cost += find_best_cover(tree, G);
-				mux.cost += find_best_cover(tree, H);
-				mux.cost += find_best_cover(tree, I);
-				mux.cost += find_best_cover(tree, J);
-				mux.cost += find_best_cover(tree, K);
-				mux.cost += find_best_cover(tree, L);
-				mux.cost += find_best_cover(tree, M);
-				mux.cost += find_best_cover(tree, N);
-				mux.cost += find_best_cover(tree, O);
-				mux.cost += find_best_cover(tree, P);
-
-				if (best_mux.cost > mux.cost)
+				find_best_covers(tree, mux.inputs);
+				log_debug("        Decode cost for mux16 at %s: %d\n", log_signal(bit), mux.cost);
+
+				mux.cost += cost_mux16;
+				mux.cost += sum_best_covers(tree, mux.inputs);
+
+				log_debug("      Cost of mux16 at %s: %d\n", log_signal(bit), mux.cost);
+
+				if (best_mux.cost >= mux.cost)
 					best_mux = mux;
 			}
 		}
@@ -528,6 +588,7 @@ struct MuxcoverWorker
 	void treecover(tree_t &tree)
 	{
 		int count_muxes_by_type[4] = {0, 0, 0, 0};
+		log_debug("    Searching for best cover for tree at %s.\n", log_signal(tree.root));
 		find_best_cover(tree, tree.root);
 		implement_best_cover(tree, tree.root, count_muxes_by_type);
 		log("    Replaced tree at %s: %d MUX2, %d MUX4, %d MUX8, %d MUX16\n", log_signal(tree.root),
@@ -544,12 +605,13 @@ struct MuxcoverWorker
 
 		log("  Covering trees:\n");
 
-		// pre-fill cache of decoder muxes
-		if (!nodecode)
+		if (!nodecode) {
+			log_debug("    Populating cache of decoder muxes.\n");
 			for (auto &tree : tree_list) {
 				find_best_cover(tree, tree.root);
 				tree.newmuxes.clear();
 			}
+		}
 
 		for (auto &tree : tree_list)
 			treecover(tree);
@@ -569,15 +631,25 @@ struct MuxcoverPass : public Pass {
 		log("\n");
 		log("Cover trees of $_MUX_ cells with $_MUX{4,8,16}_ cells\n");
 		log("\n");
-		log("    -mux4, -mux8, -mux16\n");
-		log("        Use the specified types of MUXes. If none of those options are used,\n");
-		log("        the effect is the same as if all of them where used.\n");
+		log("    -mux4[=cost], -mux8[=cost], -mux16[=cost]\n");
+		log("        Use the specified types of MUXes (with optional integer costs). If none\n");
+		log("        of these options are given, the effect is the same as if all of them are.\n");
+		log("        Default costs: $_MUX_ = %d, $_MUX4_ = %d,\n", COST_MUX2, COST_MUX4);
+		log("                       $_MUX8_ = %d, $_MUX16_ = %d\n", COST_MUX8, COST_MUX16);
+		log("\n");
+		log("    -dmux=cost\n");
+		log("        Use the specified cost for $_MUX_ cells used in decoders.\n");
+		log("        Default cost: %d\n", COST_DMUX);
 		log("\n");
 		log("    -nodecode\n");
 		log("        Do not insert decoder logic. This reduces the number of possible\n");
 		log("        substitutions, but guarantees that the resulting circuit is not\n");
 		log("        less efficient than the original circuit.\n");
 		log("\n");
+		log("    -nopartial\n");
+		log("        Do not consider mappings that use $_MUX<N>_ to select from less\n");
+		log("        than <N> different signals.\n");
+		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
 	{
@@ -587,26 +659,52 @@ struct MuxcoverPass : public Pass {
 		bool use_mux8 = false;
 		bool use_mux16 = false;
 		bool nodecode = false;
+		bool nopartial = false;
+		int cost_dmux = COST_DMUX;
+		int cost_mux4 = COST_MUX4;
+		int cost_mux8 = COST_MUX8;
+		int cost_mux16 = COST_MUX16;
 
 		size_t argidx;
 		for (argidx = 1; argidx < args.size(); argidx++)
 		{
-			if (args[argidx] == "-mux4") {
+			const auto &arg = args[argidx];
+			if (arg.size() >= 5 && arg.substr(0,5) == "-mux4") {
 				use_mux4 = true;
+				if (arg.size() > 5) {
+					if (arg[5] != '=') break;
+					cost_mux4 = atoi(arg.substr(6).c_str());
+				}
 				continue;
 			}
-			if (args[argidx] == "-mux8") {
+			if (arg.size() >= 5 && arg.substr(0,5) == "-mux8") {
 				use_mux8 = true;
+				if (arg.size() > 5) {
+					if (arg[5] != '=') break;
+					cost_mux8 = atoi(arg.substr(6).c_str());
+				}
 				continue;
 			}
-			if (args[argidx] == "-mux16") {
+			if (arg.size() >= 6 && arg.substr(0,6) == "-mux16") {
 				use_mux16 = true;
+				if (arg.size() > 6) {
+					if (arg[6] != '=') break;
+					cost_mux16 = atoi(arg.substr(7).c_str());
+				}
+				continue;
+			}
+			if (arg.size() >= 6 && arg.substr(0,6) == "-dmux=") {
+				cost_dmux = atoi(arg.substr(6).c_str());
 				continue;
 			}
-			if (args[argidx] == "-nodecode") {
+			if (arg == "-nodecode") {
 				nodecode = true;
 				continue;
 			}
+			if (arg == "-nopartial") {
+				nopartial = true;
+				continue;
+			}
 			break;
 		}
 		extra_args(args, argidx, design);
@@ -623,7 +721,12 @@ struct MuxcoverPass : public Pass {
 			worker.use_mux4 = use_mux4;
 			worker.use_mux8 = use_mux8;
 			worker.use_mux16 = use_mux16;
+			worker.cost_dmux = cost_dmux;
+			worker.cost_mux4 = cost_mux4;
+			worker.cost_mux8 = cost_mux8;
+			worker.cost_mux16 = cost_mux16;
 			worker.nodecode = nodecode;
+			worker.nopartial = nopartial;
 			worker.run();
 		}
 	}
diff --git a/passes/techmap/pmuxtree.cc b/passes/techmap/pmuxtree.cc
index b7a22dc3..6a923f48 100644
--- a/passes/techmap/pmuxtree.cc
+++ b/passes/techmap/pmuxtree.cc
@@ -71,9 +71,9 @@ struct PmuxtreePass : public Pass {
 	{
 		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
 		log("\n");
-		log("    pmuxtree [options] [selection]\n");
+		log("    pmuxtree [selection]\n");
 		log("\n");
-		log("This pass transforms $pmux cells to a trees of $mux cells.\n");
+		log("This pass transforms $pmux cells to trees of $mux cells.\n");
 		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
diff --git a/passes/techmap/shregmap.cc b/passes/techmap/shregmap.cc
index f20863ba..004ab1eb 100644
--- a/passes/techmap/shregmap.cc
+++ b/passes/techmap/shregmap.cc
@@ -26,7 +26,9 @@ PRIVATE_NAMESPACE_BEGIN
 struct ShregmapTech
 {
 	virtual ~ShregmapTech() { }
-	virtual bool analyze(vector<int> &taps) = 0;
+	virtual void init(const Module * /*module*/, const SigMap &/*sigmap*/) {}
+	virtual void non_chain_user(const SigBit &/*bit*/, const Cell* /*cell*/, IdString /*port*/) {}
+	virtual bool analyze(vector<int> &taps, const vector<SigBit> &qbits) = 0;
 	virtual bool fixup(Cell *cell, dict<int, SigBit> &taps) = 0;
 };
 
@@ -54,7 +56,7 @@ struct ShregmapOptions
 
 struct ShregmapTechGreenpak4 : ShregmapTech
 {
-	bool analyze(vector<int> &taps)
+	bool analyze(vector<int> &taps, const vector<SigBit> &/*qbits*/)
 	{
 		if (GetSize(taps) > 2 && taps[0] == 0 && taps[2] < 17) {
 			taps.clear();
@@ -91,6 +93,155 @@ struct ShregmapTechGreenpak4 : ShregmapTech
 	}
 };
 
+struct ShregmapTechXilinx7 : ShregmapTech
+{
+	dict<SigBit, std::tuple<Cell*,int,int>> sigbit_to_shiftx_offset;
+	const ShregmapOptions &opts;
+
+	ShregmapTechXilinx7(const ShregmapOptions &opts) : opts(opts) {}
+
+	virtual void init(const Module* module, const SigMap &sigmap) override
+	{
+		for (const auto &i : module->cells_) {
+			auto cell = i.second;
+			if (cell->type == "$shiftx") {
+				if (cell->getParam("\\Y_WIDTH") != 1) continue;
+				int j = 0;
+				for (auto bit : sigmap(cell->getPort("\\A")))
+					sigbit_to_shiftx_offset[bit] = std::make_tuple(cell, j++, 0);
+				log_assert(j == cell->getParam("\\A_WIDTH").as_int());
+			}
+			else if (cell->type == "$mux") {
+				int j = 0;
+				for (auto bit : sigmap(cell->getPort("\\A")))
+					sigbit_to_shiftx_offset[bit] = std::make_tuple(cell, 0, j++);
+				j = 0;
+				for (auto bit : sigmap(cell->getPort("\\B")))
+					sigbit_to_shiftx_offset[bit] = std::make_tuple(cell, 1, j++);
+			}
+		}
+	}
+
+	virtual void non_chain_user(const SigBit &bit, const Cell *cell, IdString port) override
+	{
+		auto it = sigbit_to_shiftx_offset.find(bit);
+		if (it == sigbit_to_shiftx_offset.end())
+			return;
+		if (cell) {
+			if (cell->type == "$shiftx" && port == "\\A")
+				return;
+			if (cell->type == "$mux" && (port == "\\A" || port == "\\B"))
+				return;
+		}
+		sigbit_to_shiftx_offset.erase(it);
+	}
+
+	virtual bool analyze(vector<int> &taps, const vector<SigBit> &qbits) override
+	{
+		if (GetSize(taps) == 1)
+			return taps[0] >= opts.minlen-1 && sigbit_to_shiftx_offset.count(qbits[0]);
+
+		if (taps.back() < opts.minlen-1)
+			return false;
+
+		Cell *shiftx = nullptr;
+		int group = 0;
+		for (int i = 0; i < GetSize(taps); ++i) {
+			auto it = sigbit_to_shiftx_offset.find(qbits[i]);
+			if (it == sigbit_to_shiftx_offset.end())
+				return false;
+
+			// Check taps are sequential
+			if (i != taps[i])
+				return false;
+			// Check taps are not connected to a shift register,
+			// or sequential to the same shift register
+			if (i == 0) {
+				int offset;
+				std::tie(shiftx,offset,group) = it->second;
+				if (offset != i)
+					return false;
+			}
+			else {
+				Cell *shiftx_ = std::get<0>(it->second);
+				if (shiftx_ != shiftx)
+					return false;
+				int offset = std::get<1>(it->second);
+				if (offset != i)
+					return false;
+				int group_ = std::get<2>(it->second);
+				if (group_ != group)
+					return false;
+			}
+		}
+		log_assert(shiftx);
+
+		// Only map if $shiftx exclusively covers the shift register
+		if (shiftx->type == "$shiftx") {
+			if (GetSize(taps) > shiftx->getParam("\\A_WIDTH").as_int())
+				return false;
+			// Due to padding the most significant bits of A may be 1'bx,
+			//   and if so, discount them
+			if (GetSize(taps) < shiftx->getParam("\\A_WIDTH").as_int()) {
+				const SigSpec A = shiftx->getPort("\\A");
+				const int A_width = shiftx->getParam("\\A_WIDTH").as_int();
+				for (int i = GetSize(taps); i < A_width; ++i)
+					if (A[i] != RTLIL::Sx) return false;
+			}
+			else if (GetSize(taps) != shiftx->getParam("\\A_WIDTH").as_int())
+				return false;
+		}
+		else if (shiftx->type == "$mux") {
+			if (GetSize(taps) != 2)
+				return false;
+		}
+		else log_abort();
+
+		return true;
+	}
+
+	virtual bool fixup(Cell *cell, dict<int, SigBit> &taps) override
+	{
+		const auto &tap = *taps.begin();
+		auto bit = tap.second;
+
+		auto it = sigbit_to_shiftx_offset.find(bit);
+		log_assert(it != sigbit_to_shiftx_offset.end());
+
+		auto newcell = cell->module->addCell(NEW_ID, "$__XILINX_SHREG_");
+		newcell->set_src_attribute(cell->get_src_attribute());
+		newcell->setParam("\\DEPTH", cell->getParam("\\DEPTH"));
+		newcell->setParam("\\INIT", cell->getParam("\\INIT"));
+		newcell->setParam("\\CLKPOL", cell->getParam("\\CLKPOL"));
+		newcell->setParam("\\ENPOL", cell->getParam("\\ENPOL"));
+
+		newcell->setPort("\\C", cell->getPort("\\C"));
+		newcell->setPort("\\D", cell->getPort("\\D"));
+		if (cell->hasPort("\\E"))
+			newcell->setPort("\\E", cell->getPort("\\E"));
+
+		Cell* shiftx = std::get<0>(it->second);
+		RTLIL::SigSpec l_wire, q_wire;
+		if (shiftx->type == "$shiftx") {
+			l_wire = shiftx->getPort("\\B");
+			q_wire = shiftx->getPort("\\Y");
+			shiftx->setPort("\\Y", cell->module->addWire(NEW_ID));
+		}
+		else if (shiftx->type == "$mux") {
+			l_wire = shiftx->getPort("\\S");
+			q_wire = shiftx->getPort("\\Y");
+			shiftx->setPort("\\Y", cell->module->addWire(NEW_ID));
+		}
+		else log_abort();
+
+		newcell->setPort("\\Q", q_wire);
+		newcell->setPort("\\L", l_wire);
+
+		return false;
+	}
+};
+
+
 struct ShregmapWorker
 {
 	Module *module;
@@ -113,8 +264,10 @@ struct ShregmapWorker
 		for (auto wire : module->wires())
 		{
 			if (wire->port_output || wire->get_bool_attribute("\\keep")) {
-				for (auto bit : sigmap(wire))
+				for (auto bit : sigmap(wire)) {
 					sigbit_with_non_chain_users.insert(bit);
+					if (opts.tech) opts.tech->non_chain_user(bit, nullptr, {});
+				}
 			}
 
 			if (wire->attributes.count("\\init")) {
@@ -140,10 +293,22 @@ struct ShregmapWorker
 
 				if (opts.init || sigbit_init.count(q_bit) == 0)
 				{
-					if (sigbit_chain_next.count(d_bit)) {
+					auto r = sigbit_chain_next.insert(std::make_pair(d_bit, cell));
+					if (!r.second) {
+						// Insertion not successful means that d_bit is already
+						// connected to another register, thus mark it as a
+						// non chain user ...
 						sigbit_with_non_chain_users.insert(d_bit);
-					} else
-						sigbit_chain_next[d_bit] = cell;
+						// ... and clone d_bit into another wire, and use that
+						// wire as a different key in the d_bit-to-cell dictionary
+						// so that it can be identified as another chain
+						// (omitting this common flop)
+						// Link: https://github.com/YosysHQ/yosys/pull/1085
+						Wire *wire = module->addWire(NEW_ID);
+						module->connect(wire, d_bit);
+						sigmap.add(wire, d_bit);
+						sigbit_chain_next.insert(std::make_pair(wire, cell));
+					}
 
 					sigbit_chain_prev[q_bit] = cell;
 					continue;
@@ -152,8 +317,10 @@ struct ShregmapWorker
 
 			for (auto conn : cell->connections())
 				if (cell->input(conn.first))
-					for (auto bit : sigmap(conn.second))
+					for (auto bit : sigmap(conn.second)) {
 						sigbit_with_non_chain_users.insert(bit);
+						if (opts.tech) opts.tech->non_chain_user(bit, cell, conn.first);
+					}
 		}
 	}
 
@@ -258,7 +425,7 @@ struct ShregmapWorker
 					if (taps.empty() || taps.back() < depth-1)
 						taps.push_back(depth-1);
 
-					if (opts.tech->analyze(taps))
+					if (opts.tech->analyze(taps, qbits))
 						break;
 
 					taps.pop_back();
@@ -377,6 +544,9 @@ struct ShregmapWorker
 	ShregmapWorker(Module *module, const ShregmapOptions &opts) :
 			module(module), sigmap(module), opts(opts), dff_count(0), shreg_count(0)
 	{
+		if (opts.tech)
+			opts.tech->init(module, sigmap);
+
 		make_sigbit_chain_next_prev();
 		find_chain_start_cells();
 
@@ -447,6 +617,11 @@ struct ShregmapPass : public Pass {
 		log("\n");
 		log("    -tech greenpak4\n");
 		log("        map to greenpak4 shift registers.\n");
+		log("        this option also implies -clkpol pos -zinit\n");
+		log("\n");
+		log("    -tech xilinx\n");
+		log("        map to xilinx dynamic-length shift registers.\n");
+		log("        this option also implies -params -init\n");
 		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
@@ -501,6 +676,12 @@ struct ShregmapPass : public Pass {
 					clkpol = "pos";
 					opts.zinit = true;
 					opts.tech = new ShregmapTechGreenpak4;
+				}
+				else if (tech == "xilinx") {
+					opts.init = true;
+					opts.params = true;
+					enpol = "any_or_none";
+					opts.tech = new ShregmapTechXilinx7(opts);
 				} else {
 					argidx--;
 					break;
diff --git a/passes/techmap/simplemap.cc b/passes/techmap/simplemap.cc
index 660b6060..f3da80c6 100644
--- a/passes/techmap/simplemap.cc
+++ b/passes/techmap/simplemap.cc
@@ -599,7 +599,7 @@ struct SimplemapPass : public Pass {
 		simplemap_get_mappers(mappers);
 
 		for (auto mod : design->modules()) {
-			if (!design->selected(mod))
+			if (!design->selected(mod) || mod->get_blackbox_attribute())
 				continue;
 			std::vector<RTLIL::Cell*> cells = mod->cells();
 			for (auto cell : cells) {
diff --git a/passes/techmap/techmap.cc b/passes/techmap/techmap.cc
index d0e5e223..ab0bd3b5 100644
--- a/passes/techmap/techmap.cc
+++ b/passes/techmap/techmap.cc
@@ -72,6 +72,8 @@ struct TechmapWorker
 	pool<IdString> flatten_done_list;
 	pool<Cell*> flatten_keep_list;
 
+	pool<string> log_msg_cache;
+
 	struct TechmapWireData {
 		RTLIL::Wire *wire;
 		RTLIL::SigSpec value;
@@ -84,6 +86,7 @@ struct TechmapWorker
 	bool flatten_mode;
 	bool recursive_mode;
 	bool autoproc_mode;
+	bool ignore_wb;
 
 	TechmapWorker()
 	{
@@ -92,6 +95,7 @@ struct TechmapWorker
 		flatten_mode = false;
 		recursive_mode = false;
 		autoproc_mode = false;
+		ignore_wb = false;
 	}
 
 	std::string constmap_tpl_name(SigMap &sigmap, RTLIL::Module *tpl, RTLIL::Cell *cell, bool verbose)
@@ -383,11 +387,12 @@ struct TechmapWorker
 	{
 		std::string mapmsg_prefix = in_recursion ? "Recursively mapping" : "Mapping";
 
-		if (!design->selected(module))
+		if (!design->selected(module) || module->get_blackbox_attribute(ignore_wb))
 			return false;
 
 		bool log_continue = false;
 		bool did_something = false;
+		LogMakeDebugHdl mkdebug;
 
 		SigMap sigmap(module);
 
@@ -472,7 +477,7 @@ struct TechmapWorker
 				RTLIL::Module *tpl = map->modules_[tpl_name];
 				std::map<RTLIL::IdString, RTLIL::Const> parameters(cell->parameters.begin(), cell->parameters.end());
 
-				if (tpl->get_bool_attribute("\\blackbox"))
+				if (tpl->get_blackbox_attribute(ignore_wb))
 					continue;
 
 				if (!flatten_mode)
@@ -545,6 +550,7 @@ struct TechmapWorker
 								if (extmapper_name == "wrap") {
 									std::string cmd_string = tpl->attributes.at("\\techmap_wrap").decode_string();
 									log("Running \"%s\" on wrapper %s.\n", cmd_string.c_str(), log_id(extmapper_module));
+									mkdebug.on();
 									Pass::call_on_module(extmapper_design, extmapper_module, cmd_string);
 									log_continue = true;
 								}
@@ -558,11 +564,21 @@ struct TechmapWorker
 								goto use_wrapper_tpl;
 							}
 
-							log("%s %s.%s (%s) to %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(cell->type), log_id(extmapper_module));
+							auto msg = stringf("Using extmapper %s for cells of type %s.", log_id(extmapper_module), log_id(cell->type));
+							if (!log_msg_cache.count(msg)) {
+								log_msg_cache.insert(msg);
+								log("%s\n", msg.c_str());
+							}
+							log_debug("%s %s.%s (%s) to %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(cell->type), log_id(extmapper_module));
 						}
 						else
 						{
-							log("%s %s.%s (%s) with %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(cell->type), extmapper_name.c_str());
+							auto msg = stringf("Using extmapper %s for cells of type %s.", extmapper_name.c_str(), log_id(cell->type));
+							if (!log_msg_cache.count(msg)) {
+								log_msg_cache.insert(msg);
+								log("%s\n", msg.c_str());
+							}
+							log_debug("%s %s.%s (%s) with %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(cell->type), extmapper_name.c_str());
 
 							if (extmapper_name == "simplemap") {
 								if (simplemap_mappers.count(cell->type) == 0)
@@ -660,6 +676,7 @@ struct TechmapWorker
 						tpl = techmap_cache[key];
 					} else {
 						if (parameters.size() != 0) {
+							mkdebug.on();
 							derived_name = tpl->derive(map, dict<RTLIL::IdString, RTLIL::Const>(parameters.begin(), parameters.end()));
 							tpl = map->module(derived_name);
 							log_continue = true;
@@ -829,6 +846,7 @@ struct TechmapWorker
 						if (log_continue) {
 							log_header(design, "Continuing TECHMAP pass.\n");
 							log_continue = false;
+							mkdebug.off();
 						}
 						while (techmap_module(map, tpl, map, handled_cells, celltypeMap, true)) { }
 					}
@@ -840,6 +858,7 @@ struct TechmapWorker
 				if (log_continue) {
 					log_header(design, "Continuing TECHMAP pass.\n");
 					log_continue = false;
+					mkdebug.off();
 				}
 
 				if (extern_mode && !in_recursion)
@@ -859,13 +878,18 @@ struct TechmapWorker
 						module_queue.insert(m);
 					}
 
-					log("%s %s.%s to imported %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(m_name));
+					log_debug("%s %s.%s to imported %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(m_name));
 					cell->type = m_name;
 					cell->parameters.clear();
 				}
 				else
 				{
-					log("%s %s.%s using %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(tpl));
+					auto msg = stringf("Using template %s for cells of type %s.", log_id(tpl), log_id(cell->type));
+					if (!log_msg_cache.count(msg)) {
+						log_msg_cache.insert(msg);
+						log("%s\n", msg.c_str());
+					}
+					log_debug("%s %s.%s (%s) using %s.\n", mapmsg_prefix.c_str(), log_id(module), log_id(cell), log_id(cell->type), log_id(tpl));
 					techmap_module_worker(design, module, cell, tpl);
 					cell = NULL;
 				}
@@ -883,6 +907,7 @@ struct TechmapWorker
 		if (log_continue) {
 			log_header(design, "Continuing TECHMAP pass.\n");
 			log_continue = false;
+			mkdebug.off();
 		}
 
 		return did_something;
@@ -925,6 +950,9 @@ struct TechmapPass : public Pass {
 		log("    -autoproc\n");
 		log("        Automatically call \"proc\" on implementations that contain processes.\n");
 		log("\n");
+		log("    -wb\n");
+		log("        Ignore the 'whitebox' attribute on cell implementations.\n");
+		log("\n");
 		log("    -assert\n");
 		log("        this option will cause techmap to exit with an error if it can't map\n");
 		log("        a selected cell. only cell types that end on an underscore are accepted\n");
@@ -1031,7 +1059,7 @@ struct TechmapPass : public Pass {
 		simplemap_get_mappers(worker.simplemap_mappers);
 
 		std::vector<std::string> map_files;
-		std::string verilog_frontend = "verilog -nooverwrite";
+		std::string verilog_frontend = "verilog -nooverwrite -noblackbox";
 		int max_iter = -1;
 
 		size_t argidx;
@@ -1068,6 +1096,10 @@ struct TechmapPass : public Pass {
 				worker.autoproc_mode = true;
 				continue;
 			}
+			if (args[argidx] == "-wb") {
+				worker.ignore_wb = true;
+				continue;
+			}
 			break;
 		}
 		extra_args(args, argidx, design);
@@ -1076,7 +1108,7 @@ struct TechmapPass : public Pass {
 		if (map_files.empty()) {
 			std::istringstream f(stdcells_code);
 			Frontend::frontend_call(map, &f, "<techmap.v>", verilog_frontend);
-		} else
+		} else {
 			for (auto &fn : map_files)
 				if (fn.substr(0, 1) == "%") {
 					if (!saved_designs.count(fn.substr(1))) {
@@ -1095,6 +1127,9 @@ struct TechmapPass : public Pass {
 						log_cmd_error("Can't open map file `%s'\n", fn.c_str());
 					Frontend::frontend_call(map, &f, fn, (fn.size() > 3 && fn.substr(fn.size()-3) == ".il") ? "ilang" : verilog_frontend);
 				}
+		}
+
+		log_header(design, "Continuing TECHMAP pass.\n");
 
 		std::map<RTLIL::IdString, std::set<RTLIL::IdString, RTLIL::sort_by_id_str>> celltypeMap;
 		for (auto &it : map->modules_) {
@@ -1145,7 +1180,7 @@ struct FlattenPass : public Pass {
 	{
 		//   |---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|---v---|
 		log("\n");
-		log("    flatten [selection]\n");
+		log("    flatten [options] [selection]\n");
 		log("\n");
 		log("This pass flattens the design by replacing cells by their implementation. This\n");
 		log("pass is very similar to the 'techmap' pass. The only difference is that this\n");
@@ -1154,17 +1189,29 @@ struct FlattenPass : public Pass {
 		log("Cells and/or modules with the 'keep_hierarchy' attribute set will not be\n");
 		log("flattened by this command.\n");
 		log("\n");
+		log("    -wb\n");
+		log("        Ignore the 'whitebox' attribute on cell implementations.\n");
+		log("\n");
 	}
 	void execute(std::vector<std::string> args, RTLIL::Design *design) YS_OVERRIDE
 	{
 		log_header(design, "Executing FLATTEN pass (flatten design).\n");
 		log_push();
 
-		extra_args(args, 1, design);
-
 		TechmapWorker worker;
 		worker.flatten_mode = true;
 
+		size_t argidx;
+		for (argidx = 1; argidx < args.size(); argidx++) {
+			if (args[argidx] == "-wb") {
+				worker.ignore_wb = true;
+				continue;
+			}
+			break;
+		}
+		extra_args(args, argidx, design);
+
+
 		std::map<RTLIL::IdString, std::set<RTLIL::IdString, RTLIL::sort_by_id_str>> celltypeMap;
 		for (auto module : design->modules())
 			celltypeMap[module->name].insert(module->name);
@@ -1190,6 +1237,7 @@ struct FlattenPass : public Pass {
 			}
 		}
 
+		log_suppressed();
 		log("No more expansions possible.\n");
 
 		if (top_mod != NULL)
@@ -1209,7 +1257,7 @@ struct FlattenPass : public Pass {
 
 			dict<RTLIL::IdString, RTLIL::Module*> new_modules;
 			for (auto mod : vector<Module*>(design->modules()))
-				if (used_modules[mod->name] || mod->get_bool_attribute("\\blackbox")) {
+				if (used_modules[mod->name] || mod->get_blackbox_attribute(worker.ignore_wb)) {
 					new_modules[mod->name] = mod;
 				} else {
 					log("Deleting now unused module %s.\n", log_id(mod));
diff --git a/passes/techmap/zinit.cc b/passes/techmap/zinit.cc
index b46147fb..2aefc091 100644
--- a/passes/techmap/zinit.cc
+++ b/passes/techmap/zinit.cc
@@ -46,7 +46,7 @@ struct ZinitPass : public Pass {
 		size_t argidx;
 		for (argidx = 1; argidx < args.size(); argidx++)
 		{
-			if (args[argidx] == "-singleton") {
+			if (args[argidx] == "-all") {
 				all_mode = true;
 				continue;
 			}