From 846cee5066814f448d05f70d5caccea92d4e3755 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 13 Mar 2011 05:46:29 -0700
Subject: [PATCH] Split out parsing and expression trees from regexp.y

Start of splitting regexp.y into logical components instead of the mess
it is today.  Split out the expr-tree and parsing components from regexp.y
int expr-tree.x and parse.y and since regexp.y no longer does parsing
rename it to hfa.cc

Some code cleanups snuck their way into this patch and since I am to
lazy to redo it, I have left them in.

Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-By: Steve Beattie <sbeattie@ubuntu.com>
---
 parser/libapparmor_re/Makefile             |   14 +-
 parser/libapparmor_re/apparmor_re.h        |    2 +
 parser/libapparmor_re/expr-tree.cc         |  576 ++++++++
 parser/libapparmor_re/expr-tree.h          |  627 +++++++++
 parser/libapparmor_re/{regexp.y => hfa.cc} | 1386 +-------------------
 parser/libapparmor_re/parse.h              |   27 +
 parser/libapparmor_re/parse.y              |  266 ++++
 parser/libapparmor_re/regexp.h             |   10 -
 8 files changed, 1538 insertions(+), 1370 deletions(-)
 create mode 100644 parser/libapparmor_re/expr-tree.cc
 create mode 100644 parser/libapparmor_re/expr-tree.h
 rename parser/libapparmor_re/{regexp.y => hfa.cc} (60%)
 create mode 100644 parser/libapparmor_re/parse.h
 create mode 100644 parser/libapparmor_re/parse.y
 delete mode 100644 parser/libapparmor_re/regexp.h

diff --git a/parser/libapparmor_re/Makefile b/parser/libapparmor_re/Makefile
index 3409f9a5b..7006744de 100644
--- a/parser/libapparmor_re/Makefile
+++ b/parser/libapparmor_re/Makefile
@@ -12,14 +12,20 @@ BISON := bison
 
 all : ${TARGET}
 
-libapparmor_re.a: regexp.o
+libapparmor_re.a: parse.o expr-tree.o hfa.o
 	ar ${ARFLAGS} $@ $^
 
-regexp.o : regexp.cc apparmor_re.h
+expr-tree.o: expr-tree.cc expr-tree.h
 	$(LINK.cc) $< -c -o $@
 
-regexp.cc : regexp.y flex-tables.h ../immunix.h
+hfa.o: hfa.cc apparmor_re.h
+	$(LINK.cc) $< -c -o $@
+
+parse.o : parse.cc apparmor_re.h expr-tree.h
+	$(LINK.cc) $< -c -o $@
+
+parse.cc : parse.y flex-tables.h ../immunix.h
 	${BISON} -o $@ $<
 
 clean:
-	rm -f regexp.o regexp.cc regexp.so regexp.a regexp ${TARGET}
+	rm -f *.o parse.cc ${TARGET}
diff --git a/parser/libapparmor_re/apparmor_re.h b/parser/libapparmor_re/apparmor_re.h
index 2cbd0d6df..fed69be16 100644
--- a/parser/libapparmor_re/apparmor_re.h
+++ b/parser/libapparmor_re/apparmor_re.h
@@ -10,6 +10,8 @@
 #ifndef APPARMOR_RE_H
 #define APPARMOR_RE_H
 
+#include <stdint.h>
+
 typedef enum dfaflags {
   DFA_CONTROL_EQUIV =		1 << 0,
   DFA_CONTROL_TREE_NORMAL =	1 << 1,
diff --git a/parser/libapparmor_re/expr-tree.cc b/parser/libapparmor_re/expr-tree.cc
new file mode 100644
index 000000000..2d5ca7738
--- /dev/null
+++ b/parser/libapparmor_re/expr-tree.cc
@@ -0,0 +1,576 @@
+/*
+ * (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
+ * Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
+ * Copyright 2009-2010 Canonical Ltd.
+ *
+ * The libapparmor library is licensed under the terms of the GNU
+ * Lesser General Public License, version 2.1. Please see the file
+ * COPYING.LGPL.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Functions to create/manipulate an expression tree for regular expressions
+ * that have been parsed.
+ *
+ * The expression tree can be used directly after the parse creates it, or
+ * it can be factored so that the set of important nodes is smaller.
+ * Having a reduced set of important nodes generally results in a dfa that
+ * is closer to minimum (fewer redundant states are created).  It also
+ * results in fewer important nodes in a the state set during subset
+ * construction resulting in less memory used to create a dfa.
+ *
+ * Generally it is worth doing expression tree simplification before dfa
+ * construction, if the regular expression tree contains any alternations.
+ * Even if the regular expression doesn't simplification should be fast
+ * enough that it can be used with minimal overhead.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "expr-tree.h"
+#include "apparmor_re.h"
+
+
+/* Use a single static EpsNode as it carries no node specific information */
+EpsNode epsnode;
+
+
+ostream& operator<<(ostream& os, uchar c)
+{
+	const char *search = "\a\033\f\n\r\t|*+[](). ",
+		*replace = "aefnrt|*+[](). ", *s;
+
+	if ((s = strchr(search, c)) && *s != '\0')
+		os << '\\' << replace[s - search];
+	else if (c < 32 || c >= 127)
+		os << '\\' << '0' << char('0' + (c >> 6))
+		   << char('0' + ((c >> 3) & 7)) << char('0' + (c & 7));
+	else
+		os << (char)c;
+	return os;
+}
+
+/**
+ * Text-dump a state (for debugging).
+ */
+ostream& operator<<(ostream& os, const NodeSet& state)
+{
+	os << '{';
+	if (!state.empty()) {
+		NodeSet::iterator i = state.begin();
+		for(;;) {
+			os << (*i)->label;
+			if (++i == state.end())
+				break;
+			os << ',';
+		}
+	}
+	os << '}';
+	return os;
+}
+
+ostream& operator<<(ostream& os, Node& node)
+{
+	node.dump(os);
+	return os;
+}
+
+/**
+ * hash_NodeSet - generate a hash for the Nodes in the set
+ */
+unsigned long hash_NodeSet(NodeSet *ns)
+{
+        unsigned long hash = 5381;
+
+	for (NodeSet::iterator i = ns->begin(); i != ns->end(); i++) {
+		hash = ((hash << 5) + hash) + (unsigned long) *i;
+	}
+
+        return hash;
+}
+
+
+/**
+ * label_nodes - label the node positions for pretty-printing debug output
+ *
+ * TODO: separate - node labels should be separate and optional, if not
+ * present pretty printing should use Node address
+ */
+void label_nodes(Node *root)
+{
+	int nodes = 1;
+	for (depth_first_traversal i(root); i; i++)
+		i->label = nodes++;
+}
+
+/**
+ * Text-dump the syntax tree (for debugging).
+ */
+void Node::dump_syntax_tree(ostream& os)
+{
+	for (depth_first_traversal i(this); i; i++) {
+		os << i->label << '\t';
+		if ((*i)->child[0] == 0)
+			os << **i << '\t' << (*i)->followpos << endl;
+		else {
+			if ((*i)->child[1] == 0)
+				os << (*i)->child[0]->label << **i;
+			else
+				os << (*i)->child[0]->label << **i
+				   << (*i)->child[1]->label;
+			os << '\t' << (*i)->firstpos
+			   << (*i)->lastpos << endl;
+		}
+	}
+	os << endl;
+}
+
+/*
+ * Normalize the regex parse tree for factoring and cancelations. Normalization
+ * reorganizes internal (alt and cat) nodes into a fixed "normalized" form that
+ * simplifies factoring code, in that it produces a canonicalized form for
+ * the direction being normalized so that the factoring code does not have
+ * to consider as many cases.
+ *
+ * left normalization (dir == 0) uses these rules
+ * (E | a) -> (a | E)
+ * (a | b) | c -> a | (b | c)
+ * (ab)c -> a(bc)
+ *
+ * right normalization (dir == 1) uses the same rules but reversed
+ * (a | E) -> (E | a)
+ * a | (b | c) -> (a | b) | c
+ * a(bc) -> (ab)c
+ *
+ * Note: This is written iteratively for a given node (the top node stays
+ *       fixed and the children are rotated) instead of recursively.
+ *       For a given node under examination rotate over nodes from
+ *       dir to !dir.   Until no dir direction node meets the criterial.
+ *       Then recurse to the children (which will have a different node type)
+ *       to make sure they are normalized.
+ *       Normalization of a child node is guarenteed to not affect the
+ *       normalization of the parent.
+ *
+ *       For cat nodes the depth first traverse order is guarenteed to be
+ *       maintained.  This is not necessary for altnodes.
+ *
+ * Eg. For left normalization
+ *
+ *              |1               |1
+ *             / \              / \
+ *            |2  T     ->     a   |2
+ *           / \                  / \
+ *          |3  c                b   |3
+ *         / \                      / \
+ *        a   b                    c   T
+ *
+ */
+static void rotate_node(Node *t, int dir) {
+	// (a | b) | c -> a | (b | c)
+	// (ab)c -> a(bc)
+	Node *left = t->child[dir];
+	t->child[dir] = left->child[dir];
+	left->child[dir] = left->child[!dir];
+	left->child[!dir] = t->child[!dir];
+	t->child[!dir] = left;
+}
+
+void normalize_tree(Node *t, int dir)
+{
+	if (dynamic_cast<LeafNode *>(t))
+		return;
+
+	for (;;) {
+		if ((&epsnode == t->child[dir]) &&
+		    (&epsnode != t->child[!dir]) &&
+		     dynamic_cast<TwoChildNode *>(t)) {
+			// (E | a) -> (a | E)
+			// Ea -> aE
+			Node *c = t->child[dir];
+			t->child[dir] = t->child[!dir];
+			t->child[!dir] = c;
+			// Don't break here as 'a' may be a tree that
+			// can be pulled up.
+		} else if ((dynamic_cast<AltNode *>(t) &&
+			    dynamic_cast<AltNode *>(t->child[dir])) ||
+			   (dynamic_cast<CatNode *>(t) &&
+			    dynamic_cast<CatNode *>(t->child[dir]))) {
+			// (a | b) | c -> a | (b | c)
+			// (ab)c -> a(bc)
+			rotate_node(t, dir);
+		} else if (dynamic_cast<AltNode *>(t) &&
+			   dynamic_cast<CharSetNode *>(t->child[dir]) &&
+			   dynamic_cast<CharNode *>(t->child[!dir])) {
+			// [a] | b  ->  b | [a]
+			Node *c = t->child[dir];
+			t->child[dir] = t->child[!dir];
+			t->child[!dir] = c;
+		} else {
+			break;
+		}
+	}
+	if (t->child[dir])
+		normalize_tree(t->child[dir], dir);
+	if (t->child[!dir])
+		normalize_tree(t->child[!dir], dir);
+}
+
+//charset conversion is disabled for now,
+//it hinders tree optimization in some cases, so it need to be either
+//done post optimization, or have extra factoring rules added
+#if 0
+static Node *merge_charset(Node *a, Node *b)
+{
+	if (dynamic_cast<CharNode *>(a) &&
+	    dynamic_cast<CharNode *>(b)) {
+		Chars chars;
+		chars.insert(dynamic_cast<CharNode *>(a)->c);
+		chars.insert(dynamic_cast<CharNode *>(b)->c);
+		CharSetNode *n = new CharSetNode(chars);
+		return n;
+	} else if (dynamic_cast<CharNode *>(a) &&
+		   dynamic_cast<CharSetNode *>(b)) {
+		Chars *chars = &dynamic_cast<CharSetNode *>(b)->chars;
+		chars->insert(dynamic_cast<CharNode *>(a)->c);
+		return b;
+	} else if (dynamic_cast<CharSetNode *>(a) &&
+		   dynamic_cast<CharSetNode *>(b)) {
+		Chars *from = &dynamic_cast<CharSetNode *>(a)->chars;
+		Chars *to = &dynamic_cast<CharSetNode *>(b)->chars;
+		for (Chars::iterator i = from->begin(); i != from->end(); i++)
+			to->insert(*i);
+		return b;
+	}
+
+	//return ???;
+}
+
+static Node *alt_to_charsets(Node *t, int dir)
+{
+/*
+	Node *first = NULL;
+	Node *p = t;
+	Node *i = t;
+	for (;dynamic_cast<AltNode *>(i);) {
+		if (dynamic_cast<CharNode *>(i->child[dir]) ||
+		    dynamic_cast<CharNodeSet *>(i->child[dir])) {
+			if (!first) {
+				first = i;
+				p = i;
+				i = i->child[!dir];
+			} else {
+				first->child[dir] = merge_charset(first->child[dir],
+						      i->child[dir]);
+				p->child[!dir] = i->child[!dir];
+				Node *tmp = i;
+				i = tmp->child[!dir];
+				tmp->child[!dir] = NULL;
+				tmp->release();
+			}
+		} else {
+			p = i;
+			i = i->child[!dir];
+		}
+	}
+	// last altnode of chain check other dir as well
+	if (first && (dynamic_cast<charNode *>(i) ||
+		      dynamic_cast<charNodeSet *>(i))) {
+		
+	}
+*/
+
+/*
+		if (dynamic_cast<CharNode *>(t->child[dir]) ||
+		    dynamic_cast<CharSetNode *>(t->child[dir]))
+		    char_test = true;
+			    (char_test &&
+			     (dynamic_cast<CharNode *>(i->child[dir]) ||
+			      dynamic_cast<CharSetNode *>(i->child[dir])))) {
+*/
+	return t;
+}
+#endif
+
+static Node *basic_alt_factor(Node *t, int dir)
+{
+	if (!dynamic_cast<AltNode *>(t))
+		return t;
+
+	if (t->child[dir]->eq(t->child[!dir])) {
+		// (a | a) -> a
+		Node *tmp = t->child[dir];
+		t->child[dir] = NULL;
+		t->release();
+		return tmp;
+	}
+
+	// (ab) | (ac) -> a(b|c)
+	if (dynamic_cast<CatNode *>(t->child[dir]) &&
+	    dynamic_cast<CatNode *>(t->child[!dir]) &&
+	    t->child[dir]->child[dir]->eq(t->child[!dir]->child[dir])) {
+		// (ab) | (ac) -> a(b|c)
+		Node *left = t->child[dir];
+		Node *right = t->child[!dir];
+		t->child[dir] = left->child[!dir];
+		t->child[!dir] = right->child[!dir];
+		right->child[!dir] = NULL;
+		right->release();
+		left->child[!dir] = t;
+		return left;
+	}
+
+	// a | (ab) -> a (E | b) -> a (b | E)
+	if (dynamic_cast<CatNode *>(t->child[!dir]) &&
+	    t->child[dir]->eq(t->child[!dir]->child[dir])) {
+		Node *c = t->child[!dir];
+		t->child[dir]->release();
+		t->child[dir] = c->child[!dir];
+		t->child[!dir] = &epsnode;
+		c->child[!dir] = t;
+		return c;
+	}
+
+	// ab | (a) -> a (b | E)
+	if (dynamic_cast<CatNode *>(t->child[dir]) &&
+	    t->child[dir]->child[dir]->eq(t->child[!dir])) {
+		Node *c = t->child[dir];
+		t->child[!dir]->release();
+		t->child[dir] = c->child[!dir];
+		t->child[!dir] = &epsnode;
+		c->child[!dir] = t;
+		return c;
+	}
+
+	return t;
+}
+
+static Node *basic_simplify(Node *t, int dir)
+{
+	if (dynamic_cast<CatNode *>(t) &&
+	    &epsnode == t->child[!dir]) {
+		// aE -> a
+		Node *tmp = t->child[dir];
+		t->child[dir] = NULL;
+		t->release();
+		return tmp;
+	}
+
+	return basic_alt_factor(t, dir);
+}
+
+/*
+ * assumes a normalized tree.  reductions shown for left normalization
+ * aE -> a
+ * (a | a) -> a
+ ** factoring patterns
+ * a | (a | b) -> (a | b)
+ * a | (ab) -> a (E | b) -> a (b | E)
+ * (ab) | (ac) -> a(b|c)
+ *
+ * returns t - if no simplifications were made
+ *         a new root node - if simplifications were made
+ */
+Node *simplify_tree_base(Node *t, int dir, bool &mod)
+{
+	if (dynamic_cast<ImportantNode *>(t))
+		return t;
+
+	for (int i=0; i < 2; i++) {
+		if (t->child[i]) {
+			Node *c = simplify_tree_base(t->child[i], dir, mod);
+			if (c != t->child[i]) {
+				t->child[i] = c;
+				mod = true;
+			}
+		}
+	}
+
+	// only iterate on loop if modification made
+	for (;; mod = true) {
+
+		Node *tmp = basic_simplify(t, dir);
+		if (tmp != t) {
+			t = tmp;
+			continue;
+		}
+
+
+		/* all tests after this must meet 2 alt node condition */
+		if (!dynamic_cast<AltNode *>(t) ||
+		    !dynamic_cast<AltNode *>(t->child[!dir]))
+			break;
+
+		// a | (a | b) -> (a | b)
+		// a | (b | (c | a)) -> (b | (c | a))
+		Node *p = t;
+		Node *i = t->child[!dir];
+		for (;dynamic_cast<AltNode *>(i); p = i, i = i->child[!dir]) {
+			if (t->child[dir]->eq(i->child[dir])) {
+				Node *tmp = t->child[!dir];
+				t->child[!dir] = NULL;
+				t->release();
+				t = tmp;
+				continue;
+			}
+		}
+		// last altnode of chain check other dir as well
+		if (t->child[dir]->eq(p->child[!dir])) {
+			Node *tmp = t->child[!dir];
+			t->child[!dir] = NULL;
+			t->release();
+			t = tmp;
+			continue;
+		}
+
+		//exact match didn't work, try factoring front
+		//a | (ac | (ad | () -> (a (E | c)) | (...)
+		//ab | (ac | (...)) -> (a (b | c)) | (...)
+		//ab | (a | (...)) -> (a (b | E)) | (...)
+		Node *pp;
+		int count = 0;
+		Node *subject = t->child[dir];
+		Node *a = subject;
+		if (dynamic_cast<CatNode *>(subject))
+		    a = subject->child[dir];
+
+		for (pp = p = t, i = t->child[!dir];
+		     dynamic_cast<AltNode *>(i); ) {
+			if ((dynamic_cast<CatNode *>(i->child[dir]) &&
+			     a->eq(i->child[dir]->child[dir])) ||
+			    (a->eq(i->child[dir]))) {
+				// extract matching alt node
+				p->child[!dir] = i->child[!dir];
+				i->child[!dir] = subject;
+				subject = basic_simplify(i, dir);
+				if (dynamic_cast<CatNode *>(subject))
+					a = subject->child[dir];
+				else
+					a = subject;
+
+				i = p->child[!dir];
+				count++;
+			} else {
+				pp = p; p = i; i = i->child[!dir];
+			}
+		}
+
+		// last altnode in chain check other dir as well
+		if ((dynamic_cast<CatNode *>(i) &&
+		     a->eq(i->child[dir])) ||
+		    (a->eq(i))) {
+			count++;
+			if (t == p) {
+				t->child[dir] = subject;
+				t = basic_simplify(t, dir);
+			} else {
+				t->child[dir] = p->child[dir];
+				p->child[dir] = subject;
+				pp->child[!dir] = basic_simplify(p, dir);
+			}
+		} else {
+			t->child[dir] = i;
+			p->child[!dir] = subject;
+		}
+
+		if (count == 0)
+			break;
+	}
+	return t;
+}
+
+int debug_tree(Node *t)
+{
+	int nodes = 1;
+
+	if (!dynamic_cast<ImportantNode *>(t)) {
+		if (t->child[0])
+			nodes += debug_tree(t->child[0]);
+		if (t->child[1])
+			nodes += debug_tree(t->child[1]);
+	}
+	return nodes;
+}
+
+static void count_tree_nodes(Node *t, struct node_counts *counts)
+{
+	if (dynamic_cast<AltNode *>(t)) {
+		counts->alt++;
+		count_tree_nodes(t->child[0], counts);
+		count_tree_nodes(t->child[1], counts);
+	} else if (dynamic_cast<CatNode *>(t)) {
+		counts->cat++;
+		count_tree_nodes(t->child[0], counts);
+		count_tree_nodes(t->child[1], counts);
+	} else if (dynamic_cast<PlusNode *>(t)) {
+		counts->plus++;
+		count_tree_nodes(t->child[0], counts);
+	} else if (dynamic_cast<StarNode *>(t)) {
+		counts->star++;
+		count_tree_nodes(t->child[0], counts);
+	} else if (dynamic_cast<CharNode *>(t)) {
+		counts->charnode++;
+	} else if (dynamic_cast<AnyCharNode *>(t)) {
+		counts->any++;
+	} else if (dynamic_cast<CharSetNode *>(t)) {
+		counts->charset++;
+	} else if (dynamic_cast<NotCharSetNode *>(t)) {
+		counts->notcharset++;
+	}
+}
+
+#include "stdio.h"
+#include "stdint.h"
+#include "apparmor_re.h"
+
+Node *simplify_tree(Node *t, dfaflags_t flags)
+{
+	bool update;
+
+	if (flags & DFA_DUMP_TREE_STATS) {
+		struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0, 0 };
+		count_tree_nodes(t, &counts);
+		fprintf(stderr, "expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n", counts.charnode, counts.charset, counts.notcharset, counts.alt, counts.plus, counts.star, counts.any, counts.cat);
+	}
+	do {
+		update = false;
+		//default to right normalize first as this reduces the number
+		//of trailing nodes which might follow an internal *
+		//or **, which is where state explosion can happen
+		//eg. in one test this makes the difference between
+		//    the dfa having about 7 thousands states,
+		//    and it having about  1.25 million states
+		int dir = 1;
+		if (flags & DFA_CONTROL_TREE_LEFT)
+			dir = 0;
+		for (int count = 0; count < 2; count++) {
+			bool modified;
+			do {
+			    modified = false;
+			    if (flags & DFA_CONTROL_TREE_NORMAL)
+				normalize_tree(t, dir);
+			    t = simplify_tree_base(t, dir, modified);
+			    if (modified)
+				update = true;
+			} while (modified);
+			if (flags & DFA_CONTROL_TREE_LEFT)
+				dir++;
+			else
+				dir--;
+		}
+	} while(update);
+	if (flags & DFA_DUMP_TREE_STATS) {
+		struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0, 0 };
+		count_tree_nodes(t, &counts);
+		fprintf(stderr, "simplified expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n", counts.charnode, counts.charset, counts.notcharset, counts.alt, counts.plus, counts.star, counts.any, counts.cat);
+	}
+	return t;
+}
+
diff --git a/parser/libapparmor_re/expr-tree.h b/parser/libapparmor_re/expr-tree.h
new file mode 100644
index 000000000..6a3ec3113
--- /dev/null
+++ b/parser/libapparmor_re/expr-tree.h
@@ -0,0 +1,627 @@
+/*
+ * (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
+ * Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
+ * Copyright 2009-2010 Canonical Ltd.
+ *
+ * The libapparmor library is licensed under the terms of the GNU
+ * Lesser General Public License, version 2.1. Please see the file
+ * COPYING.LGPL.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Functions to create/manipulate an expression tree for regular expressions
+ * that have been parsed.
+ *
+ * The expression tree can be used directly after the parse creates it, or
+ * it can be factored so that the set of important nodes is smaller.
+ * Having a reduced set of important nodes generally results in a dfa that
+ * is closer to minimum (fewer redundant states are created).  It also
+ * results in fewer important nodes in a the state set during subset
+ * construction resulting in less memory used to create a dfa.
+ *
+ * Generally it is worth doing expression tree simplification before dfa
+ * construction, if the regular expression tree contains any alternations.
+ * Even if the regular expression doesn't simplification should be fast
+ * enough that it can be used with minimal overhead.
+ */
+#ifndef __LIBAA_RE_EXPR_H
+#define __LIBAA_RE_EXPR_H
+
+#include <map>
+#include <set>
+#include <stack>
+#include <ostream>
+
+#include "apparmor_re.h"
+
+using namespace std;
+
+typedef unsigned char uchar;
+typedef set<uchar> Chars;
+
+ostream& operator<<(ostream& os, uchar c);
+
+/* Compute the union of two sets. */
+template<class T>
+set<T> operator+(const set<T>& a, const set<T>& b)
+{
+	set<T> c(a);
+	c.insert(b.begin(), b.end());
+	return c;
+}
+
+/**
+ * When creating DFAs from regex trees, a DFA state is constructed from
+ * a set of important nodes in the syntax tree. This includes AcceptNodes,
+ * which indicate that when a match ends in a particular state, the
+ * regular expressions that the AcceptNode belongs to match.
+ */
+class Node;
+class ImportantNode;
+typedef set <ImportantNode *> NodeSet;
+
+/**
+ * Text-dump a state (for debugging).
+ */
+ostream& operator<<(ostream& os, const NodeSet& state);
+
+/**
+ * Out-edges from a state to another: we store the follow-set of Nodes
+ * for each input character that is not a default match in
+ * cases (i.e., following a CharNode or CharSetNode), and default
+ * matches in otherwise as well as in all matching explicit cases
+ * (i.e., following an AnyCharNode or NotCharSetNode). This avoids
+ * enumerating all the explicit tranitions for default matches.
+ */
+typedef struct NodeCases {
+	typedef map<uchar, NodeSet *>::iterator iterator;
+	iterator begin() { return cases.begin(); }
+	iterator end() { return cases.end(); }
+
+	NodeCases() : otherwise(0) { }
+	map<uchar, NodeSet *> cases;
+	NodeSet *otherwise;
+} NodeCases;
+
+
+ostream& operator<<(ostream& os, Node& node);
+
+/* An abstract node in the syntax tree. */
+class Node {
+public:
+	Node() :
+	    nullable(false) { child[0] = child[1] = 0; }
+	Node(Node *left) :
+	    nullable(false) { child[0] = left; child[1] = 0; }
+	Node(Node *left, Node *right) :
+	    nullable(false) { child[0] = left; child[1] = right; }
+	virtual ~Node()
+	{
+		if (child[0])
+			child[0]->release();
+		if (child[1])
+			child[1]->release();
+	}
+
+	/**
+	 * See the "Dragon Book" for an explanation of nullable, firstpos,
+	 * lastpos, and followpos.
+	 */
+	virtual void compute_nullable() { }
+	virtual void compute_firstpos() = 0;
+	virtual void compute_lastpos() = 0;
+	virtual void compute_followpos() { }
+	virtual int eq(Node *other) = 0;
+	virtual ostream& dump(ostream& os) = 0;
+	void dump_syntax_tree(ostream& os);
+
+	bool nullable;
+	NodeSet firstpos, lastpos, followpos;
+	/* child 0 is left, child 1 is right */
+	Node *child[2];
+
+	unsigned int label;	/* unique number for debug etc */
+	/**
+	 * We indirectly release Nodes through a virtual function because
+	 * accept and Eps Nodes are shared, and must be treated specially.
+	 * We could use full reference counting here but the indirect release
+	 * is sufficient and has less overhead
+	 */
+	virtual void release(void) { delete this; }
+};
+
+
+class InnerNode : public Node {
+public:
+	InnerNode() : Node() { };
+	InnerNode(Node *left) : Node(left) {};
+	InnerNode(Node *left, Node *right) : Node(left, right) { };
+};
+
+class OneChildNode : public InnerNode {
+public:
+	OneChildNode(Node *left) : InnerNode(left) { };
+};
+
+class TwoChildNode : public InnerNode {
+public:
+	TwoChildNode(Node *left, Node *right) :  InnerNode(left, right) { };
+};
+
+class LeafNode : public Node {
+public:
+	LeafNode() : Node() { };
+};
+
+/* Match nothing (//). */
+class EpsNode : public LeafNode {
+public:
+	EpsNode() : LeafNode()
+	{
+		nullable = true;
+		label = 0;
+	}
+	void release(void)
+	{
+		/* don't delete Eps nodes because there is a single static
+		 * instance shared by all trees.  Look for epsnode in the code
+		 */
+	}
+
+	void compute_firstpos() { }
+	void compute_lastpos() { }
+	int eq(Node *other)
+	{
+		if (dynamic_cast<EpsNode *>(other))
+			return 1;
+		return 0;
+	}
+	ostream& dump(ostream& os)
+	{
+		return os << "[]";
+	}
+};
+
+/**
+ * Leaf nodes in the syntax tree are important to us: they describe the
+ * characters that the regular expression matches. We also consider
+ * AcceptNodes import: they indicate when a regular expression matches.
+ */
+class ImportantNode : public LeafNode {
+public:
+	ImportantNode() : LeafNode() { }
+	void compute_firstpos()
+	{
+		firstpos.insert(this);
+	}
+	void compute_lastpos() {
+		lastpos.insert(this);
+	}
+	virtual void follow(NodeCases& cases) = 0;
+};
+
+/* common base class for all the different classes that contain
+ * character information.
+ */
+class CNode : public ImportantNode {
+public:
+	CNode() : ImportantNode() { }
+};
+
+/* Match one specific character (/c/). */
+class CharNode : public CNode {
+public:
+	CharNode(uchar c) : c(c) { }
+	void follow(NodeCases& cases)
+	{
+		NodeSet **x = &cases.cases[c];
+		if (!*x) {
+			if (cases.otherwise)
+				*x = new NodeSet(*cases.otherwise);
+			else
+				*x = new NodeSet;
+		}
+		(*x)->insert(followpos.begin(), followpos.end());
+	}
+	int eq(Node *other)
+	{
+		CharNode *o = dynamic_cast<CharNode *>(other);
+		if (o) {
+			return c == o->c;
+		}
+		return 0;
+	}
+	ostream& dump(ostream& os)
+	{
+		return os << c;
+	}
+
+	uchar c;
+};
+
+/* Match a set of characters (/[abc]/). */
+class CharSetNode : public CNode {
+public:
+	CharSetNode(Chars& chars) : chars(chars) { }
+	void follow(NodeCases& cases)
+	{
+		for (Chars::iterator i = chars.begin(); i != chars.end(); i++) {
+			NodeSet **x = &cases.cases[*i];
+			if (!*x) {
+				if (cases.otherwise)
+					*x = new NodeSet(*cases.otherwise);
+				else
+					*x = new NodeSet;
+			}
+			(*x)->insert(followpos.begin(), followpos.end());
+		}
+	}
+	int eq(Node *other)
+	{
+		CharSetNode *o = dynamic_cast<CharSetNode *>(other);
+		if (!o || chars.size() != o->chars.size())
+			return 0;
+
+		for (Chars::iterator i = chars.begin(), j = o->chars.begin();
+		     i != chars.end() && j != o->chars.end();
+		     i++, j++) {
+			if (*i != *j)
+				return 0;
+		}
+		return 1;
+	}
+	ostream& dump(ostream& os)
+	{
+		os << '[';
+		for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
+			os << *i;
+		return os << ']';
+	}
+
+	Chars chars;
+};
+
+/* Match all except one character (/[^abc]/). */
+class NotCharSetNode : public CNode {
+public:
+	NotCharSetNode(Chars& chars) : chars(chars) { }
+	void follow(NodeCases& cases)
+	{
+		if (!cases.otherwise)
+			cases.otherwise = new NodeSet;
+		for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
+			NodeSet **x = &cases.cases[*j];
+			if (!*x)
+				*x = new NodeSet(*cases.otherwise);
+		}
+		/* Note: Add to the nonmatching characters after copying away
+		 * the old otherwise state for the matching characters.
+		 */
+		cases.otherwise->insert(followpos.begin(), followpos.end());
+		for (NodeCases::iterator i = cases.begin(); i != cases.end();
+		     i++) {
+			if (chars.find(i->first) == chars.end())
+				i->second->insert(followpos.begin(),
+						  followpos.end());
+		}
+	}
+	int eq(Node *other)
+	{
+		NotCharSetNode *o = dynamic_cast<NotCharSetNode *>(other);
+		if (!o || chars.size() != o->chars.size())
+			return 0;
+
+		for (Chars::iterator i = chars.begin(), j = o->chars.begin();
+		     i != chars.end() && j != o->chars.end();
+		     i++, j++) {
+			if (*i != *j)
+				return 0;
+		}
+		return 1;
+	}
+	ostream& dump(ostream& os)
+	{
+		os << "[^";
+		for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
+			os << *i;
+		return os << ']';
+	}
+
+	Chars chars;
+};
+
+/* Match any character (/./). */
+class AnyCharNode : public CNode {
+public:
+	AnyCharNode() { }
+	void follow(NodeCases& cases)
+	{
+		if (!cases.otherwise)
+			cases.otherwise = new NodeSet;
+		cases.otherwise->insert(followpos.begin(), followpos.end());
+		for (NodeCases::iterator i = cases.begin(); i != cases.end();
+		     i++)
+			i->second->insert(followpos.begin(), followpos.end());
+	}
+	int eq(Node *other)
+	{
+		if (dynamic_cast<AnyCharNode *>(other))
+			return 1;
+		return 0;
+	}
+	ostream& dump(ostream& os) {
+		return os << ".";
+	}
+};
+
+/**
+ * Indicate that a regular expression matches. An AcceptNode itself
+ * doesn't match anything, so it will never generate any transitions.
+ */
+class AcceptNode : public ImportantNode {
+public:
+	AcceptNode() {}
+	void release(void)
+	{
+		/* don't delete AcceptNode via release as they are shared, and
+		 * will be deleted when the table the are stored in is deleted
+		 */
+	}
+
+	void follow(NodeCases& cases __attribute__((unused)))
+	{
+		/* Nothing to follow. */
+	}
+
+	/* requires accept nodes to be common by pointer */
+	int eq(Node *other)
+	{
+		if (dynamic_cast<AcceptNode *>(other))
+			return (this == other);
+		return 0;
+	}
+};
+
+/* Match a node zero or more times. (This is a unary operator.) */
+class StarNode : public OneChildNode {
+public:
+	StarNode(Node *left) : OneChildNode(left)
+	{
+		nullable = true;
+	}
+	void compute_firstpos()
+	{
+		firstpos = child[0]->firstpos;
+	}
+	void compute_lastpos()
+	{
+		lastpos = child[0]->lastpos;
+	}
+	void compute_followpos()
+	{
+		NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
+		for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
+			(*i)->followpos.insert(to.begin(), to.end());
+		}
+	}
+	int eq(Node *other) {
+		if (dynamic_cast<StarNode *>(other))
+			return child[0]->eq(other->child[0]);
+		return 0;
+	}
+	ostream& dump(ostream& os)
+	{
+		os << '(';
+		child[0]->dump(os);
+		return os << ")*";
+	}
+};
+
+/* Match a node one or more times. (This is a unary operator.) */
+class PlusNode : public OneChildNode {
+public:
+	PlusNode(Node *left) : OneChildNode(left) { }
+	void compute_nullable()
+	{
+		nullable = child[0]->nullable;
+	}
+	void compute_firstpos()
+	{
+		firstpos = child[0]->firstpos;
+	}
+	void compute_lastpos()
+	{
+		lastpos = child[0]->lastpos;
+	}
+	void compute_followpos()
+	{
+		NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
+		for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
+			(*i)->followpos.insert(to.begin(), to.end());
+		}
+	}
+	int eq(Node *other)
+	{
+		if (dynamic_cast<PlusNode *>(other))
+			return child[0]->eq(other->child[0]);
+		return 0;
+	}
+	ostream& dump(ostream& os)
+	{
+		os << '(';
+		child[0]->dump(os);
+		return os << ")+";
+	}
+};
+
+/* Match a pair of consecutive nodes. */
+class CatNode : public TwoChildNode {
+public:
+	CatNode(Node *left, Node *right) : TwoChildNode(left, right) { }
+	void compute_nullable()
+	{
+		nullable = child[0]->nullable && child[1]->nullable;
+	}
+	void compute_firstpos()
+	{
+		if (child[0]->nullable)
+			firstpos = child[0]->firstpos + child[1]->firstpos;
+		else
+			firstpos = child[0]->firstpos;
+	}
+	void compute_lastpos()
+	{
+		if (child[1]->nullable)
+			lastpos = child[0]->lastpos + child[1]->lastpos;
+		else
+			lastpos = child[1]->lastpos;
+	}
+	void compute_followpos()
+	{
+		NodeSet from = child[0]->lastpos, to = child[1]->firstpos;
+		for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
+			(*i)->followpos.insert(to.begin(), to.end());
+		}
+	}
+	int eq(Node *other) {
+		if (dynamic_cast<CatNode *>(other)) {
+			if (!child[0]->eq(other->child[0]))
+				return 0;
+			return child[1]->eq(other->child[1]);
+		}
+		return 0;
+	}
+	ostream& dump(ostream& os)
+	{
+		child[0]->dump(os);
+		child[1]->dump(os);
+		return os;
+	}
+};
+
+/* Match one of two alternative nodes. */
+class AltNode : public TwoChildNode {
+public:
+	AltNode(Node *left, Node *right) : TwoChildNode(left, right) { }
+	void compute_nullable()
+	{
+		nullable = child[0]->nullable || child[1]->nullable;
+	}
+	void compute_lastpos()
+	{
+		lastpos = child[0]->lastpos + child[1]->lastpos;
+	}
+	void compute_firstpos()
+	{
+		firstpos = child[0]->firstpos + child[1]->firstpos;
+	}
+	int eq(Node *other) {
+		if (dynamic_cast<AltNode *>(other)) {
+			if (!child[0]->eq(other->child[0]))
+				return 0;
+			return child[1]->eq(other->child[1]);
+		}
+		return 0;
+	}
+	ostream& dump(ostream& os)
+	{
+		os << '(';
+		child[0]->dump(os);
+		os << '|';
+		child[1]->dump(os);
+		os << ')';
+		return os;
+	}
+};
+
+
+/* Traverse the syntax tree depth-first in an iterator-like manner. */
+class depth_first_traversal {
+	stack<Node *> pos;
+	void push_left(Node *node)
+	{
+		pos.push(node);
+
+		while (dynamic_cast<InnerNode *>(node)) {
+			pos.push(node->child[0]);
+			node = node->child[0];
+		}
+	}
+
+public:
+	depth_first_traversal(Node *node)
+	{
+		push_left(node);
+	}
+	Node *operator*()
+	{
+		return pos.top();
+	}
+	Node* operator->()
+	{
+		return pos.top();
+	}
+	operator bool()
+	{
+		return !pos.empty();
+	}
+	void operator++(int)
+	{
+		Node *last = pos.top();
+		pos.pop();
+
+		if (!pos.empty()) {
+			/* no need to dynamic cast, as we just popped a node so
+			 * the top node must be an inner node */
+			InnerNode *node = (InnerNode *)(pos.top());
+			if (node->child[1] && node->child[1] != last) {
+				push_left(node->child[1]);
+			}
+		}
+	}
+};
+
+struct node_counts {
+	int charnode;
+	int charset;
+	int notcharset;
+	int alt;
+	int plus;
+	int star;
+	int any;
+	int cat;
+};
+
+extern EpsNode epsnode;
+
+int debug_tree(Node *t);
+Node *simplify_tree(Node *t, dfaflags_t flags);
+void label_nodes(Node *root);
+unsigned long hash_NodeSet(NodeSet *ns);
+
+
+/* Comparison operator for sets of <NodeSet *>.
+ * Compare set hashes, and if the sets have the same hash
+ * do compare pointer comparison on set of <Node *>, the pointer comparison
+ * allows us to determine which Sets of <Node *> we have seen already from
+ * new ones when constructing the DFA.
+ */
+struct deref_less_than {
+	bool operator()(pair <unsigned long, NodeSet *> const & lhs,
+			pair <unsigned long, NodeSet *> const & rhs) const
+		{
+			if (lhs.first == rhs.first)
+				return *(lhs.second) < *(rhs.second);
+			else
+				return lhs.first < rhs.first;
+		}
+};
+
+#endif /* __LIBAA_RE_EXPR */
diff --git a/parser/libapparmor_re/regexp.y b/parser/libapparmor_re/hfa.cc
similarity index 60%
rename from parser/libapparmor_re/regexp.y
rename to parser/libapparmor_re/hfa.cc
index aac11572f..c78bfce0d 100644
--- a/parser/libapparmor_re/regexp.y
+++ b/parser/libapparmor_re/hfa.cc
@@ -1,1110 +1,36 @@
 /*
- * regexp.y -- Regular Expression Matcher Generator
  * (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
+ * Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
+ * Copyright 2009-2010 Canonical Ltd.
  *
- * Implementation based on the Lexical Analysis chapter of:
+ * The libapparmor library is licensed under the terms of the GNU
+ * Lesser General Public License, version 2.1. Please see the file
+ * COPYING.LGPL.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Base of implementation based on the Lexical Analysis chapter of:
  *   Alfred V. Aho, Ravi Sethi, Jeffrey D. Ullman:
  *   Compilers: Principles, Techniques, and Tools (The "Dragon Book"),
  *   Addison-Wesley, 1986.
- *
- *  This program is free software; you can redistribute it and/or modify
- *  it under the terms of the GNU General Public License version 2 as
- *  published by the Free Software Foundation.
- *
- *  See http://www.gnu.org for more details.
  */
 
-%{
-    /* #define DEBUG_TREE */
+#include <list>
+#include <vector>
+#include <stack>
+#include <set>
+#include <map>
+#include <ostream>
+#include <iostream>
+#include <fstream>
 
-    #include <list>
-    #include <vector>
-    #include <stack>
-    #include <set>
-    #include <map>
-    #include <ostream>
-    #include <iostream>
-    #include <fstream>
-
-    using namespace std;
-
-    typedef unsigned char uchar;
-    typedef set<uchar> Chars;
-
-    ostream& operator<<(ostream& os, uchar c);
-
-    /* Compute the union of two sets. */
-    template<class T>
-    set<T> operator+(const set<T>& a, const set<T>& b)
-    {
-	set<T> c(a);
-	c.insert(b.begin(), b.end());
-	return c;
-    }
-
-    /**
-     * When creating DFAs from regex trees, a DFA state is constructed from
-     * a set of important nodes in the syntax tree. This includes AcceptNodes,
-     * which indicate that when a match ends in a particular state, the
-     * regular expressions that the AcceptNode belongs to match.
-     */
-    class ImportantNode;
-    typedef set <ImportantNode *> NodeSet;
-
-    /**
-     * Out-edges from a state to another: we store the follow-set of Nodes
-     * for each input character that is not a default match in
-     * cases (i.e., following a CharNode or CharSetNode), and default
-     * matches in otherwise as well as in all matching explicit cases
-     * (i.e., following an AnyCharNode or NotCharSetNode). This avoids
-     * enumerating all the explicit tranitions for default matches.
-     */
-    typedef struct NodeCases {
-	typedef map<uchar, NodeSet *>::iterator iterator;
-	iterator begin() { return cases.begin(); }
-	iterator end() { return cases.end(); }
-
-	NodeCases() : otherwise(0) { }
-	map<uchar, NodeSet *> cases;
-	NodeSet *otherwise;
-    } NodeCases;
-
-
-    /* An abstract node in the syntax tree. */
-    class Node {
-    public:
-	Node() :
-	    nullable(false) { child[0] = child[1] = 0; }
-	Node(Node *left) :
-	    nullable(false) { child[0] = left; child[1] = 0; }
-	Node(Node *left, Node *right) :
-	    nullable(false) { child[0] = left; child[1] = right; }
-	virtual ~Node()
-	{
-	    if (child[0])
-		    child[0]->release();
-	    if (child[1])
-		    child[1]->release();
-	}
-
-	/**
-	 * See the "Dragon Book" for an explanation of nullable, firstpos,
-	 * lastpos, and followpos.
-	 */
-	virtual void compute_nullable() { }
-	virtual void compute_firstpos() = 0;
-	virtual void compute_lastpos() = 0;
-	virtual void compute_followpos() { }
-	virtual int eq(Node *other) = 0;
-	virtual ostream& dump(ostream& os) = 0;
-
-	bool nullable;
-	NodeSet firstpos, lastpos, followpos;
-	/* child 0 is left, child 1 is right */
-	Node *child[2];
-
-	unsigned int label;	/* unique number for debug etc */
-	/**
-	 * We indirectly release Nodes through a virtual function because
-	 * accept and Eps Nodes are shared, and must be treated specially.
-	 * We could use full reference counting here but the indirect release
-	 * is sufficient and has less overhead
-	 */
-	virtual void release(void) {
-	    delete this;
-	}
-    };
-
-    class InnerNode : public Node {
-    public:
-        InnerNode() : Node() { };
-        InnerNode(Node *left) : Node(left) {};
-        InnerNode(Node *left, Node *right) : Node(left, right) { };
-    };
-
-    class OneChildNode : public InnerNode {
-    public:
-        OneChildNode(Node *left) : InnerNode(left) { };
-    };
-
-    class TwoChildNode : public InnerNode {
-    public:
-        TwoChildNode(Node *left, Node *right) :  InnerNode(left, right) { };
-    };
-
-    class LeafNode : public Node {
-    public:
-        LeafNode() : Node() { };
-
-    };
-
-    /* Match nothing (//). */
-    class EpsNode : public LeafNode {
-    public:
-    EpsNode() : LeafNode()
-	{
-	    nullable = true;
-	    label = 0;
-	}
-	void release(void)
-	{
-	  /* don't delete Eps nodes because there is a single static instance
-	   * shared by all trees.  Look for epsnode in the code
-	   */
-	}
-
-	void compute_firstpos()
-	{
-	}
-	void compute_lastpos()
-	{
-	}
-	int eq(Node *other) {
-		if (dynamic_cast<EpsNode *>(other))
-			return 1;
-		return 0;
-	}
-	ostream& dump(ostream& os)
-	{
-	    return os << "[]";
-	}
-    };
-
-    /**
-     * Leaf nodes in the syntax tree are important to us: they describe the
-     * characters that the regular expression matches. We also consider
-     * AcceptNodes import: they indicate when a regular expression matches.
-     */
-    class ImportantNode : public LeafNode {
-    public:
-        ImportantNode() : LeafNode() { }
-	void compute_firstpos()
-	{
-	    firstpos.insert(this);
-	}
-	void compute_lastpos() {
-	    lastpos.insert(this);
-	}
-	virtual void follow(NodeCases& cases) = 0;
-    };
-
-    /* common base class for all the different classes that contain
-     * character information.
-     */
-    class CNode : public ImportantNode {
-    public:
-        CNode() : ImportantNode() { }
-
-    };
-
-    /* Match one specific character (/c/). */
-    class CharNode : public CNode {
-    public:
-	CharNode(uchar c) : c(c) { }
-	void follow(NodeCases& cases)
-	{
-	    NodeSet **x = &cases.cases[c];
-	    if (!*x) {
-		if (cases.otherwise)
-		    *x = new NodeSet(*cases.otherwise);
-		else
-		    *x = new NodeSet;
-	    }
-	    (*x)->insert(followpos.begin(), followpos.end());
-	}
-	int eq(Node *other) {
-		CharNode *o = dynamic_cast<CharNode *>(other);
-		if (o) {
-			return c == o->c;
-		}
-		return 0;
-	}
-	ostream& dump(ostream& os)
-	{
-	    return os << c;
-	}
-
-	uchar c;
-    };
-
-    /* Match a set of characters (/[abc]/). */
-    class CharSetNode : public CNode {
-    public:
-	CharSetNode(Chars& chars) : chars(chars) { }
-	void follow(NodeCases& cases)
-	{
-	    for (Chars::iterator i = chars.begin(); i != chars.end(); i++) {
-		NodeSet **x = &cases.cases[*i];
-		if (!*x) {
-		    if (cases.otherwise)
-			*x = new NodeSet(*cases.otherwise);
-		    else
-			*x = new NodeSet;
-		}
-		(*x)->insert(followpos.begin(), followpos.end());
-	    }
-	}
-	int eq(Node *other) {
-		CharSetNode *o = dynamic_cast<CharSetNode *>(other);
-		if (!o || chars.size() != o->chars.size())
-			return 0;
-
-		for (Chars::iterator i = chars.begin(), j = o->chars.begin();
-		     i != chars.end() && j != o->chars.end();
-		     i++, j++) {
-			if (*i != *j)
-				return 0;
-		}
-		return 1;
-	}
-	ostream& dump(ostream& os)
-	{
-	    os << '[';
-	    for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
-		os << *i;
-	    return os << ']';
-	}
-
-	Chars chars;
-    };
-
-    /* Match all except one character (/[^abc]/). */
-    class NotCharSetNode : public CNode {
-    public:
-	NotCharSetNode(Chars& chars) : chars(chars) { }
-	void follow(NodeCases& cases)
-	{
-	    if (!cases.otherwise)
-		cases.otherwise = new NodeSet;
-	    for (Chars::iterator j = chars.begin(); j != chars.end(); j++) {
-		NodeSet **x = &cases.cases[*j];
-		if (!*x)
-		    *x = new NodeSet(*cases.otherwise);
-	    }
-	    /**
-	     * Note: Add to the nonmatching characters after copying away the
-	     * old otherwise state for the matching characters.
-	     */
-	    cases.otherwise->insert(followpos.begin(), followpos.end());
-	    for (NodeCases::iterator i = cases.begin(); i != cases.end(); i++) {
-		if (chars.find(i->first) == chars.end())
-		    i->second->insert(followpos.begin(), followpos.end());
-	    }
-	}
-	int eq(Node *other) {
-		NotCharSetNode *o = dynamic_cast<NotCharSetNode *>(other);
-		if (!o || chars.size() != o->chars.size())
-			return 0;
-
-		for (Chars::iterator i = chars.begin(), j = o->chars.begin();
-		     i != chars.end() && j != o->chars.end();
-		     i++, j++) {
-			if (*i != *j)
-				return 0;
-		}
-		return 1;
-	}
-	ostream& dump(ostream& os)
-	{
-	    os << "[^";
-	    for (Chars::iterator i = chars.begin(); i != chars.end(); i++)
-		os << *i;
-	    return os << ']';
-	}
-
-	Chars chars;
-    };
-
-    /* Match any character (/./). */
-    class AnyCharNode : public CNode {
-    public:
-	AnyCharNode() { }
-	void follow(NodeCases& cases)
-	{
-	    if (!cases.otherwise)
-		cases.otherwise = new NodeSet;
-	    cases.otherwise->insert(followpos.begin(), followpos.end());
-	    for (NodeCases::iterator i = cases.begin(); i != cases.end(); i++)
-		i->second->insert(followpos.begin(), followpos.end());
-	}
-	int eq(Node *other) {
-		if (dynamic_cast<AnyCharNode *>(other))
-			return 1;
-		return 0;
-	}
-	ostream& dump(ostream& os) {
-	    return os << ".";
-	}
-    };
-
-    /**
-     * Indicate that a regular expression matches. An AcceptNode itself
-     * doesn't match anything, so it will never generate any transitions.
-     */
-    class AcceptNode : public ImportantNode {
-    public:
-	AcceptNode() {}
-	void release(void)
-	{
-	  /* don't delete AcceptNode via release as they are shared,
-	   * and will be deleted when the table the are stored in is deleted
-	   */
-	}
-
-	void follow(NodeCases& cases __attribute__((unused)))
-	{
-	    /* Nothing to follow. */
-	}
-	/* requires accept nodes to be common by pointer */
-	int eq(Node *other) {
-		if (dynamic_cast<AcceptNode *>(other))
-			return (this == other);
-		return 0;
-	}
-    };
-
-    /* Match a node zero or more times. (This is a unary operator.) */
-    class StarNode : public OneChildNode {
-    public:
-	StarNode(Node *left) :
-	    OneChildNode(left)
-	{
-	    nullable = true;
-	}
-	void compute_firstpos()
-	{
-	    firstpos = child[0]->firstpos;
-	}
-	void compute_lastpos()
-	{
-	    lastpos = child[0]->lastpos;
-	}
-	void compute_followpos()
-	{
-	    NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
-	    for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
-		(*i)->followpos.insert(to.begin(), to.end());
-	    }
-	}
-	int eq(Node *other) {
-		if (dynamic_cast<StarNode *>(other))
-			return child[0]->eq(other->child[0]);
-		return 0;
-	}
-	ostream& dump(ostream& os)
-	{
-	    os << '(';
-	    child[0]->dump(os);
-	    return os << ")*";
-	}
-    };
-
-    /* Match a node one or more times. (This is a unary operator.) */
-    class PlusNode : public OneChildNode {
-    public:
-	PlusNode(Node *left) :
-	    OneChildNode(left) { }
-	void compute_nullable()
-	{
-	    nullable = child[0]->nullable;
-	}
-	void compute_firstpos()
-	{
-	    firstpos = child[0]->firstpos;
-	}
-	void compute_lastpos()
-	{
-	    lastpos = child[0]->lastpos;
-	}
-	void compute_followpos()
-	{
-	    NodeSet from = child[0]->lastpos, to = child[0]->firstpos;
-	    for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
-		(*i)->followpos.insert(to.begin(), to.end());
-	    }
-	}
-	int eq(Node *other) {
-		if (dynamic_cast<PlusNode *>(other))
-			return child[0]->eq(other->child[0]);
-		return 0;
-	}
-	ostream& dump(ostream& os)
-	{
-	    os << '(';
-	    child[0]->dump(os);
-	    return os << ")+";
-	}
-    };
-
-    /* Match a pair of consecutive nodes. */
-    class CatNode : public TwoChildNode {
-    public:
-	CatNode(Node *left, Node *right) :
-	    TwoChildNode(left, right) { }
-	void compute_nullable()
-	{
-	    nullable = child[0]->nullable && child[1]->nullable;
-	}
-	void compute_firstpos()
-	{
-	    if (child[0]->nullable)
-		firstpos = child[0]->firstpos + child[1]->firstpos;
-	    else
-		firstpos = child[0]->firstpos;
-	}
-	void compute_lastpos()
-	{
-	    if (child[1]->nullable)
-		lastpos = child[0]->lastpos + child[1]->lastpos;
-	    else
-		lastpos = child[1]->lastpos;
-	}
-	void compute_followpos()
-	{
-	    NodeSet from = child[0]->lastpos, to = child[1]->firstpos;
-	    for(NodeSet::iterator i = from.begin(); i != from.end(); i++) {
-		(*i)->followpos.insert(to.begin(), to.end());
-	    }
-	}
-	int eq(Node *other) {
-		if (dynamic_cast<CatNode *>(other)) {
-			if (!child[0]->eq(other->child[0]))
-				return 0;
-			return child[1]->eq(other->child[1]);
-		}
-		return 0;
-	}
-	ostream& dump(ostream& os)
-	{
-	    child[0]->dump(os);
-	    child[1]->dump(os);
-	    return os;
-	    //return os << ' ';
-	}
-    };
-
-    /* Match one of two alternative nodes. */
-    class AltNode : public TwoChildNode {
-    public:
-	AltNode(Node *left, Node *right) :
-	    TwoChildNode(left, right) { }
-	void compute_nullable()
-	{
-	    nullable = child[0]->nullable || child[1]->nullable;
-	}
-	void compute_lastpos()
-	{
-	    lastpos = child[0]->lastpos + child[1]->lastpos;
-	}
-	void compute_firstpos()
-	{
-	    firstpos = child[0]->firstpos + child[1]->firstpos;
-	}
-	int eq(Node *other) {
-		if (dynamic_cast<AltNode *>(other)) {
-			if (!child[0]->eq(other->child[0]))
-				return 0;
-			return child[1]->eq(other->child[1]);
-		}
-		return 0;
-	}
-	ostream& dump(ostream& os)
-	{
-	    os << '(';
-	    child[0]->dump(os);
-	    os << '|';
-	    child[1]->dump(os);
-	    os << ')';
-	    return os;
-	}
-    };
-
-/* Use a single static EpsNode as it carries no node specific information */
-static EpsNode epsnode;
-
-/*
- * Normalize the regex parse tree for factoring and cancelations. Normalization
- * reorganizes internal (alt and cat) nodes into a fixed "normalized" form that
- * simplifies factoring code, in that it produces a canonicalized form for
- * the direction being normalized so that the factoring code does not have
- * to consider as many cases.
- *
- * left normalization (dir == 0) uses these rules
- * (E | a) -> (a | E)
- * (a | b) | c -> a | (b | c)
- * (ab)c -> a(bc)
- *
- * right normalization (dir == 1) uses the same rules but reversed
- * (a | E) -> (E | a)
- * a | (b | c) -> (a | b) | c
- * a(bc) -> (ab)c
- *
- * Note: This is written iteratively for a given node (the top node stays
- *       fixed and the children are rotated) instead of recursively.
- *       For a given node under examination rotate over nodes from
- *       dir to !dir.   Until no dir direction node meets the criterial.
- *       Then recurse to the children (which will have a different node type)
- *       to make sure they are normalized.
- *       Normalization of a child node is guarenteed to not affect the
- *       normalization of the parent.
- *
- *       For cat nodes the depth first traverse order is guarenteed to be
- *       maintained.  This is not necessary for altnodes.
- *
- * Eg. For left normalization
- *
- *              |1               |1
- *             / \              / \
- *            |2  T     ->     a   |2
- *           / \                  / \
- *          |3  c                b   |3
- *         / \                      / \
- *        a   b                    c   T
- *
- */
-static void rotate_node(Node *t, int dir) {
-	// (a | b) | c -> a | (b | c)
-	// (ab)c -> a(bc)
-	Node *left = t->child[dir];
-	t->child[dir] = left->child[dir];
-	left->child[dir] = left->child[!dir];
-	left->child[!dir] = t->child[!dir];
-	t->child[!dir] = left;
-}
-
-void normalize_tree(Node *t, int dir)
-{
-	if (dynamic_cast<LeafNode *>(t))
-		return;
-
-	for (;;) {
-		if ((&epsnode == t->child[dir]) &&
-		    (&epsnode != t->child[!dir]) &&
-		     dynamic_cast<TwoChildNode *>(t)) {
-			// (E | a) -> (a | E)
-			// Ea -> aE
-			Node *c = t->child[dir];
-			t->child[dir] = t->child[!dir];
-			t->child[!dir] = c;
-			// Don't break here as 'a' may be a tree that
-			// can be pulled up.
-		} else if ((dynamic_cast<AltNode *>(t) &&
-			    dynamic_cast<AltNode *>(t->child[dir])) ||
-			   (dynamic_cast<CatNode *>(t) &&
-			    dynamic_cast<CatNode *>(t->child[dir]))) {
-			// (a | b) | c -> a | (b | c)
-			// (ab)c -> a(bc)
-			rotate_node(t, dir);
-		} else if (dynamic_cast<AltNode *>(t) &&
-			   dynamic_cast<CharSetNode *>(t->child[dir]) &&
-			   dynamic_cast<CharNode *>(t->child[!dir])) {
-			// [a] | b  ->  b | [a]
-			Node *c = t->child[dir];
-			t->child[dir] = t->child[!dir];
-			t->child[!dir] = c;
-		} else {
-			break;
-		}
-	}
-	if (t->child[dir])
-		normalize_tree(t->child[dir], dir);
-	if (t->child[!dir])
-		normalize_tree(t->child[!dir], dir);
-}
-
-//charset conversion is disabled for now,
-//it hinders tree optimization in some cases, so it need to be either
-//done post optimization, or have extra factoring rules added
-#if 0
-static Node *merge_charset(Node *a, Node *b)
-{
-	if (dynamic_cast<CharNode *>(a) &&
-	    dynamic_cast<CharNode *>(b)) {
-		Chars chars;
-		chars.insert(dynamic_cast<CharNode *>(a)->c);
-		chars.insert(dynamic_cast<CharNode *>(b)->c);
-		CharSetNode *n = new CharSetNode(chars);
-		return n;
-	} else if (dynamic_cast<CharNode *>(a) &&
-		   dynamic_cast<CharSetNode *>(b)) {
-		Chars *chars = &dynamic_cast<CharSetNode *>(b)->chars;
-		chars->insert(dynamic_cast<CharNode *>(a)->c);
-		return b;
-	} else if (dynamic_cast<CharSetNode *>(a) &&
-		   dynamic_cast<CharSetNode *>(b)) {
-		Chars *from = &dynamic_cast<CharSetNode *>(a)->chars;
-		Chars *to = &dynamic_cast<CharSetNode *>(b)->chars;
-		for (Chars::iterator i = from->begin(); i != from->end(); i++)
-			to->insert(*i);
-		return b;
-	}
-
-	//return ???;
-}
-
-static Node *alt_to_charsets(Node *t, int dir)
-{
-/*
-	Node *first = NULL;
-	Node *p = t;
-	Node *i = t;
-	for (;dynamic_cast<AltNode *>(i);) {
-		if (dynamic_cast<CharNode *>(i->child[dir]) ||
-		    dynamic_cast<CharNodeSet *>(i->child[dir])) {
-			if (!first) {
-				first = i;
-				p = i;
-				i = i->child[!dir];
-			} else {
-				first->child[dir] = merge_charset(first->child[dir],
-						      i->child[dir]);
-				p->child[!dir] = i->child[!dir];
-				Node *tmp = i;
-				i = tmp->child[!dir];
-				tmp->child[!dir] = NULL;
-				tmp->release();
-			}
-		} else {
-			p = i;
-			i = i->child[!dir];
-		}
-	}
-	// last altnode of chain check other dir as well
-	if (first && (dynamic_cast<charNode *>(i) ||
-		      dynamic_cast<charNodeSet *>(i))) {
-		
-	}
-*/
-
-/*
-		if (dynamic_cast<CharNode *>(t->child[dir]) ||
-		    dynamic_cast<CharSetNode *>(t->child[dir]))
-		    char_test = true;
-			    (char_test &&
-			     (dynamic_cast<CharNode *>(i->child[dir]) ||
-			      dynamic_cast<CharSetNode *>(i->child[dir])))) {
-*/
-	return t;
-}
-#endif
-
-static Node *basic_alt_factor(Node *t, int dir)
-{
-	if (!dynamic_cast<AltNode *>(t))
-		return t;
-
-	if (t->child[dir]->eq(t->child[!dir])) {
-		// (a | a) -> a
-		Node *tmp = t->child[dir];
-		t->child[dir] = NULL;
-		t->release();
-		return tmp;
-	}
-
-	// (ab) | (ac) -> a(b|c)
-	if (dynamic_cast<CatNode *>(t->child[dir]) &&
-	    dynamic_cast<CatNode *>(t->child[!dir]) &&
-	    t->child[dir]->child[dir]->eq(t->child[!dir]->child[dir])) {
-		// (ab) | (ac) -> a(b|c)
-		Node *left = t->child[dir];
-		Node *right = t->child[!dir];
-		t->child[dir] = left->child[!dir];
-		t->child[!dir] = right->child[!dir];
-		right->child[!dir] = NULL;
-		right->release();
-		left->child[!dir] = t;
-		return left;
-	}
-
-	// a | (ab) -> a (E | b) -> a (b | E)
-	if (dynamic_cast<CatNode *>(t->child[!dir]) &&
-	    t->child[dir]->eq(t->child[!dir]->child[dir])) {
-		Node *c = t->child[!dir];
-		t->child[dir]->release();
-		t->child[dir] = c->child[!dir];
-		t->child[!dir] = &epsnode;
-		c->child[!dir] = t;
-		return c;
-	}
-
-	// ab | (a) -> a (b | E)
-	if (dynamic_cast<CatNode *>(t->child[dir]) &&
-	    t->child[dir]->child[dir]->eq(t->child[!dir])) {
-		Node *c = t->child[dir];
-		t->child[!dir]->release();
-		t->child[dir] = c->child[!dir];
-		t->child[!dir] = &epsnode;
-		c->child[!dir] = t;
-		return c;
-	}
-
-	return t;
-}
-
-static Node *basic_simplify(Node *t, int dir)
-{
-	if (dynamic_cast<CatNode *>(t) &&
-	    &epsnode == t->child[!dir]) {
-		// aE -> a
-		Node *tmp = t->child[dir];
-		t->child[dir] = NULL;
-		t->release();
-		return tmp;
-	}
-
-	return basic_alt_factor(t, dir);
-}
-
-/*
- * assumes a normalized tree.  reductions shown for left normalization
- * aE -> a
- * (a | a) -> a
- ** factoring patterns
- * a | (a | b) -> (a | b)
- * a | (ab) -> a (E | b) -> a (b | E)
- * (ab) | (ac) -> a(b|c)
- *
- * returns t - if no simplifications were made
- *         a new root node - if simplifications were made
- */
-Node *simplify_tree_base(Node *t, int dir, bool &mod)
-{
-	if (dynamic_cast<ImportantNode *>(t))
-		return t;
-
-	for (int i=0; i < 2; i++) {
-		if (t->child[i]) {
-			Node *c = simplify_tree_base(t->child[i], dir, mod);
-			if (c != t->child[i]) {
-				t->child[i] = c;
-				mod = true;
-			}
-		}
-	}
-
-	// only iterate on loop if modification made
-	for (;; mod = true) {
-
-		Node *tmp = basic_simplify(t, dir);
-		if (tmp != t) {
-			t = tmp;
-			continue;
-		}
-
-
-		/* all tests after this must meet 2 alt node condition */
-		if (!dynamic_cast<AltNode *>(t) ||
-		    !dynamic_cast<AltNode *>(t->child[!dir]))
-			break;
-
-		// a | (a | b) -> (a | b)
-		// a | (b | (c | a)) -> (b | (c | a))
-		Node *p = t;
-		Node *i = t->child[!dir];
-		for (;dynamic_cast<AltNode *>(i); p = i, i = i->child[!dir]) {
-			if (t->child[dir]->eq(i->child[dir])) {
-				Node *tmp = t->child[!dir];
-				t->child[!dir] = NULL;
-				t->release();
-				t = tmp;
-				continue;
-			}
-		}
-		// last altnode of chain check other dir as well
-		if (t->child[dir]->eq(p->child[!dir])) {
-			Node *tmp = t->child[!dir];
-			t->child[!dir] = NULL;
-			t->release();
-			t = tmp;
-			continue;
-		}
-
-		//exact match didn't work, try factoring front
-		//a | (ac | (ad | () -> (a (E | c)) | (...)
-		//ab | (ac | (...)) -> (a (b | c)) | (...)
-		//ab | (a | (...)) -> (a (b | E)) | (...)
-		Node *pp;
-		int count = 0;
-		Node *subject = t->child[dir];
-		Node *a = subject;
-		if (dynamic_cast<CatNode *>(subject))
-		    a = subject->child[dir];
-
-		for (pp = p = t, i = t->child[!dir];
-		     dynamic_cast<AltNode *>(i); ) {
-			if ((dynamic_cast<CatNode *>(i->child[dir]) &&
-			     a->eq(i->child[dir]->child[dir])) ||
-			    (a->eq(i->child[dir]))) {
-				// extract matching alt node
-				p->child[!dir] = i->child[!dir];
-				i->child[!dir] = subject;
-				subject = basic_simplify(i, dir);
-				if (dynamic_cast<CatNode *>(subject))
-					a = subject->child[dir];
-				else
-					a = subject;
-
-				i = p->child[!dir];
-				count++;
-			} else {
-				pp = p; p = i; i = i->child[!dir];
-			}
-		}
-
-		// last altnode in chain check other dir as well
-		if ((dynamic_cast<CatNode *>(i) &&
-		     a->eq(i->child[dir])) ||
-		    (a->eq(i))) {
-			count++;
-			if (t == p) {
-				t->child[dir] = subject;
-				t = basic_simplify(t, dir);
-			} else {
-				t->child[dir] = p->child[dir];
-				p->child[dir] = subject;
-				pp->child[!dir] = basic_simplify(p, dir);
-			}
-		} else {
-			t->child[dir] = i;
-			p->child[!dir] = subject;
-		}
-
-		if (count == 0)
-			break;
-	}
-	return t;
-}
-
-int debug_tree(Node *t)
-{
-	int nodes = 1;
-
-	if (!dynamic_cast<ImportantNode *>(t)) {
-		if (t->child[0])
-			nodes += debug_tree(t->child[0]);
-		if (t->child[1])
-			nodes += debug_tree(t->child[1]);
-	}
-	return nodes;
-}
-
-struct node_counts {
-	int charnode;
-	int charset;
-	int notcharset;
-	int alt;
-	int plus;
-	int star;
-	int any;
-	int cat;
-};
-
-
-static void count_tree_nodes(Node *t, struct node_counts *counts)
-{
-	if (dynamic_cast<AltNode *>(t)) {
-		counts->alt++;
-		count_tree_nodes(t->child[0], counts);
-		count_tree_nodes(t->child[1], counts);
-	} else if (dynamic_cast<CatNode *>(t)) {
-		counts->cat++;
-		count_tree_nodes(t->child[0], counts);
-		count_tree_nodes(t->child[1], counts);
-	} else if (dynamic_cast<PlusNode *>(t)) {
-		counts->plus++;
-		count_tree_nodes(t->child[0], counts);
-	} else if (dynamic_cast<StarNode *>(t)) {
-		counts->star++;
-		count_tree_nodes(t->child[0], counts);
-	} else if (dynamic_cast<CharNode *>(t)) {
-		counts->charnode++;
-	} else if (dynamic_cast<AnyCharNode *>(t)) {
-		counts->any++;
-	} else if (dynamic_cast<CharSetNode *>(t)) {
-		counts->charset++;
-	} else if (dynamic_cast<NotCharSetNode *>(t)) {
-		counts->notcharset++;
-	}
-}
-
-#include "stdio.h"
-#include "stdint.h"
-#include "apparmor_re.h"
-
-Node *simplify_tree(Node *t, dfaflags_t flags)
-{
-	bool update;
-
-	if (flags & DFA_DUMP_TREE_STATS) {
-		struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0, 0 };
-		count_tree_nodes(t, &counts);
-		fprintf(stderr, "expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n", counts.charnode, counts.charset, counts.notcharset, counts.alt, counts.plus, counts.star, counts.any, counts.cat);
-	}
-	do {
-		update = false;
-		//default to right normalize first as this reduces the number
-		//of trailing nodes which might follow an internal *
-		//or **, which is where state explosion can happen
-		//eg. in one test this makes the difference between
-		//    the dfa having about 7 thousands states,
-		//    and it having about  1.25 million states
-		int dir = 1;
-		if (flags & DFA_CONTROL_TREE_LEFT)
-			dir = 0;
-		for (int count = 0; count < 2; count++) {
-			bool modified;
-			do {
-			    modified = false;
-			    if (flags & DFA_CONTROL_TREE_NORMAL)
-				normalize_tree(t, dir);
-			    t = simplify_tree_base(t, dir, modified);
-			    if (modified)
-				update = true;
-			} while (modified);
-			if (flags & DFA_CONTROL_TREE_LEFT)
-				dir++;
-			else
-				dir--;
-		}
-	} while(update);
-	if (flags & DFA_DUMP_TREE_STATS) {
-		struct node_counts counts = { 0, 0, 0, 0, 0, 0, 0, 0 };
-		count_tree_nodes(t, &counts);
-		fprintf(stderr, "simplified expr tree: c %d, [] %d, [^] %d, | %d, + %d, * %d, . %d, cat %d\n", counts.charnode, counts.charset, counts.notcharset, counts.alt, counts.plus, counts.star, counts.any, counts.cat);
-	}
-	return t;
-}
-
-
-%}
-
-%union {
-    char c;
-    Node *node;
-    Chars *cset;
-}
-
-%{
-    void regexp_error(Node **, const char *, const char *);
-#   define YYLEX_PARAM &text
-    int regexp_lex(YYSTYPE *, const char **);
-
-    static inline Chars*
-    insert_char(Chars* cset, uchar a)
-    {
-	cset->insert(a);
-	return cset;
-    }
-
-    static inline Chars*
-    insert_char_range(Chars* cset, uchar a, uchar b)
-    {
-	if (a > b)
-	    swap(a, b);
-	for (uchar i = a; i <= b; i++)
-	    cset->insert(i);
-	return cset;
-    }
-%}
-
-%pure-parser
-/* %error-verbose */
-%parse-param {Node **root}
-%parse-param {const char *text}
-%name-prefix = "regexp_"
-
-%token <c> CHAR
-%type <c> regex_char cset_char1 cset_char cset_charN
-%type <cset> charset cset_chars
-%type <node> regexp expr terms0 terms qterm term
-
-/**
- * Note: destroy all nodes upon failure, but *not* the start symbol once
- * parsing succeeds!
- */
-%destructor { $$->release(); } expr terms0 terms qterm term
-
-%%
-
-/* FIXME: Does not parse "[--]", "[---]", "[^^-x]". I don't actually know
-          which precise grammer Perl regexps use, and rediscovering that
-	  is proving to be painful. */
-
-regexp	    : /* empty */	{ *root = $$ = &epsnode; }
-	    | expr		{ *root = $$ = $1; }
-	    ;
-
-expr	    : terms
-	    | expr '|' terms0	{ $$ = new AltNode($1, $3); }
-	    | '|' terms0	{ $$ = new AltNode(&epsnode, $2); }
-	    ;
-
-terms0	    : /* empty */	{ $$ = &epsnode; }
-	    | terms
-	    ;
-
-terms	    : qterm
-	    | terms qterm	{ $$ = new CatNode($1, $2); }
-	    ;
-
-qterm	    : term
-	    | term '*'		{ $$ = new StarNode($1); }
-	    | term '+'		{ $$ = new PlusNode($1); }
-	    ;
-
-term	    : '.'		{ $$ = new AnyCharNode; }
-	    | regex_char	{ $$ = new CharNode($1); }
-	    | '[' charset ']'	{ $$ = new CharSetNode(*$2);
-				  delete $2; }
-	    | '[' '^' charset ']'
-				{ $$ = new NotCharSetNode(*$3);
-				  delete $3; }
-	    | '[' '^' '^' cset_chars ']'
-				{ $4->insert('^');
-				  $$ = new NotCharSetNode(*$4);
-				  delete $4; }
-	    | '(' regexp ')'	{ $$ = $2; }
-	    ;
-
-regex_char  : CHAR
-	    | '^'		{ $$ = '^'; }
-	    | '-'		{ $$ = '-'; }
-	    | ']'		{ $$ = ']'; }
-	    ;
-
-charset	    : cset_char1 cset_chars
-				{ $$ = insert_char($2, $1); }
-	    | cset_char1 '-' cset_charN cset_chars
-				{ $$ = insert_char_range($4, $1, $3); }
-	    ;
-
-cset_chars  : /* nothing */	{ $$ = new Chars; }
-	    | cset_chars cset_charN
-				{ $$ = insert_char($1, $2); }
-	    | cset_chars cset_charN '-' cset_charN
-				{ $$ = insert_char_range($1, $2, $4); }
-	    ;
-
-cset_char1  : cset_char
-	    | ']'		{ $$ = ']'; }
-	    | '-'		{ $$ = '-'; }
-	    ;
-
-cset_charN  : cset_char
-	    | '^'		{ $$ = '^'; }
-	    ;
-
-cset_char   : CHAR
-	    | '['		{ $$ = '['; }
-	    | '*'		{ $$ = '*'; }
-	    | '+'		{ $$ = '+'; }
-	    | '.'		{ $$ = '.'; }
-	    | '|'		{ $$ = '|'; }
-	    | '('		{ $$ = '('; }
-	    | ')'		{ $$ = ')'; }
-	    ;
-
-%%
 
 #include <string.h>
 #include <getopt.h>
@@ -1114,264 +40,12 @@ cset_char   : CHAR
 #include <iostream>
 #include <fstream>
 
+#include "expr-tree.h"
+#include "parse.h"
 #include "../immunix.h"
 
-/* Traverse the syntax tree depth-first in an iterator-like manner. */
-class depth_first_traversal {
-    stack<Node *> pos;
-    void push_left(Node *node)
-    {
-	pos.push(node);
 
-        while (dynamic_cast<InnerNode *>(node)) {
-            pos.push(node->child[0]);
-            node = node->child[0];
-        }
-    }
 
-public:
-    depth_first_traversal(Node *node) {
-	push_left(node);
-    }
-    Node *operator*()
-    {
-        return pos.top();
-    }
-    Node* operator->()
-    {
-	return pos.top();
-    }
-    operator bool()
-    {
-        return !pos.empty();
-    }
-    void operator++(int)
-    {
-        Node *last = pos.top();
-        pos.pop();
-
-        if (!pos.empty()) {
-            /* no need to dynamic cast, as we just popped a node so the top node
-             * must be an inner node */
-            InnerNode *node = (InnerNode *)(pos.top());
-
-            if (node->child[1] && node->child[1] != last) {
-                push_left(node->child[1]);
-	    }
-	}
-    }
-};
-
-ostream& operator<<(ostream& os, Node& node)
-{
-    node.dump(os);
-    return os;
-}
-
-ostream& operator<<(ostream& os, uchar c)
-{
-    const char *search = "\a\033\f\n\r\t|*+[](). ",
-	       *replace = "aefnrt|*+[](). ", *s;
-
-    if ((s = strchr(search, c)) && *s != '\0')
-	os << '\\' << replace[s - search];
-    else if (c < 32 || c >= 127)
-	os << '\\' << '0' << char('0' + (c >> 6))
-	   << char('0' + ((c >> 3) & 7)) << char('0' + (c & 7));
-    else
-	os << (char)c;
-    return os;
-}
-
-int
-octdigit(char c)
-{
-    if (c >= '0' && c <= '7')
-	return c - '0';
-    return -1;
-}
-
-int
-hexdigit(char c)
-{
-    if (c >= '0' && c <= '9')
-	return c - '0';
-    else if (c >= 'A' && c <= 'F')
-	return 10 + c - 'A';
-    else if (c >= 'a' && c <= 'f')
-	return 10 + c - 'A';
-    else
-	return -1;
-}
-
-int
-regexp_lex(YYSTYPE *val, const char **pos)
-{
-    int c;
-
-    val->c = **pos;
-    switch(*(*pos)++) {
-	case '\0':
-	    (*pos)--;
-	    return 0;
-
-	case '*': case '+': case '.': case '|': case '^': case '-':
-	case '[': case ']': case '(' : case ')':
-	    return *(*pos - 1);
-
-	case '\\':
-	    val->c = **pos;
-	    switch(*(*pos)++) {
-		case '\0':
-		    (*pos)--;
-		    /* fall through */
-		case '\\':
-		    val->c = '\\';
-		    break;
-
-		case '0':
-		    val->c = 0;
-		    if ((c = octdigit(**pos)) >= 0) {
-			val->c = c;
-			(*pos)++;
-		    }
-		    if ((c = octdigit(**pos)) >= 0) {
-			val->c = (val->c << 3) + c;
-			(*pos)++;
-		    }
-		    if ((c = octdigit(**pos)) >= 0) {
-			val->c = (val->c << 3) + c;
-			(*pos)++;
-		    }
-		    break;
-
-		case 'x':
-		    val->c = 0;
-		    if ((c = hexdigit(**pos)) >= 0) {
-			val->c = c;
-			(*pos)++;
-		    }
-		    if ((c = hexdigit(**pos)) >= 0) {
-			val->c = (val->c << 4) + c;
-			(*pos)++;
-		    }
-		    break;
-
-		case 'a':
-		    val->c = '\a';
-		    break;
-
-		case 'e':
-		    val->c = 033  /* ESC */;
-		    break;
-
-		case 'f':
-		    val->c = '\f';
-		    break;
-
-		case 'n':
-		    val->c = '\n';
-		    break;
-
-		case 'r':
-		    val->c = '\r';
-		    break;
-
-		case 't':
-		    val->c = '\t';
-		    break;
-	    }
-    }
-    return CHAR;
-}
-
-void
-regexp_error(Node ** __attribute__((unused)),
-	     const char *text __attribute__((unused)),
-	     const char *error __attribute__((unused)))
-{
-    /* We don't want the library to print error messages. */
-}
-
-/**
- * Assign a consecutive number to each node. This is only needed for
- * pretty-printing the debug output.
- *
- * The epsnode is labeled 0.  Start labeling at 1
- */
-void label_nodes(Node *root)
-{
-    int nodes = 1;
-    for (depth_first_traversal i(root); i; i++)
-       i->label = nodes++;
-}
-
-/**
- * Text-dump a state (for debugging).
- */
-ostream& operator<<(ostream& os, const NodeSet& state)
-{
-    os << '{';
-    if (!state.empty()) {
-	NodeSet::iterator i = state.begin();
-	for(;;) {
-	   os << (*i)->label;
-	    if (++i == state.end())
-		break;
-	    os << ',';
-	}
-    }
-    os << '}';
-    return os;
-}
-
-/**
- * Text-dump the syntax tree (for debugging).
- */
-void dump_syntax_tree(ostream& os, Node *node) {
-    for (depth_first_traversal i(node); i; i++) {
-	os << i->label << '\t';
-	if ((*i)->child[0] == 0)
-	    os << **i << '\t' << (*i)->followpos << endl;
-	else {
-	    if ((*i)->child[1] == 0)
-		os << (*i)->child[0]->label << **i;
-	    else
-		os << (*i)->child[0]->label << **i
-		   << (*i)->child[1]->label;
-	    os << '\t' << (*i)->firstpos
-		       << (*i)->lastpos << endl;
-	}
-    }
-    os << endl;
-}
-
-/* Comparison operator for sets of <NodeSet *>.
- * Compare set hashes, and if the sets have the same hash
- * do compare pointer comparison on set of <Node *>, the pointer comparison
- * allows us to determine which Sets of <Node *> we have seen already from
- * new ones when constructing the DFA.
- */
-struct deref_less_than {
-  bool operator()(pair <unsigned long, NodeSet *> const & lhs, pair <unsigned long, NodeSet *> const & rhs) const
-  {
-	  if (lhs.first == rhs.first)
-		  return *(lhs.second) < *(rhs.second);
-	  else
-		  return lhs.first < rhs.first;
-  }
-};
-
-unsigned long hash_NodeSet(const NodeSet *ns)
-{
-        unsigned long hash = 5381;
-
-	for (NodeSet::iterator i = ns->begin(); i != ns->end(); i++) {
-	  hash = ((hash << 5) + hash) + (unsigned long) *i;
-	}
-
-        return hash;
-}
 
 class State;
 /**
@@ -2536,7 +1210,7 @@ SecondIterator<Iter> second_iterator(Iter iter)
  */
 
 #include "flex-tables.h"
-#include "regexp.h"
+#define YYTH_REGEX_MAGIC 0x1B5E783D
 
 static inline size_t pad64(size_t i)
 {
@@ -2638,7 +1312,7 @@ void TransitionTable::flex_table(ostream& os, const char *name)
     /* Write the actual flex parser table. */
 
     size_t hsize = pad64(sizeof(th) + sizeof(th_version) + strlen(name) + 1);
-    th.th_magic = htonl(YYTH_REGEXP_MAGIC);
+    th.th_magic = htonl(YYTH_REGEX_MAGIC);
     th.th_hsize = htonl(hsize);
     th.th_ssize = htonl(hsize +
 	    flex_table_size(accept.begin(), accept.end()) +
@@ -2871,7 +1545,7 @@ extern "C" int aare_add_rule_vec(aare_ruleset_t *rules, int deny,
 
     assert(perms != 0);
 
-    if (regexp_parse(&tree, rulev[0]))
+    if (regex_parse(&tree, rulev[0]))
 	return 0;
     for (int i = 1; i < count; i++) {
 	    Node *subtree = NULL;
@@ -2879,7 +1553,7 @@ extern "C" int aare_add_rule_vec(aare_ruleset_t *rules, int deny,
 	    if (!node)
 		return 0;
 	    tree = new CatNode(tree, node);
-	    if (regexp_parse(&subtree, rulev[i]))
+	    if (regex_parse(&subtree, rulev[i]))
 		return 0;
 	    tree = new CatNode(tree, subtree);
     }
diff --git a/parser/libapparmor_re/parse.h b/parser/libapparmor_re/parse.h
new file mode 100644
index 000000000..42ad8435b
--- /dev/null
+++ b/parser/libapparmor_re/parse.h
@@ -0,0 +1,27 @@
+/*
+ * (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
+ * Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
+ * Copyright 2009-2010 Canonical Ltd.
+ *
+ * The libapparmor library is licensed under the terms of the GNU
+ * Lesser General Public License, version 2.1. Please see the file
+ * COPYING.LGPL.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Parsing of regular expression into expression trees as implemented in
+ * expr-tree
+ */
+#ifndef __LIBAA_RE_PARSE_H
+#define __LIBAA_RE_PARSE_H
+
+int regex_parse(Node **tree, const char *rule);
+
+#endif /* __LIBAA_RE_PARSE_H */
diff --git a/parser/libapparmor_re/parse.y b/parser/libapparmor_re/parse.y
new file mode 100644
index 000000000..3f9ef30f2
--- /dev/null
+++ b/parser/libapparmor_re/parse.y
@@ -0,0 +1,266 @@
+/*
+ * (C) 2006, 2007 Andreas Gruenbacher <agruen@suse.de>
+ * Copyright (c) 2003-2008 Novell, Inc. (All rights reserved)
+ * Copyright 2009-2010 Canonical Ltd.
+ *
+ * The libapparmor library is licensed under the terms of the GNU
+ * Lesser General Public License, version 2.1. Please see the file
+ * COPYING.LGPL.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Parsing of regular expression into expression trees as implemented in
+ * expr-tree
+ */
+
+%{
+/* #define DEBUG_TREE */
+ #include "expr-tree.h"
+
+%}
+
+%union {
+    char c;
+    Node *node;
+    Chars *cset;
+}
+
+%{
+    void regex_error(Node **, const char *, const char *);
+#   define YYLEX_PARAM &text
+    int regex_lex(YYSTYPE *, const char **);
+
+    static inline Chars*
+    insert_char(Chars* cset, uchar a)
+    {
+	cset->insert(a);
+	return cset;
+    }
+
+    static inline Chars*
+    insert_char_range(Chars* cset, uchar a, uchar b)
+    {
+	if (a > b)
+	    swap(a, b);
+	for (uchar i = a; i <= b; i++)
+	    cset->insert(i);
+	return cset;
+    }
+%}
+
+%pure-parser
+/* %error-verbose */
+%parse-param {Node **root}
+%parse-param {const char *text}
+%name-prefix = "regex_"
+
+%token <c> CHAR
+%type <c> regex_char cset_char1 cset_char cset_charN
+%type <cset> charset cset_chars
+%type <node> regex expr terms0 terms qterm term
+
+/**
+ * Note: destroy all nodes upon failure, but *not* the start symbol once
+ * parsing succeeds!
+ */
+%destructor { $$->release(); } expr terms0 terms qterm term
+
+%%
+
+/* FIXME: Does not parse "[--]", "[---]", "[^^-x]". I don't actually know
+          which precise grammer Perl regexs use, and rediscovering that
+	  is proving to be painful. */
+
+regex	    : /* empty */	{ *root = $$ = &epsnode; }
+	    | expr		{ *root = $$ = $1; }
+	    ;
+
+expr	    : terms
+	    | expr '|' terms0	{ $$ = new AltNode($1, $3); }
+	    | '|' terms0	{ $$ = new AltNode(&epsnode, $2); }
+	    ;
+
+terms0	    : /* empty */	{ $$ = &epsnode; }
+	    | terms
+	    ;
+
+terms	    : qterm
+	    | terms qterm	{ $$ = new CatNode($1, $2); }
+	    ;
+
+qterm	    : term
+	    | term '*'		{ $$ = new StarNode($1); }
+	    | term '+'		{ $$ = new PlusNode($1); }
+	    ;
+
+term	    : '.'		{ $$ = new AnyCharNode; }
+	    | regex_char	{ $$ = new CharNode($1); }
+	    | '[' charset ']'	{ $$ = new CharSetNode(*$2);
+				  delete $2; }
+	    | '[' '^' charset ']'
+				{ $$ = new NotCharSetNode(*$3);
+				  delete $3; }
+	    | '[' '^' '^' cset_chars ']'
+				{ $4->insert('^');
+				  $$ = new NotCharSetNode(*$4);
+				  delete $4; }
+	    | '(' regex ')'	{ $$ = $2; }
+	    ;
+
+regex_char  : CHAR
+	    | '^'		{ $$ = '^'; }
+	    | '-'		{ $$ = '-'; }
+	    | ']'		{ $$ = ']'; }
+	    ;
+
+charset	    : cset_char1 cset_chars
+				{ $$ = insert_char($2, $1); }
+	    | cset_char1 '-' cset_charN cset_chars
+				{ $$ = insert_char_range($4, $1, $3); }
+	    ;
+
+cset_chars  : /* nothing */	{ $$ = new Chars; }
+	    | cset_chars cset_charN
+				{ $$ = insert_char($1, $2); }
+	    | cset_chars cset_charN '-' cset_charN
+				{ $$ = insert_char_range($1, $2, $4); }
+	    ;
+
+cset_char1  : cset_char
+	    | ']'		{ $$ = ']'; }
+	    | '-'		{ $$ = '-'; }
+	    ;
+
+cset_charN  : cset_char
+	    | '^'		{ $$ = '^'; }
+	    ;
+
+cset_char   : CHAR
+	    | '['		{ $$ = '['; }
+	    | '*'		{ $$ = '*'; }
+	    | '+'		{ $$ = '+'; }
+	    | '.'		{ $$ = '.'; }
+	    | '|'		{ $$ = '|'; }
+	    | '('		{ $$ = '('; }
+	    | ')'		{ $$ = ')'; }
+	    ;
+
+%%
+
+
+int
+octdigit(char c)
+{
+    if (c >= '0' && c <= '7')
+	return c - '0';
+    return -1;
+}
+
+int
+hexdigit(char c)
+{
+    if (c >= '0' && c <= '9')
+	return c - '0';
+    else if (c >= 'A' && c <= 'F')
+	return 10 + c - 'A';
+    else if (c >= 'a' && c <= 'f')
+	return 10 + c - 'A';
+    else
+	return -1;
+}
+
+int
+regex_lex(YYSTYPE *val, const char **pos)
+{
+    int c;
+
+    val->c = **pos;
+    switch(*(*pos)++) {
+	case '\0':
+	    (*pos)--;
+	    return 0;
+
+	case '*': case '+': case '.': case '|': case '^': case '-':
+	case '[': case ']': case '(' : case ')':
+	    return *(*pos - 1);
+
+	case '\\':
+	    val->c = **pos;
+	    switch(*(*pos)++) {
+		case '\0':
+		    (*pos)--;
+		    /* fall through */
+		case '\\':
+		    val->c = '\\';
+		    break;
+
+		case '0':
+		    val->c = 0;
+		    if ((c = octdigit(**pos)) >= 0) {
+			val->c = c;
+			(*pos)++;
+		    }
+		    if ((c = octdigit(**pos)) >= 0) {
+			val->c = (val->c << 3) + c;
+			(*pos)++;
+		    }
+		    if ((c = octdigit(**pos)) >= 0) {
+			val->c = (val->c << 3) + c;
+			(*pos)++;
+		    }
+		    break;
+
+		case 'x':
+		    val->c = 0;
+		    if ((c = hexdigit(**pos)) >= 0) {
+			val->c = c;
+			(*pos)++;
+		    }
+		    if ((c = hexdigit(**pos)) >= 0) {
+			val->c = (val->c << 4) + c;
+			(*pos)++;
+		    }
+		    break;
+
+		case 'a':
+		    val->c = '\a';
+		    break;
+
+		case 'e':
+		    val->c = 033  /* ESC */;
+		    break;
+
+		case 'f':
+		    val->c = '\f';
+		    break;
+
+		case 'n':
+		    val->c = '\n';
+		    break;
+
+		case 'r':
+		    val->c = '\r';
+		    break;
+
+		case 't':
+		    val->c = '\t';
+		    break;
+	    }
+    }
+    return CHAR;
+}
+
+void
+regex_error(Node ** __attribute__((unused)),
+	    const char *text __attribute__((unused)),
+	    const char *error __attribute__((unused)))
+{
+    /* We don't want the library to print error messages. */
+}
diff --git a/parser/libapparmor_re/regexp.h b/parser/libapparmor_re/regexp.h
deleted file mode 100644
index 728efbe92..000000000
--- a/parser/libapparmor_re/regexp.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __REGEXP_H
-#define __REGEXP_H
-
-/**
- * Flex file format, but without state compression and with negative
- *  match results in the YYTD_ID_DEF table instead.
- */
-#define YYTH_REGEXP_MAGIC 0x1B5E783D
-
-#endif  /* __REGEXP_H */