2
0
mirror of https://gitlab.com/apparmor/apparmor synced 2025-09-01 06:45:38 +00:00

Dfa minimization and unreachable state removal

Add basic Hopcroft based dfa minimization.  It currently does a simple
straight state comparison that can be quadratic in time to split partitions.
This is offset however by using hashing to setup the initial partitions so
that the number of states within a partition are relative few.

The hashing of states for initial partition setup is linear in time.  This
means the closer the initial partition set is to the final set, the closer
the algorithm is to completing in a linear time.  The hashing works as
follows:  For each state we know the number of transitions that are not
the default transition.  For each of of these we hash the set of letters
it can transition on using a simple djb2 hash algorithm.  This creates
a unique hash based on the number of transitions and the input it can
transition on.  If a state does not have the same hash we know it can not
the same as another because it either has a different number of transitions
or or transitions on a different set.

To further distiguish states, the number of transitions of each transitions
target state are added into the hash.  This serves to further distiguish
states as a transition to a state with a different number of transitions
can not possibly be reduced to an equivalent state.

A further distinction of states is made for accepting states in that
we know each state with a unique set of accept permissions must be in
its own partition to ensure the unique accept permissions are in the
final dfa.

The unreachable state removal is a basic walk of the dfa from the start
state marking all states that are reached.  It then sweeps any state not
reached away.  This does not do dead state removal where a non accepting
state gets into a loop that will never result in an accepting state.
This commit is contained in:
John Johansen
2010-01-20 03:32:34 -08:00
parent d4d9dda5cb
commit 91dd7527d9
3 changed files with 340 additions and 7 deletions

View File

@@ -182,6 +182,8 @@ static void display_dump(char *command)
"dfa-stats Dump dfa creation stats\n"
"dfa-states Dump dfa state diagram\n"
"dfa-graph Dump dfa dot (graphviz) graph\n"
"dfa-minimize Dump dfa minimization\n"
"dfa-unreachable Dump dfa unreachable states\n"
"trans-progress Dump progress of transition table\n"
"trans-stats Dump stats on transition table\n"
"trans-table Dump transition table\n"
@@ -205,6 +207,9 @@ static void display_optimize(char *command)
"no-expr-simplify don't do expr tree simplification\n"
"expr-left-simplify do left simplification first\n"
"expr-right-simplify do right simplification first\n"
"no-minimize don't do state minimization\n"
"no-hash-part don't hash partitions at start of minimization\n"
"no-remove-unreachable don't do unreachable state removal\n"
,command);
}
@@ -323,15 +328,20 @@ static int process_args(int argc, char *argv[])
} else if (strcmp(optarg, "expr-stats") == 0) {
dfaflags |= DFA_DUMP_TREE_STATS;
} else if (strcmp(optarg, "dfa-progress") == 0) {
dfaflags |= DFA_DUMP_PROGRESS;
dfaflags |= DFA_DUMP_PROGRESS | DFA_DUMP_STATS;
} else if (strcmp(optarg, "dfa-stats") == 0) {
dfaflags |= DFA_DUMP_STATS;
} else if (strcmp(optarg, "dfa-states") == 0) {
dfaflags |= DFA_DUMP_STATES;
} else if (strcmp(optarg, "dfa-graph") == 0) {
dfaflags |= DFA_DUMP_GRAPH;
} else if (strcmp(optarg, "dfa- minimize") == 0) {
dfaflags |= DFA_DUMP_MINIMIZE;
} else if (strcmp(optarg, "dfa-unreachable") == 0) {
dfaflags |= DFA_DUMP_UNREACHABLE;
} else if (strcmp(optarg, "trans-progress") == 0) {
dfaflags |= DFA_DUMP_TRANS_PROGRESS;
dfaflags |= DFA_DUMP_TRANS_PROGRESS |
DFA_DUMP_TRANS_STATS;
} else if (strcmp(optarg, "trans-stats") == 0) {
dfaflags |= DFA_DUMP_TRANS_STATS;
} else if (strcmp(optarg, "trans-table") == 0) {
@@ -350,7 +360,9 @@ static int process_args(int argc, char *argv[])
skip_cache = 1;
if (strcmp(optarg, "0") == 0) {
dfaflags |= DFA_CONTROL_NO_TREE_NORMAL |
DFA_CONTROL_NO_TREE_SIMPLE;
DFA_CONTROL_NO_TREE_SIMPLE |
DFA_CONTROL_NO_MINIMIZE |
DFA_CONTROL_NO_UNREACHABLE;
} else if (strcmp(optarg, "equiv") == 0) {
dfaflags |= DFA_CONTROL_EQUIV;
} else if (strcmp(optarg, "no-equiv") == 0) {
@@ -367,6 +379,18 @@ static int process_args(int argc, char *argv[])
dfaflags |= DFA_CONTROL_TREE_LEFT;
} else if (strcmp(optarg, "expr-right-simplify") == 0) {
dfaflags &= ~DFA_CONTROL_TREE_LEFT;
} else if (strcmp(optarg, "minimize") == 0) {
dfaflags &= ~DFA_CONTROL_NO_MINIMIZE;
} else if (strcmp(optarg, "no-minimize") == 0) {
dfaflags |= DFA_CONTROL_NO_MINIMIZE;
} else if (strcmp(optarg, "hash-part") == 0) {
dfaflags &= ~DFA_CONTROL_NO_HASH_PART;
} else if (strcmp(optarg, "no-hash-part") == 0) {
dfaflags |= DFA_CONTROL_NO_HASH_PART;
} else if (strcmp(optarg, "remove-unreachable") == 0) {
dfaflags &= ~DFA_CONTROL_NO_UNREACHABLE;
} else if (strcmp(optarg, "no-remove-unreachable") == 0) {
dfaflags |= DFA_CONTROL_NO_UNREACHABLE;
} else {
PERROR("%s: Invalid --Optimize option %s\n",
progname, optarg);