2
0
mirror of https://gitlab.com/apparmor/apparmor synced 2025-08-22 18:17:09 +00:00

Dfa minimization and unreachable state removal

Add basic Hopcroft based dfa minimization.  It currently does a simple
straight state comparison that can be quadratic in time to split partitions.
This is offset however by using hashing to setup the initial partitions so
that the number of states within a partition are relative few.

The hashing of states for initial partition setup is linear in time.  This
means the closer the initial partition set is to the final set, the closer
the algorithm is to completing in a linear time.  The hashing works as
follows:  For each state we know the number of transitions that are not
the default transition.  For each of of these we hash the set of letters
it can transition on using a simple djb2 hash algorithm.  This creates
a unique hash based on the number of transitions and the input it can
transition on.  If a state does not have the same hash we know it can not
the same as another because it either has a different number of transitions
or or transitions on a different set.

To further distiguish states, the number of transitions of each transitions
target state are added into the hash.  This serves to further distiguish
states as a transition to a state with a different number of transitions
can not possibly be reduced to an equivalent state.

A further distinction of states is made for accepting states in that
we know each state with a unique set of accept permissions must be in
its own partition to ensure the unique accept permissions are in the
final dfa.

The unreachable state removal is a basic walk of the dfa from the start
state marking all states that are reached.  It then sweeps any state not
reached away.  This does not do dead state removal where a non accepting
state gets into a loop that will never result in an accepting state.
This commit is contained in:
John Johansen 2010-01-20 03:32:34 -08:00
parent d4d9dda5cb
commit 91dd7527d9
3 changed files with 340 additions and 7 deletions

View File

@ -16,6 +16,9 @@ typedef enum dfaflags {
DFA_CONTROL_NO_TREE_NORMAL = 1 << 1,
DFA_CONTROL_NO_TREE_SIMPLE = 1 << 2,
DFA_CONTROL_TREE_LEFT = 1 << 3,
DFA_CONTROL_NO_MINIMIZE = 1 << 4,
DFA_CONTROL_NO_HASH_PART = 1 << 5,
DFA_CONTROL_NO_UNREACHABLE = 1 << 6,
DFA_DUMP_TREE_STATS = 1 << 8,
DFA_DUMP_TREE = 1 << 9,
@ -29,6 +32,8 @@ typedef enum dfaflags {
DFA_DUMP_TRANS_TABLE = 1 << 17,
DFA_DUMP_EQUIV = 1 << 18,
DFA_DUMP_EQUIV_STATS = 1 << 19,
DFA_DUMP_MINIMIZE = 1 << 20,
DFA_DUMP_UNREACHABLE = 1 << 22,
} dfaflags_t;
#ifdef __cplusplus

View File

@ -1279,6 +1279,11 @@ class DFA {
public:
DFA(Node *root, dfaflags_t flags);
virtual ~DFA();
void remove_unreachable(dfaflags_t flags);
bool same_mappings(map <State *, States *> &partition_map, State *s1,
State *s2);
size_t hash_trans(State *s);
void minimize(dfaflags_t flags);
void dump(ostream& os);
void dump_dot_graph(ostream& os);
map<uchar, uchar> equivalence_classes(dfaflags_t flags);
@ -1364,8 +1369,15 @@ DFA::DFA(Node *root, dfaflags_t flags) : root(root)
here.cases.insert(*j);
}
}
if (flags & (DFA_DUMP_STATS | DFA_DUMP_PROGRESS))
if (flags & (DFA_DUMP_STATS))
fprintf(stderr, "\033[2KCreated dfa: states %ld\tmatching %d\tnonmatching %d\n", states.size(), match_count, nomatch_count);
if (!(flags & DFA_CONTROL_NO_MINIMIZE))
minimize(flags);
if (!(flags & DFA_CONTROL_NO_UNREACHABLE))
remove_unreachable(flags);
}
DFA::~DFA()
@ -1416,6 +1428,297 @@ State *DFA::verify_perms(void)
return NULL;
}
/* Remove dead or unreachable states */
void DFA::remove_unreachable(dfaflags_t flags)
{
set <State *> reachable;
list <State *> work_queue;
/* find the set of reachable states */
reachable.insert(nonmatching);
work_queue.push_back(start);
while (!work_queue.empty()) {
State *from = work_queue.front();
work_queue.pop_front();
reachable.insert(from);
Trans::iterator i = trans.find(from);
if (i == trans.end() && from != nonmatching)
continue;
if (i->second.otherwise &&
(reachable.find(i->second.otherwise) == reachable.end()))
work_queue.push_back(i->second.otherwise);
for (Cases::iterator j = i->second.begin();
j != i->second.end(); j++) {
if (reachable.find(j->second) == reachable.end())
work_queue.push_back(j->second);
}
}
/* walk the set of states and remove any that aren't reachable */
if (reachable.size() < states.size()) {
int count = 0;
States::iterator i;
States::iterator next;
for (i = states.begin(); i != states.end(); i = next) {
next = i;
next++;
if (reachable.find(*i) == reachable.end()) {
states.erase(*i);
Trans::iterator t = trans.find(*i);
if (t != trans.end())
trans.erase(t);
if (flags & DFA_DUMP_UNREACHABLE) {
uint32_t audit, accept = accept_perms(*i, &audit, NULL);
cerr << "unreachable: "<< **i;
if (*i == start)
cerr << " <==";
if (accept) {
cerr << " (0x" << hex << accept
<< " " << audit << dec << ')';
}
cerr << endl;
}
}
delete(*i);
count++;
}
if (count && (flags & DFA_DUMP_STATS))
cerr << "DFA: states " << states.size() << " removed "
<< count << " unreachable states\n";
}
}
/* test if two states have the same transitions under partition_map */
bool DFA::same_mappings(map <State *, States *> &partition_map, State *s1,
State *s2)
{
Trans::iterator i1 = trans.find(s1);
Trans::iterator i2 = trans.find(s2);
if (i1 == trans.end()) {
if (i2 == trans.end()) {
return true;
}
return false;
} else if (i2 == trans.end()) {
return false;
}
if (i1->second.otherwise) {
if (!i2->second.otherwise)
return false;
States *p1 = partition_map.find(i1->second.otherwise)->second;
States *p2 = partition_map.find(i2->second.otherwise)->second;
if (p1 != p2)
return false;
} else if (i2->second.otherwise) {
return false;
}
if (i1->second.cases.size() != i2->second.cases.size())
return false;
for (Cases::iterator j1 = i1->second.begin(); j1 != i1->second.end();
j1++){
Cases::iterator j2 = i2->second.cases.find(j1->first);
if (j2 == i2->second.end())
return false;
States *p1 = partition_map.find(j1->second)->second;
States *p2 = partition_map.find(j2->second)->second;
if (p1 != p2)
return false;
}
return true;
}
/* Do simple djb2 hashing against a States transition cases
* this provides a rough initial guess at state equivalence as if a state
* has a different number of transitions or has transitions on different
* cases they will never be equivalent.
* Note: this only hashes based off of the alphabet (not destination)
* as different destinations could end up being equiv
*/
size_t DFA::hash_trans(State *s)
{
unsigned long hash = 5381;
Trans::iterator i = trans.find(s);
if (i == trans.end())
return 0;
for (Cases::iterator j = i->second.begin(); j != i->second.end(); j++){
hash = ((hash << 5) + hash) + j->first;
Trans::iterator k = trans.find(j->second);
hash = ((hash << 5) + hash) + k->second.cases.size();
}
if (i->second.otherwise && i->second.otherwise != nonmatching) {
hash = ((hash << 5) + hash) + 5381;
Trans::iterator k = trans.find(i->second.otherwise);
hash = ((hash << 5) + hash) + k->second.cases.size();
}
return hash;
}
/* minimize the number of dfa states */
void DFA::minimize(dfaflags_t flags)
{
map <pair <uint64_t, size_t>, States *> perm_map;
list <States *> partitions;
map <State *, States *> partition_map;
/* Set up the initial partitions - 1 non accepting, and a
* partion for each unique combination of permissions
*
* Save off accept value for State so we don't have to recompute
* this should be fixed by updating State to store them but this
* will work for now
*/
int accept_count = 0;
for (States::iterator i = states.begin(); i != states.end(); i++) {
uint32_t accept1, accept2;
accept1 = accept_perms(*i, &accept2, NULL);
uint64_t combined = ((uint64_t)accept2)<<32 | (uint64_t)accept1;
size_t size = 0;
if (!(flags & DFA_CONTROL_NO_HASH_PART))
size = hash_trans(*i);
pair <uint64_t, size_t> group = make_pair(combined, size);
map <pair <uint64_t, size_t>, States *>::iterator p = perm_map.find(group);
if (p == perm_map.end()) {
States *part = new States();
part->insert(*i);
perm_map.insert(make_pair(group, part));
partitions.push_back(part);
partition_map.insert(make_pair(*i, part));
if (combined)
accept_count++;
} else {
partition_map.insert(make_pair(*i, p->second));
p->second->insert(*i);
}
if ((flags & DFA_DUMP_PROGRESS) &&
(partitions.size() % 1000 == 0))
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << partitions.size() << "\t(accept " << accept_count << ")\r";
}
int init_count = partitions.size();
if (flags & DFA_DUMP_PROGRESS)
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << init_count << "\t(accept " << accept_count << ")\r";
/* Now do repartitioning until each partition contains the set of
* states that are the same. This will happen when the partition
* splitting stables. With a worse case of 1 state per partition
* ie. already minimized.
*/
States *new_part;
int new_part_count;
do {
new_part_count = 0;
for (list <States *>::iterator p = partitions.begin();
p != partitions.end(); p++) {
new_part = NULL;
State *rep = *((*p)->begin());
States::iterator next;
for (States::iterator s = ++(*p)->begin();
s != (*p)->end(); s++) {
if (same_mappings(partition_map, rep, *s))
continue;
if (!new_part) {
new_part = new States;
}
new_part->insert(*s);
}
if (new_part) {
for (States::iterator m = new_part->begin();
m != new_part->end(); m++) {
(*p)->erase(*m);
partition_map.erase(*m);
partition_map.insert(make_pair(*m, new_part));
}
partitions.push_back(new_part);
new_part_count++;
}
}
if ((flags & DFA_DUMP_PROGRESS) &&
(partitions.size() % 1000 == 0))
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << init_count << "\t(accept " << accept_count << ")\r";
} while(new_part_count);
if (flags & DFA_DUMP_STATS)
cerr << "\033[2KMinimize dfa: partitions " << partitions.size() << "\tinit " << init_count << "\t(accept " << accept_count << ")\n";
if (partitions.size() == states.size()) {
goto out;
}
/* Remap the dfa so it uses the representative states
* Use the first state of a partition as the representative state
* At this point all states with in a partion have transitions
* to same states within the same partitions
*/
for (list <States *>::iterator p = partitions.begin();
p != partitions.end(); p++) {
/* representative state for this partition */
State *rep = *((*p)->begin());
/* update representative state's transitions */
Trans::iterator i = trans.find(rep);
if (i != trans.end()) {
if (i->second.otherwise) {
map <State *, States *>::iterator z = partition_map.find(i->second.otherwise);
States *partition = partition_map.find(i->second.otherwise)->second;
i->second.otherwise = *partition->begin();
}
for (Cases::iterator c = i->second.begin();
c != i->second.end(); c++) {
States *partition = partition_map.find(c->second)->second;
c->second = *partition->begin();
}
}
}
/* make sure nonmatching and start state are up to date with the
* mappings */
{
States *partition = partition_map.find(nonmatching)->second;
if (*partition->begin() != nonmatching) {
nonmatching = *partition->begin();
}
partition = partition_map.find(start)->second;
if (*partition->begin() != start) {
start = *partition->begin();
}
}
/* Now that the states have been remapped, remove all states
* that are not the representive states for their partition
*/
for (list <States *>::iterator p = partitions.begin();
p != partitions.end(); p++) {
for (States::iterator i = ++(*p)->begin(); i != (*p)->end(); i++) {
Trans::iterator j = trans.find(*i);
if (j != trans.end())
trans.erase(j);
states.erase(*i);
}
}
out:
/* Cleanup */
while (!partitions.empty()) {
States *p = partitions.front();
partitions.pop_front();
delete(p);
}
}
/**
* text-dump the DFA (for debugging).
*/
@ -2085,6 +2388,7 @@ uint32_t accept_perms(State *state, uint32_t *audit_ctl, int *error)
uint32_t perms = 0, exact_match_perms = 0, audit = 0, exact_audit = 0,
quiet = 0, deny = 0;
if (error)
*error = 0;
for (State::iterator i = state->begin(); i != state->end(); i++) {
MatchFlag *match;
@ -2093,7 +2397,7 @@ uint32_t accept_perms(State *state, uint32_t *audit_ctl, int *error)
if (dynamic_cast<ExactMatchFlag *>(match)) {
/* exact match only ever happens with x */
if (!is_merged_x_consistent(exact_match_perms,
match->flag))
match->flag) && error)
*error = 1;;
exact_match_perms |= match->flag;
exact_audit |= match->audit;
@ -2101,7 +2405,7 @@ uint32_t accept_perms(State *state, uint32_t *audit_ctl, int *error)
deny |= match->flag;
quiet |= match->audit;
} else {
if (!is_merged_x_consistent(perms, match->flag))
if (!is_merged_x_consistent(perms, match->flag) && error)
*error = 1;
perms |= match->flag;
audit |= match->audit;

View File

@ -182,6 +182,8 @@ static void display_dump(char *command)
"dfa-stats Dump dfa creation stats\n"
"dfa-states Dump dfa state diagram\n"
"dfa-graph Dump dfa dot (graphviz) graph\n"
"dfa-minimize Dump dfa minimization\n"
"dfa-unreachable Dump dfa unreachable states\n"
"trans-progress Dump progress of transition table\n"
"trans-stats Dump stats on transition table\n"
"trans-table Dump transition table\n"
@ -205,6 +207,9 @@ static void display_optimize(char *command)
"no-expr-simplify don't do expr tree simplification\n"
"expr-left-simplify do left simplification first\n"
"expr-right-simplify do right simplification first\n"
"no-minimize don't do state minimization\n"
"no-hash-part don't hash partitions at start of minimization\n"
"no-remove-unreachable don't do unreachable state removal\n"
,command);
}
@ -323,15 +328,20 @@ static int process_args(int argc, char *argv[])
} else if (strcmp(optarg, "expr-stats") == 0) {
dfaflags |= DFA_DUMP_TREE_STATS;
} else if (strcmp(optarg, "dfa-progress") == 0) {
dfaflags |= DFA_DUMP_PROGRESS;
dfaflags |= DFA_DUMP_PROGRESS | DFA_DUMP_STATS;
} else if (strcmp(optarg, "dfa-stats") == 0) {
dfaflags |= DFA_DUMP_STATS;
} else if (strcmp(optarg, "dfa-states") == 0) {
dfaflags |= DFA_DUMP_STATES;
} else if (strcmp(optarg, "dfa-graph") == 0) {
dfaflags |= DFA_DUMP_GRAPH;
} else if (strcmp(optarg, "dfa- minimize") == 0) {
dfaflags |= DFA_DUMP_MINIMIZE;
} else if (strcmp(optarg, "dfa-unreachable") == 0) {
dfaflags |= DFA_DUMP_UNREACHABLE;
} else if (strcmp(optarg, "trans-progress") == 0) {
dfaflags |= DFA_DUMP_TRANS_PROGRESS;
dfaflags |= DFA_DUMP_TRANS_PROGRESS |
DFA_DUMP_TRANS_STATS;
} else if (strcmp(optarg, "trans-stats") == 0) {
dfaflags |= DFA_DUMP_TRANS_STATS;
} else if (strcmp(optarg, "trans-table") == 0) {
@ -350,7 +360,9 @@ static int process_args(int argc, char *argv[])
skip_cache = 1;
if (strcmp(optarg, "0") == 0) {
dfaflags |= DFA_CONTROL_NO_TREE_NORMAL |
DFA_CONTROL_NO_TREE_SIMPLE;
DFA_CONTROL_NO_TREE_SIMPLE |
DFA_CONTROL_NO_MINIMIZE |
DFA_CONTROL_NO_UNREACHABLE;
} else if (strcmp(optarg, "equiv") == 0) {
dfaflags |= DFA_CONTROL_EQUIV;
} else if (strcmp(optarg, "no-equiv") == 0) {
@ -367,6 +379,18 @@ static int process_args(int argc, char *argv[])
dfaflags |= DFA_CONTROL_TREE_LEFT;
} else if (strcmp(optarg, "expr-right-simplify") == 0) {
dfaflags &= ~DFA_CONTROL_TREE_LEFT;
} else if (strcmp(optarg, "minimize") == 0) {
dfaflags &= ~DFA_CONTROL_NO_MINIMIZE;
} else if (strcmp(optarg, "no-minimize") == 0) {
dfaflags |= DFA_CONTROL_NO_MINIMIZE;
} else if (strcmp(optarg, "hash-part") == 0) {
dfaflags &= ~DFA_CONTROL_NO_HASH_PART;
} else if (strcmp(optarg, "no-hash-part") == 0) {
dfaflags |= DFA_CONTROL_NO_HASH_PART;
} else if (strcmp(optarg, "remove-unreachable") == 0) {
dfaflags &= ~DFA_CONTROL_NO_UNREACHABLE;
} else if (strcmp(optarg, "no-remove-unreachable") == 0) {
dfaflags |= DFA_CONTROL_NO_UNREACHABLE;
} else {
PERROR("%s: Invalid --Optimize option %s\n",
progname, optarg);