From f3ba454d8c0a2d45a803734d69124faed96228dd Mon Sep 17 00:00:00 2001 From: John Johansen Date: Tue, 27 Feb 2007 02:29:16 +0000 Subject: [PATCH] Add dfa support to the parser --- parser/Makefile | 18 +- parser/immunix.h | 121 +- parser/libapparmor_re/Makefile | 25 + parser/libapparmor_re/apparmor_re.h | 30 + parser/libapparmor_re/flex-tables.h | 40 + parser/libapparmor_re/regexp.h | 10 + parser/libapparmor_re/regexp.y | 1641 +++++++++++++++++++++++++++ parser/parser.h | 20 +- parser/parser_interface.c | 149 ++- parser/parser_main.c | 145 +-- parser/parser_misc.c | 36 +- parser/parser_policy.c | 12 +- parser/parser_regex.c | 121 +- parser/parser_variable.c | 2 +- parser/parser_yacc.y | 12 +- 15 files changed, 2123 insertions(+), 259 deletions(-) create mode 100644 parser/libapparmor_re/Makefile create mode 100644 parser/libapparmor_re/apparmor_re.h create mode 100644 parser/libapparmor_re/flex-tables.h create mode 100644 parser/libapparmor_re/regexp.h create mode 100644 parser/libapparmor_re/regexp.y diff --git a/parser/Makefile b/parser/Makefile index fce175f30..290326a9d 100644 --- a/parser/Makefile +++ b/parser/Makefile @@ -77,6 +77,9 @@ OBJECTS = parser_lex.o parser_yacc.o parser_main.o parser_interface.o \ parser_include.o parser_merge.o parser_symtab.o parser_misc.o \ parser_regex.o parser_variable.o parser_policy.o +AAREDIR= libapparmor_re +AAREOBJECTS = ${AAREDIR}/libapparmor_re.a + PCREDIR= pcre PCREOBJECTS = ${PCREDIR}/pcre.o @@ -110,8 +113,11 @@ all: $(LEX_C_FILES) $(YACC_C_FILES) $(TOOLS) $(Q)make -C po all $(Q)make -s tests -apparmor_parser: $(OBJECTS) $(PCREOBJECTS) - $(CC) $(EXTRA_CFLAGS) -o $@ $(OBJECTS) $(PCREOBJECTS) $(LIBS) ${LEXLIB} +apparmor_parser: $(OBJECTS) $(PCREOBJECTS) $(AAREOBJECTS) + rm -f ./libstdc++.a + ln -s `g++ -print-file-name=libstdc++.a` + g++ $(EXTRA_CFLAGS) -o $@ $(OBJECTS) $(PCREOBJECTS) $(LIBS) \ + ${LEXLIB} $(AAREOBJECTS) -static-libgcc -L. parser_yacc.c parser_yacc.h: parser_yacc.y parser.h $(YACC) $(YFLAGS) -o parser_yacc.c parser_yacc.y @@ -174,6 +180,10 @@ tests: ${TESTS} $(Q)make -s -C tst tests # always need to rebuild. +.SILENT: $(AAREOBJECTS) +$(AAREOBJECTS): + make -C $(AAREDIR) + .SILENT: $(PCREOBJECTS) $(PCREOBJECTS): make -C $(PCREDIR) "CFLAGS=$(CFLAGS)" @@ -196,7 +206,7 @@ install-suse: ln -sf /etc/init.d/boot.apparmor $(DESTDIR)/sbin/rcapparmor ln -sf rcapparmor $(DESTDIR)/sbin/rcsubdomain ln -sf /etc/init.d/aaeventd $(DESTDIR)/sbin/rcaaeventd - + .PHONY: install-slackware install-slackware: install -m 755 -d $(APPARMOR_BIN_PREFIX)/install @@ -229,7 +239,9 @@ clean: rm -f $(YACC_C_FILES) rm -f parser_version.h rm -f $(NAME)*.tar.gz $(NAME)*.tgz + rm -rf libstdc++.a make -s -C $(PCREDIR) clean + make -s -C $(AAREDIR) clean make -s -C po clean .SILENT: dist_clean diff --git a/parser/immunix.h b/parser/immunix.h index fae82975b..2628b67c4 100644 --- a/parser/immunix.h +++ b/parser/immunix.h @@ -21,64 +21,70 @@ #define _IMMUNIX_H /* start of system offsets */ -#define POS_KERN_COD_FILE_MIN 0 -#define POS_KERN_COD_MAY_EXEC POS_KERN_COD_FILE_MIN -#define POS_KERN_COD_MAY_WRITE (POS_KERN_COD_MAY_EXEC + 1) -#define POS_KERN_COD_MAY_READ (POS_KERN_COD_MAY_WRITE + 1) +#define POS_AA_FILE_MIN 0 +#define POS_AA_MAY_EXEC POS_AA_FILE_MIN +#define POS_AA_MAY_WRITE (POS_AA_MAY_EXEC + 1) +#define POS_AA_MAY_READ (POS_AA_MAY_WRITE + 1) /* not used by Subdomain */ -#define POS_KERN_COD_MAY_APPEND (POS_KERN_COD_MAY_READ + 1) +#define POS_AA_MAY_APPEND (POS_AA_MAY_READ + 1) /* end of system offsets */ -#define POS_KERN_COD_MAY_LINK (POS_KERN_COD_MAY_APPEND + 1) -#define POS_KERN_COD_EXEC_INHERIT (POS_KERN_COD_MAY_LINK + 1) -#define POS_KERN_COD_EXEC_UNCONSTRAINED (POS_KERN_COD_EXEC_INHERIT + 1) -#define POS_KERN_COD_EXEC_PROFILE (POS_KERN_COD_EXEC_UNCONSTRAINED + 1) -#define POS_KERN_COD_EXEC_MMAP (POS_KERN_COD_EXEC_PROFILE + 1) -#define POS_KERN_COD_EXEC_UNSAFE (POS_KERN_COD_EXEC_MMAP + 1) -#define POS_KERN_COD_FILE_MAX POS_KERN_COD_EXEC_UNSAFE +#define POS_AA_MAY_LINK (POS_AA_MAY_APPEND + 1) +#define POS_AA_EXEC_INHERIT (POS_AA_MAY_LINK + 1) +#define POS_AA_EXEC_UNCONSTRAINED (POS_AA_EXEC_INHERIT + 1) +#define POS_AA_EXEC_PROFILE (POS_AA_EXEC_UNCONSTRAINED + 1) +#define POS_AA_EXEC_MMAP (POS_AA_EXEC_PROFILE + 1) +#define POS_AA_EXEC_UNSAFE (POS_AA_EXEC_MMAP + 1) +#define POS_AA_FILE_MAX POS_AA_EXEC_UNSAFE -#define POS_KERN_COD_NET_MIN (POS_KERN_COD_FILE_MAX + 1) -#define POS_KERN_COD_TCP_CONNECT POS_KERN_COD_NET_MIN -#define POS_KERN_COD_TCP_ACCEPT (POS_KERN_COD_TCP_CONNECT + 1) -#define POS_KERN_COD_TCP_CONNECTED (POS_KERN_COD_TCP_ACCEPT + 1) -#define POS_KERN_COD_TCP_ACCEPTED (POS_KERN_COD_TCP_CONNECTED + 1) -#define POS_KERN_COD_UDP_SEND (POS_KERN_COD_TCP_ACCEPTED + 1) -#define POS_KERN_COD_UDP_RECEIVE (POS_KERN_COD_UDP_SEND + 1) -#define POS_KERN_COD_NET_MAX POS_KERN_COD_UDP_RECEIVE +#define POS_AA_NET_MIN (POS_AA_FILE_MAX + 1) +#define POS_AA_TCP_CONNECT POS_AA_NET_MIN +#define POS_AA_TCP_ACCEPT (POS_AA_TCP_CONNECT + 1) +#define POS_AA_TCP_CONNECTED (POS_AA_TCP_ACCEPT + 1) +#define POS_AA_TCP_ACCEPTED (POS_AA_TCP_CONNECTED + 1) +#define POS_AA_UDP_SEND (POS_AA_TCP_ACCEPTED + 1) +#define POS_AA_UDP_RECEIVE (POS_AA_UDP_SEND + 1) +#define POS_AA_NET_MAX POS_AA_UDP_RECEIVE /* logging only */ -#define POS_KERN_COD_LOGTCP_SEND (POS_KERN_COD_NET_MAX + 1) -#define POS_KERN_COD_LOGTCP_RECEIVE (POS_KERN_COD_LOGTCP_SEND + 1) +#define POS_AA_LOGTCP_SEND (POS_AA_NET_MAX + 1) +#define POS_AA_LOGTCP_RECEIVE (POS_AA_LOGTCP_SEND + 1) /* Absolute MAX/MIN */ -#define POS_KERN_COD_MIN (POS_KERN_COD_FILE_MIN -#define POS_KERN_COD_MAX (POS_KERN_COD_NET_MAX +#define POS_AA_MIN (POS_AA_FILE_MIN +#define POS_AA_MAX (POS_AA_NET_MAX + +/* Invalid perm permission */ +#define POS_AA_INVALID_POS 31 /* Modeled after MAY_READ, MAY_WRITE, MAY_EXEC def'ns */ -#define KERN_COD_MAY_EXEC (0x01 << POS_KERN_COD_MAY_EXEC) -#define KERN_COD_MAY_WRITE (0x01 << POS_KERN_COD_MAY_WRITE) -#define KERN_COD_MAY_READ (0x01 << POS_KERN_COD_MAY_READ) -#define KERN_COD_MAY_LINK (0x01 << POS_KERN_COD_MAY_LINK) -#define KERN_COD_EXEC_INHERIT (0x01 << POS_KERN_COD_EXEC_INHERIT) -#define KERN_COD_EXEC_UNCONSTRAINED (0x01 << POS_KERN_COD_EXEC_UNCONSTRAINED) -#define KERN_COD_EXEC_PROFILE (0x01 << POS_KERN_COD_EXEC_PROFILE) -#define KERN_COD_EXEC_MMAP (0x01 << POS_KERN_COD_EXEC_MMAP) -#define KERN_COD_EXEC_UNSAFE (0x01 << POS_KERN_COD_EXEC_UNSAFE) -#define KERN_EXEC_MODIFIERS(X) (X & (KERN_COD_EXEC_INHERIT | \ - KERN_COD_EXEC_UNCONSTRAINED | \ - KERN_COD_EXEC_PROFILE)) +#define AA_MAY_EXEC (0x01 << POS_AA_MAY_EXEC) +#define AA_MAY_WRITE (0x01 << POS_AA_MAY_WRITE) +#define AA_MAY_READ (0x01 << POS_AA_MAY_READ) +#define AA_MAY_LINK (0x01 << POS_AA_MAY_LINK) +#define AA_EXEC_INHERIT (0x01 << POS_AA_EXEC_INHERIT) +#define AA_EXEC_UNCONSTRAINED (0x01 << POS_AA_EXEC_UNCONSTRAINED) +#define AA_EXEC_PROFILE (0x01 << POS_AA_EXEC_PROFILE) +#define AA_EXEC_MMAP (0x01 << POS_AA_EXEC_MMAP) +#define AA_EXEC_UNSAFE (0x01 << POS_AA_EXEC_UNSAFE) +#define AA_EXEC_MODIFIERS (AA_EXEC_INHERIT | \ + AA_EXEC_UNCONSTRAINED | \ + AA_EXEC_PROFILE) +#define KERN_EXEC_MODIFIERS(X) (X & AA_EXEC_MODIFIERS) + /* Network subdomain extensions. */ -#define KERN_COD_TCP_CONNECT (0x01 << POS_KERN_COD_TCP_CONNECT) -#define KERN_COD_TCP_ACCEPT (0x01 << POS_KERN_COD_TCP_ACCEPT) -#define KERN_COD_TCP_CONNECTED (0x01 << POS_KERN_COD_TCP_CONNECTED) -#define KERN_COD_TCP_ACCEPTED (0x01 << POS_KERN_COD_TCP_ACCEPTED) -#define KERN_COD_UDP_SEND (0x01 << POS_KERN_COD_UDP_SEND) -#define KERN_COD_UDP_RECEIVE (0x01 << POS_KERN_COD_UDP_RECEIVE) +#define AA_TCP_CONNECT (0x01 << POS_AA_TCP_CONNECT) +#define AA_TCP_ACCEPT (0x01 << POS_AA_TCP_ACCEPT) +#define AA_TCP_CONNECTED (0x01 << POS_AA_TCP_CONNECTED) +#define AA_TCP_ACCEPTED (0x01 << POS_AA_TCP_ACCEPTED) +#define AA_UDP_SEND (0x01 << POS_AA_UDP_SEND) +#define AA_UDP_RECEIVE (0x01 << POS_AA_UDP_RECEIVE) -#define KERN_COD_LOGTCP_SEND (0x01 << POS_KERN_COD_LOGTCP_SEND) -#define KERN_COD_LOGTCP_RECEIVE (0x01 << POS_KERN_COD_LOGTCP_RECEIVE) +#define AA_LOGTCP_SEND (0x01 << POS_AA_LOGTCP_SEND) +#define AA_LOGTCP_RECEIVE (0x01 << POS_AA_LOGTCP_RECEIVE) +#define AA_INVALID_PERM (0x01 << POS_AA_INVALID_POS) -#define KERN_COD_HAT_SIZE 975 /* Maximum size of a subdomain +#define AA_HAT_SIZE 975 /* Maximum size of a subdomain * ident (hat) */ enum pattern_t { @@ -88,14 +94,21 @@ enum pattern_t { ePatternInvalid, }; -#define HAS_MAY_READ(mode) ((mode) & KERN_COD_MAY_READ) -#define HAS_MAY_WRITE(mode) ((mode) & KERN_COD_MAY_WRITE) -#define HAS_MAY_LINK(mode) ((mode) & KERN_COD_MAY_LINK) -#define HAS_MAY_EXEC(mode) ((mode) & KERN_COD_MAY_EXEC) -#define HAS_EXEC_INHERIT(mode) ((mode) & KERN_COD_EXEC_INHERIT) -#define HAS_EXEC_PROFILE(mode) ((mode) & KERN_COD_EXEC_PROFILE) -#define HAS_EXEC_UNCONSTRAINED(mode) ((mode) & KERN_COD_EXEC_UNCONSTRAINED) -#define HAS_EXEC_MMAP(mode) ((mode) & KERN_COD_EXEC_MMAP) -#define HAS_EXEC_UNSAFE(mode) ((mode) & KERN_COD_EXEC_UNSAFE) +#define HAS_MAY_READ(mode) ((mode) & AA_MAY_READ) +#define HAS_MAY_WRITE(mode) ((mode) & AA_MAY_WRITE) +#define HAS_MAY_LINK(mode) ((mode) & AA_MAY_LINK) +#define HAS_MAY_EXEC(mode) ((mode) & AA_MAY_EXEC) +#define HAS_EXEC_INHERIT(mode) ((mode) & AA_EXEC_INHERIT) +#define HAS_EXEC_PROFILE(mode) ((mode) & AA_EXEC_PROFILE) +#define HAS_EXEC_UNCONSTRAINED(mode) ((mode) & AA_EXEC_UNCONSTRAINED) +#define HAS_EXEC_MMAP(mode) ((mode) & AA_EXEC_MMAP) +#define HAS_EXEC_UNSAFE(mode) ((mode) & AA_EXEC_UNSAFE) +#define AA_NOXMODS_PERM_MASK (AA_MAY_EXEC | AA_MAY_WRITE | \ + AA_MAY_READ | AA_MAY_LINK | \ + AA_EXEC_MMAP) +#define AA_VALID_PERM_MASK ((1 << (POS_AA_MAX + 1)) - 1) + +#define SINGLE_BIT_SET(X) (!((X) & ((X) - 1))) +#define AA_EXEC_SINGLE_MODIFIER_SET(X) SINGLE_BIT_SET(((X) & AA_EXEC_MODIFIERS)) #endif /* ! _IMMUNIX_H */ diff --git a/parser/libapparmor_re/Makefile b/parser/libapparmor_re/Makefile new file mode 100644 index 000000000..a95956b55 --- /dev/null +++ b/parser/libapparmor_re/Makefile @@ -0,0 +1,25 @@ +# Profiling: +#EXTRA_CFLAGS = -pg + +TARGET=libapparmor_re.a + +CFLAGS = -c -g -Wall -O2 ${EXTRA_CFLAGS} +CXXFLAGS := ${CFLAGS} + +ARFLAGS=-rcs + +BISON := bison + +all : ${TARGET} + +libapparmor_re.a: regexp.o + ar ${ARFLAGS} $@ $^ + +regexp.o : regexp.cc + $(LINK.cc) $^ -o $@ + +regexp.cc : regexp.y flex-tables.h + ${BISON} -o $@ $< + +clean: + rm -f regexp{,.o,.cc,.so,.a} ${TARGET} diff --git a/parser/libapparmor_re/apparmor_re.h b/parser/libapparmor_re/apparmor_re.h new file mode 100644 index 000000000..ec640ef92 --- /dev/null +++ b/parser/libapparmor_re/apparmor_re.h @@ -0,0 +1,30 @@ +/* $Id: apparmor.h 6203 2006-02-02 22:03:41Z steve $ + + Copyright (c) 2003, 2004, 2005, 2006 Novell, Inc. (All rights reserved) + + The libapparmor library is licensed under the terms of the GNU + Lesser General Public License, version 2.1. Please see the file + COPYING.LGPL. +*/ + +#ifndef APPARMOR_RE_H +#define APPARMOR_RE_H + +#ifdef __cplusplus +extern "C" { +#endif + +struct aare_ruleset; + +typedef struct aare_ruleset aare_ruleset_t; + +aare_ruleset_t *aare_new_ruleset(int reverse); +void aare_delete_ruleset(aare_ruleset_t *rules); +int aare_add_rule(aare_ruleset_t *rules, char *rule, uint32_t perms); +void *aare_create_dfa(aare_ruleset_t *rules, int equiv_classes, size_t *size); + +#ifdef __cplusplus +} +#endif + +#endif /* APPARMOR_RE_H */ diff --git a/parser/libapparmor_re/flex-tables.h b/parser/libapparmor_re/flex-tables.h new file mode 100644 index 000000000..2b755b7f6 --- /dev/null +++ b/parser/libapparmor_re/flex-tables.h @@ -0,0 +1,40 @@ +#ifndef __FLEX_TABLES_H +#define __FLEX_TABLES_H + +#include +#include + +#define YYTH_MAGIC 0xF13C57B1 + +struct table_set_header { + uint32_t th_magic; /* TH_MAGIC */ + uint32_t th_hsize; + uint32_t th_ssize; + uint16_t th_flags; + char th_version[]; +/* char th_name[]; + char th_pad64[];*/ +} __attribute__ ((packed)); + +#define YYTD_ID_ACCEPT 1 /* 1 */ +#define YYTD_ID_BASE 2 /* 2 */ +#define YYTD_ID_CHK 3 /* 3 */ +#define YYTD_ID_DEF 4 /* 4 */ +#define YYTD_ID_EC 5 /* 5 */ +#define YYTD_ID_NXT 6 /* 8 */ +#define YYTD_ID_META 7 /* 6 */ + +#define YYTD_DATA8 1 +#define YYTD_DATA16 2 +#define YYTD_DATA32 4 + +struct table_header { + uint16_t td_id; + uint16_t td_flags; + uint32_t td_hilen; + uint32_t td_lolen; + char td_data[]; +/* char td_pad64[];*/ +} __attribute__ ((packed)); + +#endif diff --git a/parser/libapparmor_re/regexp.h b/parser/libapparmor_re/regexp.h new file mode 100644 index 000000000..728efbe92 --- /dev/null +++ b/parser/libapparmor_re/regexp.h @@ -0,0 +1,10 @@ +#ifndef __REGEXP_H +#define __REGEXP_H + +/** + * Flex file format, but without state compression and with negative + * match results in the YYTD_ID_DEF table instead. + */ +#define YYTH_REGEXP_MAGIC 0x1B5E783D + +#endif /* __REGEXP_H */ diff --git a/parser/libapparmor_re/regexp.y b/parser/libapparmor_re/regexp.y new file mode 100644 index 000000000..33ae968a4 --- /dev/null +++ b/parser/libapparmor_re/regexp.y @@ -0,0 +1,1641 @@ +/* + * regexp.y -- Regular Expression Matcher Generator + * (C) 2006 Andreas Gruenbacher + * + * Implementation based on the Lexical Analysis chapter of: + * Alfred V. Aho, Ravi Sethi, Jeffrey D. Ullman: + * Compilers: Principles, Techniques, and Tools (The "Dragon Book"), + * Addison-Wesley, 1986. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * See http://www.gnu.org for more details. + */ + +%{ + #include + #include + #include + #include + #include + + using namespace std; + + typedef unsigned char uchar; + typedef set Chars; + + ostream& operator<<(ostream& os, uchar c); + + /* Compute the union of two sets. */ + template + set operator+(const set& a, const set& b) + { + set c(a); + c.insert(b.begin(), b.end()); + return c; + } + + /** + * A DFA state is a set of important nodes in the syntax tree. This + * includes AcceptNodes, which indicate that when a match ends in a + * particular state, the regular expressions that the AcceptNode + * belongs to match. + */ + class ImportantNode; + typedef set State; + + /** + * Out-edges from a state to another: we store the follow-state + * for each input character that is not a default match in + * cases (i.e., following a CharNode or CharSetNode), and default + * matches in otherwise as well as in all matching explicit cases + * (i.e., following an AnyCharNode or NotCharSetNode). This avoids + * enumerating all the explicit tranitions for default matches. + */ + typedef struct Cases { + typedef map::iterator iterator; + iterator begin() { return cases.begin(); } + iterator end() { return cases.end(); } + + Cases() : otherwise(0) { } + map cases; + State *otherwise; + } Cases; + + /* An abstract node in the syntax tree. */ + class Node { + public: + Node() : + nullable(false), left(0), right(0) { } + Node(Node *left) : + nullable(false), left(left), right(0) { } + Node(Node *left, Node *right) : + nullable(false), left(left), right(right) { } + virtual ~Node() + { + delete left; + delete right; + } + + /** + * See the "Dragon Book" for an explanation of nullable, firstpos, + * lastpos, and followpos. + */ + virtual void compute_nullable() { } + virtual void compute_firstpos() = 0; + virtual void compute_lastpos() = 0; + virtual void compute_followpos() { } + + virtual ostream& dump(ostream& os) = 0; + + bool nullable; + State firstpos, lastpos, followpos; + Node *left, *right; + }; + + /* Match nothing (//). */ + class EpsNode : public Node { + public: + EpsNode() + { + nullable = true; + } + void compute_firstpos() + { + } + void compute_lastpos() + { + } + ostream& dump(ostream& os) + { + return os << "[]"; + } + }; + + /** + * Leaf nodes in the syntax tree are important to us: they describe the + * characters that the regular expression matches. We also consider + * AcceptNodes import: they indicate when a regular expression matches. + */ + class ImportantNode : public Node { + public: + ImportantNode() { } + void compute_firstpos() + { + firstpos.insert(this); + } + void compute_lastpos() { + lastpos.insert(this); + } + virtual void follow(Cases& cases) = 0; + }; + + /* Match one specific character (/c/). */ + class CharNode : public ImportantNode { + public: + CharNode(uchar c) : c(c) { } + void follow(Cases& cases) + { + State **x = &cases.cases[c]; + if (!*x) { + if (cases.otherwise) + *x = new State(*cases.otherwise); + else + *x = new State; + } + (*x)->insert(followpos.begin(), followpos.end()); + } + ostream& dump(ostream& os) + { + return os << c; + } + + uchar c; + }; + + /* Match a set of characters (/[abc]/). */ + class CharSetNode : public ImportantNode { + public: + CharSetNode(Chars& chars) : chars(chars) { } + void follow(Cases& cases) + { + for (Chars::iterator i = chars.begin(); i != chars.end(); i++) { + State **x = &cases.cases[*i]; + if (!*x) { + if (cases.otherwise) + *x = new State(*cases.otherwise); + else + *x = new State; + } + (*x)->insert(followpos.begin(), followpos.end()); + } + } + ostream& dump(ostream& os) + { + os << '['; + for (Chars::iterator i = chars.begin(); i != chars.end(); i++) + os << *i; + return os << ']'; + } + + Chars chars; + }; + + /* Match all except one character (/[^abc]/). */ + class NotCharSetNode : public ImportantNode { + public: + NotCharSetNode(Chars& chars) : chars(chars) { } + void follow(Cases& cases) + { + if (!cases.otherwise) + cases.otherwise = new State; + for (Chars::iterator j = chars.begin(); j != chars.end(); j++) { + State **x = &cases.cases[*j]; + if (!*x) + *x = new State(*cases.otherwise); + } + /** + * Note: Add to the nonmatching characters after copying away the + * old otherwise state for the matching characters. + */ + cases.otherwise->insert(followpos.begin(), followpos.end()); + for (Cases::iterator i = cases.begin(); i != cases.end(); i++) { + if (chars.find(i->first) == chars.end()) + i->second->insert(followpos.begin(), followpos.end()); + } + } + ostream& dump(ostream& os) + { + os << "[^"; + for (Chars::iterator i = chars.begin(); i != chars.end(); i++) + os << *i; + return os << ']'; + } + + Chars chars; + }; + + /* Match any character (/./). */ + class AnyCharNode : public ImportantNode { + public: + AnyCharNode() { } + void follow(Cases& cases) + { + if (!cases.otherwise) + cases.otherwise = new State; + cases.otherwise->insert(followpos.begin(), followpos.end()); + for (Cases::iterator i = cases.begin(); i != cases.end(); i++) + i->second->insert(followpos.begin(), followpos.end()); + } + ostream& dump(ostream& os) { + return os << "."; + } + }; + + /** + * Indicate that a regular expression matches. An AcceptNode itself + * doesn't match anything, so it will never generate any transitions. + */ + class AcceptNode : public ImportantNode { + public: + AcceptNode(uint32_t perms, int is_rerule) + : perms(perms), is_rerule(is_rerule) {} + void follow(Cases& cases) + { + /* Nothing to follow. */ + } + ostream& dump(ostream& os) { + return os << '<' << perms << ", " << is_rerule << '>'; + } + + uint32_t perms; + int is_rerule; + }; + + /* Match a pair of consecutive nodes. */ + class CatNode : public Node { + public: + CatNode(Node *left, Node *right) : + Node(left, right) { } + void compute_nullable() + { + nullable = left->nullable && right->nullable; + } + void compute_firstpos() + { + if (left->nullable) + firstpos = left->firstpos + right->firstpos; + else + firstpos = left->firstpos; + } + void compute_lastpos() + { + if (right->nullable) + lastpos = left->lastpos + right->lastpos; + else + lastpos = right->lastpos; + } + void compute_followpos() + { + State from = left->lastpos, to = right->firstpos; + for(State::iterator i = from.begin(); i != from.end(); i++) { + (*i)->followpos.insert(to.begin(), to.end()); + } + } + ostream& dump(ostream& os) + { + return os; + //return os << ' '; + } + }; + + /* Match a node zero or more times. (This is a unary operator.) */ + class StarNode : public Node { + public: + StarNode(Node *left) : + Node(left) + { + nullable = true; + } + void compute_firstpos() + { + firstpos = left->firstpos; + } + void compute_lastpos() + { + lastpos = left->lastpos; + } + void compute_followpos() + { + State from = left->lastpos, to = left->firstpos; + for(State::iterator i = from.begin(); i != from.end(); i++) { + (*i)->followpos.insert(to.begin(), to.end()); + } + } + ostream& dump(ostream& os) + { + return os << '*'; + } + }; + + /* Match a node one or more times. (This is a unary operator.) */ + class PlusNode : public Node { + public: + PlusNode(Node *left) : + Node(left) { } + void compute_nullable() + { + nullable = left->nullable; + } + void compute_firstpos() + { + firstpos = left->firstpos; + } + void compute_lastpos() + { + lastpos = left->lastpos; + } + void compute_followpos() + { + State from = left->lastpos, to = left->firstpos; + for(State::iterator i = from.begin(); i != from.end(); i++) { + (*i)->followpos.insert(to.begin(), to.end()); + } + } + ostream& dump(ostream& os) + { + return os << '+'; + } + }; + + /* Match one of two alternative nodes. */ + class AltNode : public Node { + public: + AltNode(Node *left, Node *right) : + Node(left, right) { } + void compute_nullable() + { + nullable = left->nullable || right->nullable; + } + void compute_lastpos() + { + lastpos = left->lastpos + right->lastpos; + } + void compute_firstpos() + { + firstpos = left->firstpos + right->firstpos; + } + ostream& dump(ostream& os) + { + return os << '|'; + } + }; +%} + +%union { + char c; + Node *node; + Chars *cset; +} + +%{ + void regexp_error(Node **, const char *, int *, const char *); +# define YYLEX_PARAM &text + int regexp_lex(YYSTYPE *, const char **); + + static inline Chars* + insert_char(Chars* cset, uchar a) + { + cset->insert(a); + return cset; + } + + static inline Chars* + insert_char_range(Chars* cset, uchar a, uchar b) + { + if (a > b) + swap(a, b); + for (uchar i = a; i <= b; i++) + cset->insert(i); + return cset; + } +%} + +%pure-parser +/* %error-verbose */ +%parse-param {Node **root} +%parse-param {const char *text} +%parse-param {int *is_rerule} +%name-prefix = "regexp_" + +%token CHAR +%type regex_char cset_char1 cset_char cset_charN +%type charset cset_chars +%type regexp expr terms0 terms qterm term + +/** + * Note: destroy all nodes upon failure, but *not* the start symbol once + * parsing succeeds! + */ +%destructor { delete $$; } expr terms0 terms qterm term + +%% + +/* FIXME: Does not parse "[--]", "[---]", "[^^-x]". I don't actually know + which precise grammer Perl regexps use, and rediscovering that + is proving to be painful. */ + +regexp : /* empty */ { *root = $$ = new EpsNode; } + | expr { *root = $$ = $1; } + ; + +expr : terms + | expr '|' terms0 { $$ = new AltNode($1, $3); } + | '|' terms0 { $$ = new AltNode(new EpsNode, $2); } + ; + +terms0 : /* empty */ { $$ = new EpsNode; } + | terms + ; + +terms : qterm + | terms qterm { $$ = new CatNode($1, $2); } + ; + +qterm : term + | term '*' { $$ = new StarNode($1); *is_rerule = 1; } + | term '+' { $$ = new PlusNode($1); *is_rerule = 1; } + ; + +term : '.' { $$ = new AnyCharNode; *is_rerule = 1; } + | regex_char { $$ = new CharNode($1); } + | '[' charset ']' { $$ = new CharSetNode(*$2); + delete $2; *is_rerule = 1; } + | '[' '^' charset ']' + { $$ = new NotCharSetNode(*$3); + delete $3; *is_rerule = 1; } + | '[' '^' '^' cset_chars ']' + { $4->insert('^'); + $$ = new NotCharSetNode(*$4); + delete $4; *is_rerule = 1; } + | '(' regexp ')' { $$ = $2; } + ; + +regex_char : CHAR + | '^' { $$ = '^'; } + | '-' { $$ = '-'; } + | ']' { $$ = ']'; } + ; + +charset : cset_char1 cset_chars + { $$ = insert_char($2, $1); } + | cset_char1 '-' cset_charN cset_chars + { $$ = insert_char_range($4, $1, $3); } + ; + +cset_chars : /* nothing */ { $$ = new Chars; } + | cset_chars cset_charN + { $$ = insert_char($1, $2); } + | cset_chars cset_charN '-' cset_charN + { $$ = insert_char_range($1, $2, $4); } + ; + +cset_char1 : cset_char + | ']' { $$ = ']'; } + | '-' { $$ = '-'; } + ; + +cset_charN : cset_char + | '^' { $$ = '^'; } + ; + +cset_char : CHAR + | '[' { $$ = '['; } + | '*' { $$ = '*'; } + | '+' { $$ = '+'; } + | '.' { $$ = '.'; } + | '|' { $$ = '|'; } + | '(' { $$ = '('; } + | ')' { $$ = ')'; } + ; + +%% + +#include +#include +#include +#include + +#include +#include + +#include "../immunix.h" + +#define NOT_RE_RULE 0 + +/* Traverse the syntax tree depth-first in an iterator-like manner. */ +class depth_first_traversal { + vector stack; + vector visited; +public: + depth_first_traversal(Node *node) { + stack.push_back(node); + while (node->left) { + visited.push_back(false); + stack.push_back(node->left); + node = node->left; + } + } + Node *operator*() + { + return stack.back(); + } + Node* operator->() + { + return stack.back(); + } + operator bool() + { + return !stack.empty(); + } + void operator++(int) + { + stack.pop_back(); + if (!stack.empty()) { + if (!visited.back() && stack.back()->right) { + visited.pop_back(); + visited.push_back(true); + stack.push_back(stack.back()->right); + while (stack.back()->left) { + visited.push_back(false); + stack.push_back(stack.back()->left); + } + } else + visited.pop_back(); + } + } +}; + +ostream& operator<<(ostream& os, Node& node) +{ + node.dump(os); + return os; +} + +ostream& operator<<(ostream& os, uchar c) +{ + const char *search = "\a\033\f\n\r\t|*+[](). ", + *replace = "aefnrt|*+[](). ", *s; + + if ((s = strchr(search, c)) && *s != '\0') + os << '\\' << replace[s - search]; + else if (c < 32 || c >= 127) + os << '\\' << '0' << char('0' + (c >> 6)) + << char('0' + ((c >> 3) & 7)) << char('0' + (c & 7)); + else + os << (char)c; + return os; +} + +int +octdigit(char c) +{ + if (c >= '0' && c <= '7') + return c - '0'; + return -1; +} + +int +hexdigit(char c) +{ + if (c >= '0' && c <= '9') + return c - '0'; + else if (c >= 'A' && c <= 'F') + return 10 + c - 'A'; + else if (c >= 'a' && c <= 'f') + return 10 + c - 'A'; + else + return -1; +} + +int +regexp_lex(YYSTYPE *val, const char **pos) +{ + int c; + + val->c = **pos; + switch(*(*pos)++) { + case '\0': + (*pos)--; + return 0; + + case '*': case '+': case '.': case '|': case '^': case '-': + case '[': case ']': case '(' : case ')': + return *(*pos - 1); + + case '\\': + val->c = **pos; + switch(*(*pos)++) { + case '\0': + (*pos)--; + val->c = '\\'; + break; + + case '0': + val->c = 0; + if ((c = octdigit(**pos)) >= 0) { + val->c = c; + (*pos)++; + } + if ((c = octdigit(**pos)) >= 0) { + val->c = (val->c << 3) + c; + (*pos)++; + } + if ((c = octdigit(**pos)) >= 0) { + val->c = (val->c << 3) + c; + (*pos)++; + } + break; + + case 'x': + val->c = 0; + if ((c = hexdigit(**pos)) >= 0) { + val->c = c; + (*pos)++; + } + if ((c = hexdigit(**pos)) >= 0) { + val->c = (val->c << 4) + c; + (*pos)++; + } + break; + + case 'a': + val->c = '\a'; + break; + + case 'e': + val->c = 033 /* ESC */; + break; + + case 'f': + val->c = '\f'; + break; + + case 'n': + val->c = '\n'; + break; + + case 'r': + val->c = '\r'; + break; + + case 't': + val->c = '\t'; + break; + } + } + return CHAR; +} + +void +regexp_error(Node **, const char *text, int *is_rerule, const char *error) +{ + /* We don't want the library to print error messages. */ +} + +/** + * Assign a consecutive number to each node. This is only needed for + * pretty-printing the debug output. + */ +map node_label; +void label_nodes(Node *root) +{ + int nodes = 0; + for (depth_first_traversal i(root); i; i++) + node_label.insert(make_pair(*i, nodes++)); +} + +/** + * Text-dump a state (for debugging). + */ +ostream& operator<<(ostream& os, const State& state) +{ + os << '{'; + if (!state.empty()) { + State::iterator i = state.begin(); + for(;;) { + os << node_label[*i]; + if (++i == state.end()) + break; + os << ','; + } + } + os << '}'; + return os; +} + +/** + * Text-dump the syntax tree (for debugging). + */ +void dump_syntax_tree(ostream& os, Node *node) { + for (depth_first_traversal i(node); i; i++) { + os << node_label[*i] << '\t'; + if ((*i)->left == 0) + os << **i << '\t' << (*i)->followpos << endl; + else { + if ((*i)->right == 0) + os << node_label[(*i)->left] << **i; + else + os << node_label[(*i)->left] << **i + << node_label[(*i)->right]; + os << '\t' << (*i)->firstpos + << (*i)->lastpos << endl; + } + } + os << endl; +} + +/* Comparison operator for sets of . */ +template +class deref_less_than { +public: + deref_less_than() { } + bool operator()(T a, T b) + { + return *a < *b; + } +}; + +/** + * States in the DFA. The pointer comparison allows us to tell sets we + * have seen already from new ones when constructing the DFA. + */ +typedef set > States; +/* Transitions in the DFA. */ +typedef map Trans; + +class DFA { +public: + DFA(Node *root); + virtual ~DFA(); + void dump(ostream& os); + void dump_dot_graph(ostream& os); + map equivalence_classes(); + void apply_equivalence_classes(map& eq); + State *verify_perms(void); + Node *root; + State *nonmatching, *start; + States states; + Trans trans; +}; + +/** + * Construct a DFA from a syntax tree. + */ +DFA::DFA(Node *root) : root(root) +{ + for (depth_first_traversal i(root); i; i++) { + (*i)->compute_nullable(); + (*i)->compute_firstpos(); + (*i)->compute_lastpos(); + } + for (depth_first_traversal i(root); i; i++) { + (*i)->compute_followpos(); + } + + nonmatching = new State; + states.insert(nonmatching); + + start = new State(root->firstpos); + states.insert(start); + + list work_queue; + work_queue.push_back(start); + while (!work_queue.empty()) { + State *from = work_queue.front(); + work_queue.pop_front(); + Cases cases; + for (State::iterator i = from->begin(); i != from->end(); i++) + (*i)->follow(cases); + if (cases.otherwise) { + pair x = states.insert(cases.otherwise); + if (x.second) + work_queue.push_back(cases.otherwise); + else { + delete cases.otherwise; + cases.otherwise = *x.first; + } + } + for (Cases::iterator j = cases.begin(); j != cases.end(); j++) { + pair x = states.insert(j->second); + if (x.second) + work_queue.push_back(*x.first); + else { + delete j->second; + j->second = *x.first; + } + } + Cases& here = trans.insert(make_pair(from, Cases())).first->second; + here.otherwise = cases.otherwise; + for (Cases::iterator j = cases.begin(); j != cases.end(); j++) { + /** + * Do not insert transitions that the default transition already + * covers. + */ + if (j->second != cases.otherwise) + here.cases.insert(*j); + } + } + /** + * Set the result of the nonmatching state to an invalid perm, but only + * after contructing the DFA: otherwise, the empty states that + * NotCharSetNode::follow() create would not compare equal with the + * nonmatching state -- but we rely on merging states that compare + * equal. + */ + nonmatching->insert(new AcceptNode(AA_INVALID_PERM, NOT_RE_RULE)); +} + +DFA::~DFA() +{ + for (States::iterator i = states.begin(); i != states.end(); i++) + delete *i; +} + +/** + * Result when this state matches. + */ +uint32_t accept_perms(State *state) +{ + uint32_t perms = 0; + int is_exactXmatch = 0; + + for (State::iterator i = state->begin(); i != state->end(); i++) { + if (AcceptNode *accept = dynamic_cast(*i)) { + if (is_exactXmatch) { + /* exact match X perms override an re match X perm. Only + * accumulate regular permissions + */ + if (accept->is_rerule) + perms |= AA_NOXMODS_PERM_MASK & accept->perms; + else + /* N exact matches must have same X perm so accumulate + * to catch any error */ + perms |= accept->perms; + } else { + if (accept->is_rerule || + !(AA_EXEC_MODIFIERS & accept->perms)) { + perms |= accept->perms; + } else { + /* exact match with an exec modifier override accumulated + * X permissions */ + is_exactXmatch = 1; + perms = (AA_NOXMODS_PERM_MASK & perms) | accept->perms; + } + } + } + } + return perms; +} + +/** + * verify that there are no conflicting X permissions on the dfa + * return NULL - perms verified okay + * State of 1st encountered with bad X perms + */ +State *DFA::verify_perms(void) +{ + for (States::iterator i = states.begin(); i != states.end(); i++) { + uint32_t accept = accept_perms(*i); + if (*i == start || accept) { + if ((accept & AA_EXEC_MODIFIERS) && + !AA_EXEC_SINGLE_MODIFIER_SET(accept)) + return *i; + } + } + return NULL; +} + +/** + * text-dump the DFA (for debugging). + */ +void DFA::dump(ostream& os) +{ + for (States::iterator i = states.begin(); i != states.end(); i++) { + uint32_t accept = accept_perms(*i); + if (*i == start || accept) { + os << **i; + if (*i == start) + os << " <=="; + if (accept) { + os << " (" << accept << ')'; + } + os << endl; + } + } + os << endl; + + for (Trans::iterator i = trans.begin(); i != trans.end(); i++) { + if (i->second.otherwise) + os << *(i->first) << " -> " << *i->second.otherwise << endl; + for (Cases::iterator j = i->second.begin(); j != i->second.end(); j++) { + os << *(i->first) << " -> " << *(j->second) << ": " + << j->first << endl; + } + } + os << endl; +} + +/** + * Create a dot (graphviz) graph from the DFA (for debugging). + */ +void DFA::dump_dot_graph(ostream& os) +{ + os << "digraph \"dfa\" {" << endl; + + for (States::iterator i = states.begin(); i != states.end(); i++) { + if (*i == nonmatching) + continue; + + os << "\t\"" << **i << "\" [" << endl; + if (*i == start) { + os << "\t\tstyle=bold" << endl; + } + uint32_t perms = accept_perms(*i); + if (perms) { + os << "\t\tlabel=\"" << **i << "\\n(" + << perms << ")\"" << endl; + } + os << "\t]" << endl; + } + for (Trans::iterator i = trans.begin(); i != trans.end(); i++) { + Cases& cases = i->second; + Chars excluded; + + for (Cases::iterator j = cases.begin(); j != cases.end(); j++) { + if (j->second == nonmatching) + excluded.insert(j->first); + else { + os << "\t\"" << *i->first << "\" -> \""; + os << *j->second << "\" [" << endl; + os << "\t\tlabel=\"" << (char)j->first << "\"" << endl; + os << "\t]" << endl; + } + } + if (i->second.otherwise && i->second.otherwise != nonmatching) { + os << "\t\"" << *i->first << "\" -> \"" << *i->second.otherwise + << "\" [" << endl; + if (!excluded.empty()) { + os << "\t\tlabel=\"[^"; + for (Chars::iterator i = excluded.begin(); + i != excluded.end(); + i++) { + os << *i; + } + os << "]\"" << endl; + } + os << "\t]" << endl; + } + } + os << '}' << endl; +} + +/** + * Compute character equivalence classes in the DFA to save space in the + * transition table. + */ +map DFA::equivalence_classes() +{ + map classes; + uchar next_class = 1; + + for (Trans::iterator i = trans.begin(); i != trans.end(); i++) { + Cases& cases = i->second; + + /* Group edges to the same next state together */ + map node_sets; + for (Cases::iterator j = cases.begin(); j != cases.end(); j++) + node_sets[j->second].insert(j->first); + + for (map::iterator j = node_sets.begin(); + j != node_sets.end(); + j++) { + /* Group edges to the same next state together by class */ + map node_classes; + bool class_used = false; + for (Chars::iterator k = j->second.begin(); + k != j->second.end(); + k++) { + pair::iterator, bool> x = + classes.insert(make_pair(*k, next_class)); + if (x.second) + class_used = true; + pair::iterator, bool> y = + node_classes.insert(make_pair(x.first->second, Chars())); + y.first->second.insert(*k); + } + if (class_used) { + next_class++; + class_used = false; + } + for (map::iterator k = node_classes.begin(); + k != node_classes.end(); + k++) { + /** + * If any other characters are in the same class, move + * the characters in this class into their own new class + */ + map::iterator l; + for (l = classes.begin(); l != classes.end(); l++) { + if (l->second == k->first && + k->second.find(l->first) == k->second.end()) { + class_used = true; + break; + } + } + if (class_used) { + for (Chars::iterator l = k->second.begin(); + l != k->second.end(); + l++) { + classes[*l] = next_class; + } + next_class++; + class_used = false; + } + } + } + } + return classes; +} + +/** + * Text-dump the equivalence classes (for debugging). + */ +void dump_equivalence_classes(ostream& os, map& eq) +{ + map rev; + + for (map::iterator i = eq.begin(); i != eq.end(); i++) { + Chars& chars = rev.insert(make_pair(i->second, + Chars())).first->second; + chars.insert(i->first); + } + os << "(eq):" << endl; + for (map::iterator i = rev.begin(); i != rev.end(); i++) { + os << (int)i->first << ':'; + Chars& chars = i->second; + for (Chars::iterator j = chars.begin(); j != chars.end(); j++) { + os << ' ' << *j; + } + os << endl; + } +} + +/** + * Replace characters with classes (which are also represented as + * characters) in the DFA transition table. + */ +void DFA::apply_equivalence_classes(map& eq) +{ + /** + * Note: We only transform the transition table; the nodes continue to + * contain the original characters. + */ + for (Trans::iterator i = trans.begin(); i != trans.end(); i++) { + map tmp; + tmp.swap(i->second.cases); + for (Cases::iterator j = tmp.begin(); j != tmp.end(); j++) + i->second.cases.insert(make_pair(eq[j->first], j->second)); + } +} + +/** + * Flip the children of all cat nodes. This causes strings to be matched + * back-forth. + */ +void flip_tree(Node *node) +{ + for (depth_first_traversal i(node); i; i++) { + if (CatNode *cat = dynamic_cast(*i)) { + swap(cat->left, cat->right); + } + } +} + +class TransitionTable { + typedef vector > DefaultBase; + typedef vector > NextCheck; +public: + TransitionTable(DFA& dfa, map& eq); + void dump(ostream& os); + void flex_table(ostream& os, const char *name); + bool fits_in(size_t base, Cases& cases); + void insert_state(State *state, DFA& dfa); + +private: + vector accept; + DefaultBase default_base; + NextCheck next_check; + map num; + map& eq; + uchar max_eq; + uint32_t min_base; +}; + +/** + * Construct the transition table. + */ +TransitionTable::TransitionTable(DFA& dfa, map& eq) + : eq(eq), min_base(0) +{ + /* Insert the dummy nonmatching transition by hand */ + next_check.push_back(make_pair(dfa.nonmatching, dfa.nonmatching)); + + if (eq.empty()) + max_eq = 255; + else { + max_eq = 0; + for(map::iterator i = eq.begin(); i != eq.end(); i++) { + if (i->second > max_eq) + max_eq = i->second; + } + } + + /** + * Insert all the DFA states into the transition table. The nonmatching + * and start states come first, followed by all other states. + */ + insert_state(dfa.nonmatching, dfa); + insert_state(dfa.start, dfa); + for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) { + if (*i != dfa.nonmatching && *i != dfa.start) + insert_state(*i, dfa); + } + + num.insert(make_pair(dfa.nonmatching, num.size())); + num.insert(make_pair(dfa.start, num.size())); + for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) { + if (*i != dfa.nonmatching && *i != dfa.start) + num.insert(make_pair(*i, num.size())); + } + + accept.resize(dfa.states.size()); + for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) + /* mask off AA_INVALID_PERM, it is not needed by match engine */ + accept[num[*i]] = accept_perms(*i) & ~AA_INVALID_PERM; +} + +/** + * Does fit into position of the transition table? + */ +bool TransitionTable::fits_in(size_t base, Cases& cases) +{ + for (Cases::iterator i = cases.begin(); i != cases.end(); i++) { + size_t c = base + i->first; + if (c >= next_check.size()) + continue; + if (next_check[c].second) + return false; + } + return true; +} + +/** + * Insert of into the transition table. + */ +void TransitionTable::insert_state(State *from, DFA& dfa) +{ + State *default_state = dfa.nonmatching; + size_t base = 0; + + Trans::iterator i = dfa.trans.find(from); + if (i != dfa.trans.end()) { + Cases& cases = i->second; + if (cases.otherwise) + default_state = cases.otherwise; + if (cases.cases.empty()) + goto insert_state; + + size_t c = cases.begin()->first; + if (c < min_base) + base = min_base - c; + /* Try inserting until we succeed. */ + while (!fits_in(base, cases)) + base++; + + if (next_check.size() <= base + max_eq) + next_check.resize(base + max_eq + 1); + for (Cases::iterator j = cases.begin(); j != cases.end(); j++) + next_check[base + j->first] = make_pair(j->second, from); + + while (min_base < next_check.size()) { + if (!next_check[min_base].second) + break; + min_base++; + } + } +insert_state: + default_base.push_back(make_pair(default_state, base)); +} + +/** + * Text-dump the transition table (for debugging). + */ +void TransitionTable::dump(ostream& os) +{ + map st; + for (map::iterator i = num.begin(); + i != num.end(); + i++) { + st.insert(make_pair(i->second, i->first)); + } + + os << "(accept, default, base):" << endl; + for (size_t i = 0; i < default_base.size(); i++) { + os << "(" << accept[i] << ", " + << num[default_base[i].first] << ", " + << default_base[i].second << ")"; + if (st[i]) + os << " " << *st[i]; + if (default_base[i].first) + os << " -> " << *default_base[i].first; + os << endl; + } + + os << "(next, check):" << endl; + for (size_t i = 0; i < next_check.size(); i++) { + if (!next_check[i].second) + continue; + + os << i << ": "; + if (next_check[i].second) { + os << "(" << num[next_check[i].first] << ", " + << num[next_check[i].second] << ")" << " " + << *next_check[i].second << " -> " + << *next_check[i].first << ": "; + + size_t offs = i - default_base[num[next_check[i].second]].second; + if (eq.size()) + os << offs; + else + os << (uchar)offs; + } + os << endl; + } +} + +#if 0 +template +class FirstIterator { +public: + FirstIterator(Iter pos) : pos(pos) { } + typename Iter::value_type::first_type operator*() { return pos->first; } + bool operator!=(FirstIterator& i) { return pos != i.pos; } + void operator++() { ++pos; } + ssize_t operator-(FirstIterator i) { return pos - i.pos; } +private: + Iter pos; +}; + +template +FirstIterator first_iterator(Iter iter) +{ + return FirstIterator(iter); +} + +template +class SecondIterator { +public: + SecondIterator(Iter pos) : pos(pos) { } + typename Iter::value_type::second_type operator*() { return pos->second; } + bool operator!=(SecondIterator& i) { return pos != i.pos; } + void operator++() { ++pos; } + ssize_t operator-(SecondIterator i) { return pos - i.pos; } +private: + Iter pos; +}; + +template +SecondIterator second_iterator(Iter iter) +{ + return SecondIterator(iter); +} +#endif + +/** + * Create a flex-style binary dump of the DFA tables. The table format + * was partly reverse engineered from the flex sources and from + * examining the tables that flex creates with its --tables-file option. + * (Only the -Cf and -Ce formats are currently supported.) + */ + +#include "flex-tables.h" +#include "regexp.h" + +static inline size_t pad64(size_t i) +{ + return (i + (size_t)7) & ~(size_t)7; +} + +string fill64(size_t i) +{ + const char zeroes[8] = { }; + string fill(zeroes, (i & 7) ? 8 - (i & 7) : 0); + return fill; +} + +template +size_t flex_table_size(Iter pos, Iter end) +{ + return pad64(sizeof(struct table_header) + sizeof(*pos) * (end - pos)); +} + +template +void write_flex_table(ostream& os, int id, Iter pos, Iter end) +{ + struct table_header td = { }; + size_t size = end - pos; + + td.td_id = htons(id); + td.td_flags = htons(sizeof(*pos)); + td.td_lolen = htonl(size); + os.write((char *)&td, sizeof(td)); + + for (; pos != end; ++pos) { + switch(sizeof(*pos)) { + case 4: + os.put((char)(*pos >> 24)); + os.put((char)(*pos >> 16)); + case 2: + os.put((char)(*pos >> 8)); + case 1: + os.put((char)*pos); + } + } + + os << fill64(sizeof(td) + sizeof(*pos) * size); +} + +void TransitionTable::flex_table(ostream& os, const char *name) +{ + const char th_version[] = "notflex"; + struct table_set_header th = { }; + + /** + * Change the following two data types to adjust the maximum flex + * table size. + */ + typedef uint16_t state_t; + typedef uint32_t trans_t; + + if (default_base.size() >= (state_t)-1) { + cerr << "Too many states (" << default_base.size() << ") for " + "type state_t" << endl; + exit(1); + } + if (next_check.size() >= (trans_t)-1) { + cerr << "Too many transitions (" << next_check.size() << ") for " + "type trans_t" << endl; + exit(1); + } + + /** + * Create copies of the data structures so that we can dump the tables + * using the generic write_flex_table() routine. + */ + vector equiv_vec; + if (eq.size()) { + equiv_vec.resize(256); + for (map::iterator i = eq.begin(); i != eq.end(); i++) { + equiv_vec[i->first] = i->second; + } + } + + vector default_vec; + vector base_vec; + for (DefaultBase::iterator i = default_base.begin(); + i != default_base.end(); + i++) { + default_vec.push_back(num[i->first]); + base_vec.push_back(i->second); + } + + vector next_vec; + vector check_vec; + for (NextCheck::iterator i = next_check.begin(); + i != next_check.end(); + i++) { + next_vec.push_back(num[i->first]); + check_vec.push_back(num[i->second]); + } + + /* Write the actual flex parser table. */ + + size_t hsize = pad64(sizeof(th) + sizeof(th_version) + strlen(name) + 1); + th.th_magic = htonl(YYTH_REGEXP_MAGIC); + th.th_hsize = htonl(hsize); + th.th_ssize = htonl(hsize + + flex_table_size(accept.begin(), accept.end()) + + (eq.size() ? + flex_table_size(equiv_vec.begin(), equiv_vec.end()) : 0) + + flex_table_size(base_vec.begin(), base_vec.end()) + + flex_table_size(default_vec.begin(), default_vec.end()) + + flex_table_size(next_vec.begin(), next_vec.end()) + + flex_table_size(check_vec.begin(), check_vec.end())); + os.write((char *)&th, sizeof(th)); + os << th_version << (char)0 << name << (char)0; + os << fill64(sizeof(th) + sizeof(th_version) + strlen(name) + 1); + + + write_flex_table(os, YYTD_ID_ACCEPT, accept.begin(), accept.end()); + if (eq.size()) + write_flex_table(os, YYTD_ID_EC, equiv_vec.begin(), equiv_vec.end()); + write_flex_table(os, YYTD_ID_BASE, base_vec.begin(), base_vec.end()); + write_flex_table(os, YYTD_ID_DEF, default_vec.begin(), default_vec.end()); + write_flex_table(os, YYTD_ID_NXT, next_vec.begin(), next_vec.end()); + write_flex_table(os, YYTD_ID_CHK, check_vec.begin(), check_vec.end()); +} + +typedef set AcceptNodes; +map dominance(DFA& dfa) +{ + map is_dominated; + + for (States::iterator i = dfa.states.begin(); i != dfa.states.end(); i++) { + AcceptNodes set1; + for (State::iterator j = (*i)->begin(); j != (*i)->end(); j++) { + if (AcceptNode *accept = dynamic_cast(*j)) + set1.insert(accept); + } + for (AcceptNodes::iterator j = set1.begin(); j != set1.end(); j++) { + pair::iterator, bool> x = + is_dominated.insert(make_pair(*j, set1)); + if (!x.second) { + AcceptNodes &set2(x.first->second), set3; + for (AcceptNodes::iterator l = set2.begin(); + l != set2.end(); + l++) { + if (set1.find(*l) != set1.end()) + set3.insert(*l); + } + set3.swap(set2); + } + } + } + return is_dominated; +} + +void dump_regexp_rec(ostream& os, Node *tree) +{ + if (tree->left) + dump_regexp_rec(os, tree->left); + os << *tree; + if (tree->right) + dump_regexp_rec(os, tree->right); +} + +void dump_regexp(ostream& os, Node *tree) +{ + dump_regexp_rec(os, tree); + os << endl; +} + +/** + * "Librarize" + */ + +#include +#include +#if 0 +#include + +__BEGIN_DECLS +#endif + +/** + * "value\0regexp\0value\0regexp\0\0" + */ +extern "C" char * +regexp_flex_table(const char *name, const char *regexps, + int equivalence_classes, int reverse, size_t *size) +{ + Node *root = new EpsNode(); + char *buffer; + + while (*regexps) { + char *endptr; + uint32_t accept; + int is_rerule; + + Node *tree; + + accept = strtoul(regexps, &endptr, 0); + if (*endptr != '\0' || accept == 0 || accept >= (1UL << 31)) { + delete root; + errno = EINVAL; + return NULL; + } + regexps = endptr + 1; + is_rerule = NOT_RE_RULE; + if (regexp_parse(&tree, endptr, &is_rerule)) { + delete root; + errno = EINVAL; + return NULL; + } + if (reverse) + flip_tree(tree); + + tree = new CatNode(tree, new AcceptNode(accept, is_rerule)); + root = new AltNode(root, tree); + regexps = strchr(regexps, 0); + } + + DFA dfa(root); + map eq; + if (equivalence_classes) { + eq = dfa.equivalence_classes(); + dfa.apply_equivalence_classes(eq); + } + TransitionTable transition_table(dfa, eq); + ostringstream stream; + transition_table.flex_table(stream, name); + delete root; + + streambuf *buf = stream.rdbuf(); + *size = buf->in_avail(); + buffer = (char *)malloc(*size); + if (!buffer) + return NULL; + buf->sgetn(buffer, *size); + return buffer; +} + +#include +#include "apparmor_re.h" + +struct aare_ruleset { + int reverse; + Node *root; +}; + +extern "C" { + +aare_ruleset_t *aare_new_ruleset(int reverse) +{ + aare_ruleset_t *container = (aare_ruleset_t *) malloc(sizeof(aare_ruleset_t)); + if (!container) + return NULL; + + container->root = new EpsNode(); + container->reverse = reverse; + + return container; +} + +void aare_delete_ruleset(aare_ruleset_t *rules) +{ + if (rules) { + delete(rules->root); + free(rules); + } +} + +int aare_add_rule(aare_ruleset_t *rules, char *rule, + uint32_t perms) +{ + Node *tree; + int is_rerule = NOT_RE_RULE; + + if (regexp_parse(&tree, rule, &is_rerule)) { + return 0; + } + + if (rules->reverse) + flip_tree(tree); + AcceptNode *accept = new AcceptNode(perms, is_rerule); + tree = new CatNode(tree, accept); + rules->root = new AltNode(rules->root, tree); + + return 1; +} + +/* create a dfa from the ruleset + * returns: buffer contain dfa tables, @size set to the size of the tables + * else NULL on failure + */ +void *aare_create_dfa(aare_ruleset_t *rules, int equiv_classes, size_t *size) +{ + char *buffer = NULL; + + label_nodes(rules->root); + DFA dfa(rules->root); + map eq; + if (equiv_classes) { + eq = dfa.equivalence_classes(); + dfa.apply_equivalence_classes(eq); + } + + if (dfa.verify_perms()) { + *size = 0; + return NULL; + } + + stringstream stream; + TransitionTable transition_table(dfa, eq); + transition_table.flex_table(stream, ""); + + stringbuf *buf = stream.rdbuf(); + + buf->pubseekpos(0); + *size = buf->in_avail(); + + buffer = (char *)malloc(*size); + if (!buffer) + return NULL; + buf->sgetn(buffer, *size); + return buffer; +} + +} /* extern C */ diff --git a/parser/parser.h b/parser/parser.h index eecf8512e..606131e55 100644 --- a/parser/parser.h +++ b/parser/parser.h @@ -19,6 +19,7 @@ #include #include "pcre/internal.h" #include "immunix.h" +#include "libapparmor_re/apparmor_re.h" typedef enum pattern_t pattern_t; @@ -37,8 +38,8 @@ struct cod_entry { char * name ; struct codomain *codomain ; /* Special codomain defined * just for this executable */ - int mode ; /* mode is 'or' of KERN_COD_* bits */ - int deny ; /* TRUE or FALSE */ + int mode ; /* mode is 'or' of AA_* bits */ + int deny ; /* TRUE or FALSE */ pattern_t pattern_type; struct cod_pattern pat; @@ -68,6 +69,11 @@ struct codomain { struct cod_net_entry * net_entries; void *hat_table; //struct codomain *next; + + aare_ruleset_t *dfarules; + int dfarule_count; + void *dfa; + size_t dfa_size; } ; struct cod_global_entry { @@ -116,6 +122,10 @@ struct var_string { #define OPTION_REPLACE 3 #define OPTION_STDOUT 4 +#define AARE_NONE 0 +#define AARE_PCRE 1 +#define AARE_DFA 2 + #ifdef DEBUG #define PDEBUG(fmt, args...) printf("parser: " fmt, ## args) #else @@ -139,6 +149,11 @@ struct var_string { #define __unused __attribute__ ((unused)) #endif +#define list_for_each(LIST, ENTRY) \ + for ((ENTRY) = (LIST); (ENTRY); (ENTRY) = (ENTRY)->next) +#define list_last_entry(LIST, ENTRY) \ + for ((ENTRY) = (LIST); (ENTRY) && (ENTRY)->next; (ENTRY) = (ENTRY)->next) + /* Some external definitions to make b0rken programs happy */ extern char *progname; extern char *subdomainbase; @@ -146,6 +161,7 @@ extern char *profilename; /* from parser_main */ extern int force_complain; +extern int regex_type; extern void pwarn(char *fmt, ...) __attribute__((__format__(__printf__, 1, 2))); extern int yyparse(void); diff --git a/parser/parser_interface.c b/parser/parser_interface.c index 13d68f406..5a0ecba95 100644 --- a/parser/parser_interface.c +++ b/parser/parser_interface.c @@ -26,6 +26,7 @@ #define _(s) gettext(s) #include "parser.h" +#include "libapparmor_re/apparmor_re.h" #include #include @@ -55,6 +56,7 @@ #define SD_STR_LEN (sizeof(u16)) #define SUBDOMAIN_INTERFACE_VERSION 2 +#define SUBDOMAIN_INTERFACE_DFA_VERSION 3 int sd_serialize_codomain(int option, struct codomain *cod); @@ -334,6 +336,27 @@ inline int sd_write_blob(sd_serialize *p, void *b, int buf_size, char *name) return 1; } +#define align64(X) (((size_t) (X) + (size_t) 7) & ~((size_t) 7)) +inline int sd_write_aligned_blob(sd_serialize *p, void *b, int buf_size, + char *name) +{ + size_t pad; + u32 tmp; + if (!sd_write_name(p, name)) + return 0; + pad = align64((p->pos + 5) - p->buffer) - ((p->pos + 5) - p->buffer); + if (!sd_prepare_write(p, SD_BLOB, 4 + buf_size + pad)) + return 0; + tmp = cpu_to_le32(buf_size + pad); + memcpy(p->pos, &tmp, sizeof(tmp)); + sd_inc(p, sizeof(tmp)); + memset(p->pos, 0, pad); + sd_inc(p, pad); + memcpy(p->pos, b, buf_size); + sd_inc(p, buf_size); + return 1; +} + inline int sd_write_string(sd_serialize *p, char *b, char *name) { u16 tmp; @@ -470,12 +493,20 @@ int sd_serialize_file_entry(sd_serialize *p, struct cod_entry *file_entry) return 1; } +int sd_serialize_dfa(sd_serialize *p, void *dfa, size_t size) +{ + if (dfa && !sd_write_aligned_blob(p, dfa, size, "aadfa")) + return 0; + + return 1; +} + int count_file_ents(struct cod_entry *list) { - struct cod_entry *file_entry; + struct cod_entry *entry; int count = 0; - for (file_entry = list; file_entry; file_entry = file_entry->next) { - if (file_entry->pattern_type == ePatternBasic) { + list_for_each(list, entry) { + if (entry->pattern_type == ePatternBasic) { count++; } } @@ -484,10 +515,10 @@ int count_file_ents(struct cod_entry *list) int count_tailglob_ents(struct cod_entry *list) { - struct cod_entry *file_entry; + struct cod_entry *entry; int count = 0; - for (file_entry = list; file_entry; file_entry = file_entry->next) { - if (file_entry->pattern_type == ePatternTailGlob) { + list_for_each(list, entry) { + if (entry->pattern_type == ePatternTailGlob) { count++; } } @@ -496,10 +527,10 @@ int count_tailglob_ents(struct cod_entry *list) int count_pcre_ents(struct cod_entry *list) { - struct cod_entry *file_entry; + struct cod_entry *entry; int count = 0; - for (file_entry = list; file_entry; file_entry = file_entry->next) { - if (file_entry->pattern_type == ePatternRegex) { + list_for_each(list, entry) { + if (entry->pattern_type == ePatternRegex) { count++; } } @@ -508,7 +539,7 @@ int count_pcre_ents(struct cod_entry *list) int sd_serialize_profile(sd_serialize *p, struct codomain *profile) { - struct cod_entry *file_entry; + struct cod_entry *entry; struct cod_net_entry *net_entry; if (!sd_write_struct(p, "profile")) @@ -529,55 +560,58 @@ int sd_serialize_profile(sd_serialize *p, struct codomain *profile) if (!sd_write32(p, profile->capabilities)) return 0; - /* pcre globbing entries */ - if (count_pcre_ents(profile->entries)) { - if (!sd_write_list(p, "pgent")) + /* either have a single dfa or lists of different entry types */ + if (regex_type == AARE_DFA) { + if (!sd_serialize_dfa(p, profile->dfa, profile->dfa_size)) return 0; - for (file_entry = profile->entries; file_entry; - file_entry = file_entry->next) { - if (file_entry->pattern_type == ePatternRegex) { - if (!sd_serialize_file_entry(p, file_entry)) - return 0; + } else { + /* pcre globbing entries */ + if (count_pcre_ents(profile->entries)) { + if (!sd_write_list(p, "pgent")) + return 0; + list_for_each(profile->entries, entry) { + if (entry->pattern_type == ePatternRegex) { + if (!sd_serialize_file_entry(p, entry)) + return 0; + } } + if (!sd_write_listend(p)) + return 0; + } + + /* simple globbing entries */ + if (count_tailglob_ents(profile->entries)) { + if (!sd_write_list(p, "sgent")) + return 0; + list_for_each(profile->entries, entry) { + if (entry->pattern_type == ePatternTailGlob) { + if (!sd_serialize_file_entry(p, entry)) + return 0; + } + } + if (!sd_write_listend(p)) + return 0; + } + + /* basic file entries */ + if (count_file_ents(profile->entries)) { + if (!sd_write_list(p, "fent")) + return 0; + list_for_each(profile->entries, entry) { + if (entry->pattern_type == ePatternBasic) { + if (!sd_serialize_file_entry(p, entry)) + return 0; + } + } + if (!sd_write_listend(p)) + return 0; } - if (!sd_write_listend(p)) - return 0; } - /* simple globbing entries */ - if (count_tailglob_ents(profile->entries)) { - if (!sd_write_list(p, "sgent")) - return 0; - for (file_entry = profile->entries; file_entry; - file_entry = file_entry->next) { - if (file_entry->pattern_type == ePatternTailGlob) { - if (!sd_serialize_file_entry(p, file_entry)) - return 0; - } - } - if (!sd_write_listend(p)) - return 0; - } - - /* basic file entries */ - if (count_file_ents(profile->entries)) { - if (!sd_write_list(p, "fent")) - return 0; - for (file_entry = profile->entries; file_entry; - file_entry = file_entry->next) { - if (file_entry->pattern_type == ePatternBasic) { - if (!sd_serialize_file_entry(p, file_entry)) - return 0; - } - } - if (!sd_write_listend(p)) - return 0; - } - - if (profile->net_entries) { + if (profile->net_entries && (regex_type != AARE_DFA)) { if (!sd_write_list(p, "net")) return 0; - for (net_entry = profile->net_entries; net_entry; net_entry = net_entry->next) { + list_for_each(profile->net_entries, net_entry) { if (!sd_serialize_net_entry(p, net_entry)) return 0; } @@ -603,9 +637,18 @@ int sd_serialize_profile(sd_serialize *p, struct codomain *profile) int sd_serialize_top_profile(sd_serialize *p, struct codomain *profile) { + int version; + + if (regex_type == AARE_DFA) + version = SUBDOMAIN_INTERFACE_DFA_VERSION; + else + version = SUBDOMAIN_INTERFACE_VERSION; + + if (!sd_write_name(p, "version")) return 0; - if (!sd_write32(p, SUBDOMAIN_INTERFACE_VERSION)) + + if (!sd_write32(p, version)) return 0; return sd_serialize_profile(p, profile); } diff --git a/parser/parser_main.c b/parser/parser_main.c index 50d54494d..b05605bfe 100644 --- a/parser/parser_main.c +++ b/parser/parser_main.c @@ -48,6 +48,7 @@ #define MATCH_STRING "/sys/kernel/security/" MODULE_NAME "/matching" #define MOUNTED_FS "/proc/mounts" #define PCRE "pattern=pcre" +#define AADFA "pattern=aadfa" #define UNPRIVILEGED_OPS (debug || preprocess_only || option == OPTION_STDOUT || names_only || \ dump_vars || dump_expanded_vars) @@ -65,6 +66,8 @@ int conf_quiet = 0; char *subdomainbase = NULL; char *profilename; char *match_string = NULL; +int regex_type = AARE_NONE; + extern int current_lineno; struct option long_options[] = { @@ -264,99 +267,52 @@ static inline char *try_subdomainfs_mountpoint(const char *mntpnt, return retval; } -void find_subdomainfs_mountpoint(void) +int find_subdomainfs_mountpoint(void) { FILE *mntfile; struct mntent *mntpt; - if (!(mntfile = setmntent(MOUNTED_FS, "r"))) { - /* Ugh, what's the right default if you can't open /proc/mounts? */ - PERROR(_("Warning: unable to open %s, attempting to use %s\n" - "as the subdomainfs location. Use --subdomainfs to override.\n"), - MOUNTED_FS, DEFAULT_APPARMORFS); - subdomainbase = DEFAULT_APPARMORFS; - return; - } - - while ((mntpt = getmntent(mntfile))) { - char *proposed = NULL; - if (strcmp(mntpt->mnt_type, "securityfs") == 0) { - proposed = try_subdomainfs_mountpoint(mntpt->mnt_dir, "/" MODULE_NAME); - if (proposed != NULL) { - subdomainbase = proposed; - break; + if ((mntfile = setmntent(MOUNTED_FS, "r"))) { + while ((mntpt = getmntent(mntfile))) { + char *proposed = NULL; + if (strcmp(mntpt->mnt_type, "securityfs") == 0) { + proposed = try_subdomainfs_mountpoint(mntpt->mnt_dir, "/" MODULE_NAME); + if (proposed != NULL) { + subdomainbase = proposed; + break; + } + proposed = try_subdomainfs_mountpoint(mntpt->mnt_dir, "/" OLD_MODULE_NAME); + if (proposed != NULL) { + subdomainbase = proposed; + break; + } } - proposed = try_subdomainfs_mountpoint(mntpt->mnt_dir, "/" OLD_MODULE_NAME); - if (proposed != NULL) { - subdomainbase = proposed; - break; - } - } - if (strcmp(mntpt->mnt_type, "subdomainfs") == 0) { - proposed = try_subdomainfs_mountpoint(mntpt->mnt_dir, ""); - if (proposed != NULL) { - subdomainbase = proposed; - break; + if (strcmp(mntpt->mnt_type, "subdomainfs") == 0) { + proposed = try_subdomainfs_mountpoint(mntpt->mnt_dir, ""); + if (proposed != NULL) { + subdomainbase = proposed; + break; + } } } + endmntent(mntfile); } if (!subdomainbase) { - PERROR(_("Warning: unable to find a suitable fs in %s, is it mounted?\n" - "Attempting to use %s as the subdomainfs location.\n" - "Use --subdomainfs to override.\n"), - MOUNTED_FS, DEFAULT_APPARMORFS); - subdomainbase = DEFAULT_APPARMORFS; - } - endmntent(mntfile); -} - -int is_module_loaded(void) -{ - char *query_failed = NULL; - int module_loaded = 0; - int mlen = strlen(MODULE_NAME); - int oldmlen = strlen(OLD_MODULE_NAME); - FILE *fp; - - fp = fopen(PROC_MODULES, "r"); - if (fp) { - while (!feof(fp)) { - const int buflen = 256; - char buf[buflen]; - - if (fgets(buf, buflen, fp)) { - buf[buflen - 1] = 0; - - if (strncmp(buf, MODULE_NAME, mlen) == 0 && - buf[mlen] == ' ') { - module_loaded = 1; - } - if (strncmp(buf, OLD_MODULE_NAME, oldmlen) == 0 && - buf[oldmlen] == ' ') { - module_loaded = 1; - } - } + struct stat buf; + if (stat(DEFAULT_APPARMORFS, &buf) == -1) { + PERROR(_("Warning: unable to find a suitable fs in %s, is it " + "mounted?\nUse --subdomainfs to override.\n"), + MOUNTED_FS); + } else { + subdomainbase = DEFAULT_APPARMORFS; } - (void)fclose(fp); - } else { - query_failed = "unable to open " PROC_MODULES; } - if (query_failed) { - PERROR(_("%s: Unable to query modules - '%s'\n" - "Either modules are disabled or your kernel is" - " too old.\n"), progname, query_failed); - return 1; - } else if (!module_loaded) { - PERROR(_("%s: Unable to find " MODULE_NAME "!\n" - "Ensure that it has been loaded.\n"), progname); - return 1; - } - - return 0; + return (subdomainbase == NULL); } + int have_enough_privilege(void) { uid_t uid, euid; @@ -387,7 +343,7 @@ static void get_match_string(void) { /* has process_args() already assigned a match string? */ if (match_string) - return; + goto out; FILE *ms = fopen(MATCH_STRING, "r"); if (!ms) @@ -404,22 +360,28 @@ static void get_match_string(void) { } out: - fclose(ms); + if (match_string) { + if (strstr(match_string, PCRE)) + regex_type = AARE_PCRE; + + if (strstr(match_string, AADFA)) + regex_type = AARE_DFA; + } + + if (ms) + fclose(ms); return; } /* return 1 --> PCRE should work fine return 0 --> no PCRE support */ -static int pcre_support(void) { - - get_match_string(); - +static int regex_support(void) { /* no match string, predates (or postdates?) the split matching module design */ if (!match_string) return 1; - if (strstr(match_string, PCRE)) + if (regex_type != AARE_NONE) return 1; return 0; @@ -437,6 +399,9 @@ int process_profile(int option, char *profilename) if (retval != 0) goto out; + /* Get the match string to determine type of regex support needed */ + get_match_string(); + retval = post_process_policy(); if (retval != 0) { PERROR(_("%s: Errors found in file. Aborting.\n"), progname); @@ -467,10 +432,7 @@ int process_profile(int option, char *profilename) goto out; } - if (!subdomainbase && !preprocess_only && !(option == OPTION_STDOUT)) - find_subdomainfs_mountpoint(); - - if (!pcre_support()) { + if (!regex_support()) { die_if_any_regex(); } @@ -502,8 +464,9 @@ int main(int argc, char *argv[]) return retval; } - /* Check to make sure modules are enabled */ - if (!(UNPRIVILEGED_OPS) && ((retval = is_module_loaded()))) { + /* Check to make sure there is an interface to load policy */ + if (!(UNPRIVILEGED_OPS) && (subdomainbase == NULL) && + (retval = find_subdomainfs_mountpoint())) { return retval; } diff --git a/parser/parser_misc.c b/parser/parser_misc.c index e7adee6d7..99781cfc2 100644 --- a/parser/parser_misc.c +++ b/parser/parser_misc.c @@ -296,17 +296,17 @@ reeval: switch (this) { case COD_READ_CHAR: PDEBUG("Parsing mode: found READ\n"); - mode |= KERN_COD_MAY_READ; + mode |= AA_MAY_READ; break; case COD_WRITE_CHAR: PDEBUG("Parsing mode: found WRITE\n"); - mode |= KERN_COD_MAY_WRITE; + mode |= AA_MAY_WRITE; break; case COD_LINK_CHAR: PDEBUG("Parsing mode: found LINK\n"); - mode |= KERN_COD_MAY_LINK; + mode |= AA_MAY_LINK; break; case COD_INHERIT_CHAR: @@ -319,13 +319,13 @@ reeval: if (next != tolower(next)) warn_uppercase(); mode |= - (KERN_COD_EXEC_INHERIT | KERN_COD_MAY_EXEC); + (AA_EXEC_INHERIT | AA_MAY_EXEC); p++; /* skip 'x' */ } break; case COD_UNSAFE_UNCONSTRAINED_CHAR: - mode |= KERN_COD_EXEC_UNSAFE; + mode |= AA_EXEC_UNSAFE; pwarn(_("Unconstrained exec qualifier (%c%c) allows some dangerous environment variables " "to be passed to the unconfined process; 'man 5 apparmor.d' for details.\n"), COD_UNSAFE_UNCONSTRAINED_CHAR, COD_EXEC_CHAR); @@ -342,14 +342,14 @@ reeval: if (next != tolower(next)) warn_uppercase(); mode |= - (KERN_COD_EXEC_UNCONSTRAINED | - KERN_COD_MAY_EXEC); + (AA_EXEC_UNCONSTRAINED | + AA_MAY_EXEC); p++; /* skip 'x' */ } break; case COD_UNSAFE_PROFILE_CHAR: - mode |= KERN_COD_EXEC_UNSAFE; + mode |= AA_EXEC_UNSAFE; /* fall through */ case COD_PROFILE_CHAR: PDEBUG("Parsing mode: found PROFILE\n"); @@ -363,14 +363,14 @@ reeval: if (next != tolower(next)) warn_uppercase(); mode |= - (KERN_COD_EXEC_PROFILE | KERN_COD_MAY_EXEC); + (AA_EXEC_PROFILE | AA_MAY_EXEC); p++; /* skip 'x' */ } break; case COD_MMAP_CHAR: PDEBUG("Parsing mode: found MMAP\n"); - mode |= KERN_COD_EXEC_MMAP; + mode |= AA_EXEC_MMAP; break; case COD_EXEC_CHAR: @@ -557,7 +557,7 @@ void debug_cod_entries(struct cod_entry *list) printf("--- Entries ---\n"); - for (item = list; item; item = item->next) { + list_for_each(list, item) { if (!item) printf("Item is NULL!\n"); @@ -603,7 +603,7 @@ void debug_cod_net_entries(struct cod_net_entry *list) printf("--- NetwerkEntries --- \n"); - for (item = list; item; item = item->next) { + list_for_each(list, item) { if (!item) printf("Item is NULL"); @@ -623,17 +623,17 @@ void debug_cod_net_entries(struct cod_net_entry *list) printf("Destination netmask: %lx\n", dmask); fflush(stdout); printf("Mode:\t"); - if (item->mode & KERN_COD_TCP_ACCEPT) + if (item->mode & AA_TCP_ACCEPT) printf("TA"); - if (item->mode & KERN_COD_TCP_CONNECT) + if (item->mode & AA_TCP_CONNECT) printf("TC"); - if (item->mode & KERN_COD_TCP_ACCEPTED) + if (item->mode & AA_TCP_ACCEPTED) printf("Ta"); - if (item->mode & KERN_COD_TCP_CONNECTED) + if (item->mode & AA_TCP_CONNECTED) printf("Tc"); - if (item->mode & KERN_COD_UDP_SEND) + if (item->mode & AA_UDP_SEND) printf("US"); - if (item->mode & KERN_COD_UDP_RECEIVE) + if (item->mode & AA_UDP_RECEIVE) printf("UR"); if (item->iface != NULL) printf("\nInterface: %s\n", item->iface); diff --git a/parser/parser_policy.c b/parser/parser_policy.c index 3f98014d9..850ae0801 100644 --- a/parser/parser_policy.c +++ b/parser/parser_policy.c @@ -159,7 +159,7 @@ static int any_regex_entries(struct cod_entry *entry_list) { struct cod_entry *entry; - for (entry = entry_list; entry; entry = entry->next) { + list_for_each(entry_list, entry) { if (entry->pattern_type == ePatternRegex) return TRUE; } @@ -374,8 +374,7 @@ struct codomain *merge_policy(struct codomain *a, struct codomain *b) } if (a->entries) { - for (last = a->entries; last->next; last = last->next) - /* do nothing */ ; + list_last_entry(a->entries, last); last->next = b->entries; } else { a->entries = b->entries; @@ -383,8 +382,7 @@ struct codomain *merge_policy(struct codomain *a, struct codomain *b) b->entries = NULL; if (a->net_entries) { - for (lastnet = a->net_entries; lastnet->next; lastnet = lastnet->next) - /* do nothing */ ; + list_last_entry(a->net_entries, lastnet); lastnet->next = b->net_entries; } else { a->net_entries = b->net_entries; @@ -451,5 +449,9 @@ void free_policy(struct codomain *cod) free_hat_table(cod->hat_table); free_cod_entries(cod->entries); free_net_entries(cod->net_entries); + if (cod->dfarules) + aare_delete_ruleset(cod->dfarules); + if (cod->dfa) + free(cod->dfa); free(cod); } diff --git a/parser/parser_regex.c b/parser/parser_regex.c index e23e18fc9..4c8d31a74 100644 --- a/parser/parser_regex.c +++ b/parser/parser_regex.c @@ -27,6 +27,7 @@ /* #define DEBUG */ #include "parser.h" +#include "libapparmor_re/apparmor_re.h" enum error_type { e_no_error, @@ -113,10 +114,11 @@ static void filter_slashes(char *path) } } -static int process_regex_entry(struct cod_entry *entry) +static pattern_t convert_aaregex_to_pcre(const char *aare, int anchor, + char *pcre, size_t pcre_size) { #define STORE(_src, _dest, _len) \ - if ((const char*)_dest + _len > tbufend){ \ + if ((const char*)_dest + _len > (pcre + pcre_size)){ \ error = e_buffer_overflow; \ } else { \ memcpy(_dest, _src, _len); \ @@ -128,9 +130,6 @@ static int process_regex_entry(struct cod_entry *entry) /* flag to indicate input error */ enum error_type error; - char tbuf[PATH_MAX + 3]; /* +3 for ^, $ and \0 */ - const char *tbufend = &tbuf[PATH_MAX]; - const char *sptr; char *dptr; pattern_t ptype; @@ -142,14 +141,12 @@ static int process_regex_entry(struct cod_entry *entry) error = e_no_error; ptype = ePatternBasic; /* assume no regex */ - if (!entry) /* shouldn't happen */ - return TRUE; + sptr = aare; + dptr = pcre; - sptr = entry->name; - dptr = tbuf; - - /* anchor beginning of regular expression */ - *dptr++ = '^'; + if (anchor) + /* anchor beginning of regular expression */ + *dptr++ = '^'; while (error == e_no_error && *sptr) { switch (*sptr) { @@ -342,12 +339,12 @@ static int process_regex_entry(struct cod_entry *entry) } /* anchor end and terminate pattern string */ - if (error == e_no_error) { - char buf[2] = { '$', 0 }; - - STORE(buf, dptr, 2); + if ((error == e_no_error) && anchor) { + STORE("$" , dptr, 1); + } + if (error == e_no_error) { + STORE("", dptr, 1); } - /* check error again, as above STORE may have set it */ if (error != e_no_error) { if (error == e_buffer_overflow) { @@ -356,12 +353,31 @@ static int process_regex_entry(struct cod_entry *entry) } PERROR(_("%s: Unable to parse input line '%s'\n"), - progname, entry->name); + progname, aare); ret = FALSE; goto out; } +out: + if (ret == FALSE) + ptype = ePatternInvalid; + return ptype; +} + +static int process_pcre_entry(struct cod_entry *entry) +{ + char tbuf[PATH_MAX + 3]; /* +3 for ^, $ and \0 */ + int ret = TRUE; + pattern_t ptype; + + if (!entry) /* shouldn't happen */ + return TRUE; + + ptype = convert_aaregex_to_pcre(entry->name, 1, tbuf, PATH_MAX + 3); + if (ptype == ePatternInvalid) + return FALSE; + entry->pattern_type = ptype; /* @@ -422,33 +438,83 @@ static int process_regex_entry(struct cod_entry *entry) filter_escapes(entry->name); } /* ptype == ePatternRegex */ -out: return ret; } -int post_process_entries(struct cod_entry *entry_list) +static int process_dfa_entry(aare_ruleset_t *dfarules, struct cod_entry *entry) +{ + char tbuf[PATH_MAX + 3]; /* +3 for ^, $ and \0 */ + int ret = TRUE; + pattern_t ptype; + + if (!entry) /* shouldn't happen */ + return TRUE; + + ptype = convert_aaregex_to_pcre(entry->name, 0, tbuf, PATH_MAX + 3); + if (ptype == ePatternInvalid) + return FALSE; + + entry->pattern_type = ptype; + + /* ix implies m but the apparmor module does not add m bit to + * dfa states like it does for pcre + */ + if (entry->mode & AA_EXEC_INHERIT) + entry->mode |= AA_EXEC_MMAP; + if (!aare_add_rule(dfarules, tbuf, entry->mode)) + ret = FALSE; + + return ret; +} + +int post_process_entries(struct codomain *cod) { int ret = TRUE, rc; struct cod_entry *entry; + int count = 0; - for (entry = entry_list; entry; entry = entry->next) { + list_for_each(cod->entries, entry) { filter_slashes(entry->name); - rc = process_regex_entry(entry); + if (regex_type == AARE_DFA) + rc = process_dfa_entry(cod->dfarules, entry); + else + rc = process_pcre_entry(entry); if (!rc) ret = FALSE; + count++; } + cod->dfarule_count = count; return ret; } int process_regex(struct codomain *cod) { - int error = 0; + int error = -1; - if (!post_process_entries(cod->entries)) { - error = -1; + if (regex_type == AARE_DFA) { + cod->dfarules = aare_new_ruleset(0); + if (!cod->dfarules) + goto out; } + if (!post_process_entries(cod)) + goto out; + if (regex_type == AARE_DFA && cod->dfarule_count > 0) { + cod->dfa = aare_create_dfa(cod->dfarules, 0, &cod->dfa_size); + if (!cod->dfa) + goto out; +/* + if (cod->dfa_size == 0) { + PERROR(_("profile %s: has merged rules (%s) with " + "multiple x modifiers\n"), + cod->name, (char *) cod->dfa); + free(cod->dfa); + cod->dfa = NULL; + goto out; + } +*/ + } /* * Post process subdomain(s): * @@ -464,8 +530,11 @@ int process_regex(struct codomain *cod) * } */ if (process_hat_regex(cod) != 0) - error = -1; + goto out; + error = 0; + +out: return error; } diff --git a/parser/parser_variable.c b/parser/parser_variable.c index 6880a7ba5..a01520ab8 100644 --- a/parser/parser_variable.c +++ b/parser/parser_variable.c @@ -184,7 +184,7 @@ static int process_variables_in_entries(struct cod_entry *entry_list) int ret = TRUE, rc; struct cod_entry *entry; - for (entry = entry_list; entry; entry = entry->next) { + list_for_each(entry_list, entry) { rc = expand_entry_variables(entry); if (!rc) ret = FALSE; diff --git a/parser/parser_yacc.y b/parser/parser_yacc.y index 5e7000e4d..08d0eee2c 100644 --- a/parser/parser_yacc.y +++ b/parser/parser_yacc.y @@ -599,12 +599,12 @@ netrule: action addresses interface TOK_END_OF_RULE $$ = entry; }; -action: TOK_TCP_CONN { $$ = KERN_COD_TCP_CONNECT; } - | TOK_TCP_ACPT { $$ = KERN_COD_TCP_ACCEPT; } - | TOK_TCP_CONN_ESTB { $$ = KERN_COD_TCP_CONNECTED; } - | TOK_TCP_ACPT_ESTB { $$ = KERN_COD_TCP_ACCEPTED; } - | TOK_UDP_SEND { $$ = KERN_COD_UDP_SEND; } - | TOK_UDP_RECV { $$ = KERN_COD_UDP_RECEIVE; } +action: TOK_TCP_CONN { $$ = AA_TCP_CONNECT; } + | TOK_TCP_ACPT { $$ = AA_TCP_ACCEPT; } + | TOK_TCP_CONN_ESTB { $$ = AA_TCP_CONNECTED; } + | TOK_TCP_ACPT_ESTB { $$ = AA_TCP_ACCEPTED; } + | TOK_UDP_SEND { $$ = AA_UDP_SEND; } + | TOK_UDP_RECV { $$ = AA_UDP_RECEIVE; } ;