Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

RuleMatcher.h

Go to the documentation of this file.
00001 // $Id: RuleMatcher.h,v 1.5 2004/10/05 05:44:59 vern Exp $
00002 
00003 #ifndef sigs_h
00004 #define sigs_h
00005 
00006 #include <limits.h>
00007 
00008 #include "BroString.h"
00009 #include "List.h"
00010 #include "RE.h"
00011 #include "Net.h"
00012 #include "Sessions.h"
00013 #include "IntSet.h"
00014 #include "util.h"
00015 #include "Rule.h"
00016 #include "RuleAction.h"
00017 #include "RuleCondition.h"
00018 
00019 //#define MATCHER_PRINT_STATS
00020 //#define MATCHER_PRINT_DEBUG
00021 
00022 extern int rule_bench;
00023 
00024 // Parser interface:
00025 
00026 extern void rules_error(const char* msg);
00027 extern void rules_error(const char* msg, const char* addl);
00028 extern void rules_error(Rule* id, const char* msg);
00029 extern int rules_lex(void);
00030 extern int rules_parse(void);
00031 extern "C" int rules_wrap(void);
00032 extern FILE* rules_in;
00033 extern int rules_line_number;
00034 extern const char* current_rule_file;
00035 
00036 class RuleMatcher;
00037 extern RuleMatcher* rule_matcher;
00038 
00039 
00040 // RuleHdrTest and associated things:
00041 
00042 // Given a header expression like "ip[offset:len] & mask = val", we parse
00043 // it into a Range and a MaskedValue.
00044 struct Range {
00045         uint32 offset;
00046         uint32 len;
00047 };
00048 
00049 struct MaskedValue {
00050         uint32 val;
00051         uint32 mask;
00052 };
00053 
00054 declare(PList, MaskedValue);
00055 typedef PList(MaskedValue) maskedvalue_list;
00056 
00057 typedef PList(char) string_list;
00058 
00059 declare(PList, BroString);
00060 typedef PList(BroString) bstr_list;
00061 
00062 // Get values from Bro's script-level variables.
00063 extern void id_to_maskedvallist(const char* id, maskedvalue_list* append_to);
00064 extern char* id_to_str(const char* id);
00065 extern uint32 id_to_uint(const char* id);
00066 
00067 class RuleHdrTest {
00068 public:
00069         enum Comp { LE, GE, LT, GT, EQ, NE };
00070         enum Prot { NOPROT, IP, ICMP, TCP, UDP };
00071 
00072         RuleHdrTest(Prot arg_prot, uint32 arg_offset, uint32 arg_size,
00073                         Comp arg_comp, maskedvalue_list* arg_vals);
00074         ~RuleHdrTest();
00075 
00076         void PrintDebug();
00077 
00078 private:
00079         // The constructor does not copy those attributes which are set
00080         // by RuleMatcher::BuildRulesTree() (see below).
00081         RuleHdrTest(RuleHdrTest& h);
00082                 // should be const, but lists don't have const version
00083 
00084         // Likewise, the operator== checks only for same test semantics.
00085         bool operator==(const RuleHdrTest& h);
00086 
00087         Prot prot;
00088         Comp comp;
00089         maskedvalue_list* vals;
00090         uint32 offset;
00091         uint32 size;
00092 
00093         uint32 id;      // For debugging, each HdrTest gets an unique ID
00094         static uint32 idcounter;
00095 
00096         // The following are all set by RuleMatcher::BuildRulesTree().
00097         friend class RuleMatcher;
00098 
00099         struct PatternSet {
00100                 PatternSet() {}
00101 
00102                 // If we're above the 'RE_level' (see RuleMatcher), this
00103                 // expr contains all patterns on this node. If we're on
00104                 // 'RE_level', it additionally contains all patterns
00105                 // of any of its children.
00106                 Specific_RE_Matcher* re;
00107 
00108                 // All the patterns and their rule indices.
00109                 string_list patterns;
00110                 int_list ids;   // (only needed for debugging)
00111         };
00112 
00113         declare(PList, PatternSet);
00114         typedef PList(PatternSet) pattern_set_list;
00115         pattern_set_list psets[Rule::TYPES];
00116 
00117         // List of rules belonging to this node.
00118         Rule* pattern_rules;    // rules w/ at least one pattern of any type
00119         Rule* pure_rules;       // rules containing no patterns at all
00120 
00121         IntSet* ruleset;        // set of all rules belonging to this node
00122                                 // (for fast membership test)
00123 
00124         RuleHdrTest* sibling;   // linkage within HdrTest tree
00125         RuleHdrTest* child;
00126 
00127         int level;      // level within the tree
00128 };
00129 
00130 declare(PList, RuleHdrTest);
00131 typedef PList(RuleHdrTest) rule_hdr_test_list;
00132 
00133 // RuleEndpointState keeps the per-stream matching state of one
00134 // connection endpoint.
00135 class RuleEndpointState {
00136 public:
00137         ~RuleEndpointState();
00138 
00139         Connection* Conn()      { return conn; }
00140         bool IsOrig()           { return is_orig; }
00141 
00142         // For flipping roles.
00143         void FlipIsOrig()       { is_orig = ! is_orig; }
00144 
00145         // Returns the size of the first non-empty chunk of
00146         //   data feed into the RULE_PAYLOAD matcher.
00147         // Returns 0 zero iff only empty chunks have been fed.
00148         // Returns -1 if no chunk has been fed yet at all.
00149         int PayloadSize()       { return payload_size; }
00150 
00151 private:
00152         friend class RuleMatcher;
00153 
00154         // Constructor is private; use RuleMatcher::InitEndpoint()
00155         // for creating an instance.
00156         RuleEndpointState(Connection* arg_conn, bool arg_is_orig,
00157                                 RuleEndpointState* arg_opposite)
00158                 {
00159                 payload_size = -1;
00160                 conn = arg_conn;
00161                 opposite = arg_opposite;
00162                 if ( opposite )
00163                         opposite->opposite = this;
00164                 is_orig = arg_is_orig;
00165                 }
00166 
00167         struct Matcher {
00168                 RE_Match_State* state;
00169                 Rule::PatternType type;
00170         };
00171 
00172         declare(PList, Matcher);
00173         typedef PList(Matcher) matcher_list;
00174 
00175         bool is_orig;
00176         Connection* conn;
00177         RuleEndpointState* opposite;
00178 
00179         matcher_list matchers;
00180         rule_hdr_test_list hdr_tests;
00181 
00182         // The follow tracks which rules for which all patterns have matched,
00183         // and in a parallel list the (first instance of the) corresponding
00184         // matched text.
00185         rule_list matched_by_patterns;
00186         bstr_list matched_text;
00187 
00188         int payload_size;
00189 
00190         int_list matched_rules;         // Rules for which all conditions have matched
00191 };
00192 
00193 
00194 // RuleMatcher is the main class which builds up the data structures
00195 // and performs the actual matching.
00196 
00197 class RuleMatcher {
00198 public:
00199         // Argument is tree level on which we build combined regexps
00200         // (Level 0 is root).
00201         RuleMatcher(int RE_level = 4);
00202         ~RuleMatcher();
00203 
00204         // Parse the given files and built up data structures.
00205         bool ReadFiles(const name_list& files);
00206 
00207         // Initialize the matching state for a endpoind of a connections
00208         // based on the given packet (which should be the first packet
00209         // encountered for this endpoint).
00210         RuleEndpointState* InitEndpoint(Connection* conn, const IP_Hdr* ip,
00211                 int caplen, RuleEndpointState* opposite);
00212 
00213         // Finish matching for this stream.
00214         void FinishEndpoint(RuleEndpointState* state);
00215 
00216         // Perform the actual pattern matching on the given data.
00217         // bol/eol should be set to false for type Rule::PAYLOAD; they're
00218         // deduced automatically.
00219         void Match(RuleEndpointState* state, Rule::PatternType type,
00220                         const u_char* data, int data_len, bool bol, bool eol);
00221 
00222         // Reset the state of the pattern matcher for this endpoint.
00223         void ClearEndpointState(RuleEndpointState* state);
00224 
00225         void PrintDebug();
00226 
00227         // Interface to parser
00228         void AddRule(Rule* rule);
00229         void SetParseError()            { parse_error = true; }
00230 
00231         // Interface to for getting some statistics
00232         struct Stats {
00233                 unsigned int matchers;  // # distinct RE matchers
00234 
00235                 // # DFA states across all matchers
00236                 unsigned int dfa_states;
00237                 unsigned int computed;  // # computed DFA state transitions
00238                 unsigned int mem;       // #  bytes used by DFA states
00239 
00240                 // # cache hits (sampled, multiply by MOVE_TO_FRONT_SAMPLE_SIZE)
00241                 unsigned int hits;
00242                 unsigned int misses;    // # cache misses
00243 
00244                 // Average # NFA states per DFA state.
00245                 unsigned int avg_nfa_states;
00246         };
00247 
00248         Val* BuildRuleStateValue(const Rule* rule,
00249                                         const RuleEndpointState* state) const;
00250 
00251         void GetStats(Stats* stats, RuleHdrTest* hdr_test = 0);
00252         void DumpStats(BroFile* f);
00253 
00254 private:
00255         // Delete node and all children.
00256         void Delete(RuleHdrTest* node);
00257 
00258         // Build tree containing all added rules.
00259         void BuildRulesTree();
00260 
00261         // Insert one rule into the current tree.
00262         void InsertRuleIntoTree(Rule* r, int testnr, RuleHdrTest* dest,
00263                                 int level);
00264 
00265         // Traverse tree building the combined regular expressions.
00266         void BuildRegEx(RuleHdrTest* hdr_test, string_list* exprs, int_list* ids);
00267 
00268         // Build groups of regular epxressions.
00269         void BuildPatternSets(RuleHdrTest::pattern_set_list* dst,
00270                                 const string_list& exprs, const int_list& ids);
00271 
00272         // Check an arbitrary rule if it's satisfied right now.
00273         // eos signals end of stream
00274         void ExecRule(Rule* rule, RuleEndpointState* state, bool eos);
00275 
00276         // Evaluate all rules which do not depend on any matched patterns.
00277         void ExecPureRules(RuleEndpointState* state, bool eos);
00278 
00279         // Eval a rule under the assumption that all its patterns
00280         // have already matched.  s holds the text the rule matched,
00281         // or nil if N/A.
00282         bool ExecRulePurely(Rule* r, BroString* s,
00283                 RuleEndpointState* state, bool eos);
00284 
00285         // Execute the actions associated with a rule.
00286         void ExecRuleActions(Rule* r, RuleEndpointState* state,
00287                                 const u_char* data, int len, bool eos);
00288 
00289         // Evaluate all rule conditions except patterns and "header".
00290         bool EvalRuleConditions(Rule* r, RuleEndpointState* state,
00291                                 const u_char* data, int len, bool eos);
00292 
00293         void PrintTreeDebug(RuleHdrTest* node);
00294 
00295         void DumpStateStats(BroFile* f, RuleHdrTest* hdr_test);
00296 
00297         int RE_level;
00298         bool parse_error;
00299         RuleHdrTest* root;
00300         rule_list rules;
00301         rule_dict rules_by_id;
00302 };
00303 
00304 #endif

Generated on Wed Sep 14 02:56:25 2005 for bro_docs by doxygen 1.3.5