From 50daf1d71631e5aa99bc9bf07f1ada410b4c69d9 Mon Sep 17 00:00:00 2001
From: DustInDark <nextsasasa@gmail.com>
Date: Sun, 5 Dec 2021 15:05:09 +0900
Subject: [PATCH] Feature/improve rule file read time#254 (#260)

* fixed cached aggregation parser regex #254

* fixed cached condition parser regex #254

* fixed cached condition parser regex re_pipe #254
---
 src/detections/rule/aggregation_parser.rs | 44 +++++++++++------------
 src/detections/rule/condition_parser.rs   | 33 ++++++++---------
 2 files changed, 37 insertions(+), 40 deletions(-)
diff --git a/src/detections/rule/aggregation_parser.rs b/src/detections/rule/aggregation_parser.rs
index 92fd56ff..9324daf7 100644
--- a/src/detections/rule/aggregation_parser.rs
+++ b/src/detections/rule/aggregation_parser.rs
@@ -1,5 +1,23 @@
+use lazy_static::lazy_static;
 use regex::Regex;
 
+lazy_static! {
+    // ここで字句解析するときに使う正規表現の一覧を定義する。
+    // ここはSigmaのGithubレポジトリにある、toos/sigma/parser/condition.pyのSigmaConditionTokenizerのtokendefsを参考にしています。
+    pub static ref AGGREGATION_REGEXMAP: Vec<Regex> = vec![
+        Regex::new(r"^count\( *\w* *\)").unwrap(), // countの式
+        Regex::new(r"^ ").unwrap(),
+        Regex::new(r"^by").unwrap(),
+        Regex::new(r"^==").unwrap(),
+        Regex::new(r"^<=").unwrap(),
+        Regex::new(r"^>=").unwrap(),
+        Regex::new(r"^<").unwrap(),
+        Regex::new(r"^>").unwrap(),
+        Regex::new(r"^\w+").unwrap(),
+    ];
+    pub static ref RE_PIPE: Regex = Regex::new(r"\|.*").unwrap();
+}
+
 #[derive(Debug)]
 pub struct AggregationParseInfo {
     pub _field_name: Option<String>,    // countの括弧に囲まれた部分の文字
@@ -24,28 +42,11 @@ pub enum AggregationConditionToken {
 /// SIGMAルールでいうAggregationConditionを解析する。
 /// AggregationConditionはconditionに指定された式のパイプ以降の部分を指してます。
 #[derive(Debug)]
-pub struct AggegationConditionCompiler {
-    regex_patterns: Vec<Regex>,
-}
+pub struct AggegationConditionCompiler {}
 
 impl AggegationConditionCompiler {
     pub fn new() -> Self {
-        // ここで字句解析するときに使う正規表現の一覧を定義する。
-        // ここはSigmaのGithubレポジトリにある、toos/sigma/parser/condition.pyのSigmaConditionTokenizerのtokendefsを参考にしています。
-        let mut regex_patterns = vec![];
-        regex_patterns.push(Regex::new(r"^count\( *\w* *\)").unwrap()); // countの式
-        regex_patterns.push(Regex::new(r"^ ").unwrap());
-        regex_patterns.push(Regex::new(r"^by").unwrap());
-        regex_patterns.push(Regex::new(r"^==").unwrap());
-        regex_patterns.push(Regex::new(r"^<=").unwrap());
-        regex_patterns.push(Regex::new(r"^>=").unwrap());
-        regex_patterns.push(Regex::new(r"^<").unwrap());
-        regex_patterns.push(Regex::new(r"^>").unwrap());
-        regex_patterns.push(Regex::new(r"^\w+").unwrap());
-
-        return AggegationConditionCompiler {
-            regex_patterns: regex_patterns,
-        };
+        AggegationConditionCompiler {}
     }
 
     pub fn compile(&self, condition_str: String) -> Result<Option<AggregationParseInfo>, String> {
@@ -65,8 +66,7 @@ impl AggegationConditionCompiler {
         condition_str: String,
     ) -> Result<Option<AggregationParseInfo>, String> {
         // パイプの部分だけを取り出す
-        let re_pipe = Regex::new(r"\|.*").unwrap();
-        let captured = re_pipe.captures(&condition_str);
+        let captured = self::RE_PIPE.captures(&condition_str);
         if captured.is_none() {
             // パイプが無いので終了
             return Result::Ok(Option::None);
@@ -94,7 +94,7 @@ impl AggegationConditionCompiler {
 
         let mut tokens = Vec::new();
         while cur_condition_str.len() != 0 {
-            let captured = self.regex_patterns.iter().find_map(|regex| {
+            let captured = self::AGGREGATION_REGEXMAP.iter().find_map(|regex| {
                 return regex.captures(cur_condition_str.as_str());
             });
             if captured.is_none() {
diff --git a/src/detections/rule/condition_parser.rs b/src/detections/rule/condition_parser.rs
index 3a1e3af4..349f558f 100644
--- a/src/detections/rule/condition_parser.rs
+++ b/src/detections/rule/condition_parser.rs
@@ -1,3 +1,4 @@
+use lazy_static::lazy_static;
 use regex::Regex;
 
 use self::selectionnodes::{
@@ -6,6 +7,16 @@ use self::selectionnodes::{
 use super::selectionnodes;
 use std::{collections::HashMap, sync::Arc};
 
+lazy_static! {
+    pub static ref CONDITION_REGEXMAP: Vec<Regex> = vec![
+        Regex::new(r"^\(").unwrap(),
+        Regex::new(r"^\)").unwrap(),
+        Regex::new(r"^ ").unwrap(),
+        Regex::new(r"^\w+").unwrap(),
+    ];
+    pub static ref RE_PIPE: Regex = Regex::new(r"\|.*").unwrap();
+}
+
 #[derive(Debug, Clone)]
 /// 字句解析で出てくるトークン
 pub enum ConditionToken {
@@ -92,25 +103,12 @@ impl ConditionToken {
 }
 
 #[derive(Debug)]
-pub struct ConditionCompiler {
-    regex_patterns: Vec<Regex>,
-}
+pub struct ConditionCompiler {}
 
 // conditionの式を読み取るクラス。
 impl ConditionCompiler {
     pub fn new() -> Self {
-        // ここで字句解析するときに使う正規表現の一覧を定義する。
-        let mut regex_patterns = vec![];
-        regex_patterns.push(Regex::new(r"^\(").unwrap());
-        regex_patterns.push(Regex::new(r"^\)").unwrap());
-        regex_patterns.push(Regex::new(r"^ ").unwrap());
-        // ^\w+については、sigmaのソースのsigma/tools/sigma/parser/condition.pyのSigmaConditionTokenizerを参考にしている。
-        // 上記ソースの(SigmaConditionToken.TOKEN_ID,     re.compile("[\\w*]+")),を参考。
-        regex_patterns.push(Regex::new(r"^\w+").unwrap());
-
-        return ConditionCompiler {
-            regex_patterns: regex_patterns,
-        };
+        ConditionCompiler {}
     }
 
     pub fn compile_condition(
@@ -119,8 +117,7 @@ impl ConditionCompiler {
         name_2_node: &HashMap<String, Arc<Box<dyn SelectionNode + Send + Sync>>>,
     ) -> Result<Box<dyn SelectionNode + Send + Sync>, String> {
         // パイプはここでは処理しない
-        let re_pipe = Regex::new(r"\|.*").unwrap();
-        let captured = re_pipe.captures(&condition_str);
+        let captured = self::RE_PIPE.captures(&condition_str);
         let condition_str = if captured.is_some() {
             let captured = captured.unwrap().get(0).unwrap().as_str().to_string();
             condition_str.replacen(&captured, "", 1)
@@ -192,7 +189,7 @@ impl ConditionCompiler {
 
         let mut tokens = Vec::new();
         while cur_condition_str.len() != 0 {
-            let captured = self.regex_patterns.iter().find_map(|regex| {
+            let captured = self::CONDITION_REGEXMAP.iter().find_map(|regex| {
                 return regex.captures(cur_condition_str.as_str());
             });
             if captured.is_none() {