From a03c307eeb27ab31701f39f6c6770cb7641de578 Mon Sep 17 00:00:00 2001
From: Benjamin Demetz <50681275+Benji377@users.noreply.github.com>
Date: Thu, 18 Jul 2024 15:29:50 +0200
Subject: [PATCH] Adding scripts

---
 .gitignore         |  2 +-
 scripts/builder.py | 49 ++++++++++++++++++++++++++++++++
 scripts/parser.py  | 71 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 scripts/builder.py
 create mode 100644 scripts/parser.py

diff --git a/.gitignore b/.gitignore
index 433621b..238eac7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,2 @@
 .idea/
-scripts/
\ No newline at end of file
+venv/
\ No newline at end of file
diff --git a/scripts/builder.py b/scripts/builder.py
new file mode 100644
index 0000000..c430ca0
--- /dev/null
+++ b/scripts/builder.py
@@ -0,0 +1,49 @@
+import yara_x
+import os
+
+# A script to compile all the Yara rules into a single binary.
+# It does this by iterating trough each rule, reading its content and passing it to the YaraX compiler
+# The compiler then collects them all and finally compiles them altogether into a single binary file
+# The binary file is then saved in the same directory as the script was executed
+
+compiler = yara_x.Compiler()
+
+error_rules = []
+number_of_rules = 0
+
+# Iterate through the rules folder and retrieve each yar file in the folder and subfolders
+rules_folder = 'rules'
+for root, dirs, files in os.walk(rules_folder):
+    for file in files:
+        if file.endswith('.yar'):
+            # Get the content of the file as string
+            with open(os.path.join(root, file), 'r') as rule_file:
+                content = rule_file.readlines()
+                # Replace lines starting with include "..." to start with import "..." instead
+                # This is because YaraX does not support include statements
+                content = [line.replace('include "', 'import "') for line in content]
+                # Add the rule to the compiler
+                try:
+                    compiler.add_source(''.join(content))
+                    number_of_rules += 1
+                except Exception as e:
+                    error_rules.append(file)
+                    print(f'Error compiling {file}: {e}')
+
+# Print the rules that failed to compile
+if error_rules:
+    print("--------------------")
+    print('The following rules failed to compile:')
+    for rule in error_rules:
+        print(rule)
+
+# Compile the rules
+rules = compiler.build()
+
+# Save the compiled rules into a binary file
+with open('rulepirus.yarac', 'wb') as write_file:
+    rules.serialize_into(write_file)
+
+# Print the number of rules compiled and the number of rules that failed to compile
+print(f'Compiled {number_of_rules} rules')
+print(f'Failed to compile {len(error_rules)} rules')
diff --git a/scripts/parser.py b/scripts/parser.py
new file mode 100644
index 0000000..00de578
--- /dev/null
+++ b/scripts/parser.py
@@ -0,0 +1,71 @@
+import re
+import os
+
+# A script to extract YARA rules from a single file and write them to separate files
+# The script assumes that the input file contains multiple YARA rules and imports
+# The script creates directories based on the first uppercase part of the rule name
+# The script writes each rule to a separate file named after the rule
+
+
+def extract_yara_rules(file_path):
+    with open(file_path, 'r') as file:
+        content = file.read()
+
+    # Regular expression to match imports
+    import_pattern = re.compile(r'(?<=\n)import\s+["\w]+\s*')
+
+    # Regular expression to match YARA rules
+    rule_pattern = re.compile(r'(rule\s+[\w_]+\s*:\s*.*?\{.*?\n})', re.DOTALL)
+
+    # Find all imports and their positions
+    imports = list(import_pattern.finditer(content))
+
+    # Extract the rule names and full rule texts along with preceding imports
+    rule_texts = rule_pattern.finditer(content)
+
+    # Initialize a variable to store the current imports
+    previous_end = 0
+
+    for match in rule_texts:
+        rule_text = match.group(0)
+        rule_start = match.start()
+
+        # Find imports that are located between the end of the previous rule and the start of the current rule
+        rule_imports = []
+        for imp in imports:
+            if previous_end <= imp.start() < rule_start:
+                rule_imports.append(imp.group())
+
+        # Update previous_end to the end of the current rule
+        previous_end = match.end()
+
+        # Extract the rule name using another regex
+        rule_name_match = re.search(r'rule\s+([\w_]+)\s*:', rule_text)
+        if rule_name_match:
+            rule_name = rule_name_match.group(1)
+
+            # Find the first uppercase part of the rule name that may include numbers
+            rule_name_prefix = re.match(r'[A-Z0-9]+', rule_name).group(0)
+
+            # Create directory path
+            dir_path = rule_name_prefix
+
+            # Create directories if they don't exist
+            os.makedirs(dir_path, exist_ok=True)
+
+            # Create the output file name and path
+            output_file_name = f"{rule_name}.yar"
+            output_file_path = os.path.join(dir_path, output_file_name)
+
+            # Write each rule to a separate file named after the rule
+            with open(output_file_path, 'w') as rule_file:
+                # Write the rule with its preceding imports
+                rule_file.write('\n'.join(rule_imports) + '\n' + rule_text)
+            print(f"Rule '{rule_name}' written to '{output_file_path}'.")
+        else:
+            print("Error: Unable to find rule name.")
+
+
+if __name__ == "__main__":
+    input_file_path = 'yara.yar'  # Replace with your input file path
+    extract_yara_rules(input_file_path)