From a03c307eeb27ab31701f39f6c6770cb7641de578 Mon Sep 17 00:00:00 2001 From: Benjamin Demetz <50681275+Benji377@users.noreply.github.com> Date: Thu, 18 Jul 2024 15:29:50 +0200 Subject: [PATCH] Adding scripts --- .gitignore | 2 +- scripts/builder.py | 49 ++++++++++++++++++++++++++++++++ scripts/parser.py | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 scripts/builder.py create mode 100644 scripts/parser.py diff --git a/.gitignore b/.gitignore index 433621b..238eac7 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ .idea/ -scripts/ \ No newline at end of file +venv/ \ No newline at end of file diff --git a/scripts/builder.py b/scripts/builder.py new file mode 100644 index 0000000..c430ca0 --- /dev/null +++ b/scripts/builder.py @@ -0,0 +1,49 @@ +import yara_x +import os + +# A script to compile all the Yara rules into a single binary. +# It does this by iterating trough each rule, reading its content and passing it to the YaraX compiler +# The compiler then collects them all and finally compiles them altogether into a single binary file +# The binary file is then saved in the same directory as the script was executed + +compiler = yara_x.Compiler() + +error_rules = [] +number_of_rules = 0 + +# Iterate through the rules folder and retrieve each yar file in the folder and subfolders +rules_folder = 'rules' +for root, dirs, files in os.walk(rules_folder): + for file in files: + if file.endswith('.yar'): + # Get the content of the file as string + with open(os.path.join(root, file), 'r') as rule_file: + content = rule_file.readlines() + # Replace lines starting with include "..." to start with import "..." instead + # This is because YaraX does not support include statements + content = [line.replace('include "', 'import "') for line in content] + # Add the rule to the compiler + try: + compiler.add_source(''.join(content)) + number_of_rules += 1 + except Exception as e: + error_rules.append(file) + print(f'Error compiling {file}: {e}') + +# Print the rules that failed to compile +if error_rules: + print("--------------------") + print('The following rules failed to compile:') + for rule in error_rules: + print(rule) + +# Compile the rules +rules = compiler.build() + +# Save the compiled rules into a binary file +with open('rulepirus.yarac', 'wb') as write_file: + rules.serialize_into(write_file) + +# Print the number of rules compiled and the number of rules that failed to compile +print(f'Compiled {number_of_rules} rules') +print(f'Failed to compile {len(error_rules)} rules') diff --git a/scripts/parser.py b/scripts/parser.py new file mode 100644 index 0000000..00de578 --- /dev/null +++ b/scripts/parser.py @@ -0,0 +1,71 @@ +import re +import os + +# A script to extract YARA rules from a single file and write them to separate files +# The script assumes that the input file contains multiple YARA rules and imports +# The script creates directories based on the first uppercase part of the rule name +# The script writes each rule to a separate file named after the rule + + +def extract_yara_rules(file_path): + with open(file_path, 'r') as file: + content = file.read() + + # Regular expression to match imports + import_pattern = re.compile(r'(?<=\n)import\s+["\w]+\s*') + + # Regular expression to match YARA rules + rule_pattern = re.compile(r'(rule\s+[\w_]+\s*:\s*.*?\{.*?\n})', re.DOTALL) + + # Find all imports and their positions + imports = list(import_pattern.finditer(content)) + + # Extract the rule names and full rule texts along with preceding imports + rule_texts = rule_pattern.finditer(content) + + # Initialize a variable to store the current imports + previous_end = 0 + + for match in rule_texts: + rule_text = match.group(0) + rule_start = match.start() + + # Find imports that are located between the end of the previous rule and the start of the current rule + rule_imports = [] + for imp in imports: + if previous_end <= imp.start() < rule_start: + rule_imports.append(imp.group()) + + # Update previous_end to the end of the current rule + previous_end = match.end() + + # Extract the rule name using another regex + rule_name_match = re.search(r'rule\s+([\w_]+)\s*:', rule_text) + if rule_name_match: + rule_name = rule_name_match.group(1) + + # Find the first uppercase part of the rule name that may include numbers + rule_name_prefix = re.match(r'[A-Z0-9]+', rule_name).group(0) + + # Create directory path + dir_path = rule_name_prefix + + # Create directories if they don't exist + os.makedirs(dir_path, exist_ok=True) + + # Create the output file name and path + output_file_name = f"{rule_name}.yar" + output_file_path = os.path.join(dir_path, output_file_name) + + # Write each rule to a separate file named after the rule + with open(output_file_path, 'w') as rule_file: + # Write the rule with its preceding imports + rule_file.write('\n'.join(rule_imports) + '\n' + rule_text) + print(f"Rule '{rule_name}' written to '{output_file_path}'.") + else: + print("Error: Unable to find rule name.") + + +if __name__ == "__main__": + input_file_path = 'yara.yar' # Replace with your input file path + extract_yara_rules(input_file_path)