BackDoorBuster/detection/Regexdetection.py

import re
from typing import Dict, List, Tuple
from .utils import remove_comments


def find_dangerous_functions(
    file_content: str, file_extension: str
) -> Dict[str, List[Tuple[int, str]]]:
    patterns = {
        ".py": {
            r"\bsystem\(": "high",
            r"\bexec\(": "high",
            r"\bpopen\(": "medium",
            r"\beval\(": "high",
            r"\bsubprocess": "medium",
            r"\b__getattribute__\(": "high",
            r"\bgetattr\(": "medium",
            r"\b__import__\(": "high",
        },
        ".js": {
            r"\beval\(": "high",
            r"\bexec\(": "high",
            r"\bchild_process\.exec\(": "high",
        },
        ".cpp": {
            r"\bsystem\(": "high",
        },
        ".pyc": {
            r"\bexec\b": "high",
            r"\beval\b": "high",
            r"\bos\.system\b": "high",
            r"\bos\.exec\b": "high",
            r"\bos\.fork\b": "high",
            r"\bos\.kill\b": "high",
            r"\bos\.popen\b": "medium",
            r"\bos\.spawn\b": "medium",
        },
    }
    risk_patterns = patterns.get(file_extension, {})
    classified_results = {"high": [], "medium": [], "low": [], "none": []}
    if file_content is not None:
        for line_number, line in enumerate(file_content.split("\n"), start=1):
            clean_line = remove_comments(line, file_extension)
            if not clean_line:
                continue
            for pattern, risk_level in risk_patterns.items():
                if re.search(pattern, clean_line):
                    classified_results[risk_level].append((line_number, clean_line))
    return classified_results