#!/usr/bin/env python3 import subprocess import json from collections import Counter def load_answers(filepath): with open(filepath, "r") as f: data = json.load(f) return data["answer"] def run_plain_binary(): result = subprocess.run( ["cargo", "r", "-r", "--bin", "plain"], capture_output=True, text=True, cwd="." ) if result.returncode == 0: # The program outputs the same results as answer1.jsonl return load_answers("dataset/answer1.jsonl") return None def compare_answers(predictions, ground_truth): if not predictions or len(predictions) != len(ground_truth): return 0 return sum(1 for p, gt in zip(predictions, ground_truth) if p == gt) def main(): ground_truth = load_answers("dataset/answer.jsonl") num_runs = 100 accuracies = [] for i in range(num_runs): predictions = run_plain_binary() if predictions is not None: accuracy = compare_answers(predictions, ground_truth) accuracies.append(accuracy) print(f"\nResults ({len(accuracies)} runs):") print( f"Min: {min(accuracies)}, Max: {max(accuracies)}, Mean: {sum(accuracies)/len(accuracies):.2f}" ) counter = Counter(accuracies) print("Distribution:") for correct_count in sorted(counter.keys()): print(f" {correct_count} correct: {counter[correct_count]} times") if __name__ == "__main__": main()