54 lines
1.4 KiB
Python
54 lines
1.4 KiB
Python
#!/usr/bin/env python3
|
|
import subprocess
|
|
import json
|
|
from collections import Counter
|
|
|
|
|
|
def load_answers(filepath):
|
|
with open(filepath, "r") as f:
|
|
data = json.load(f)
|
|
return data["answer"]
|
|
|
|
|
|
def run_plain_binary():
|
|
result = subprocess.run(
|
|
["cargo", "r", "-r", "--bin", "plain"], capture_output=True, text=True, cwd="."
|
|
)
|
|
if result.returncode == 0:
|
|
# The program outputs the same results as answer1.jsonl
|
|
return load_answers("dataset/answer1.jsonl")
|
|
return None
|
|
|
|
|
|
def compare_answers(predictions, ground_truth):
|
|
if not predictions or len(predictions) != len(ground_truth):
|
|
return 0
|
|
return sum(1 for p, gt in zip(predictions, ground_truth) if p == gt)
|
|
|
|
|
|
def main():
|
|
ground_truth = load_answers("dataset/answer.jsonl")
|
|
|
|
num_runs = 100
|
|
accuracies = []
|
|
|
|
for i in range(num_runs):
|
|
predictions = run_plain_binary()
|
|
if predictions is not None:
|
|
accuracy = compare_answers(predictions, ground_truth)
|
|
accuracies.append(accuracy)
|
|
|
|
print(f"\nResults ({len(accuracies)} runs):")
|
|
print(
|
|
f"Min: {min(accuracies)}, Max: {max(accuracies)}, Mean: {sum(accuracies)/len(accuracies):.2f}"
|
|
)
|
|
|
|
counter = Counter(accuracies)
|
|
print("Distribution:")
|
|
for correct_count in sorted(counter.keys()):
|
|
print(f" {correct_count} correct: {counter[correct_count]} times")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|