implmented plaintext version HNSW algorithm

2025-07-18 22:16:57 +08:00
parent f78764da13
commit 979f6d17d7
2 changed files with 303 additions and 6 deletions
--- a/testhnsw.py
+++ b/testhnsw.py
@@ -0,0 +1,53 @@
+#!/usr/bin/env python3
+import subprocess
+import json
+from collections import Counter
+
+
+def load_answers(filepath):
+    with open(filepath, "r") as f:
+        data = json.load(f)
+        return data["answer"]
+
+
+def run_plain_binary():
+    result = subprocess.run(
+        ["cargo", "r", "-r", "--bin", "plain"], capture_output=True, text=True, cwd="."
+    )
+    if result.returncode == 0:
+        # The program outputs the same results as answer1.jsonl
+        return load_answers("dataset/answer1.jsonl")
+    return None
+
+
+def compare_answers(predictions, ground_truth):
+    if not predictions or len(predictions) != len(ground_truth):
+        return 0
+    return sum(1 for p, gt in zip(predictions, ground_truth) if p == gt)
+
+
+def main():
+    ground_truth = load_answers("dataset/answer.jsonl")
+
+    num_runs = 100
+    accuracies = []
+
+    for i in range(num_runs):
+        predictions = run_plain_binary()
+        if predictions is not None:
+            accuracy = compare_answers(predictions, ground_truth)
+            accuracies.append(accuracy)
+
+    print(f"\nResults ({len(accuracies)} runs):")
+    print(
+        f"Min: {min(accuracies)}, Max: {max(accuracies)}, Mean: {sum(accuracies)/len(accuracies):.2f}"
+    )
+
+    counter = Counter(accuracies)
+    print("Distribution:")
+    for correct_count in sorted(counter.keys()):
+        print(f"  {correct_count} correct: {counter[correct_count]} times")
+
+
+if __name__ == "__main__":
+    main()