refactor: split data file into multiple

any benchmark file should end in _data.yaml
biocypher · Oct 2, 2024 · 123d231 · 123d231
1 parent 88beff8
commit 123d231
Show file tree

Hide file tree

Showing 12 changed files with 979 additions and 924 deletions.
diff --git a/benchmark/conftest.py b/benchmark/conftest.py
@@ -546,38 +546,38 @@ def pytest_generate_tests(metafunc):
     Called once for each test case in the benchmark test collection.
     If fixture is part of test declaration, the test is parametrized.
     """
-    # Load the data file
-    data_file = BENCHMARK_DATASET["benchmark_data.yaml"]
+    # Load the data
+    data = BENCHMARK_DATASET
 
     # Parametrize the fixtures with the collected rows
     if "test_data_biocypher_query_generation" in metafunc.fixturenames:
         metafunc.parametrize(
             "test_data_biocypher_query_generation",
-            data_file["biocypher_query_generation"],
+            data["biocypher_query_generation"],
         )
     if "test_data_rag_interpretation" in metafunc.fixturenames:
         metafunc.parametrize(
             "test_data_rag_interpretation",
-            data_file["rag_interpretation"],
+            data["rag_interpretation"],
         )
     if "test_data_text_extraction" in metafunc.fixturenames:
         metafunc.parametrize(
             "test_data_text_extraction",
-            data_file["text_extraction"],
+            data["text_extraction"],
         )
     if "test_data_api_calling" in metafunc.fixturenames:
         metafunc.parametrize(
             "test_data_api_calling",
-            data_file["api_calling"],
+            data["api_calling"],
         )
     if "test_data_medical_exam" in metafunc.fixturenames:
         metafunc.parametrize(
             "test_data_medical_exam",
-            data_file["medical_exam"],
+            data["medical_exam"],
         )
 
 
 @pytest.fixture
 def kg_schemas():
-    data_file = BENCHMARK_DATASET["benchmark_data.yaml"]
-    return data_file["kg_schemas"]
+    data = BENCHMARK_DATASET
+    return data["kg_schemas"]
diff --git a/benchmark/data/benchmark_api_calling_data.yaml b/benchmark/data/benchmark_api_calling_data.yaml
@@ -0,0 +1,62 @@
+# Top-level keys: benchmark modules
+# Values: list of dictionaries, each containing a test case
+#
+# Test case keys:
+# - input (for creating the test)
+# - expected (for asserting ourcomes and generating a score)
+# - case (for categorizing the test case)
+#
+# If any input is a dictionary itself, it will be expanded into separate test
+# cases, using the top-level key to create a concatenated test case purpose.
+
+api_calling:
+  - case: oncokb:braf:melanoma
+    input:
+      prompt:
+        exact_spelling: "What is the consequence of the V600E BRAF variant in Melanoma?"
+    expected:
+      parts_of_query:
+        [
+          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?",
+          "hugoSymbol=BRAF",
+          "alteration=V600E",
+          "tumorType=Melanoma",
+        ]
+  - case: oncokb:tp53:colon_adenocarcinoma
+    input:
+      prompt:
+        exact_spelling: "What is the consequence of the R273C TP53 variant in Colon Adenocarcinoma?"
+    expected:
+      parts_of_query:
+        [
+          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?hugoSymbol=TP53",
+          "alteration=R273C",
+          "tumorType=Colon%20Adenocarcinoma",
+        ]
+  - case: oncokb:braf:histiocytosis
+    input:
+      prompt:
+        exact_spelling: "What is the consequence of the N486_P490del BRAF variant in Histiocytosis?"
+        descriptive_spelling: "What is the consequence of an N486_P490 deletion in BRAF in Histiocytosis?"
+    expected:
+      parts_of_query:
+        [
+          "https://demo.oncokb.org/api/v1/annotate/mutations/byProteinChange?",
+          "hugoSymbol=BRAF",
+          "alteration=N486_P490del",
+          "tumorType=Histiocytosis",
+        ]
+  - case: oncokb:ros1:lung_adenocarcinoma
+    input:
+      prompt:
+        exact_spelling: "What is the consequence of the functional fusion of CD47 and ROS1 in Lung Adenocarcinoma?"
+    expected:
+      parts_of_query:
+        [
+          "https://demo.oncokb.org/api/v1/annotate/structuralVariants?",
+          "hugoSymbolA=CD74",
+          "hugoSymbolB=ROS1",
+          "structuralVariantType=FUSION",
+          "isFunctionalFusion=true",
+          "tumorType=Lung%20Adenocarcinoma",
+        ]