From bb922ef2206cfd235c26f084947a92294eecbd52 Mon Sep 17 00:00:00 2001 From: Maria Mirkina Date: Wed, 25 Sep 2024 10:29:54 +0000 Subject: [PATCH] Added recipe for scrolls_gov_report_8k dataset --- data_axs.json | 3 ++- dataset_scrolls_gov_report_8k_recipe/data_axs.json | 10 ++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) create mode 100644 dataset_scrolls_gov_report_8k_recipe/data_axs.json diff --git a/data_axs.json b/data_axs.json index 26b990b..3e3cd42 100644 --- a/data_axs.json +++ b/data_axs.json @@ -62,7 +62,8 @@ "llm_hf_weights_recipe": "llm_hf_weights_recipe", "huggingface_tool_detector": "huggingface_tool_detector", "model_llama3_recipe": "model_llama3_recipe", - "model_training_llama2_recipe": "model_training_llama2_recipe" + "model_training_llama2_recipe": "model_training_llama2_recipe", + "dataset_scrolls_gov_report_8k_recipe": "dataset_scrolls_gov_report_8k_recipe" }, "repo_name": "axs2mlperf", "submodules": false diff --git a/dataset_scrolls_gov_report_8k_recipe/data_axs.json b/dataset_scrolls_gov_report_8k_recipe/data_axs.json new file mode 100644 index 0000000..e8deb0b --- /dev/null +++ b/dataset_scrolls_gov_report_8k_recipe/data_axs.json @@ -0,0 +1,10 @@ +{ + "_producer_rules": [ + [ [ "downloaded", "preprocessed", "dataset_name=scrolls_gov_report_8k" ], [["get_kernel"],["byname","downloader"],["download"]], { + "downloading_tool_query": "shell_tool,can_download_url_from_rclone", + "url": "mlc-llama2:training/scrolls_gov_report_8k", + "newborn_entry_name": "downloaded_dataset_scrolls_gov_report_8k", + "file_path": "training/scrolls_gov_report_8k" + }, [] ] + ] +}