40 lines
899 B
Python
40 lines
899 B
Python
"""Download MMLU dataset to data/mmlu/."""
|
|
|
|
import argparse
|
|
import os
|
|
import urllib.request
|
|
import zipfile
|
|
|
|
REPO = "https://github.com/hendrycks/test/raw/master/"
|
|
FILES = [
|
|
"auxiliary.zip",
|
|
"dev.zip",
|
|
"test.zip",
|
|
"val.zip",
|
|
]
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--output_dir", type=str, default="data/mmlu")
|
|
args = parser.parse_args()
|
|
|
|
os.makedirs(args.output_dir, exist_ok=True)
|
|
|
|
for fname in FILES:
|
|
url = REPO + fname
|
|
zip_path = os.path.join(args.output_dir, fname)
|
|
print(f"Downloading {url}...")
|
|
urllib.request.urlretrieve(url, zip_path)
|
|
|
|
print(f"Extracting {zip_path}...")
|
|
with zipfile.ZipFile(zip_path, "r") as z:
|
|
z.extractall(args.output_dir)
|
|
os.remove(zip_path)
|
|
|
|
print(f"MMLU data saved to {args.output_dir}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|