Skip to content

Commit

Permalink
feat: add convert-to-zenodo to cli tool
Browse files Browse the repository at this point in the history
  • Loading branch information
wd15 committed Aug 11, 2023
1 parent 20f6be3 commit e7b0cfe
Show file tree
Hide file tree
Showing 4 changed files with 190 additions and 36 deletions.
72 changes: 64 additions & 8 deletions _data/python-pfhub/pfhub/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,24 @@ def get_timeseries_info(meta_yaml, item):
dotwiz = lambda x: DotWiz(**x)


def meta_to_zenodo_(url):
"""Convert a meta.yaml link to a dict of file names and contents.
Args:
url: the url of meta.yaml file
Returns:
the dict of file names as keys and file contents as values
>>> yaml_file = getfixture('yaml_data_file')
>>> data = meta_to_zenodo_(str(yaml_file))
>>> print(sorted(data.keys()))
['free_energy_1a.csv', 'pfhub.yaml']
"""
return pipe(url, read_yaml, dotwiz, get_file_strings)


def meta_to_zenodo(url):
"""Convert a meta.yaml link to pfhub.json ready for upload to Zenodo
Expand All @@ -224,7 +242,52 @@ def meta_to_zenodo(url):
"""
return pipe(url, read_yaml, dotwiz, get_file_strings, bundle("pfhub"))
return pipe(url, meta_to_zenodo_, bundle("pfhub"))


def meta_to_zenodo_no_zip(url, dest):
"""Convert a meta.yaml link to pfhub.json ready for upload to Zenodo
Args:
url: the url of meta.yaml file
dest: destination directory of the files
Returns:
the path to the pfhub.yaml and associated data files
>>> yaml_file = getfixture('yaml_data_file')
>>> tmpdir = getfixture('tmpdir')
>>> files = meta_to_zenodo_no_zip(str(yaml_file), tmpdir)
>>> file0 = os.path.join(tmpdir, files[0])
>>> file1 = os.path.join(tmpdir, files[1])
>>> assert file0 == os.path.join(tmpdir, "pfhub.yaml")
>>> assert file1 == os.path.join(tmpdir, "free_energy_1a.csv")
"""
return pipe(url, meta_to_zenodo_, write_files(dest=dest))


@curry
def write_files(string_dict, dest):
"""Write files from a dict
Args:
string_dict: dict of file names as keys and contents as values
dest: the destination directory
Returns:
the file paths of the written paths
"""

@curry
def write(dir_, item):
path = os.path.join(dir_, f"{item[0]}")
with open(path, "w", encoding="utf-8") as fstream:
fstream.write(item[1])
return path

return list(map_(write(dest), string_dict.items()))


@curry
Expand All @@ -249,13 +312,6 @@ def bundle(zipname, string_dict, path="."):
"""

@curry
def write(dir_, item):
with open(dir_ + f"/{item[0]}", "w", encoding="utf-8") as fstream:
fstream.write(item[1])

write_files = lambda s, d: list(map_(write(d), s.items()))

tmpdir = tempfile.mkdtemp()

write_files(string_dict, tmpdir)
Expand Down
58 changes: 38 additions & 20 deletions _data/python-pfhub/pfhub/scripts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,41 @@
"""

import re
import os
import tempfile
import shutil

import click
import click_params
from toolz.curried import pipe, get
from toolz.curried import map as map_
import pykwalify
import pykwalify.core
import requests

from ..convert import meta_to_zenodo, download_file
from ..convert import meta_to_zenodo_no_zip, download_file
from ..convert import download_zenodo as download_zenodo_
from ..convert import download_meta as download_meta_
from ..func import compact

import tempfile
import pykwalify
import pykwalify.core
import shutil


EPILOG = "See the documentation at \
https://github.com/usnistgov/pfhub/blob/master/CLI.md (under construction)"



@click.group(epilog=EPILOG)
def cli():
"""Submit results to PFHub and manipulate PFHub data"""



def output(local_filepaths):
"""Output formatted file names with commas to stdout
Args:
local_filepaths: list of file path strings
"""

def echo(local_filepath, newline, comma=","):
formatted_path = click.format_filename(local_filepath)
click.secho(message=f" {formatted_path}" + comma, fg="green", nl=newline)
Expand Down Expand Up @@ -84,14 +90,22 @@ def download_zenodo(url, dest):
def download_meta(url, dest):
"""Download a record from pfhub
Download a meta.yaml from any URL
Download a meta.yaml along with linked data
Args:
url: the URL of either a meta.yaml or Zenodo record
dest: the destination directory
"""
try:
is_meta = check_meta_url(url)
except requests.exceptions.ConnectionError as err:
click.secho(err, fg="red")
click.secho(f"{url} is invalid", fg="red")
return
except IsADirectoryError:
click.secho(f"{url} is not a link to a file", fg="red")
return

is_meta = check_meta_url(url)
if is_meta:
local_filepaths = download_meta_(url, dest=dest)
output(local_filepaths)
Expand All @@ -118,13 +132,12 @@ def convert_to_zenodo(file_path, dest):
"""
is_meta = check_meta(file_path)
if is_meta:
local_filepaths = [meta_to_zenodo(file_path)]
local_filepaths = meta_to_zenodo_no_zip(file_path, dest)
output(local_filepaths)
else:
click.secho(f"{file_path} is not a valid PFHub meta.yaml", fg="red")



def zenodo_regexs():
"""Regular expression for acceptable Zenodo URLs"""
return [
Expand Down Expand Up @@ -164,6 +177,9 @@ def get_zenodo_record_id(url, regexs):

def check_meta_url(url):
"""Check that a file is a valid meta.yaml
Args:
url: the url for the file
"""
tmpdir = tempfile.mkdtemp()
file_path = download_file(url, dest=tmpdir)
Expand All @@ -173,18 +189,20 @@ def check_meta_url(url):


def check_meta(path):
"""Check that a path is a valid meta.yaml
Args:
path: the path to the file
"""
schema_file = os.path.join(
os.path.split(__file__)[0], "..", "schema", "schema_meta.yaml"
)
try:
c = pykwalify.core.Core(source_file=path, schema_files=["schema.yaml"])
obj = pykwalify.core.Core(source_file=path, schema_files=[schema_file])
except pykwalify.errors.CoreError:
return False
try:
c.validate(raise_exception=True)
obj.validate(raise_exception=True)
except pykwalify.errors.SchemaError:
return False
return True


def check_linkml(path):
"""Check that a file has a valid linkml pfhub schema
"""
pass
89 changes: 82 additions & 7 deletions _data/python-pfhub/pfhub/scripts/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

from click.testing import CliRunner

from .cli import cli, download
from .cli import cli, download_zenodo, download_meta, convert_to_zenodo


def test_cli():
Expand All @@ -19,31 +19,106 @@ def test_download_zenodo(tmpdir):
"""Test downloading a Zenodo record"""
runner = CliRunner()
result = runner.invoke(
download, ["https://zenodo.org/record/7255597", "--dest", tmpdir]
download_zenodo, ["https://zenodo.org/record/7255597", "--dest", tmpdir]
)
assert result.exit_code == 0
file1 = os.path.join(tmpdir, "phase_field_1.tsv")
file2 = os.path.join(tmpdir, "stats.tsv")
assert result.output == f"Writing: {file1}, {file2}\n"


def test_download_zenodo_bad(tmpdir):
"""Check the error message on a bad link"""
runner = CliRunner()
result = runner.invoke(download_zenodo, ["https://blah.com", "--dest", tmpdir])
assert result.exit_code == 0
assert (
result.output
== "https://blah.com does not match any expected regex for Zenodo\n"
)


def test_download_zenodo_sandbox(tmpdir):
"""Test downloading from the Zenodo sandbox"""
runner = CliRunner()
result = runner.invoke(
download_zenodo, ["https://sandbox.zenodo.org/record/657937", "--dest", tmpdir]
)
assert result.exit_code == 0
file1 = os.path.join(
tmpdir, "marines-sniper-rifle-aiming-scope-weapon-shooting-special-gun.jpg"
)
assert result.output == f"Writing: {file1}\n"


def test_download_meta(tmpdir):
"""Test downloading a meta.yaml"""
runner = CliRunner()
base = "https://raw.githubusercontent.com/usnistgov/pfhub"
end = "master/_data/simulations/fenics_1a_ivan/meta.yaml"
yaml_url = os.path.join(base, end)
result = runner.invoke(download, [yaml_url, "--dest", tmpdir])
result = runner.invoke(download_meta, [yaml_url, "--dest", tmpdir])
assert result.exit_code == 0
file1 = os.path.join(tmpdir, "meta.yaml")
file2 = os.path.join(tmpdir, "1a_square_periodic_out.csv")
assert result.output == f"Writing: {file1}, {file2}\n"


def test_download_broken(tmpdir):
"""Test downloading if the URL is incorrect"""
def test_download_exist(tmpdir):
"""URL doesn't exist"""
runner = CliRunner()
yaml_url = "https://blah.com"
result = runner.invoke(download, [yaml_url, "--dest", tmpdir])
result = runner.invoke(download_meta, [yaml_url, "--dest", tmpdir])
assert result.exit_code == 0
assert result.output.splitlines()[1] == "https://blah.com is invalid"


def test_download_not_file(tmpdir):
"""URL not a file"""
runner = CliRunner()
yaml_url = "https://google.com"
result = runner.invoke(download_meta, [yaml_url, "--dest", tmpdir])
assert result.exit_code == 0
assert result.output == "https://google.com is not a link to a file\n"


def test_download_not_valid(tmpdir):
"""Not a valid meta.yaml"""
runner = CliRunner()
yaml_url = "https://raw.githubusercontent.com/usnistgov/pfhub/master/.travis.yml"
result = runner.invoke(download_meta, [yaml_url, "--dest", tmpdir])
assert result.exit_code == 0
assert result.output == f"{yaml_url} is not a valid PFHub meta.yaml\n"


def test_convert_to_zenodo(tmpdir):
"""Conversion from meta.yaml to pfhub.json"""
runner = CliRunner()
base = "https://raw.githubusercontent.com/usnistgov/pfhub"
end = "master/_data/simulations/fenics_1a_ivan/meta.yaml"
yaml_url = ("/").join([base, end])
runner.invoke(download_meta, [yaml_url, "--dest", tmpdir])
yaml_path = os.path.join(tmpdir, "meta.yaml")
result = runner.invoke(convert_to_zenodo, [yaml_path, "--dest", tmpdir])
file1 = os.path.join(tmpdir, "pfhub.yaml")
file2 = os.path.join(tmpdir, "free_energy_1a.csv")
assert result.exit_code == 0
assert result.output == f"Writing: {file1}, {file2}\n"


def test_convert_to_zenodo_valid(tmpdir):
"""Test conversion if not a valid YAML"""
runner = CliRunner()
base = os.path.split(__file__)[0]
yaml_path = os.path.join(base, "..", "templates", "8a_data.yaml")
result = runner.invoke(convert_to_zenodo, [yaml_path, "--dest", tmpdir])
assert result.exit_code == 0
assert result.output == f"{yaml_path} is not a valid PFHub meta.yaml\n"


def test_convert_to_zenodo_not_yaml(tmpdir):
"""Test if not a YAML"""
runner = CliRunner()
result = runner.invoke(convert_to_zenodo, [__file__, "--dest", tmpdir])
assert result.exit_code == 0
assert result.output == "https://blah.com does not match any expected regex\n"
assert result.output == f"{__file__} is not a valid PFHub meta.yaml\n"
7 changes: 6 additions & 1 deletion _data/python-pfhub/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@ def setup_args():
return {
"packages": find_packages(),
"package_data": {
"": ["tests/*.py", "templates/*.mustache", "templates/*.yaml"]
"": [
"tests/*.py",
"templates/*.mustache",
"templates/*.yaml",
"schema/*.yaml",
]
},
"include_package_data": True,
"data_files": ["setup.cfg"],
Expand Down

0 comments on commit e7b0cfe

Please sign in to comment.