Skip to content

Commit

Permalink
Add missing fields, better license selector, use ordered output
Browse files Browse the repository at this point in the history
  • Loading branch information
rlafuente committed Nov 12, 2016
1 parent 8609b81 commit 18cc2b6
Showing 1 changed file with 62 additions and 8 deletions.
70 changes: 62 additions & 8 deletions datapaka
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import unicodecsv as csv
from jsontableschema import infer
import glob
import json
from collections import OrderedDict

'''
- do we have a data/ directory?
Expand All @@ -18,7 +19,9 @@ import json
from colorama import init, Fore, Back, Style
init(autoreset=True)

dp = datapackage.DataPackage()

# ensure we get the output in fixed order by specifying the descriptor argument
dp = datapackage.DataPackage(descriptor=OrderedDict())

# Splash screen :3
print Fore.YELLOW + " ___ ___ "
Expand Down Expand Up @@ -54,7 +57,7 @@ dp.descriptor['name'] = raw_input(Style.BRIGHT + "? ") or default_slug

# Description
print "Give me a short, human " + Style.BRIGHT + "description" + Style.RESET_ALL + " for this data package."
print Style.DIM + "A description gives more information about what's inside. 'Property prices in Berlin in April 2015' or 'UN population density statistics for the 2003-2013 period' let people know more specific details. (optional field)"
print Style.DIM + "A description gives more information about what's inside. 'Property prices in Berlin in April 2015' or 'UN population density statistics for the 2003-2013 period' let people know more specific details. Markdown formatting can be used. (optional field)"
dp.descriptor['description'] = raw_input(Style.BRIGHT + "? ")

# Version
Expand All @@ -63,11 +66,34 @@ print "What's the package " + Style.BRIGHT + "version number" + Style.RESET_ALL
print Style.DIM + "The version number tracks changes and improvements on the data package. If you're starting a new one, go with the default value of 0.1.0, and update it whenever there is a change in the data layout."
dp.descriptor['version'] = raw_input(Style.BRIGHT + "? ") or default_version

# Homepage
print "What's the " + Style.BRIGHT + "homepage" + Style.RESET_ALL + " for this data package?"
print Style.DIM + "Please include the 'http://' prefix. (optional field)"
h = raw_input(Style.BRIGHT + "? ")
if h:
dp.descriptor['homepage'] = h

# License
default_license = "PDDL-1.0"
default_license = "1"
license_options = {
"1": "ODC-PDDL-1.0",
"2": "ODbL-1.0",
"3": "ODC-BY-1.0",
"4": "CC0-1.0",
"5": "CC-BY-4.0",
"6": "CC-BY-SA-4.0",
}
print "What's the package " + Style.BRIGHT + "license" + Style.RESET_ALL + "? [Leave blank for '%s'] " % default_license
print Style.DIM + "The license field is the identifier for the license this package is to be published under. We'll have a better menu to pick licenses, but in the meantime you can find the license identifiers at http://licenses.opendefinition.org/. The default is the Public Domain Dedication License."
dp.descriptor['license'] = raw_input(Style.BRIGHT + "? ") or default_license
print Style.DIM + "The license field states the license under which this package is to be published. Read more about each license at http://opendefinition.org/licenses/."
print
print('1) Public Domain Dedication and License (public domain)')
print('2) Open Database License (attribution, sharealike)')
print('3) Open Data Commons Attribution License (attribution)')
print('4) Creative Commons Zero (public domain)')
print('5) Creative Commons Attribution')
print('6) Creative Commons Attribution-ShareAlike')
selected = raw_input(Style.BRIGHT + "? ") or default_license
dp.descriptor['license'] = license_options[selected.strip()]

# Sources
dp.descriptor['sources'] = []
Expand All @@ -83,6 +109,34 @@ while another:
if add_another not in ("y", "Y"):
another = False

# Contributors
dp.descriptor['contributors'] = []
another = True
print "Now I want to know about the " + Style.BRIGHT + "contributors" + Style.RESET_ALL + " of this data package."
print Style.DIM + "I will ask you the name, e-mail and website URL for each, and then you'll be asked if you want to input another contributor or move ahead."
while another:
contrib_name = raw_input(Style.BRIGHT + "Contributor name? ")
contrib_email = raw_input(Style.BRIGHT + "Contributor e-mail (optional)? ")
contrib_url = raw_input(Style.BRIGHT + "Contributor URL (optional)? ")
c = {"name": contrib_name}
if contrib_email:
c["email"] = contrib_email
if contrib_url:
c["web"] = contrib_url
dp.descriptor['contributors'].append(c)
print
add_another = raw_input(Style.BRIGHT + "Add another contributor? [n]")
if add_another not in ("y", "Y"):
another = False

# Keywords
print "Tell me some " + Style.BRIGHT + "keywords" + Style.RESET_ALL + " (tags) for this package so that users can find it in catalogs. [optional]"
print Style.DIM + "Keywords can have spaces; separate distinct keywords with commas."
k = raw_input(Style.BRIGHT + "? ")
if k:
dp.descriptor['keywords'] = [w.strip() for w in k.split(",")]


# CSV files
dp.descriptor['resources'] = []
csv_files = glob.glob('data/*.csv')
Expand All @@ -101,14 +155,14 @@ for filepath in csv_files:
slug = raw_input(" Slug for this file? [%s]" % default_slug)
resource_name = slug or default_slug
with open(filepath, 'rb') as f:
headers = f.readline().rstrip('\n').split(',')
headers = [s.decode("utf-8") for s in f.readline().rstrip('\n').split(',')]
values = csv.reader(f, encoding="utf-8")
print " Inferring column types, this might take a bit..."
schema = infer(headers, values)

fields = []
for field in schema['fields']:
fieldname = field['name']
print type(fieldname)
field['title'] = raw_input(" Human title for field '%s'? " % fieldname)
field['description'] = raw_input(" Short description for field '%s'? " % fieldname)
fields.append(field)
Expand All @@ -127,7 +181,7 @@ if not os.path.exists('datapackage.json'):
outfilename = 'datapackage.json'
else:
outfilename = 'datapackage-new.json'
with codecs.open('datapackage-new.json', 'w', 'utf-8') as f:
with codecs.open(outfilename, 'w', 'utf-8') as f:
f.write(out)

print "All done! File saved as " + Style.BRIGHT + outfilename + Style.RESET_ALL + ". Ta!"

0 comments on commit 18cc2b6

Please sign in to comment.