Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored the code base #2

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
BSD-style license
=================

Copyright (c) 2010, Michael Stephens
Copyright (c) 2013, Michael Stephens

All rights reserved.

Expand Down
42 changes: 19 additions & 23 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ A Python library for querying the DuckDuckGo API.

Copyright Michael Stephens <me@mikej.st>, released under a BSD-style license.

Source: http://github.com/crazedpsyc/python-duckduckgo
Original source: http://github.com/mikejs/python-duckduckgo (outdated)
Source: http://github.com/djinn/python-duckduckgo
Original Source: http://github.com/crazedpsyc/python-duckduckgo
Original Original Source: http://github.com/mikejs/python-duckduckgo (outdated)

This version has been forked from the original to handle some new features of the API, and switch from XML to JSON.
This version has been forked from the original to be able to allow cleaner interface. It extensively uses namedtuples instead of data classes. This API only interfaces with JSON API

Installation
============
Expand All @@ -24,16 +25,18 @@ Usage
>>> import duckduckgo
>>> r = duckduckgo.query('DuckDuckGo')
>>> r.type
u'answer'
>>> r.results[0].text
'answer'
>>> r.result[0]
Result(html=u'<a href="https://duckduckgo.com/">Official site</a><a href="https://duckduckgo.com/"></a>', text=u'Official site', url=u'https://duckduckgo.com/', icon=Icon(url=u'https://i.duckduckgo.com/i/duckduckgo.com.ico', width=16, height=16))
>>> r.result[0].text
u'Official site'
>>> r.results[0].url
u'http://duckduckgo.com/'
>>> r.abstract
Abstract(primary=u'DuckDuckGo is an Internet search engine that uses information from many sources, such as crowdsourced websites like Wikipedia and from partnerships with other search engines like Yandex, Yahoo!, Bing and WolframAlpha to obtain its results.', url=u'https://en.wikipedia.org/wiki/DuckDuckGo', text=u'DuckDuckGo is an Internet search engine that uses information from many sources, such as crowdsourced websites like Wikipedia and from partnerships with other search engines like Yandex, Yahoo!, Bing and WolframAlpha to obtain its results.', source=u'Wikipedia')
>>> r.abstract.url
u'http://en.wikipedia.org/wiki/Duck_Duck_Go'
u'https://en.wikipedia.org/wiki/DuckDuckGo'
>>> r.abstract.source
u'Wikipedia'

>>> r = duckduckgo.query('Python')
>>> r.type
u'disambiguation'
Expand All @@ -44,26 +47,19 @@ Usage
>>> r.related[7].topics[0].text # weird, but this is how the DDG API is currently organized
u'Armstrong Siddeley Python, an early turboprop engine'


>>> r = duckduckgo.query('1 + 1')
>>> r.type
u'nothing'
>>> r.answer.text
'exclusive'
>>> r.answer.primary
u'1 + 1 = 2'
>>> r.answer.type
u'calc'

>>> print duckduckgo.query('19301', kad='es_ES').answer.text
19301 es un código postal de Paoli, PA
>>> print duckduckgo.query('how to spell test', html=True).answer.text
<b>Test</b> appears to be spelled right!<br/><i>Suggestions: </i>test, testy, teat, tests, rest, yest.

The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci::
>>> print duckduckgo.get_zci('foo')
The terms foobar /ˈfʊːbɑːr/, fubar, or foo, bar, baz and qux are sometimes used as placeholder names in computer programming or computer-related documentation. (https://en.wikipedia.org/wiki/Foobar)
>>> print ddg.get_zci('foo fighters site')
http://www.foofighters.com/us/home


>>> print duckduckgo.query('how to spell test', html=True).answer.primary
<b>Test</b> appears to be spelled correctly!<br/><i>Suggestions:</i> <a href='/?q=define+test'>test</a> <a href='/?q=define+testy'>testy</a> <a href='/?q=define+teat'>teat</a> <a href='/?q=define+tests'>tests</a> <a href='/?q=define+rest'>rest</a> <a href='/?q=define+yest'>yest</a> .
The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci::

Special keyword args for query():
- useragent - string, The useragent used to make API calls. This is somewhat irrelevant, as they are not logged or used on DuckDuckGo, but it is retained for backwards compatibility.
- safesearch - boolean, enable or disable safesearch.
Expand Down
41 changes: 41 additions & 0 deletions dot_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from duckduckgo import query, Topic
from sys import argv
visited = []
depth_color = {
0: 'green',
1: '#A52A2A',
2: 'grey',
3: 'blue'
}

def build_web_tree(node, qr, depth=0):
cooked_qr = qr.replace('"', '\\"')
print '"%s" [label="%s", shape="hexagon", style="filled", color="%s"];' % (cooked_qr, cooked_qr, depth_color[depth])
if node != None:
print '"%s" -> "%s";' % (node, cooked_qr)
ds = query(qr)
if depth == 3:
return
if ds.error_code != 0:
return
visited.append(qr)
if ds.related == []:
return
else:
for r in ds.related:
if isinstance(r, Topic) == True:
r_used = r.name.encode('ascii', 'ignore')
else:
r_used = r.text.encode('ascii', 'ignore').split('-')[0].strip()
try:
visited.index(r_used)
except:
build_web_tree(qr, r_used, depth=depth+1)


if __name__ == '__main__':
print """digraph G {
ranksep=3;
ratio=auto;"""
build_web_tree(None, ' '.join(argv[1:]))
print "}"
245 changes: 119 additions & 126 deletions duckduckgo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,31 @@
import urllib2
import json as j
import sys
from collections import namedtuple

__version__ = 0.242

Response = namedtuple('Response', ['type', 'api_version',
'heading', 'result',
'related', 'definition',
'abstract', 'redirect',
'answer', 'error_code',
'error_msg'])
Result = namedtuple('Result', ['html',
'text', 'url',
'icon'])
Related = namedtuple('Related', ['html', 'text',
'url', 'icon'])
Definition = namedtuple('Definition', ['primary','url', 'source'])

Abstract = namedtuple('Abstract', ['primary', 'url',
'text', 'source'])
Redirect = namedtuple('Redirect', ['primary',])
Icon = namedtuple('Icon', ['url', 'width', 'height'])
Topic = namedtuple('Topic',['name', 'results'])
Answer = namedtuple('Answer', ['primary', 'type'])



def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs):
"""
Expand Down Expand Up @@ -42,138 +64,109 @@ def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=Tru
params.update(kwargs)
encparams = urllib.urlencode(params)
url = 'http://api.duckduckgo.com/?' + encparams

request = urllib2.Request(url, headers={'User-Agent': useragent})
response = urllib2.urlopen(request)
json = j.loads(response.read())
response.close()

return Results(json)


class Results(object):

def __init__(self, json):
self.type = {'A': 'answer', 'D': 'disambiguation',
'C': 'category', 'N': 'name',
'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '')

self.json = json
self.api_version = None # compat

self.heading = json.get('Heading', '')

self.results = [Result(elem) for elem in json.get('Results',[])]
self.related = [Result(elem) for elem in
json.get('RelatedTopics',[])]

self.abstract = Abstract(json)
self.redirect = Redirect(json)
self.definition = Definition(json)
self.answer = Answer(json)

self.image = Image({'Result':json.get('Image','')})


class Abstract(object):

def __init__(self, json):
self.html = json.get('Abstract', '')
self.text = json.get('AbstractText', '')
self.url = json.get('AbstractURL', '')
self.source = json.get('AbstractSource')

class Redirect(object):

def __init__(self, json):
self.url = json.get('Redirect', '')
try:
response = urllib2.urlopen(request)
except urllib2.URLError, e:
return Response(type='Error', api_version=__version__,
heading=None, redirect=None,
abstract=None,
definition=None,
answer=None,
related=None,
result=None, error_code=1,
error_msg=str(e))

try:
json = j.loads(response.read())
except Exception, e:
return Response(type='Error', api_version=__version__,
heading=None, redirect=None,
abstract=None,
definition=None,
answer=None,
related=None,
result=None, error_code=2,
error_msg='Data from api malformed')

class Result(object):

def __init__(self, json):
self.topics = json.get('Topics', [])
if self.topics:
self.topics = [Result(t) for t in self.topics]
return
self.html = json.get('Result')
self.text = json.get('Text')
self.url = json.get('FirstURL')

icon_json = json.get('Icon')
if icon_json is not None:
self.icon = Image(icon_json)
else:
self.icon = None


class Image(object):

def __init__(self, json):
self.url = json.get('Result')
self.height = json.get('Height', None)
self.width = json.get('Width', None)


class Answer(object):

def __init__(self, json):
self.text = json.get('Answer')
self.type = json.get('AnswerType', '')

class Definition(object):
def __init__(self, json):
self.text = json.get('Definition','')
self.url = json.get('DefinitionURL')
self.source = json.get('DefinitionSource')


def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', 'definition'], urls=True, **kwargs):
'''A helper method to get a single (and hopefully the best) ZCI result.
priority=list can be used to set the order in which fields will be checked for answers.
Use web_fallback=True to fall back to grabbing the first web result.
passed to query. This method will fall back to 'Sorry, no results.'
if it cannot find anything.'''

ddg = query('\\'+q, **kwargs)
response = ''

for p in priority:
ps = p.split('.')
type = ps[0]
index = int(ps[1]) if len(ps) > 1 else None

result = getattr(ddg, type)
if index is not None:
if not hasattr(result, '__getitem__'): raise TypeError('%s field is not indexable' % type)
result = result[index] if len(result) > index else None
if not result: continue

if result.text: response = result.text
if result.text and hasattr(result,'url') and urls:
if result.url: response += ' (%s)' % result.url
if response: break

# if there still isn't anything, try to get the first web result
if not response and web_fallback:
if ddg.redirect.url:
response = ddg.redirect.url

# final fallback
if not response:
response = 'Sorry, no results.'
response.close()

return response
return process_results(json)






def result_deserialize(dataset, obj_type):
d = dataset
topics = None
if 'Topics' in d:
results = [result_deserialize(t, Result) for t in d['Topics']]
return Topic(d['Name'], results=results)
text = d['Text']
url = d['FirstURL']
html = d['Result']
i_url = d['Icon']['URL']
i_width = d['Icon']['Width']
i_height = d['Icon']['Height']
icon = None
if i_url != '':
icon = Icon(url=i_url, width=i_width,
height=i_height)
dt = obj_type(text=text, url=url, html=html,
icon=icon)
return dt



def search_deserialize(dataset, prefix, obj_type):
if dataset[prefix] == '':
return None
keys = dataset.keys()
required = filter(lambda x: x.startswith(prefix) and x != prefix, keys)
unq_required = [r.split(prefix)[1].lower() for r in required]
args = {ur: dataset[r] for ur, r in map(None, unq_required, required)}
if prefix in dataset:
args['primary'] = dataset[prefix]
return obj_type(**args)



def process_results(json):
resp_type = {'A': 'answer',
'D': 'disambiguation',
'C': 'category',
'N': 'name',
'E': 'exclusive',
'': 'nothing'}.get(json.get('Type',''), '')
if resp_type == 'Nothing':
return Response(type='nothing', api_version=0.242, heading=None,
result=None, related=None, definition=None,
abstract=None, redirect=None, answer=None,
error_code=0, error_msg=None)

redirect = search_deserialize(json, 'Redirect', Redirect)
abstract = search_deserialize(json, 'Abstract', Abstract)
definition = search_deserialize(json, 'Definition', Definition)
js_results = json.get('Results', [])
results = [result_deserialize(jr, Result) for jr in js_results]
js_related = json.get('RelatedTopics', [])
related = [result_deserialize(jr, Related) for jr in js_related]
answer = search_deserialize(json, 'Answer', Answer)
return Response(type=resp_type, api_version=__version__,
heading='', redirect=redirect,
abstract=abstract,
definition=definition,
answer=answer,
related=related,
result=results, error_code=0,
error_msg=None)

def main():
if len(sys.argv) > 1:
q = query(' '.join(sys.argv[1:]))
keys = q.json.keys()
keys.sort()
for key in keys:
sys.stdout.write(key)
if type(q.json[key]) in [str,unicode]: print(':', q.json[key])
else:
sys.stdout.write('\n')
for i in q.json[key]: print('\t',i)
print q
else:
print('Usage: %s [query]' % sys.argv[0])

Loading