From d9b5c5af71e21901cd22a9ddf26ac8dcbdbc6b61 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Fri, 13 Sep 2013 16:45:08 +0530 Subject: [PATCH 1/9] Refactored the code base --- LICENSE | 2 +- README.rst | 7 +- duckduckgo.py | 210 +++++++++++++++++++++----------------------------- 3 files changed, 92 insertions(+), 127 deletions(-) diff --git a/LICENSE b/LICENSE index f3e2506..5551c6e 100644 --- a/LICENSE +++ b/LICENSE @@ -1,7 +1,7 @@ BSD-style license ================= -Copyright (c) 2010, Michael Stephens +Copyright (c) 2013, Michael Stephens All rights reserved. diff --git a/README.rst b/README.rst index 99aa8c3..013c2b2 100644 --- a/README.rst +++ b/README.rst @@ -6,10 +6,11 @@ A Python library for querying the DuckDuckGo API. Copyright Michael Stephens , released under a BSD-style license. -Source: http://github.com/crazedpsyc/python-duckduckgo -Original source: http://github.com/mikejs/python-duckduckgo (outdated) +Source: http://github.com/djinn/python-duckduckgo +Original Source: http://github.com/crazedpsyc/python-duckduckgo +Original Original Source: http://github.com/mikejs/python-duckduckgo (outdated) -This version has been forked from the original to handle some new features of the API, and switch from XML to JSON. +This version has been forked from the original to be able to allow cleaner interface. It extensively uses namedtuples instead of data classes. This API only interfaces with JSON API Installation ============ diff --git a/duckduckgo.py b/duckduckgo.py index 04ed1a5..1bf7670 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -2,6 +2,7 @@ import urllib2 import json as j import sys +from collections import namedtuple __version__ = 0.242 @@ -48,132 +49,95 @@ def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=Tru json = j.loads(response.read()) response.close() - return Results(json) - - -class Results(object): - - def __init__(self, json): - self.type = {'A': 'answer', 'D': 'disambiguation', - 'C': 'category', 'N': 'name', - 'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '') - - self.json = json - self.api_version = None # compat - - self.heading = json.get('Heading', '') - - self.results = [Result(elem) for elem in json.get('Results',[])] - self.related = [Result(elem) for elem in - json.get('RelatedTopics',[])] - - self.abstract = Abstract(json) - self.redirect = Redirect(json) - self.definition = Definition(json) - self.answer = Answer(json) - - self.image = Image({'Result':json.get('Image','')}) - - -class Abstract(object): - - def __init__(self, json): - self.html = json.get('Abstract', '') - self.text = json.get('AbstractText', '') - self.url = json.get('AbstractURL', '') - self.source = json.get('AbstractSource') - -class Redirect(object): - - def __init__(self, json): - self.url = json.get('Redirect', '') - -class Result(object): - - def __init__(self, json): - self.topics = json.get('Topics', []) - if self.topics: - self.topics = [Result(t) for t in self.topics] - return - self.html = json.get('Result') - self.text = json.get('Text') - self.url = json.get('FirstURL') - - icon_json = json.get('Icon') - if icon_json is not None: - self.icon = Image(icon_json) - else: - self.icon = None - - -class Image(object): - - def __init__(self, json): - self.url = json.get('Result') - self.height = json.get('Height', None) - self.width = json.get('Width', None) - - -class Answer(object): - - def __init__(self, json): - self.text = json.get('Answer') - self.type = json.get('AnswerType', '') - -class Definition(object): - def __init__(self, json): - self.text = json.get('Definition','') - self.url = json.get('DefinitionURL') - self.source = json.get('DefinitionSource') - - -def get_zci(q, web_fallback=True, priority=['answer', 'abstract', 'related.0', 'definition'], urls=True, **kwargs): - '''A helper method to get a single (and hopefully the best) ZCI result. - priority=list can be used to set the order in which fields will be checked for answers. - Use web_fallback=True to fall back to grabbing the first web result. - passed to query. This method will fall back to 'Sorry, no results.' - if it cannot find anything.''' - - ddg = query('\\'+q, **kwargs) - response = '' - - for p in priority: - ps = p.split('.') - type = ps[0] - index = int(ps[1]) if len(ps) > 1 else None - - result = getattr(ddg, type) - if index is not None: - if not hasattr(result, '__getitem__'): raise TypeError('%s field is not indexable' % type) - result = result[index] if len(result) > index else None - if not result: continue - - if result.text: response = result.text - if result.text and hasattr(result,'url') and urls: - if result.url: response += ' (%s)' % result.url - if response: break - - # if there still isn't anything, try to get the first web result - if not response and web_fallback: - if ddg.redirect.url: - response = ddg.redirect.url - - # final fallback - if not response: - response = 'Sorry, no results.' - - return response + return process_results(json) + + +Response = namedtuple('Response', ['type', 'api_version', + 'heading', 'result', + 'related', 'definition', + 'abstract', 'redirect', + 'answer']) + +Result = namedtuple('Result', ['html', + 'text', 'url', + 'icon']) +Related = namedtuple('Related', ['html', 'text', + 'url', 'icon']) +Definition = namedtuple('Definition', ['primary','url', 'source']) + +Abstract = namedtuple('Abstract', ['primary', 'url', + 'text', 'source']) +Redirect = namedtuple('Redirect', ['primary',]) +Icon = namedtuple('Icon', ['url', 'width', 'height']) +Topic = namedtuple('Topic',['name', 'results']) +Answer = namedtuple('Answer', ['primary', 'type']) + + + + +def result_deserialize(dataset, obj_type): + d = dataset + topics = None + if 'Topics' in d: + results = [result_deserialize(t, Result) for t in d['Topics']] + return Topic(d['Name'], results=results) + text = d['Text'] + url = d['FirstURL'] + html = d['Result'] + i_url = d['Icon']['URL'] + i_width = d['Icon']['Width'] + i_height = d['Icon']['Height'] + icon = None + if i_url != '': + icon = Icon(url=i_url, width=i_width, + height=i_height) + dt = obj_type(text=text, url=url, html=html, + icon=icon) + return dt + + + +def search_deserialize(dataset, prefix, obj_type): + keys = dataset.keys() + required = filter(lambda x: x.startswith(prefix) and x != prefix, keys) + unq_required = [r.split(prefix)[1].lower() for r in required] + args = {ur: dataset[r] for ur, r in map(None, unq_required, required)} + if prefix in dataset: + args['primary'] = dataset[prefix] + return obj_type(**args) + + + +def process_results(json): + resp_type = {'A': 'answer', + 'D': 'disambiguation', + 'C': 'category', + 'N': 'name', + 'E': 'exclusive', + '': 'nothing'}.get(json.get('Type',''), '') + + redirect = search_deserialize(json, 'Redirect', Redirect) + abstract = search_deserialize(json, 'Abstract', Abstract) + definition = search_deserialize(json, 'Definition', Definition) + js_results = json.get('Results', []) + results = [result_deserialize(jr, Result) for jr in js_results] + js_related = json.get('RelatedTopics', []) + related = [result_deserialize(jr, Related) for jr in js_related] + answer = search_deserialize(json, 'Answer', Answer) + return Response(type=resp_type, api_version=__version__, + heading='', redirect=redirect, + abstract=abstract, + definition=definition, + answer=answer, + related=related, + result=results) def main(): if len(sys.argv) > 1: q = query(' '.join(sys.argv[1:])) - keys = q.json.keys() - keys.sort() - for key in keys: - sys.stdout.write(key) - if type(q.json[key]) in [str,unicode]: print(':', q.json[key]) - else: - sys.stdout.write('\n') - for i in q.json[key]: print('\t',i) + print q else: print('Usage: %s [query]' % sys.argv[0]) + +if __name__ == '__main__': + main() From a2504e95b0d98367215f696ce42bb7ddd6632c81 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Fri, 13 Sep 2013 17:33:44 +0530 Subject: [PATCH 2/9] Cleaned up the documentation --- README.rst | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/README.rst b/README.rst index 013c2b2..15a28bb 100644 --- a/README.rst +++ b/README.rst @@ -25,16 +25,18 @@ Usage >>> import duckduckgo >>> r = duckduckgo.query('DuckDuckGo') >>> r.type - u'answer' - >>> r.results[0].text + 'answer' + >>> r.result[0] + Result(html=u'Official site', text=u'Official site', url=u'https://duckduckgo.com/', icon=Icon(url=u'https://i.duckduckgo.com/i/duckduckgo.com.ico', width=16, height=16)) + >>> r.result[0].text u'Official site' - >>> r.results[0].url - u'http://duckduckgo.com/' + >>> r.abstract + Abstract(primary=u'DuckDuckGo is an Internet search engine that uses information from many sources, such as crowdsourced websites like Wikipedia and from partnerships with other search engines like Yandex, Yahoo!, Bing and WolframAlpha to obtain its results.', url=u'https://en.wikipedia.org/wiki/DuckDuckGo', text=u'DuckDuckGo is an Internet search engine that uses information from many sources, such as crowdsourced websites like Wikipedia and from partnerships with other search engines like Yandex, Yahoo!, Bing and WolframAlpha to obtain its results.', source=u'Wikipedia') >>> r.abstract.url - u'http://en.wikipedia.org/wiki/Duck_Duck_Go' + u'https://en.wikipedia.org/wiki/DuckDuckGo' >>> r.abstract.source u'Wikipedia' - + >>> r = duckduckgo.query('Python') >>> r.type u'disambiguation' @@ -45,26 +47,19 @@ Usage >>> r.related[7].topics[0].text # weird, but this is how the DDG API is currently organized u'Armstrong Siddeley Python, an early turboprop engine' - >>> r = duckduckgo.query('1 + 1') >>> r.type - u'nothing' - >>> r.answer.text + 'exclusive' + >>> r.answer.primary u'1 + 1 = 2' >>> r.answer.type u'calc' - >>> print duckduckgo.query('19301', kad='es_ES').answer.text - 19301 es un código postal de Paoli, PA - >>> print duckduckgo.query('how to spell test', html=True).answer.text - Test appears to be spelled right!
Suggestions: test, testy, teat, tests, rest, yest. - + + >>> print duckduckgo.query('how to spell test', html=True).answer.primary +Test appears to be spelled correctly!
Suggestions: test testy teat tests rest yest . The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: - >>> print duckduckgo.get_zci('foo') - The terms foobar /ˈfʊːbɑːr/, fubar, or foo, bar, baz and qux are sometimes used as placeholder names in computer programming or computer-related documentation. (https://en.wikipedia.org/wiki/Foobar) - >>> print ddg.get_zci('foo fighters site') - http://www.foofighters.com/us/home - + Special keyword args for query(): - useragent - string, The useragent used to make API calls. This is somewhat irrelevant, as they are not logged or used on DuckDuckGo, but it is retained for backwards compatibility. - safesearch - boolean, enable or disable safesearch. From 1c06c0228460bba9fb818aa01b5fc5066bd6d60b Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Fri, 13 Sep 2013 17:38:13 +0530 Subject: [PATCH 3/9] Clearing issues with rest interface --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 15a28bb..3f528d8 100644 --- a/README.rst +++ b/README.rst @@ -57,8 +57,8 @@ Usage >>> print duckduckgo.query('how to spell test', html=True).answer.primary -Test appears to be spelled correctly!
Suggestions: test testy teat tests rest yest . -The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: + Test appears to be spelled correctly!
Suggestions: test testy teat tests rest yest . + The easiest method of quickly grabbing the best (hopefully) API result is to use duckduckgo.get_zci:: Special keyword args for query(): - useragent - string, The useragent used to make API calls. This is somewhat irrelevant, as they are not logged or used on DuckDuckGo, but it is retained for backwards compatibility. From d0e56c0cbedacc34f21ec08ac7cb46513b9e2a72 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Fri, 13 Sep 2013 23:16:47 +0530 Subject: [PATCH 4/9] cleaned up duckduckgo. wrote tests one interesting example included --- duckduckgo.py | 81 +++++++++++++++++++++----------- examples.py | 26 +++++++++++ tests.py | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 26 deletions(-) create mode 100644 examples.py create mode 100644 tests.py diff --git a/duckduckgo.py b/duckduckgo.py index 1bf7670..dd584eb 100755 --- a/duckduckgo.py +++ b/duckduckgo.py @@ -6,6 +6,27 @@ __version__ = 0.242 +Response = namedtuple('Response', ['type', 'api_version', + 'heading', 'result', + 'related', 'definition', + 'abstract', 'redirect', + 'answer', 'error_code', + 'error_msg']) +Result = namedtuple('Result', ['html', + 'text', 'url', + 'icon']) +Related = namedtuple('Related', ['html', 'text', + 'url', 'icon']) +Definition = namedtuple('Definition', ['primary','url', 'source']) + +Abstract = namedtuple('Abstract', ['primary', 'url', + 'text', 'source']) +Redirect = namedtuple('Redirect', ['primary',]) +Icon = namedtuple('Icon', ['url', 'width', 'height']) +Topic = namedtuple('Topic',['name', 'results']) +Answer = namedtuple('Answer', ['primary', 'type']) + + def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=True, html=False, meanings=True, **kwargs): """ @@ -43,34 +64,36 @@ def query(query, useragent='python-duckduckgo '+str(__version__), safesearch=Tru params.update(kwargs) encparams = urllib.urlencode(params) url = 'http://api.duckduckgo.com/?' + encparams - request = urllib2.Request(url, headers={'User-Agent': useragent}) - response = urllib2.urlopen(request) - json = j.loads(response.read()) + try: + response = urllib2.urlopen(request) + except urllib2.URLError, e: + return Response(type='Error', api_version=__version__, + heading=None, redirect=None, + abstract=None, + definition=None, + answer=None, + related=None, + result=None, error_code=1, + error_msg=str(e)) + + try: + json = j.loads(response.read()) + except Exception, e: + return Response(type='Error', api_version=__version__, + heading=None, redirect=None, + abstract=None, + definition=None, + answer=None, + related=None, + result=None, error_code=2, + error_msg='Data from api malformed') + response.close() return process_results(json) -Response = namedtuple('Response', ['type', 'api_version', - 'heading', 'result', - 'related', 'definition', - 'abstract', 'redirect', - 'answer']) - -Result = namedtuple('Result', ['html', - 'text', 'url', - 'icon']) -Related = namedtuple('Related', ['html', 'text', - 'url', 'icon']) -Definition = namedtuple('Definition', ['primary','url', 'source']) - -Abstract = namedtuple('Abstract', ['primary', 'url', - 'text', 'source']) -Redirect = namedtuple('Redirect', ['primary',]) -Icon = namedtuple('Icon', ['url', 'width', 'height']) -Topic = namedtuple('Topic',['name', 'results']) -Answer = namedtuple('Answer', ['primary', 'type']) @@ -98,6 +121,8 @@ def result_deserialize(dataset, obj_type): def search_deserialize(dataset, prefix, obj_type): + if dataset[prefix] == '': + return None keys = dataset.keys() required = filter(lambda x: x.startswith(prefix) and x != prefix, keys) unq_required = [r.split(prefix)[1].lower() for r in required] @@ -115,7 +140,12 @@ def process_results(json): 'N': 'name', 'E': 'exclusive', '': 'nothing'}.get(json.get('Type',''), '') - + if resp_type == 'Nothing': + return Response(type='nothing', api_version=0.242, heading=None, + result=None, related=None, definition=None, + abstract=None, redirect=None, answer=None, + error_code=0, error_msg=None) + redirect = search_deserialize(json, 'Redirect', Redirect) abstract = search_deserialize(json, 'Abstract', Abstract) definition = search_deserialize(json, 'Definition', Definition) @@ -130,7 +160,8 @@ def process_results(json): definition=definition, answer=answer, related=related, - result=results) + result=results, error_code=0, + error_msg=None) def main(): if len(sys.argv) > 1: @@ -139,5 +170,3 @@ def main(): else: print('Usage: %s [query]' % sys.argv[0]) -if __name__ == '__main__': - main() diff --git a/examples.py b/examples.py new file mode 100644 index 0000000..350140a --- /dev/null +++ b/examples.py @@ -0,0 +1,26 @@ +from duckduckgo import query, Topic +from sys import argv +visited = [] + +def build_web_tree(qr, depth=0): + print ' '* depth * 4 + qr + ds = query(qr) + if depth == 2: + return + if ds.error_code != 0: + return + visited.append(qr) + if ds.related == []: + return + else: + for r in ds.related: + if isinstance(r, Topic) == False: + r_used = r.text.encode('ascii', 'ignore').split('-')[0].strip() + try: + visited.index(r_used) + except: + build_web_tree(r_used, depth=depth+1) + + +if __name__ == '__main__': + build_web_tree(' '.join(argv[1:])) diff --git a/tests.py b/tests.py new file mode 100644 index 0000000..8d5fc56 --- /dev/null +++ b/tests.py @@ -0,0 +1,125 @@ +from duckduckgo import query +import unittest + + +class GrandDuckDuckGoTestSuite(unittest.TestCase): + def testDuckDuckGo(self): + dataset = query('duckduckgo') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result[0].url, 'https://duckduckgo.com/') + self.assertEqual(len(ds.related), 1) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/c/Internet_search_engines?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Duck_Duck_Go') + self.assertEqual(ds.redirect, None) + + def test4_pow_10(self): + dataset = query('4 ^ 10') + ds = dataset + self.assertEqual(ds.type, 'exclusive') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 0) + self.assertEqual(ds.answer.primary, '4 ^ 10 = 1,048,576') + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testYahoo(self): + dataset = query('Yahoo!') + ds = dataset + self.assertEqual(ds.type, 'disambiguation') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 6) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Yahoo!?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def test42(self): + dataset = query('42') + ds = dataset + self.assertEqual(ds.type, 'disambiguation') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 7) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/42?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testGenomeProject(self): + dataset = query('Genome Project') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 6) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Joint_Genome_Institute?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Genome_project') + self.assertEqual(ds.redirect, None) + + def testBeetle(self): + dataset = query('Beetle') + ds = dataset + self.assertEqual(ds.type, 'disambiguation') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 20) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Beetle?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition.url, 'http://www.merriam-webster.com/dictionary/beetle') + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testGoLang(self): + dataset = query('golang') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 7) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/Go!_(programming_language)?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Go_(programming_language)') + self.assertEqual(ds.redirect, None) + + def testPythonDuckDuckGo(self): + dataset = query('python-duckduckgo') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 0) + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://github.com/mikejs/python-duckduckgo') + self.assertEqual(ds.redirect, None) + + def testPythonDjango(self): + dataset = query('python django') + ds = dataset + self.assertEqual(ds.type, 'nothing') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 0) + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract, None) + self.assertEqual(ds.redirect, None) + + def testNFAK(self): + dataset = query('NFAK') + ds = dataset + self.assertEqual(ds.type, 'answer') + self.assertEqual(ds.result, []) + self.assertEqual(len(ds.related), 8) + self.assertEqual(ds.related[0].url, 'http://duckduckgo.com/c/Harmonium_players?kp=1') + self.assertEqual(ds.answer, None) + self.assertEqual(ds.definition, None) + self.assertEqual(ds.abstract.url, 'https://en.wikipedia.org/wiki/Nusrat_Fateh_Ali_Khan') + self.assertEqual(ds.redirect, None) + + +if __name__ == '__main__': + unittest.main() From 1aa188010d9bb2d2c5c3b74bd9f44eb5f3cce980 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Sat, 14 Sep 2013 00:22:45 +0530 Subject: [PATCH 5/9] examples moved to example1 --- examples.py => example1.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples.py => example1.py (100%) diff --git a/examples.py b/example1.py similarity index 100% rename from examples.py rename to example1.py From 732a596ad433caffb8383344144df15036083243 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Sat, 14 Sep 2013 00:27:31 +0530 Subject: [PATCH 6/9] calculator using duckduckgo api --- example2.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 example2.py diff --git a/example2.py b/example2.py new file mode 100644 index 0000000..17c6347 --- /dev/null +++ b/example2.py @@ -0,0 +1,9 @@ +from duckduckgo import query + +def calculate(text): + """ There is bc but why not use web api to caculate""" + return query(text).answer.primary if query(text).type != 'nothing' else None + +if __name__ == '__main__': + import sys + print calculate(' '.join(sys.argv[1:])) From e3a35260f5fefa2d9f996767a24c5c2273f4fe30 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Sat, 14 Sep 2013 00:28:12 +0530 Subject: [PATCH 7/9] changes --- example1.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/example1.py b/example1.py index 350140a..f726618 100644 --- a/example1.py +++ b/example1.py @@ -14,12 +14,14 @@ def build_web_tree(qr, depth=0): return else: for r in ds.related: - if isinstance(r, Topic) == False: + if isinstance(r, Topic) == True: + r_used = r.name.encode('ascii', 'ignore') + else: r_used = r.text.encode('ascii', 'ignore').split('-')[0].strip() - try: - visited.index(r_used) - except: - build_web_tree(r_used, depth=depth+1) + try: + visited.index(r_used) + except: + build_web_tree(r_used, depth=depth+1) if __name__ == '__main__': From 9f603e0b045a6fb7f296ebf3ab271052c2123030 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Sat, 14 Sep 2013 00:34:19 +0530 Subject: [PATCH 8/9] wikipedia presence check --- example3.py | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 example3.py diff --git a/example3.py b/example3.py new file mode 100644 index 0000000..6c64f3b --- /dev/null +++ b/example3.py @@ -0,0 +1,9 @@ +from duckduckgo import query + +def wikipedia_presence(text): + """Find if a query has wikipedia article""" + return query(text).abstract.url if query(text).abstract != None and query(text).abstract.source == 'Wikipedia' else None + +if __name__ == '__main__': + import sys + print wikipedia_presence(' '.join(sys.argv[1:])) From e4bb5729cdf8c1e086226760af01e2c0c7dbb500 Mon Sep 17 00:00:00 2001 From: Supreet Sethi Date: Tue, 17 Sep 2013 16:28:33 +0530 Subject: [PATCH 9/9] dot file from related topics --- dot_example.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 dot_example.py diff --git a/dot_example.py b/dot_example.py new file mode 100644 index 0000000..5c0c80e --- /dev/null +++ b/dot_example.py @@ -0,0 +1,41 @@ +from duckduckgo import query, Topic +from sys import argv +visited = [] +depth_color = { + 0: 'green', + 1: '#A52A2A', + 2: 'grey', + 3: 'blue' + } + +def build_web_tree(node, qr, depth=0): + cooked_qr = qr.replace('"', '\\"') + print '"%s" [label="%s", shape="hexagon", style="filled", color="%s"];' % (cooked_qr, cooked_qr, depth_color[depth]) + if node != None: + print '"%s" -> "%s";' % (node, cooked_qr) + ds = query(qr) + if depth == 3: + return + if ds.error_code != 0: + return + visited.append(qr) + if ds.related == []: + return + else: + for r in ds.related: + if isinstance(r, Topic) == True: + r_used = r.name.encode('ascii', 'ignore') + else: + r_used = r.text.encode('ascii', 'ignore').split('-')[0].strip() + try: + visited.index(r_used) + except: + build_web_tree(qr, r_used, depth=depth+1) + + +if __name__ == '__main__': + print """digraph G { + ranksep=3; + ratio=auto;""" + build_web_tree(None, ' '.join(argv[1:])) + print "}"