Skip to content

Commit

Permalink
For old MediaWiki releases, use rawcontinue and wikitools query()
Browse files Browse the repository at this point in the history
Otherwise the query continuation may fail and only the top revisions
will be exported. Tested with Wikia:
http://clubpenguin.wikia.com/api.php?action=query&prop=revisions&titles=Club_Penguin_Wiki

Also add parentid since it's available after all.

#311 (comment)
  • Loading branch information
nemobis committed May 25, 2018
1 parent d11df60 commit 73902d3
Showing 1 changed file with 16 additions and 15 deletions.
31 changes: 16 additions & 15 deletions dumpgenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -805,7 +805,7 @@ def getXMLRevisions(config={}, session=None, allpages=False):

try:
for namespace in namespaces:
print "Exporting revisions from namespace %s" % namespace
print "Trying to export all revisions from namespace %s" % namespace
arvparams = {
'action': 'query',
'list': 'allrevisions',
Expand Down Expand Up @@ -864,22 +864,22 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'titles': title,
'prop': 'revisions',
'rvlimit': 'max',
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content'
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content',
'rawcontinue': 'yes'
}
prequest = wikitools.api.APIRequest(site, pparams)
results = prequest.queryGen()
for result in results:
pages = result['query']['pages']
for page in pages:
try:
xml = makeXmlFromPage(pages[page])
except PageMissingError:
logerror(
config=config,
text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
)
continue
yield xml
results = prequest.query()
pages = results['query']['pages']
for page in pages:
try:
xml = makeXmlFromPage(pages[page])
except PageMissingError:
logerror(
config=config,
text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
)
continue
yield xml

except wikitools.api.APIError:
print "This wikitools version seems not to work for us. Exiting."
Expand All @@ -896,6 +896,7 @@ def makeXmlFromPage(page):
for rev in page['revisions']:
revision = E.revision(
E.id(to_unicode(rev['revid'])),
E.parentid(to_unicode(rev['parentid'])),
E.timestamp(rev['timestamp']),
E.contributor(
E.id(to_unicode(rev['userid'])),
Expand Down

0 comments on commit 73902d3

Please sign in to comment.