A minimal set of scripts to create analytical data about Wikipedia pageviews starting from a root category.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
This repo is archived. You can view files and clone it, but cannot push or open issues/pull-requests.
 

44 lines
1.3 KiB

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2013 Stefano Costa <steko@iosa.it>
import logging
import requests
class MediaWikiAPIClient():
def __init__(self, wiki):
self.wiki = wiki
logging.info('Initialized MediaWikiAPIClient for wiki {}'.format(wiki,))
self.pages = []
def get_category_members(self, category):
'''Gets category members from MediaWiki API.'''
params = {
'action': 'query',
'format': 'json',
'list': 'categorymembers',
'cmtitle': category,
'cmlimit': 500,
}
url = "http://{}/w/api.php".format(self.wiki,)
logging.info('Requesting URL {}'.format(url,))
r = requests.get(url, params=params)
result = r.json()
for page in result['query']['categorymembers']:
logging.debug('Adding page id {}'.format(page['pageid'],))
self.pages.append(page['title'])
if page['ns'] == 14:
self.get_category_members(page['title'])
logging.info('Total pages: {}'.format(len(self.pages)))
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
mw = MediaWikiAPIClient(wiki='it.wikipedia.org')
mw.get_category_members(u"Categoria:Siti archeologici d'Italia")