Jump to content

Wikipedia:Knowledge gaps in women's health/Navboxes/code

From Wikipedia, the free encyclopedia

Here is the python code for API search of articles containing a particular keyword. In the following example, the keyword is 'Pregnancy'. The results of the search will be populated into a csv file. Please note that API search has a 10,000 record limit.

import requests
import csv

S = requests.Session()
SEARCHLIST = [];
URL = "https://en.wikipedia.org/w/api.php"
SEARCHWORD = "Pregnancy"

# open the file in the write mode
f = open('title_fgm.csv', 'w')
# create the csv writer
writer = csv.writer(f)

######
for iPage in range(20):
   PARAMS = {
       "action": "query",
       "format": "json",
       "list": "search",
       "srsearch": SEARCHWORD,
       "srlimit": 500,
       "sroffset": iPage*500
   }

   R = S.get(url=URL, params=PARAMS)
   DATA = R.json()    

   for i in range(len(DATA['query']['search'])):
       row = DATA['query']['search'][i]['title']
       #print (row)
       # write a row to the csv file
       writer.writerow([str(row)])
   
   # print("iPage = ",iPage)
   totalhits = DATA['query']['searchinfo']['totalhits']
   if (iPage+1)*500 > totalhits:
       break;

# close the file
f.close()