User:Kumar Appaiah Bot/bot.py

From Wikipedia, the free encyclopedia
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
This is a rudimentary bot to add coordinates to some Wikipedia pages,
consisting mainly of Indian towns/villages/cities lacking coordinates.

The following parameters are supported:

&params;

    -debug         If given, doesn't do any real changes, but only shows
                   what would have been changed.

All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
"""
import sys
import wikipedia
import pagegenerators

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}

def convert_latlong_wikitext(latitude, longitude):
    """
    Takes latitude and longitude values in decimal notation and
    returns them in Wikitext format.
    """
    lat_n, long_n = float(latitude), float(longitude)
    latd = int(lat_n)
    lat_n = lat_n - latd
    latm = round(60 * lat_n)
    lat_n = lat_n - latm / 60
    lats = round(60 * 60 * lat_n)
    if lats < 0:
        latm = latm - 1
        lats = lats + 60

    longd = int(long_n)
    long_n = long_n - longd
    longm = round(60 * long_n)
    long_n = long_n - longm / 60
    longs = round(60 * 60 * long_n)
    if longs < 0:
        longm = longm - 1
        longs = longs + 60

    wikitext = "latd = " + str(int(latd)) + " | latm = " + str(int(latm)) + " | lats = " + str(int(lats)) + ' |'
    wikitext = wikitext + "\nlongd = " + str(int(longd)) + " | longm = " + str(int(longm)) + " | longs = " + str(int(longs)) + ' |'
    wikitext = wikitext + "\n"
    return wikitext
    

class CoordReplaceBot:
    # Edit summary message that should be used.
    # NOTE: Put a good description here, and add translations, if possible!
    msg = {
        'en': u'Robot: Adding coordinates, please refer to [[Wikipedia:Bots/Requests_for_approval/Kumar_Appaiah_Bot|BRFA page]]',
    }

    def __init__(self, generator, latitude, longitude, debug):
        """
        Constructor. Parameters:
            * generator - The page generator that determines on which pages
                          to work on.
            * debug     - If True, doesn't do any real changes, but only shows
                          what would have been changed.
        """
        self.generator = generator
        self.debug = debug
        self.latitude = latitude
        self.longitude = longitude

    def run(self):
        # Set the edit summary message
        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
        for page in self.generator:
            self.treat(page)

    def strip_text(self, text_block, stripped_param):
        """
        Strip a block of text consisting of the specified string in text_block,
        up to the next occurring pipe ('|') symbol. It expects the articles to have
        well formed structure, but that isn't a terrible assumption.
        """
        if stripped_param in text_block:
            text_left_extreme = text_block.find(stripped_param)
            text_right_extreme = text_block.find('|', text_left_extreme) + 1
            text_block = text_block[:text_left_extreme] + text_block[text_right_extreme:]
        return text_block

    def treat(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """
        try:
            # Load the page
            text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
            return

        text = self.strip_text(text, 'lats')
        text = self.strip_text(text, 'longs')
        text = self.strip_text(text, 'latm')
        text = self.strip_text(text, 'longm')
        text = self.strip_text(text, 'longd')

        if 'latd' in text:
            text_left_extreme = text.find('latd')
            text_right_extreme = text.find('|', text_left_extreme) + 1
            text = text[:text_left_extreme] + convert_latlong_wikitext(self.latitude, self.longitude) + text[text_right_extreme:]
            text = text.replace("{{coord missing|India}}", "")
            text = text.replace("{{Coord missing|India}}", "")
            text = text.replace("{{Coord missing}}", "")
            text = text.replace("{{coord missing}}", "")
        
        # only save if something was changed
        if text != page.get():
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title())
            # show what was changed
            wikipedia.showDiff(page.get(), text)
            if not self.debug:
                #choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
                #if choice == 'y':
                if True:
                    try:
                        # Save the page
                        page.put(text, minorEdit=False)
                    except wikipedia.LockedPage:
                        wikipedia.output(u"Page %s is locked; skipping." % page.aslink())
                    except wikipedia.EditConflict:
                        wikipedia.output(u'Skipping %s because of edit conflict' % (page.title()))
                    except wikipedia.SpamfilterError, error:
                        wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (page.title(), error.url))


def main():
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False

    # Parse command line arguments
    latitude = None
    longitude = None
    latlongdb = None
    for arg in wikipedia.handleArgs():
        if arg.startswith("-debug"):
            debug = True
        elif arg.startswith("-latitude"):
            latitude = arg.split(':')[1]
        elif arg.startswith("-longitude"):
            longitude = arg.split(':')[1]
        elif arg.startswith("-latlongdb"):
            latlongdb = arg.split(':')[1]
        else:
            # check if a standard argument like
            # -start:XYZ or -ref:Asdf was given.
            if not genFactory.handleArg(arg):
                pageTitleParts.append(arg)

    if latlongdb:
        coord_db = open(latlongdb, 'r')
        for line in coord_db:
            print line
            line = line[:-1]
            article, lat_val, long_val = line.split("\t")
            page = wikipedia.Page(wikipedia.getSite(), article)
            gen = iter([page])
            if gen:
                # The preloading generator is responsible for downloading multiple
                # pages from the wiki simultaneously.
                gen = pagegenerators.PreloadingGenerator(gen)
                bot = CoordReplaceBot(gen, lat_val, long_val, debug)
                bot.run()

    if pageTitleParts != []:
        # We will only work on a single page.
        pageTitle = ' '.join(pageTitleParts)
        page = wikipedia.Page(wikipedia.getSite(), pageTitle)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = CoordReplaceBot(gen, latitude, longitude, debug)
        bot.run()
    else:
        wikipedia.showHelp()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()