User:The wubbot/source

The extremely messy, probably very inefficient code of my bot. Uses Pywikipediabot
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
User:The wubbot#delsortclean.py
"""
import wikipedia
import pagegenerators
import sys
import re

resultPattern = re.compile(r"result was\s*'''(.+?)'''") # new pattern, should handle multiple spaces, line breaks before result
datePattern = re.compile(r"(\d\d:\d\d, \d+ \w+ \d{4} \(UTC\))") # pattern to pick out dates

wubTalk = wikipedia.Page(wikipedia.getSite(), 'User:The wubbot/notes') # where to put messages

def main():
    summary = 'Archiving closed debates'
    wikipedia.setAction(summary)

    masterList = getsubpages() # get the subpages to work on from [[User:The wubbot/delsortlist]]

    for s in masterList:
        t = s.title()
        if t.startswith('Wikipedia:WikiProject Deletion sorting/'): # only for actual subpages
            if t != 'Wikipedia:WikiProject Deletion sorting/Anime and manga': # manually exclude, this has a different archiving system
                clean(s)

def clean(subPage):

    subPageTitle = subPage.title()
    
    wikipedia.output(';' + subPage.aslink() )

    l = subPage.get() # wikitext of deletion sorting subpage
    l = l.replace('_', ' ') # strip of pesky underscores
    l2 = l #changed version

    archivePage = wikipedia.Page(wikipedia.getSite(), subPageTitle + '/archive')

    archival = '' # start with nothing to be archived
    
    listed = subPage.templates() # grab a list of transcludes on the subpage

    for transclude in listed:
        if transclude.startswith('Wikipedia:Articles for deletion/'):
            debate = wikipedia.Page(wikipedia.getSite(), transclude)
            if debate.isRedirectPage(): # if it's a redirect for some reason
                debate = debate.getRedirectTarget() # use the actual page
            d = debate.get()

            d = substify(d, debate) # subst templates if needed

            if '<div class="boilerplate metadata' in d: #  if it's closed
                result,date = getresult(d)
#                if result == '?': # if it couldn't figure out the result
#                   informboss(debate, archivePage)
                length = str(len(d))
                wikipedia.output('* Removing ' + transclude + ' - result was ' + result)
                l2 = l2.replace('{{' + transclude + '}}'+'\n', '') # remove from deletion sorting subpage
                l2 = l2.replace('{{' + transclude + '}}', '') # in the unlikely event of no newline (could optimise this)
                archival += '* [[' + transclude + '|' + transclude[32:] + ']] - (' + length + ') - ' + result + ' - <small>closed ' + date + '</small> \n' # add a line to the archive
            else: # if it's open
                wikipedia.output('* Skipping ' + transclude + ' - still open')

    if l2 != l: # only put if changes made
        wikipedia.output('Saving page...')
        subPage.put(l2)  # save updated subpage
        wikipedia.output('Archive page:')
        archive(archivePage, subPageTitle, archival)
                
    else:
        wikipedia.output('No changes made')

    print '-' * 20 + '\n' # divider


def archive(archivePage, subPageTitle, archival):
    if not archivePage.exists():  # if the archive page doesn't exist already, add the intro
        archivetext = "<noinclude>{{deletionlistarchive|" + subPageTitle[39:] + """}}</noinclude>

==""" + subPageTitle[39:] + """==

===Articles for Deletion===
<!-- add old AfD discussions at the top -->

<!-- end of old AfD discussions -->
"""
        print 'Creating new archive page: ' + archivePage.title()
        
    else: # else if it does exist, grab the contents
        archivetext = archivePage.get()
            
    archivetext = archivetext.replace('<!-- add old AfD discussions at the top -->', '<!-- add old AfD discussions at the top -->\n' + archival)

#    print 'archivetext = \n' + archivetext
    archivePage.put(archivetext)


def getresult(d):
    """Extracts result from an AfD for archival

    Call with debate text"""
#    resultPattern = re.compile(r"result was\s*'''(.+?)'''") # new pattern, should handle multiple spaces, line breaks before result
    r = resultPattern.search(d)
    if r == None: # couldn't work out result
        result = '?'
    else:
        result = r.group(1)
#    datePattern = re.compile(r"(\d\d:\d\d, \d+ \w+ \d{4} \(UTC\))") # pattern to pick out dates
    dt = datePattern.search(d)
    if dt == None: # couldn't find a date, wtf?
        date = '?'
    else:
        date = dt.group(1) # pick out the first date looking thing, probably the closure date
    return result, date


def informboss(debate, archivePage):
     wikipedia.output("* Couldn't extract result, informing the boss...")
#     wubTalk = wikipedia.Page(wikipedia.getSite(), 'User:The wubbot/notes')
     t = wubTalk.get()
     t += "\n\n" + """== Failed to extract result ==
Failed to extract result from """ + debate.aslink() + ", please use your skillz on " + archivePage.aslink() + ". ~~~~"
     wubTalk.put(t,'Failed to extract a result, help!')


def substify(d,debate): # to substitute closure templates if someone forgot to
    if d.startswith('{{'):
        wikipedia.output('* some eejit forgot to subst, so fix it')
        d = d.replace('{{at}}','{{subst:at}}')
        d = d.replace('{{ab}}','{{subst:ab}}')
        d = d.replace('{{afd top}}','{{subst:at}}')
        d = d.replace('{{afd bottom}}','{{subst:ab}}')
        d = d.replace('{{afdtop}}','{{subst:at}}')
        d = d.replace('{{afdbottom}}','{{subst:ab}}')
        d = d.replace('{{afdt}}','{{subst:at}}')
        d = d.replace('{{afdb}}','{{subst:ab}}')
        d = d.replace('{{afd t}}','{{subst:at}}')
        d = d.replace('{{afd b}}','{{subst:ab}}')
        d = d.replace('{{Afd top}}','{{subst:at}}')
        d = d.replace('{{Afd bottom}}','{{subst:ab}}')
        d = d.replace('{{afd-top}}','{{subst:at}}')
        d = d.replace('{{afd-bottom}}','{{subst:ab}}')
        debate.put(d,'[[Wikipedia:Template substitution|substituting]] closure templates')
        d = debate.get(True) # force get
    return d


def getsubpages():
    wikipedia.output('Getting subpages')
    masterPage = wikipedia.Page(wikipedia.getSite(), 'User:The wubbot/delsortlist') # grab the compact list page
    m = masterPage.linkedPages()
    return m

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()