User:Ritchie333/wircsd.py

From Wikipedia, the free encyclopedia
#!/usr/bin/python
#
# wircsd.py
#
# A pywikibot script to classify articles tagged for speedy deletion
# against a project, so that immediate attention can be applied.
# By default uses Womeninred
# Uses class AlexNewArtBotResult, which provides a Python wrapper to
#   the bot rules
#
# Usage : python pwb.py wircsd.py [Rules name]

import sys
import re
import pywikibot
from pywikibot import pagegenerators

reThreshold = re.compile( '^  @@(\d+)@@' )
rePattern = re.compile( '^ (-?\d+) /([^/]*)/' )

catName = u'Category:Candidates for speedy deletion'
rulesName = 'Womeninred'
if len( sys.argv ) > 1:
  rulesName = sys.argv[ 1 ]

class AlexNewArtBotResult:

  threshold = 10
  patterns = []

  def __init__( self, rule ):
    page = pywikibot.Page( site, 'User:AlexNewArtBot/' + rule )
    gotThreshold = False
    for line in page.text.splitlines():
      if not gotThreshold:
        match = reThreshold.match( line )
        if not match is None:
          self.threshold = int( match.group( 1 ) )
        gotThreshold = True
      else:
        match = rePattern.match( line )
        if not match is None:
          value = int( match.group( 1 ) )
          pattern = match.group( 2 )
          self.patterns.append( ( value, pattern ) )

  def score( self, title ):
    score = 0
    page = pywikibot.Page( site, title )
    for ( value, pattern ) in self.patterns:
      if re.search( pattern, page.text, re.IGNORECASE ) is not None:
       score = score + value
    return score

site = pywikibot.Site()
rules = AlexNewArtBotResult( rulesName )

cat = pywikibot.Category( site, catName )

for page in pagegenerators.CategorizedPageGenerator( cat ):
  if 0 == page.namespace():
    title = page.title()
    score = rules.score( title )
    if score >= rules.threshold:
      print 'MATCH ' + title + ' = ' + str( score )
    else:
      print title + ' = ' + str( score )

#print 'test case'
#print rules.score( u'Annie Jump Cannon' )