#!/usr/bin/env python
# -*- coding: utf-8 -*-
import difflib
import simplejson as json # safely retrieve json objects (and correctly handle '/' in article titles)
import pickle # save arrays in files
import re
#import string # string.atoi - variable wait when lagged
import sys # read/write files
import time # what day is it?
import urllib # read/write websites
null = 0
cj = None
ClientCookie = None
cookielib = None
try:
import cookielib
except ImportError:
pass
else:
import urllib2
urlopen = urllib2.urlopen
cj = cookielib.LWPCookieJar()
Request = urllib2.Request
if not cookielib:
try:
import ClientCookie
except ImportError:
import urllib2
urlopen = urllib2.urlopen
Request = urllib2.Request
else:
urlopen = ClientCookie.urlopen
cj = ClientCookie.LWPCookieJar()
Request = ClientCookie.Request
if cj != None:
if cookielib:
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
urllib2.install_opener(opener)
else:
opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
ClientCookie.install_opener(opener)
### LOWER-LEVEL URL INTERFACE ###
def act (txdata, url = 'http://en.wikipedia.org/w/api.php', txheaders = {'User-agent' : 'VWBot'}):
while True: # Loop so that it will continue to retry until it connects to the server, handles error occasionally thrown by server
try:
req = Request(url, txdata, txheaders)
handle = urlopen(req)
except IOError, e:
#print 'We failed to open "%s".' % url
#if hasattr(e, 'code'):
# print 'We failed with error code - %s.' % e.code
#elif hasattr(e, 'reason'):
# print "The error object has the following 'reason' attribute :", e.reason
# print "This usually means the server doesn't exist, is down, or we don't have an internet connection."
time.sleep(5)
else:
return handle.read()
# handle.info() returns headers, handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects)
### THIS DOES NOT ACCOUNT FOR QUERY-CONTINUE RESULTS, THESE MUST BE RE-QUERIED LATER
def action (params):
if 'url' in params:
url = params['url']
del params['url']
else: url = 'http://en.wikipedia.org/w/api.php'
while True: # Loop so that it passes all of the errors
params['format'] = 'json'
# An appropriate non-aggressive value is maxlag=5 (5 seconds), used by most of the server-side scripts.
# Higher values mean more aggressive behaviour, lower values are nicer.
#params['maxlag'] = 2 - impractical due to number
params['assert'] = 'bot'
# If we're trying to make an edit, get an edit token first and set the timestamps to recognize an edit conflict.
if params['action'] == 'edit':
page = action({'action': 'query', 'prop': 'info|revisions', 'intoken': 'edit', 'titles': params['title']})
params['token'] = page['query']['pages'][page['query']['pages'].keys()[0]]['edittoken']
params['starttimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['starttimestamp']
if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]].keys():
# page exists
params['basetimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp']
else:
# page doesn't exist
params['basetimestamp'] = params['starttimestamp']
page = json.loads(act(urllib.urlencode(params), url))
# log reply
file = open(time.strftime('log %Y-%m-%d.txt', time.gmtime()), 'a')
file.write(time.asctime(time.gmtime()) + '\t' + str(page) + '\n\n')
file.close()
# make sure we got a result
if params['action'] in page.keys()[0]:
# if 'continue' in params['action']:
if params['action'] == 'edit': time.sleep(5)
return page
if page['error']['code'] == 'emptynewsection':
return page
# We've lagged: wait the duration of the lag (or a minimum of 5 seconds) and try again
#if page['error']['code'] == 'maxlag':
# time.sleep(max(5,string.atoi(page['error']['info'][page['error']['info'].find(':')+2:page['error']['info'].find('seconds')-1])))
# We've hit an edit conflict or some other unknown error.
time.sleep(5)
#######################
##### @ 00:00 GMT #####
#######################
startTime = time.time()
##### 2-STEP LOGIN #####
def login():
page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar})
page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar, 'lgtoken': page['login']['token']})
if page['login']['result'] == 'Throttled':
time.sleep(page['login']['wait'])
login()
login()
##### TASK 1 #####
# TASK 2 - backlogSCV()
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Suspected copyright violations', 'appendtext': time.strftime('\n{{/%Y-%m-%d}}', time.gmtime()), 'section': 0, 'summary': time.strftime('Adding new listing for %-d %B %Y ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())})
##### TASK 3 #####
page = action({'action': 'query', 'list': 'categorymembers', 'cmtitle': 'Category:Articles tagged for copyright problems', 'cmlimit': 'max'})
blankedPages = []
for i in page['query']['categorymembers']:
blankedPages.append(i['title'])
file = open('todayLogCopyvio', 'rb') # pull up the previous run
alreadyBlankedPages = pickle.load(file)
file.close()
file = open('yesterdayLogCopyvio', 'wb') # overwrite yesterday's log with today's now that we have the change in articles
pickle.dump(alreadyBlankedPages, file)
file.close()
file = open('todayLogCopyvio', 'wb') # save log so it can be compared to the next run
pickle.dump(blankedPages, file)
file.close()
newBlankedPages = []
for x in blankedPages:
if x not in alreadyBlankedPages:
newBlankedPages.append(x) # now we have our list to run searches for
for i in newBlankedPages:
if i[:5] == 'File:':
newBlankedPages.remove(i) # also need to report elsewhere - list at [[WP:PUF?]]
##### TASK 5 #####
page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Close paraphrasing', 'eilimit': 'max'})
closeParaphrases = []
for i in page['query']['embeddedin']:
closeParaphrases.append(i['title'])
file = open('todayLogCloseParaphrasing', 'rb') # pull up the previous run
oldCloseParaphrases = pickle.load(file)
file.close()
file = open('yesterdayLogCloseParaphrasing', 'wb') # overwrite yesterday's log with today's now that we have the change in articles
pickle.dump(oldCloseParaphrases, file)
file.close()
file = open('todayLogCloseParaphrasing', 'wb') # save log so it can be compared to the next run
pickle.dump(closeParaphrases, file)
file.close()
newCloseParaphrases = []
for x in closeParaphrases:
if x not in oldCloseParaphrases:
newCloseParaphrases.append(x) # now we have our list to run searches for
##### TASK 10 #####
page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Copypaste', 'eilimit': 'max'})
copyPastes = []
for i in page['query']['embeddedin']:
copyPastes.append(i['title'])
file = open('todayLogCopypaste', 'rb') # pull up the previous run
oldCopyPastes = pickle.load(file)
file.close()
file = open('yesterdayLogCopypaste', 'wb') # overwrite yesterday's log with today's
pickle.dump(oldCopyPastes, file)
file.close()
file = open('todayLogCopypaste', 'wb') # save the new log so it can be compared to the next run tomorrow
pickle.dump(copyPastes, file)
file.close()
newCopyPastes = []
for x in copyPastes:
if x not in oldCopyPastes:
newCopyPastes.append(x) # now we have our list to run searches for
#######################
##### @ 00:10 GMT #####
#######################
while time.time() - startTime < 600: # no earlier than 00:10 GMT
time.sleep(600 - (time.time() - startTime))
# always update NewListings - this is only needed so Zorglbot doesn't screw up; has no actual effect
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems/NewListings', 'text': time.strftime('{{Wikipedia:Copyright problems/{{#time:Y F j|-7 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-6 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-5 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-4 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-3 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-2 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-1 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j}}}}<!--\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*168)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*144)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*120)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*96)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*72)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*48)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*24)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}', time.gmtime()), 'summary': time.strftime('Automatic addition of new listing for %-d %B %Y and archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())})
#######################
##### @ 00:20 GMT #####
#######################
while time.time() - startTime < 1200: # no earlier than 00:20 GMT
time.sleep(1200 - (time.time() - startTime))
##### TASK 3 #####
p3 = re.compile('<!-- This is Cppage. Comment used by User:DumbBOT, do not remove or change -->')
p4 = re.compile('====.*====')
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'rvlimit': 1})
# group new page creation AND old page archival
if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]:
# CREATE AND POPULATE "BOT: Automatic creation of new daily page for copyright problems"
page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': '{{subst:Cppage}}\n<!-- Add new listings at the bottom of the list with the following format:\n\n* {{subst:article-cv|ArticleName}} from [http://www.WhereItWasCopiedFrom.com]. ~~~~\n\n-->\n', 'summary': 'Automatic creation of new daily page for copyright problems including automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': 'Wikipedia:Copyright problems', 'rvlimit': 1})
newtext = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace('\n\n===New listings===', time.strftime('\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n\n===New listings===', time.gmtime(time.time()-60*60*192)))
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems', 'text': newtext.encode('utf-8'), 'summary': 'Automatic archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
elif not re.search(p3, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']):
# POPULATE "adding CorenSearchBot findings"
page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace(re.search(p4, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).group(),'{{subst:Cppage}}'), 'summary': 'Adding automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
##### TASKS 3, 5, 7 and 10 #####
def isAlreadyListed(title):
page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'})
page['query']['backlinks'].append({'title': title})
for i in page['query']['backlinks']:
page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'})
for j in page['query']['backlinks']:
if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']:
return True
return False
# replace NewListings check with one for each of the 8 always-listed days ???
def shouldBeRelisted(title):
page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'})
page['query']['backlinks'].append({'title': title})
wasListed = False
isListed = False
for i in page['query']['backlinks']:
page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'})
for j in page['query']['backlinks']:
if 'Wikipedia:Suspected copyright violations/' in j['title'] or 'Wikipedia:Copyright problems/' in j['title']:
wasListed = True
if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']:
isListed = True
if wasListed and not isListed: return True
return False
# replace NewListings check with one for each of the 8 always-listed days ???
addtext = ''
p0 = re.compile('{{Close paraphras.*?}}', re.IGNORECASE | re.DOTALL)
p1 = re.compile('{{Close paraphras.*?source.*?}}', re.IGNORECASE | re.DOTALL) # gets {{Close paraphrase}} and {{Close paraphrasing}}
p1a = re.compile('\|\W*free\W*=\W*yes', re.IGNORECASE | re.DOTALL) # is source free?
for i in newCloseParaphrases:
if not isAlreadyListed(i):
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
if re.search(p0, pageSource): # could be tag removed before it's analyzed
temp = re.search(p0, pageSource).group()
tag = re.search(p1, temp)
if not re.search(p1a, temp): # only list at WP:CP if non-free
if tag:
if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]:
addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\
tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\
tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. ~~~~\n'
else:
addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\
tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. ~~~~\n'
else: addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. ~~~~\n'
moretext = ''
p2 = re.compile('{{Copyviocore.*?}}', re.IGNORECASE | re.DOTALL)
for i in newBlankedPages:
if not isAlreadyListed(i):
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
tag = re.search(p2, pageSource)
if tag:
if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
moretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\
tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Nomination completed by ~~~~\n'
else:
moretext += '* {{subst:article-cv|:' + i + '}} from ' +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Nomination completed by ~~~~\n'
else: moretext += '* {{subst:article-cv|:' + i + '}} Nomination completed by ~~~~\n'
CopyPasteText = ''
p5 = re.compile('{{Copy.?past.*?}}|{{Copy\s*\|.*?}}|{{Copy\s*}}', re.IGNORECASE | re.DOTALL)
p6 = re.compile('{{Copy.?past.*?url.*?}}|{{Copy\s*\|.*?url.*?}}', re.IGNORECASE | re.DOTALL)
for i in newCopyPastes:
if not isAlreadyListed(i):
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
if re.search(p5, pageSource): # could be tag removed before it's analyzed
temp = re.search(p5, pageSource).group()
tag = re.search(p6, temp)
if tag:
if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\
tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. ~~~~\n'
else:
CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. ~~~~\n'
else: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted. ~~~~\n'
### NOW FOR THE RELISTINGS ###
evenmoretext = ''
for i in blankedPages:
if i in alreadyBlankedPages and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
tag = re.search(p2, pageSource)
if tag:
if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\
tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
else:
evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Relisting. ~~~~\n'
for i in copyPastes:
if i in oldCopyPastes and shouldBeRelisted(i):
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
temp = re.search(p5, pageSource).group()
tag = re.search(p6, temp)
if tag:
if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\
tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
else:
evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\
tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted. Relisting. ~~~~\n'
for i in closeParaphrases:
if i in oldCloseParaphrases and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
temp = re.search(p0, pageSource).group()
tag = re.search(p1, temp)
if not re.search(p1a, temp): # only list at WP:CP if non-free
if tag:
if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]:
evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\
tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\
tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
else:
evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\
tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. Relisting. ~~~~\n'
#addtext should be CloseParaphraseText
#moretext should be CopyvioText
#evenmoretext should be RelistText
editsum = ''
if len(addtext) + len(moretext) + len(evenmoretext) + len(CopyPasteText):
if len(addtext):
if len(moretext):
if len(evenmoretext):
if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes, close paraphrases and relisting overlooked pages'
else: editsum = 'Adding incomplete nominations, close paraphrases and relisting overlooked pages'
elif len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and close paraphrases'
else: editsum = 'Adding incomplete nominations and close paraphrases'
elif len(evenmoretext):
if len(CopyPasteText): editsum = 'Adding copy/pastes, close paraphrases and relisting overlooked pages'
else: editsum = 'Adding close paraphrases and relisting overlooked pages'
elif len(CopyPasteText): editsum = 'Adding copy/pastes and close paraphrases'
else: editsum = 'Adding close paraphrases'
elif len(moretext):
if len(evenmoretext):
if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and relisting overlooked pages'
else: editsum = 'Adding incomplete nominations and relisting overlooked pages'
elif len(CopyPasteText): editsum = 'Adding incomplete nominations and copy/pastes'
else: editsum = 'Adding incomplete nominations'
elif len(evenmoretext):
if len(CopyPasteText): editsum = 'Adding copy/pastes and relisting overlooked pages'
else: editsum = 'Relisting overlooked pages'
else: editsum = 'Adding copy/pastes'
if len(editsum):
page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime(time.time()-60*60*24)), 'appendtext': (u'\n' + moretext + CopyPasteText + addtext + evenmoretext).encode('utf-8'), 'section': 2, 'summary': editsum + ' ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
############################
##### USERSPACE TRIALS #####
############################
##### TASK 4: notify authors that their pages have been blanked (by {{subst:copyvio}}) in case they aren't notified by the taggers, so that the pages don't get relisted for an extra week without any action being taken on them #####
def doNotify(title):
page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'prop': 'revisions|info', 'rvprop': 'timestamp|user', 'rvdir': 'newer', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'blredirect': 1}) # get backlinks and creation time/user as well as info to determine if it's deleted
if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]:
return "'''Do Nothing''' Article has been deleted."
for i in page['query']['backlinks']: # check for CCIs
if i['title'][:47] == 'Wikipedia:Contributor copyright investigations/':
return "'''Do Nothing''' [[" + i['title'] + '|CCI]]'
elif i['title'][:14] == 'Wikipedia:CCI/':
return "'''Do Nothing''' [[" + i['title'] + '|CCI]]'
if 'redirlinks' in i:
for j in i['redirlinks']:
if j['title'][:47] == 'Wikipedia:Contributor copyright investigations/':
return "'''Do Nothing''' [[" + j['title'] + '|CCI]]'
elif j['title'][:14] == 'Wikipedia:CCI/':
return "'''Do Nothing''' [[" + j['title'] + '|CCI]]'
for i in page['query']['backlinks']: # parse talk pages to see if already notified
if i['title'][:10] == 'User talk:':
page2 = action({'action': 'parse', 'page': i['title'], 'prop': 'sections'})
for j in page2['parse']['sections']:
if j['line'] == 'Copyright problem: ' + title: # need to see if it matches a redirect title too... :(
return "'''Do Nothing''' " + i['title'][10:] + ' already notified'
page = action({'action': 'query', 'prop': 'categories', 'clcategories': 'Category:Items pending OTRS confirmation of permission|Category:Wikipedia pages with unconfirmed permission received by OTRS|Category:Wikipedia files with unconfirmed permission received by OTRS|Category:Items with OTRS permission confirmed', 'titles': 'Talk:'+title.encode('utf-8')})
if 'categories' in page['query']['pages'][page['query']['pages'].keys()[0]]:
return "'''Do Nothing''' OTRS tag"
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'titles': title.encode('utf-8'), 'rvlimit': 'max'})
articleRevisionIDs = []
for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
articleRevisionIDs.append(i['revid'])
revisionMatch = []
latest = ''
for i in articleRevisionIDs:
page = action({'action': 'query', 'prop': 'revisions', 'rvstartid': i, 'rvprop': 'content|user|timestamp', 'titles': title.encode('utf-8'), 'rvlimit': 1})
if i == articleRevisionIDs[0]: # maybe ???
tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user'] # maybe ???
tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp'] # maybe ??
if '*' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0].keys(): # ignore deleted revisions
if latest == '': latest = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
if '{{Copyviocore' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']:
tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user']
tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp']
revisionMatch.append(difflib.SequenceMatcher(None, latest[latest.find('<!-- Do not use the "Copyviocore" template directly; the above line is generated by "subst:Copyvio|url" -->\n')+108:latest.find('</div>')], page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).ratio())
diffRevisionMatch = []
for i in range(len(revisionMatch)):
if i < len(revisionMatch)-1: diffRevisionMatch.append(round(revisionMatch[i]-revisionMatch[i+1], 6))
else: diffRevisionMatch.append(round(revisionMatch[i], 6))
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'rvstartid': articleRevisionIDs[[i for i, x in enumerate(diffRevisionMatch) if x == max(diffRevisionMatch)][0]]})
contributor = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user']
# CHECK FOR CUSTOM NOTIFICATION
#tagger at User talk:contributor > tagtime
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'User talk:' + contributor.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]:
for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
if i['user'] == tagger: return "'''Do Nothing''' " + contributor + ' was left a custom notification'
#contributor at Talk:Article/Temp page > tagtime
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8') + '/Temp', 'rvend': tagtime, 'rvlimit': 'max'})
if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]:
for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' created the temporary page'
#contributor at Talk:Article > tagtime
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]:
for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article talk page after it was tagged'
#contributor at Article > tagtime
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]:
for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article after it was tagged'
return "'''Notify contributor''': """ + contributor + ' - tagged by ' + tagger
#narrowing with 'blnamespace': '3|4' breaks the blredirect parameter
# BETTER BUGFIX - try narrowed backlinks, then get list of redirects ONLY, then get backlinks for each redirect
# look for 'Copyright problem: <title or redirect>'
# list of all blanked pages
article = ''
for i in newBlankedPages:
article += '*[[:' + i + ']] - ' + doNotify(i) + '\n'
page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'text': (article + '\n').encode('utf-8'), 'section': 'new', 'summary': time.strftime('== %-d %B %Y ==', time.gmtime())})
##### TASK 6: flag when a contributor gets a CorenSearchBot/VWBot notice if he has had a significant amount before #####
# CSBot's user talk contribs from 00:00:00 to 23:59:59 the previous day
page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'CorenSearchBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)), 'ucnamespace': '3'})
users = {}
for i in page['query']['usercontribs']:
users[i['title']] = []
# VWBot's user talk contribs from 00:00:00 to 23:59:59 the previous day
page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'VWBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)), 'ucnamespace': '3'})
for i in page['query']['usercontribs']:
users[i['title']] = []
for i in ['Merovingian', u'Leszek Jańczuk', 'Ganeshbot', 'Starzynka', 'Ser Amantio di Nicolao', 'Kumioko', 'Packerfansam', 'Alan Liefting']:
try: del users['User talk:' + i]
except: pass
for user in users.keys():
# only checks last 5,000 edits
page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'comment|timestamp|user', 'titles': user.encode('utf-8'), 'rvlimit': 'max'})
for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
if 'user' in i: # needed because RevDelete can return edits with no user field...apparently
if i['user'] == 'VWBot' or i['user'] == 'CorenSearchBot':
users[user].append([i['comment'][i['comment'].find('on')+3:], time.strftime('%Y %B %-d', time.strptime(i['timestamp'],'%Y-%m-%dT%H:%M:%SZ'))])
addition = u''
for user in users.keys():
if len(users[user]) > 4:
addition += '\n==== ' + str(len(users[user])) + ': {{User|1=' + user[10:] + '}} ====\n{{Collapse top|Tagged articles}}\n'
for i in users[user]:
addition += '* {{subst:article-cv|' + i[0] + '}} created on ' + i[1] + '\n'
addition += '{{Collapse bottom}}\n'
if len(addition):
page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'appendtext': (u'\n\n=== Task 6 ===' + addition).encode('utf-8'), 'summary': 'Listing users who have had multiple articles tagged by CorenSearchBot/VWBot ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})