User:VWBot/source

From Wikipedia, the free encyclopedia
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import difflib
import simplejson as json # safely retrieve json objects (and correctly handle '/' in article titles)
import pickle # save arrays in files
import re
#import string # string.atoi - variable wait when lagged
import sys # read/write files
import time # what day is it?
import urllib # read/write websites



null = 0 
cj = None
ClientCookie = None
cookielib = None
try:
    import cookielib            
except ImportError:
    pass
else:
    import urllib2    
    urlopen = urllib2.urlopen
    cj = cookielib.LWPCookieJar()
    Request = urllib2.Request

if not cookielib:
    try:                                            
        import ClientCookie 
    except ImportError:
        import urllib2
        urlopen = urllib2.urlopen
        Request = urllib2.Request
    else:
        urlopen = ClientCookie.urlopen
        cj = ClientCookie.LWPCookieJar()
        Request = ClientCookie.Request

if cj != None:
    if cookielib:
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
        urllib2.install_opener(opener)
    else:
        opener = ClientCookie.build_opener(ClientCookie.HTTPCookieProcessor(cj))
        ClientCookie.install_opener(opener)

### LOWER-LEVEL URL INTERFACE ###
def act (txdata, url = 'http://en.wikipedia.org/w/api.php', txheaders = {'User-agent' : 'VWBot'}):
	while True: # Loop so that it will continue to retry until it connects to the server, handles error occasionally thrown by server
		try:
			req = Request(url, txdata, txheaders)
			handle = urlopen(req)
		except IOError, e:
			#print 'We failed to open "%s".' % url
			#if hasattr(e, 'code'):
			#	print 'We failed with error code - %s.' % e.code
			#elif hasattr(e, 'reason'):
			#	print "The error object has the following 'reason' attribute :", e.reason
			#	print "This usually means the server doesn't exist, is down, or we don't have an internet connection."
				time.sleep(5)
		else:
			return handle.read()

# handle.info() returns headers, handle.read() returns the page, handle.geturl() returns the true url of the page fetched (in case urlopen has followed any redirects)


### THIS DOES NOT ACCOUNT FOR QUERY-CONTINUE RESULTS, THESE MUST BE RE-QUERIED LATER
def action (params):
	if 'url' in params:
		url = params['url']
		del params['url']
	else: url = 'http://en.wikipedia.org/w/api.php'
	while True: # Loop so that it passes all of the errors
		params['format'] = 'json'
		# An appropriate non-aggressive value is maxlag=5 (5 seconds), used by most of the server-side scripts.
		# Higher values mean more aggressive behaviour, lower values are nicer. 
		#params['maxlag'] = 2 - impractical due to number 
		params['assert'] = 'bot'
		# If we're trying to make an edit, get an edit token first and set the timestamps to recognize an edit conflict.
		if params['action'] == 'edit':
			page = action({'action': 'query', 'prop': 'info|revisions', 'intoken': 'edit', 'titles': params['title']})
			params['token'] = page['query']['pages'][page['query']['pages'].keys()[0]]['edittoken']
			params['starttimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['starttimestamp']
			if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]].keys():
				# page exists
				params['basetimestamp'] = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp']
			else:
				# page doesn't exist
				params['basetimestamp'] = params['starttimestamp']
		page = json.loads(act(urllib.urlencode(params), url))
		# log reply
		file = open(time.strftime('log %Y-%m-%d.txt', time.gmtime()), 'a')
		file.write(time.asctime(time.gmtime()) + '\t' + str(page) + '\n\n')
		file.close()
		# make sure we got a result
		if params['action'] in page.keys()[0]:
			# if 'continue' in params['action']:
			if params['action'] == 'edit': time.sleep(5)
			return page
		if page['error']['code'] == 'emptynewsection':
			return page
		# We've lagged: wait the duration of the lag (or a minimum of 5 seconds) and try again
		#if page['error']['code'] == 'maxlag':
		#	time.sleep(max(5,string.atoi(page['error']['info'][page['error']['info'].find(':')+2:page['error']['info'].find('seconds')-1])))
		# We've hit an edit conflict or some other unknown error.
		time.sleep(5)

#######################
##### @ 00:00 GMT #####
#######################
startTime = time.time()

##### 2-STEP LOGIN #####
def login():
	page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar})
	page = action({'action': 'login', 'lgname': foo, 'lgpassword': bar, 'lgtoken': page['login']['token']})
	if page['login']['result'] == 'Throttled':
		time.sleep(page['login']['wait'])
		login()

login()

##### TASK 1 #####
# TASK 2 - backlogSCV()
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Suspected copyright violations', 'appendtext': time.strftime('\n{{/%Y-%m-%d}}', time.gmtime()), 'section': 0, 'summary': time.strftime('Adding new listing for %-d %B %Y ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())})


##### TASK 3 #####
page = action({'action': 'query', 'list': 'categorymembers', 'cmtitle': 'Category:Articles tagged for copyright problems', 'cmlimit': 'max'})
blankedPages = []
for i in page['query']['categorymembers']:
	blankedPages.append(i['title'])

file = open('todayLogCopyvio', 'rb') # pull up the previous run
alreadyBlankedPages = pickle.load(file)
file.close()
file = open('yesterdayLogCopyvio', 'wb') # overwrite yesterday's log with today's now that we have the change in articles
pickle.dump(alreadyBlankedPages, file)
file.close()
file = open('todayLogCopyvio', 'wb') # save log so it can be compared to the next run
pickle.dump(blankedPages, file)
file.close()
newBlankedPages = []
for x in blankedPages:
	if x not in alreadyBlankedPages:
		newBlankedPages.append(x) # now we have our list to run searches for

for i in newBlankedPages:
	if i[:5] == 'File:':
		newBlankedPages.remove(i) # also need to report elsewhere - list at [[WP:PUF?]]

##### TASK 5 #####
page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Close paraphrasing', 'eilimit': 'max'})
closeParaphrases = []
for i in page['query']['embeddedin']:
	closeParaphrases.append(i['title'])

file = open('todayLogCloseParaphrasing', 'rb') # pull up the previous run
oldCloseParaphrases = pickle.load(file)
file.close()
file = open('yesterdayLogCloseParaphrasing', 'wb') # overwrite yesterday's log with today's now that we have the change in articles
pickle.dump(oldCloseParaphrases, file)
file.close()
file = open('todayLogCloseParaphrasing', 'wb') # save log so it can be compared to the next run
pickle.dump(closeParaphrases, file)
file.close()

newCloseParaphrases = []
for x in closeParaphrases:
	if x not in oldCloseParaphrases:
		newCloseParaphrases.append(x) # now we have our list to run searches for

##### TASK 10 #####
page = action({'action': 'query', 'list': 'embeddedin', 'eititle': 'Template:Copypaste', 'eilimit': 'max'})
copyPastes = []
for i in page['query']['embeddedin']:
	copyPastes.append(i['title'])

file = open('todayLogCopypaste', 'rb') # pull up the previous run
oldCopyPastes = pickle.load(file)
file.close()
file = open('yesterdayLogCopypaste', 'wb') # overwrite yesterday's log with today's
pickle.dump(oldCopyPastes, file)
file.close()
file = open('todayLogCopypaste', 'wb') # save the new log so it can be compared to the next run tomorrow
pickle.dump(copyPastes, file)
file.close()

newCopyPastes = []
for x in copyPastes:
	if x not in oldCopyPastes:
		newCopyPastes.append(x) # now we have our list to run searches for

#######################
##### @ 00:10 GMT #####
#######################
while time.time() - startTime < 600: # no earlier than 00:10 GMT
	time.sleep(600 - (time.time() - startTime))

# always update NewListings - this is only needed so Zorglbot doesn't screw up; has no actual effect
page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems/NewListings', 'text': time.strftime('{{Wikipedia:Copyright problems/{{#time:Y F j|-7 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-6 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-5 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-4 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-3 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-2 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j|-1 day}}}}\n{{Wikipedia:Copyright problems/{{#time:Y F j}}}}<!--\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*168)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*144)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*120)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*96)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*72)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*48)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}\n', time.gmtime(time.time()-60*60*24)) + time.strftime('{{Wikipedia:Copyright problems/%Y %B %-d}}', time.gmtime()), 'summary': time.strftime('Automatic addition of new listing for %-d %B %Y and archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])', time.gmtime())})

#######################
##### @ 00:20 GMT #####
#######################
while time.time() - startTime < 1200: # no earlier than 00:20 GMT
	time.sleep(1200 - (time.time() - startTime))

##### TASK 3 #####
p3 = re.compile('<!-- This is Cppage. Comment used by User:DumbBOT, do not remove  or change -->')
p4 = re.compile('====.*====')

page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'rvlimit': 1})

# group new page creation AND old page archival
if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]:
	# CREATE AND POPULATE "BOT: Automatic creation of new daily page for copyright problems"
	page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': '{{subst:Cppage}}\n<!-- Add new listings at the bottom of the list with the following format:\n\n* {{subst:article-cv|ArticleName}} from [http://www.WhereItWasCopiedFrom.com]. ~~~~\n\n-->\n', 'summary': 'Automatic creation of new daily page for copyright problems including automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': 'Wikipedia:Copyright problems', 'rvlimit': 1})
	newtext = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace('\n\n===New listings===', time.strftime('\n{{Wikipedia:Copyright problems/%Y %B %-d}}\n\n===New listings===', time.gmtime(time.time()-60*60*192)))
	page = action({'action': 'edit', 'bot': 1, 'title': 'Wikipedia:Copyright problems', 'text': newtext.encode('utf-8'), 'summary': 'Automatic archiving of listings older than 7 days ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})
elif not re.search(p3, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']):
	# POPULATE "adding CorenSearchBot findings"
	page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime()), 'text': page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*'].replace(re.search(p4, page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).group(),'{{subst:Cppage}}'), 'summary': 'Adding automated findings ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})

##### TASKS 3, 5, 7 and 10 #####
def isAlreadyListed(title):
	page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'})
	page['query']['backlinks'].append({'title': title})
	for i in page['query']['backlinks']:
		page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'})
		for j in page['query']['backlinks']:
			if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']:
				return True
	return False

# replace NewListings check with one for each of the 8 always-listed days ???

def shouldBeRelisted(title):
	page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'blfilterredir': 'redirects'})
	page['query']['backlinks'].append({'title': title})
	wasListed = False
	isListed = False
	for i in page['query']['backlinks']:
		page = action({'action': 'query', 'list': 'backlinks', 'bltitle': i['title'].encode('utf-8'), 'bllimit': 'max', 'blnamespace': '4'})
		for j in page['query']['backlinks']:
			if 'Wikipedia:Suspected copyright violations/' in j['title'] or 'Wikipedia:Copyright problems/' in j['title']:
				wasListed = True
			if 'Wikipedia:Copyright problems' == j['title'] or 'Wikipedia:Suspected copyright violations' == j['title'] or 'Wikipedia:Copyright problems/NewListings' == j['title']:
				isListed = True
	if wasListed and not isListed: return True
	return False

# replace NewListings check with one for each of the 8 always-listed days ???

addtext = ''
p0 = re.compile('{{Close paraphras.*?}}', re.IGNORECASE | re.DOTALL)
p1 = re.compile('{{Close paraphras.*?source.*?}}', re.IGNORECASE | re.DOTALL) # gets {{Close paraphrase}} and {{Close paraphrasing}}
p1a = re.compile('\|\W*free\W*=\W*yes', re.IGNORECASE | re.DOTALL) # is source free?

for i in newCloseParaphrases:
	if not isAlreadyListed(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			if re.search(p0, pageSource): # could be tag removed before it's analyzed
				temp = re.search(p0, pageSource).group()
				tag = re.search(p1, temp)
				if not re.search(p1a, temp): # only list at WP:CP if non-free
					if tag:
						if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]:
							addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\
							tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\
							tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. ~~~~\n'
						else:
							addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\
							tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. ~~~~\n'
					else: addtext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. ~~~~\n'

moretext = ''
p2 = re.compile('{{Copyviocore.*?}}', re.IGNORECASE | re.DOTALL)

for i in newBlankedPages:
	if not isAlreadyListed(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			tag = re.search(p2, pageSource)
			if tag:
				if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
					moretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\
					tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Nomination completed by ~~~~\n'
				else:
					moretext += '* {{subst:article-cv|:' + i + '}} from ' +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Nomination completed by ~~~~\n'
			else: moretext += '* {{subst:article-cv|:' + i + '}} Nomination completed by ~~~~\n'


CopyPasteText = ''
p5 = re.compile('{{Copy.?past.*?}}|{{Copy\s*\|.*?}}|{{Copy\s*}}', re.IGNORECASE | re.DOTALL)
p6 = re.compile('{{Copy.?past.*?url.*?}}|{{Copy\s*\|.*?url.*?}}', re.IGNORECASE | re.DOTALL)

for i in newCopyPastes:
	if not isAlreadyListed(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			if re.search(p5, pageSource): # could be tag removed before it's analyzed
				temp = re.search(p5, pageSource).group()
				tag = re.search(p6, temp)
				if tag:
					if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
						CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\
						tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
						tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. ~~~~\n'
					else:
						CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\
						tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. ~~~~\n'
				else: CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted. ~~~~\n'

### NOW FOR THE RELISTINGS ###
evenmoretext = ''
for i in blankedPages:
	if i in alreadyBlankedPages and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			tag = re.search(p2, pageSource)
			if tag:
				if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
					evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' + tag.group()[tag.group().find('url') +\
					tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
				else:
					evenmoretext += '* {{subst:article-cv|:' + i + '}} from ' +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
			else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Relisting. ~~~~\n'

for i in copyPastes:
	if i in oldCopyPastes and shouldBeRelisted(i):
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			temp = re.search(p5, pageSource).group()
			tag = re.search(p6, temp)
			if tag:
				if '|' in tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):]:
					CopyPasteText += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' + tag.group()[tag.group().find('url') +\
					tag.group()[tag.group().find('url'):].find('=') + 1:tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
				else:
					evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted from ' +\
					tag.group()[tag.group().find('url') + tag.group()[tag.group().find('url'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
			else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Copied and pasted. Relisting. ~~~~\n'


for i in closeParaphrases:
	if i in oldCloseParaphrases and shouldBeRelisted(i): # need to check alreadyBlankedPages as there is a delay between transclusion and backlinks
		page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'content', 'titles': i.encode('utf-8'), 'rvlimit': 1})
		if 'missing' not in page['query']['pages'][page['query']['pages'].keys()[0]]:
			pageSource = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			temp = re.search(p0, pageSource).group()
			tag = re.search(p1, temp)
			if not re.search(p1a, temp): # only list at WP:CP if non-free
				if tag:
					if '|' in tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):]:
						evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' + tag.group()[tag.group().find('source') +\
						tag.group()[tag.group().find('source'):].find('=') + 1:tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') +\
						tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('='):].find('|')].strip() + '. Relisting. ~~~~\n'
					else:
						evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase of ' +\
						tag.group()[tag.group().find('source') + tag.group()[tag.group().find('source'):].find('=') + 1:-2].strip() + '. Relisting. ~~~~\n'
				else: evenmoretext += '* {{subst:article-cv|:' + i + '}} Close paraphrase. Relisting. ~~~~\n'

#addtext should be CloseParaphraseText
#moretext should be CopyvioText
#evenmoretext should be RelistText

editsum = ''
if len(addtext) + len(moretext) + len(evenmoretext) + len(CopyPasteText):
	if len(addtext):
		if len(moretext):
			if len(evenmoretext):
				if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes, close paraphrases and relisting overlooked pages'
				else: editsum = 'Adding incomplete nominations, close paraphrases and relisting overlooked pages'
			elif len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and close paraphrases'
			else: editsum = 'Adding incomplete nominations and close paraphrases'
		elif len(evenmoretext):
			if len(CopyPasteText): editsum = 'Adding copy/pastes, close paraphrases and relisting overlooked pages'
			else: editsum = 'Adding close paraphrases and relisting overlooked pages'
		elif len(CopyPasteText): editsum = 'Adding copy/pastes and close paraphrases'
		else: editsum = 'Adding close paraphrases'
	elif len(moretext):
		if len(evenmoretext):
			if len(CopyPasteText): editsum = 'Adding incomplete nominations, copy/pastes and relisting overlooked pages'
			else: editsum = 'Adding incomplete nominations and relisting overlooked pages'
		elif len(CopyPasteText): editsum = 'Adding incomplete nominations and copy/pastes'
		else: editsum = 'Adding incomplete nominations'
	elif len(evenmoretext):
		if len(CopyPasteText): editsum = 'Adding copy/pastes and relisting overlooked pages'
		else: editsum = 'Relisting overlooked pages'
	else: editsum = 'Adding copy/pastes'

if len(editsum):
	page = action({'action': 'edit', 'bot': 1, 'title': time.strftime('Wikipedia:Copyright problems/%Y %B %-d', time.gmtime(time.time()-60*60*24)), 'appendtext': (u'\n' + moretext + CopyPasteText + addtext + evenmoretext).encode('utf-8'), 'section': 2, 'summary': editsum + ' ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})

############################
##### USERSPACE TRIALS #####
############################

##### TASK 4: notify authors that their pages have been blanked (by {{subst:copyvio}}) in case they aren't notified by the taggers, so that the pages don't get relisted for an extra week without any action being taken on them  #####

def doNotify(title):
	page = action({'action': 'query', 'list': 'backlinks', 'bltitle': title.encode('utf-8'), 'bllimit': 'max', 'prop': 'revisions|info', 'rvprop': 'timestamp|user', 'rvdir': 'newer', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'blredirect': 1}) # get backlinks and creation time/user as well as info to determine if it's deleted
	if 'missing' in page['query']['pages'][page['query']['pages'].keys()[0]]:
		return "'''Do Nothing''' Article has been deleted."
	for i in page['query']['backlinks']: # check for CCIs
		if i['title'][:47] == 'Wikipedia:Contributor copyright investigations/':
			return "'''Do Nothing''' [[" + i['title'] + '|CCI]]'
		elif i['title'][:14] == 'Wikipedia:CCI/':
			return "'''Do Nothing''' [[" + i['title'] + '|CCI]]'
		if 'redirlinks' in i:
			for j in i['redirlinks']:
				if j['title'][:47] == 'Wikipedia:Contributor copyright investigations/':
					return "'''Do Nothing''' [[" + j['title'] + '|CCI]]'
				elif j['title'][:14] == 'Wikipedia:CCI/':
					return "'''Do Nothing''' [[" + j['title'] + '|CCI]]'
	for i in page['query']['backlinks']: # parse talk pages to see if already notified
		if i['title'][:10] == 'User talk:':
			page2 = action({'action': 'parse', 'page': i['title'], 'prop': 'sections'})
			for j in page2['parse']['sections']:
				if j['line'] == 'Copyright problem: ' + title: # need to see if it matches a redirect title too... :(
					return "'''Do Nothing''' " + i['title'][10:] + ' already notified'
	page = action({'action': 'query', 'prop': 'categories', 'clcategories': 'Category:Items pending OTRS confirmation of permission|Category:Wikipedia pages with unconfirmed permission received by OTRS|Category:Wikipedia files with unconfirmed permission received by OTRS|Category:Items with OTRS permission confirmed', 'titles': 'Talk:'+title.encode('utf-8')})
	if 'categories' in page['query']['pages'][page['query']['pages'].keys()[0]]:
		return "'''Do Nothing''' OTRS tag"
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'ids|user', 'titles': title.encode('utf-8'), 'rvlimit': 'max'})
	articleRevisionIDs = []
	for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
		articleRevisionIDs.append(i['revid'])
	revisionMatch = []
	latest = ''
	for i in articleRevisionIDs:
		page = action({'action': 'query', 'prop': 'revisions', 'rvstartid': i, 'rvprop': 'content|user|timestamp', 'titles': title.encode('utf-8'), 'rvlimit': 1})
		if i == articleRevisionIDs[0]: # maybe ???
			tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user'] # maybe ???
			tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp'] # maybe ??
		if '*' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0].keys(): # ignore deleted revisions
			if latest == '': latest = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']
			if '{{Copyviocore' in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']: 
				tagger = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user']
				tagtime = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['timestamp']
			revisionMatch.append(difflib.SequenceMatcher(None, latest[latest.find('<!-- Do not use the "Copyviocore" template directly; the above line is generated by "subst:Copyvio|url" -->\n')+108:latest.find('</div>')], page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['*']).ratio())
	diffRevisionMatch = []
	for i in range(len(revisionMatch)):
		if i < len(revisionMatch)-1: diffRevisionMatch.append(round(revisionMatch[i]-revisionMatch[i+1], 6))
		else: diffRevisionMatch.append(round(revisionMatch[i], 6))
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvlimit': 1, 'rvstartid': articleRevisionIDs[[i for i, x in enumerate(diffRevisionMatch) if x == max(diffRevisionMatch)][0]]})
	contributor = page['query']['pages'][page['query']['pages'].keys()[0]]['revisions'][0]['user']
	# CHECK FOR CUSTOM NOTIFICATION
	#tagger at User talk:contributor > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'User talk:' + contributor.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == tagger: return "'''Do Nothing''' " + contributor + ' was left a custom notification'
	#contributor at Talk:Article/Temp page > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8') + '/Temp', 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' created the temporary page'
	#contributor at Talk:Article > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': 'Talk:' + title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article talk page after it was tagged'
	#contributor at Article  > tagtime
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'user', 'titles': title.encode('utf-8'), 'rvend': tagtime, 'rvlimit': 'max'})
	if 'revisions' in page['query']['pages'][page['query']['pages'].keys()[0]]: 
		for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
			if i['user'] == contributor: return "'''Do Nothing''' " + contributor + ' edited the article after it was tagged'
	return "'''Notify contributor''': """ + contributor + ' - tagged by ' + tagger

#narrowing with 'blnamespace': '3|4' breaks the blredirect parameter
# BETTER BUGFIX - try narrowed backlinks, then get list of redirects ONLY, then get backlinks for each redirect
# look for 'Copyright problem: <title or redirect>'

# list of all blanked pages
article = ''
for i in newBlankedPages:
	article += '*[[:' + i + ']] - ' + doNotify(i) + '\n'

page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'text': (article + '\n').encode('utf-8'), 'section': 'new', 'summary': time.strftime('== %-d %B %Y ==', time.gmtime())})

##### TASK 6: flag when a contributor gets a CorenSearchBot/VWBot notice if he has had a significant amount before #####
# CSBot's user talk contribs from 00:00:00 to 23:59:59 the previous day
page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'CorenSearchBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)),  'ucnamespace': '3'})

users = {}
for i in page['query']['usercontribs']:
	users[i['title']] = []

# VWBot's user talk contribs from 00:00:00 to 23:59:59 the previous day
page = action({'action': 'query', 'list': 'usercontribs', 'ucuser': 'VWBot', 'uclimit': 'max', 'ucstart': time.strftime('%Y-%m-%dT23:59:59Z', time.gmtime(time.time()-60*60*24)), 'ucend': time.strftime('%Y-%m-%dT00:00:00Z', time.gmtime(time.time()-60*60*24)),  'ucnamespace': '3'})

for i in page['query']['usercontribs']:
	users[i['title']] = []

for i in ['Merovingian', u'Leszek Jańczuk', 'Ganeshbot', 'Starzynka', 'Ser Amantio di Nicolao', 'Kumioko', 'Packerfansam', 'Alan Liefting']:
	try: del users['User talk:' + i]
	except: pass

for user in users.keys():
	# only checks last 5,000 edits
	page = action({'action': 'query', 'prop': 'revisions', 'rvprop': 'comment|timestamp|user', 'titles': user.encode('utf-8'), 'rvlimit': 'max'})
	for i in page['query']['pages'][page['query']['pages'].keys()[0]]['revisions']:
		if 'user' in i: # needed because RevDelete can return edits with no user field...apparently
			if i['user'] == 'VWBot' or i['user'] == 'CorenSearchBot':
				users[user].append([i['comment'][i['comment'].find('on')+3:], time.strftime('%Y %B %-d', time.strptime(i['timestamp'],'%Y-%m-%dT%H:%M:%SZ'))])

addition = u''
for user in users.keys():
	if len(users[user]) > 4:
		addition += '\n==== ' + str(len(users[user])) + ': {{User|1=' + user[10:] + '}} ====\n{{Collapse top|Tagged articles}}\n'
		for i in users[user]:
			addition += '* {{subst:article-cv|' + i[0] + '}} created on ' + i[1] + '\n'
		addition += '{{Collapse bottom}}\n'

if len(addition):
	page = action({'action': 'edit', 'bot': 1, 'title': 'User:VWBot/Trial', 'appendtext': (u'\n\n=== Task 6 ===' + addition).encode('utf-8'), 'summary': 'Listing users who have had multiple articles tagged by CorenSearchBot/VWBot ([[WP:BOT|bot]]) ([[User:VernoWhitney|op]])'})