'''
Copyright (c) 2022 Mike Christie
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
'''
import urllib.parse
import re
import datetime
import pywikibot
pywikibot.config.max_retries=2
import pymysql
import operator
import GA_config
import sys
from dateutil.parser import parse
class Topic:
@classmethod
def initialize(cls):
topics = {}
subtopics = {}
topic = Topic("Agriculture, food and drink")
subtopic = Subtopic("Agriculture, food and drink", topic, ["FARM","FOOD"], 'This includes agriculture and farming, horticulture and forestry, restaurants, cuisines, food, drink, food and drink companies, food and drink people, and cookery books.', 'Tango icon nature.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Art and architecture")
subtopic = Subtopic("Art and architecture", topic, ["ART","ARCH"], 'This includes art, architecture, religious architecture, artists, architects, museums, and galleries.','Nuvola apps package graphics.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Engineering and technology")
subtopic = Subtopic("Computing and engineering", topic, ['COMP','ENG'], 'This includes computer-related businesses and businesspeople, cryptography, engineers and inventors, engineering technology, engineering failures and disasters, hardware, standards and protocols, programming, software, websites, and the Internet.','Nuvola apps display.png')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic('Transport',topic,['TRANS'],'This includes air transport, maritime transport, rail transport, rail bridges/tunnels/stations, trains and locomotives, road infrastructure, road transportation and policy, and transport by region.','Nuvola apps ksysv.png')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Geography and places")
subtopic = Subtopic("Geography", topic, ["GEO"], 'This includes bodies of water and water formations, geographers and explorers, human geography, islands, landforms, national and state parks, nature reserves, conservation areas, countryside routes, and urban/historical sites. Note: This does not include urban public parks, which are added to the Recreation subtopic.', 'Gnome-globe.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Places", topic, ["PLACE"], 'This includes countries, states, counties, cities, neighborhoods, and other political designations in Africa, Antarctica, Asia, Australia and the Pacific, Europe, Middle East, North America, and South America.', 'P countries-vector.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("History")
subtopic = Subtopic("Royalty, nobility and heraldry", topic, ["ROYAL"], 'This includes flags and heraldry, historical monarchs, royalty, and nobility.', 'Azure-Cross-Or-Heraldry.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("World history", topic, ["HIST"], 'This includes archaeology and archaeologists, historians and chroniclers, history books, historical heads of state and government, historical politicians, historical figures, African history, North American history, South American history, Asian history, Australian and Oceania history, European history, Middle Eastern history, and global history. Note: This does not include historical royalty or nobility, who are added to the Royalty, nobility and heraldry subtopic.', 'Greek deity head icon.png')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Language and literature")
subtopic = Subtopic("Language and literature", topic, ["LANG","LIT"], "This includes alphabets and transliteration, ancient texts, biographies, autobiographies, essays, diaries, and travelogues, characters and fictional items, children's stories, fairy tales, and nursery rhymes, comics, literary genres, literary theory, languages, nonfiction, novels, plays, poetry, short fiction/anthologies, words and linguistics, writers, publishers, and critics.", 'Nuvola apps fonts.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Mathematics")
subtopic = Subtopic("Mathematics and mathematicians", topic, ["MATH","MATHS"], 'This includes mathematics, mathematical problems, and mathematicians.', 'Nuvola apps edu mathematics-p.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Media and drama")
subtopic = Subtopic("Film", topic, ["FILM"], 'This includes film overview articles, film franchises, and film titles. Note: Articles on films that have not yet been released are not eligible to be nominated, as details within the article will change after the film''s release.', 'Film reel.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Media and drama", topic, ["STAGE"], 'This includes people in the media (including actors, directors, models, performers, and celebrities), fictional characters and technologies, animation, cinema, radio, theatre, musical theatre, dance, and opera.', 'Drama-icon.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Television", topic, ["TV"], 'This includes television overview articles, television networks, television series, television episodes and specials, and television characters.', 'TV-icon-2.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Music")
subtopic = Subtopic("Albums", topic, ["ALBUM"], 'This includes record albums, soundtracks, and video albums.', 'CD icon test.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Songs", topic, ["SONG"], 'This includes songs from any era.', 'Song icon.png')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Other music articles", topic, ["MUS"], 'This includes music awards, music by nation/people/region/country, music genres, music styles, music eras, musical theory, musical instruments, music techniques, music businesses and events, music compositions, performers, groups, composers, and other music people.', 'Saxophone-icon.svg')
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Natural sciences")
subtopic = Subtopic("Biology and medicine", topic, ["BIO","MED"], "This includes biology (including biologists, biology books, and evolution/reproduction), medicine (including medicine books, diseases/medical conditions, history of medicine, medical people/institutions, and medical procedures), pharmacology (including vaccines and drug classes), viruses, and organisms (including bacterial species, protists, fungi, plants, and animals such as mammals, birds, dinosaurs, reptiles/amphibians, fish, arthropods, other invertebrates, and domestic animals).","DNA icon.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Chemistry and materials science", topic, ["CHEM","MATSCI"], "This includes chemistry theory, chemistry books, types of chemical analyses, types of chemical transformations, named reactions, chemical compounds and materials, chemical substructures and groups, elements, chemistry and materials science organizations, and chemists/materials scientists.", "Nuvola apps edu science.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Earth sciences", topic, ["EARTH","CLIMATE","WEATHER"], "This includes geology (including geologists, geophysics and geophysicists, mineralogy and mineralogists, and earthquakes) and meteorology (meteorological observatories, storm sciences, tropical cyclones, tropical cyclone seasons, storm effects, weather, and winter storms), oceanography and climate sciences (current climate change and paleoclimate).", "Jordens inre.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Physics and astronomy", topic, ["PHYS","ASTRO"], "This includes physics and physicists, astronomy and astronomers, astrophysics and astrophysicists, the solar system, constellations and asterisms, stars, galaxies, extrasolar objects, and rocketry/spaceflight.", "Nuvola apps katomic.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Philosophy and religion")
subtopic = Subtopic("Philosophy and religion", topic, ["PHIL","REL"], "This includes divinities and protohistoric figures, myths, mythology, and miracles, philosophies and philosophers, philosophical movements, philosophical doctrines/teachings/texts/symbols, religions and religious figures, religious movements, religious congregations and organizations, religious doctrines/teachings/texts/symbols. Note: This does not include religious buildings, which are added to the Art and architecture subtopic.", "Yin yang.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Social sciences and society")
subtopic = Subtopic("Culture, sociology and psychology", topic, ["CULTURE","SOC","SOCIO","PSYCH"], "This includes culture and cultural studies, cultural symbols, Internet culture, cultural organizations and events, ethnic groups, psychology and psychologists, anthropology and anthropologists, sociology and sociologists, and globalization.", "Nuvola apps kuser.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Education", topic, ["EDU"], "This includes education, educators, and educational institutions.", "Nuvola apps edu miscellaneous.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Economics and business", topic, ["ECON","BUS"], "This includes advertising and marketing, businesspeople, businesses and organizations, economics, and numismatics/currencies.", "Nuvola mimetypes kchart chrt.png")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Law", topic, ["LAW"], "This includes law cases, domestic law, constitutional law, international law, crime, criminals, punishment, victims, ethics, lawyers, judges/legal academics, and legal institutions/buildings.", "Icon-Rechtshinweis-blau2-Asio.png")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Magazines and print journalism", topic, ["MEDIA","PRINT"], "This includes journalism and journalists, newspapers, magazines, and journals.", "Canon EOS Rebel.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Politics and government", topic, ["POL","GOVT"], "This includes heads of state and government, intelligence and espionage, international organizations, political and governmental institutions, political districts, political direction and governance, political events and elections, political figures, political issues, political theory and analysis, and political parties/movements. Note: This does not include deceased politicians or former heads of state and government, who are added to the World history subtopic.", "Vote.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Sports and recreation")
subtopic = Subtopic("Football", topic, ["FOOTBALL"], "This includes association football (soccer), Australian rules football, Gaelic football, gridiron football (including American football, arena football, and Canadian football), international rules football, rugby league, rugby union, and historical forms of football.", "Soccerball.jpg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Other sports", topic, ["SPORT"], "This includes all sports other than football, including baseball (including baseball teams/events and baseball people), basketball (basketball teams/events and basketball people), cricket (including cricket teams/events and people), hockey (field and ice hockey teams/events/arenas and hockey people), pro wrestling (events, people, and organizations), cue sports (billiards, pool, snooker, and other cue sports), and other sports (including curling, cycling, equestrianism, golf, lacrosse, mixed martial arts/boxing, Motorsport, Olympics and Paralympics, rowing, running, track and field, skating, skiing, swimming/water sports, tennis, chess, sports mascots/supporters, and sports miscellanea).", "Baseball.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
subtopic = Subtopic("Recreation", topic, ["REC"], "This includes board/card/role-playing games, poker, toys, zoos, public parks, and amusements.", "Playing card heart 3.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Video games")
subtopic = Subtopic("Video games", topic, ["VG"], "This includes early video games, video game titles, video game series, video game characters, video game genres, video game systems/services, video game history and development, the video game industry, video game developers, video game terms, and game elements.", "WPVG icon 2016.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Warfare")
subtopic = Subtopic("Warfare", topic, ["WAR"], "This includes armies and military units, battles, exercises, and conflicts (from ancient history through the present), massacres, war crimes, and legal issues of warfare, military aircraft, military awards and decorations, military museums and memorials, military people, warships and naval units (including ship types, naval technology, and warships by country) weapons, equipment, and buildings (including military programs, uniforms, installations, and castles).", "Miecze.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
topic = Topic("Miscellaneous")
subtopic = Subtopic("Miscellaneous", topic, ["MISC"], "This includes anything that does not fit into one of the topics above.", "Symbol dot dot dot gray.svg")
topic.add_subtopic(subtopic)
topics[topic.name] = topic
subtopics[subtopic.name] = subtopic
return [topics, subtopics]
def __init__(self, name):
self.name = name
self.subtopics = []
def add_subtopic(self, subtopic):
self.subtopics.append(subtopic)
def header(self, target):
header = ""
if target == 'Wikipedia':
header = "==" + self.name + "==\n"
return header
class Subtopic:
# A dictionary to look up what subtopic corresponds to an abbreviated form.
# Note that the subtopic name itself has to be in this dictionary -- e.g. subtopic_var_dict['agriculture, food and drink']='Agriculture, food and drink'
# is necessary. This is because the nom __init__ checks for this. That may be removable but for now it's easier to leave it as is.
subtopic_var_dict = {}
subtopic_var_dict['agriculture, food and drink']='Agriculture, food and drink'
subtopic_var_dict['agriculture']='Agriculture, food and drink'
subtopic_var_dict['cuisine']='Agriculture, food and drink'
subtopic_var_dict['cuisines']='Agriculture, food and drink'
subtopic_var_dict['cultivation']='Agriculture, food and drink'
subtopic_var_dict['drink']='Agriculture, food and drink'
subtopic_var_dict['farming and cultivation']='Agriculture, food and drink'
subtopic_var_dict['farming']='Agriculture, food and drink'
subtopic_var_dict['food and drink']='Agriculture, food and drink'
subtopic_var_dict['food']='Agriculture, food and drink'
subtopic_var_dict['art and architecture']='Art and architecture'
subtopic_var_dict['art']='Art and architecture'
subtopic_var_dict['architecture']='Art and architecture'
subtopic_var_dict['engineering and technology']='Computing and engineering'
subtopic_var_dict['engtech']='Computing and engineering'
subtopic_var_dict['applied sciences and technology']='Computing and engineering'
subtopic_var_dict['applied sciences']='Computing and engineering'
subtopic_var_dict['computers']='Computing and engineering'
subtopic_var_dict['computing and engineering']='Computing and engineering'
subtopic_var_dict['computing']='Computing and engineering'
subtopic_var_dict['eng']='Computing and engineering'
subtopic_var_dict['engineering']='Computing and engineering'
subtopic_var_dict['technology']='Computing and engineering'
subtopic_var_dict['transport']='Transport'
subtopic_var_dict['geography and places']='Geography'
subtopic_var_dict['geography']='Geography'
subtopic_var_dict['places']='Places'
subtopic_var_dict['history']='World history'
subtopic_var_dict['archaeology']='World history'
subtopic_var_dict['world history']='World history'
subtopic_var_dict['royalty, nobility and heraldry']='Royalty, nobility and heraldry'
subtopic_var_dict['heraldry']='Royalty, nobility and heraldry'
subtopic_var_dict['nobility']='Royalty, nobility and heraldry'
subtopic_var_dict['royalty']='Royalty, nobility and heraldry'
subtopic_var_dict['language and literature']='Language and literature'
subtopic_var_dict['langlit']='Language and literature'
subtopic_var_dict['languages and linguistics']='Language and literature'
subtopic_var_dict['languages and literature']='Language and literature'
subtopic_var_dict['languages']='Language and literature'
subtopic_var_dict['linguistics']='Language and literature'
subtopic_var_dict['lit']='Language and literature'
subtopic_var_dict['literature']='Language and literature'
subtopic_var_dict['mathematics and mathematicians']='Mathematics and mathematicians'
subtopic_var_dict['mathematics']='Mathematics and mathematicians'
subtopic_var_dict['math']='Mathematics and mathematicians'
subtopic_var_dict['maths']='Mathematics and mathematicians'
subtopic_var_dict['media and drama']='Media and drama'
subtopic_var_dict['drama']='Media and drama'
subtopic_var_dict['ballet']='Media and drama'
subtopic_var_dict['dance']='Media and drama'
subtopic_var_dict['film']='Film'
subtopic_var_dict['films']='Film'
subtopic_var_dict['media']='Media and drama'
subtopic_var_dict['opera']='Media and drama'
subtopic_var_dict['television']='Television'
subtopic_var_dict['theater']='Media and drama'
subtopic_var_dict['theatre']='Media and drama'
subtopic_var_dict['theatre, film and drama']='Media and drama'
subtopic_var_dict['music']='Other music articles'
subtopic_var_dict['classical compositions']='Other music articles'
subtopic_var_dict['other music articles']='Other music articles'
subtopic_var_dict['albums']='Albums'
subtopic_var_dict['songs']='Songs'
subtopic_var_dict['natural sciences']='Physics and astronomy'
subtopic_var_dict['natsci']='Physics and astronomy'
subtopic_var_dict['astronomy']='Physics and astronomy'
subtopic_var_dict['astrophysics']='Physics and astronomy'
subtopic_var_dict['cosmology']='Physics and astronomy'
subtopic_var_dict['natural science']='Physics and astronomy'
subtopic_var_dict['natural sciences']='Physics and astronomy'
subtopic_var_dict['physics and astronomy']='Physics and astronomy'
subtopic_var_dict['physics']='Physics and astronomy'
subtopic_var_dict['biology and medicine']='Biology and medicine'
subtopic_var_dict['biology']='Biology and medicine'
subtopic_var_dict['medicine']='Biology and medicine'
subtopic_var_dict['chemistry and materials science']='Chemistry and materials science'
subtopic_var_dict['chemistry']='Chemistry and materials science'
subtopic_var_dict['atmospheric science']='Earth sciences'
subtopic_var_dict['earth science']='Earth sciences'
subtopic_var_dict['earth sciences']='Earth sciences'
subtopic_var_dict['geology']='Earth sciences'
subtopic_var_dict['geophysics']='Earth sciences'
subtopic_var_dict['meteorology and atmospheric science']='Earth sciences'
subtopic_var_dict['mineralogy']='Earth sciences'
subtopic_var_dict['meteorology']='Earth sciences'
subtopic_var_dict['philosophy and religion']='Philosophy and religion'
subtopic_var_dict['philrelig']='Philosophy and religion'
subtopic_var_dict['mysticism']='Philosophy and religion'
subtopic_var_dict['myth']='Philosophy and religion'
subtopic_var_dict['mythology']='Philosophy and religion'
subtopic_var_dict['phil']='Philosophy and religion'
subtopic_var_dict['philosophy']='Philosophy and religion'
subtopic_var_dict['relig']='Philosophy and religion'
subtopic_var_dict['religion']='Philosophy and religion'
subtopic_var_dict['religion, mysticism and mythology']='Philosophy and religion'
subtopic_var_dict['culture, sociology and psychology']='Culture, sociology and psychology'
subtopic_var_dict['culture, society and psychology']='Culture, sociology and psychology'
subtopic_var_dict['culture, sociology, and psychology']='Culture, sociology and psychology'
subtopic_var_dict['culture, society, and psychology']='Culture, sociology and psychology'
subtopic_var_dict['social sciences and society']='Culture, sociology and psychology'
subtopic_var_dict['socsci']='Culture, sociology and psychology'
subtopic_var_dict['culture and society']='Culture, sociology and psychology'
subtopic_var_dict['culture']='Culture, sociology and psychology'
subtopic_var_dict['psychology']='Culture, sociology and psychology'
subtopic_var_dict['social science']='Culture, sociology and psychology'
subtopic_var_dict['social sciences']='Culture, sociology and psychology'
subtopic_var_dict['society']='Culture, sociology and psychology'
subtopic_var_dict['education']='Education'
subtopic_var_dict['economics and business']='Economics and business'
subtopic_var_dict['business and economics']='Economics and business'
subtopic_var_dict['business']='Economics and business'
subtopic_var_dict['economics']='Economics and business'
subtopic_var_dict['law']='Law'
subtopic_var_dict['journalism and media']='Magazines and print journalism'
subtopic_var_dict['journalism']='Magazines and print journalism'
subtopic_var_dict['magazines and print journalism']='Magazines and print journalism'
subtopic_var_dict['media and journalism']='Magazines and print journalism'
subtopic_var_dict['gov']='Politics and government'
subtopic_var_dict['government']='Politics and government'
subtopic_var_dict['politics and government']='Politics and government'
subtopic_var_dict['politics']='Politics and government'
subtopic_var_dict['sports and recreation']='Other sports'
subtopic_var_dict['other sports']='Other sports'
subtopic_var_dict['sports']='Other sports'
subtopic_var_dict['everyday life']='Recreation'
subtopic_var_dict['everydaylife']='Recreation'
subtopic_var_dict['games']='Recreation'
subtopic_var_dict['recreation']='Recreation'
subtopic_var_dict['sports (other)']='Other sports'
subtopic_var_dict['sport and recreation']='Other sports'
subtopic_var_dict['sport']='Other sports'
subtopic_var_dict['sports (football)']='Football'
subtopic_var_dict['football']='Football'
subtopic_var_dict['association football']='Football'
subtopic_var_dict['soccer']='Football'
subtopic_var_dict['american football']='Football'
subtopic_var_dict['canadian football']='Football'
subtopic_var_dict['gridiron football']='Football'
subtopic_var_dict['gaelic football']='Football'
subtopic_var_dict['australian football']='Football'
subtopic_var_dict['australian rules football']='Football'
subtopic_var_dict['rugby']='Football'
subtopic_var_dict['rugby union']='Football'
subtopic_var_dict['rugby league']='Football'
#subtopic_var_dict['sports and recreation']='Sports and recreation'
#subtopic_var_dict['sports']='Sports and recreation'
#subtopic_var_dict['everyday life']='Sports and recreation'
#subtopic_var_dict['everydaylife']='Sports and recreation'
#subtopic_var_dict['games']='Sports and recreation'
#subtopic_var_dict['recreation']='Sports and recreation'
#subtopic_var_dict['sport and recreation']='Sports and recreation'
#subtopic_var_dict['sport']='Sports and recreation'
subtopic_var_dict['video games']='Video games'
subtopic_var_dict['video and computer games']='Video games'
subtopic_var_dict['warfare']='Warfare'
subtopic_var_dict['war']='Warfare'
subtopic_var_dict['aircraft']='Warfare'
subtopic_var_dict['battles and exercises']='Warfare'
subtopic_var_dict['battles']='Warfare'
subtopic_var_dict['decorations and memorials']='Warfare'
subtopic_var_dict['military']='Warfare'
subtopic_var_dict['military people']='Warfare'
subtopic_var_dict['units']='Warfare'
subtopic_var_dict['war and military']='Warfare'
subtopic_var_dict['warships']='Warfare'
subtopic_var_dict['weapons and buildings']='Warfare'
subtopic_var_dict['weapons']='Warfare'
subtopic_var_dict['miscellaneous']='Miscellaneous'
def __init__(self, name, topic, anchors, intro_text, icon_file):
self.topic = topic
self.name = name
self.anchors = anchors
self.intro_text = intro_text
self.icon_file = icon_file
self.nom_list = {}
self.section_header_line = "=== " + self.name + " ==="
self.icon_line = '[[File:' + self.icon_file + '|22px|left]]'
self.anchor_line = ''
self.shortcut_line = ''
if len(anchors) > 0:
self.shortcut_line = '{{shortcut'
for a in anchors:
self.anchor_line += '{{anchor|' + a + '}}'
self.shortcut_line += '|WP:GAN#' + a
self.shortcut_line += '}}'
def add_nom(self, nom):
self.nom_list[nom.title] = nom
def section_header(self):
header_rows = [
"",
self.icon_line,
self.anchor_line,
"",
self.section_header_line,
self.shortcut_line,
"",
"::" + self.intro_text,
""
]
return('\n'.join(header_rows))
class Nom:
# Class variable dictionary to hold the status letter/status string relationship
status_strings = {
'R' : 'Under review',
'2' : '2nd opinion',
'H' : 'On hold',
'P' : 'Passed',
'F' : 'Failed',
'X' : 'Removed',
'' : 'Start review'
}
status_template_strings = {
'R' : 'onreview',
'2' : '2ndopinion',
'H' : 'onhold',
'' : ''
}
@classmethod
def is_a_GA(cls, page): # returns True if the article has the GA template on the talk page or the GA template their article page
return ('Category:Wikipedia good articles' in [x.title() for x in page.categories()] or 'Category:Good articles' in [x.title() for x in page.categories()])
@classmethod
def is_an_FA(cls, page): # returns True if the article is featured; works for both article and talk page
return ('Category:Wikipedia featured articles' in [x.title() for x in page.categories()] or 'Category:Featured articles' in [x.title() for x in page.categories()])
@classmethod
def is_an_FFA(cls, page): # returns True if the article is a former featured article; works for both article and talk page
if page.title()[:5] == "Talk:":
talk_page = page
else:
site = pywikibot.Site('en','wikipedia')
talk_page = pywikibot.Page(site,"Talk:" + page.title())
return 'Category:Wikipedia former featured articles' in [x.title() for x in talk_page.categories()]
def __init__(self, conn, topic, subtopic, title, status, page_num, nominator, nomination_ts, note, shortdesc):
#GAN.log(conn, "Nom:init","Called with page:",page_num)
if subtopic.lower() not in Subtopic.subtopic_var_dict.keys():
self.subtopic = 'Miscellaneous'
else:
self.subtopic = Subtopic.subtopic_var_dict[subtopic.lower()]
self.topic = topic
self.title = title
self.status = status
self.page_num = page_num
self.nominator = nominator
self.nomination_ts = nomination_ts
self.note = note
self.shortdesc = shortdesc
self.nominator_reviews = 0
self.nominator_GAs = 0
self.nominator_edit_age = 0 # time in days since nominator's last edit
self.reviewer_reviews = 0
self.reviewer_GAs = 0
self.reviewer_edit_age = 0 # time in days since reviewer's last edit
self.nom_edits = 0
self.reviewer = None
self.review_started_ts = None
self.custom_sort_2 = ""
self.edit_summary = ""
self.status_template_string = ""
self.warnings = []
self.get_review_info(title, page_num)
self.hold_ts = None # Time when the nomination was placed on hold
self.second_opinion_ts = None # Time when the nomination status was changed to second opinion requested
if status not in Nom.status_strings.keys(): # Change an invalid status to waiting for review
status = ''
self.status_string = Nom.status_strings[status]
if status not in Nom.status_template_strings.keys(): # Change an invalid status to waiting for review
self.status_template_string = Nom.status_template_strings['']
else:
self.status_template_string = Nom.status_template_strings[status]
self.status_string = Nom.status_strings[status]
if not self.review_page_exists and self.status not in ('X',''):
self.warnings.append("Status indicates review has started but there is no review page")
GA_config.current_errors.append("Status of [[" + self.title + "]] / " + str(self.page_num) + " is " + self.status + " indicating review has started but there is no review page\n")
# Now we need to set the second opinion and hold timestamps, if they exist. These are not shown on the review or talk pages, so we have to get them from the active_nominations table.
if self.review_started_ts != None:
active_nomination = Active_nomination.get_active_nomination(conn, title)
if active_nomination == None:
# If we're here, this is the first time we've seen this nomination, since it's not in the active_nominations stored during the last run.
if self.status == 'H':
self.hold_ts = datetime.datetime.utcnow()
if self.status == '2':
self.second_opinion_ts = datetime.datetime.utcnow()
else:
self.hold_ts = active_nomination['hold_ts']
self.second_opinion_ts = active_nomination['second_opinion_ts']
age_of_nom = datetime.datetime.utcnow() - self.nomination_ts
self.age_in_days = age_of_nom.days
self.age_in_seconds_without_days = age_of_nom.seconds
self.R_over_G = 0
self.R_plus_one_over_G = 0
self.recalculate_RG()
self.recalculate_R_plus_one_over_G()
self.recalculate_custom_sort()
def get_review_info(self, title, page_num):
self.review_page_name = "Talk:" + title + "/GA" + str(page_num)
self.reviewer = None
self.review_started_ts = None
self.review_page_exists = False
site = pywikibot.Site('en','wikipedia')
review_page = pywikibot.Page(site, self.review_page_name)
try:
self.review_page_exists = review_page.exists()
except pywikibot.exceptions.ServerError as e:
GAN.log(conn,"get_review_info: exceptions",title,"Server error testing existence of review page")
if self.review_page_exists:
# If it exists we still need to check if it's a redirect
try:
review_page.get()
except pywikibot.exceptions.IsRedirectPageError as e:
# If it's a redirect, try getting the target page instead
review_page_redir_target = pywikibot.Page(site,e.title[2:-2])
if review_page_redir_target.exists():
review_page = review_page_redir_target
else:
# Here the review page is a redirect but it doesn't redirect to an actual page
review_page_exists = False
if self.review_page_exists:
try:
self.reviewer = review_page.oldest_revision['user']
self.review_started_ts = review_page.oldest_revision['timestamp']
except pywikibot.exceptions.ServerError as e:
GAN.log(conn,"get_review_info: exceptions",title,"Server error retrieving oldest revision of review page")
def update_timestamps(self, old_status):
if self.status == 'H' and old_status in ['','2','R']:
self.hold_ts = datetime.datetime.utcnow()
if self.status == '2' and old_status in ['','H','R']:
self.second_opinion_ts = datetime.datetime.utcnow()
def add_GA_star(self, conn):
site = pywikibot.Site('en','wikipedia')
article_page = pywikibot.Page(site, self.title)
# May need to eventually check that this is not a redirect, and go to the target if it is.
ga_offset = article_page.text.find('{{good article}}') # Check if the star is already in the article
if ga_offset == -1:
if GA_config.is_live:
# The star should be after the short description and DISPLAYTITLE if they exist. It should also be after any hatnotes but that is not currently implemented.
sd_offset = 0
sd_re = re.search('{{Short description[^}]*}}', article_page.text, re.IGNORECASE)
if sd_re is not None:
sd_offset = sd_re.span()[1]
DT_offset = 0
DT_re = re.search('{{DISPLAYTITLE[^}]*}}', article_page.text)
if DT_re is not None:
DT_offset = sd_re.span()[1]
insert_offset = sd_offset
if DT_offset > sd_offset:
insert_offset = DT_offset
if article_page.text[insert_offset:insert_offset + 1] == "\n":
insert_offset += 1 # Because Python treats \n as two characters on read but one character in the string. I think.
article_page.text = article_page.text[:insert_offset] + "{{good article}}\n" + article_page.text[insert_offset:]
GAN.log(conn, "add_GA_star", self.title, "new article text is" + article_page.text[:200] + " ...")
article_page.save("Add good article icon")
article_page = pywikibot.Page(site, self.title) # Now the star is added, refresh the page since we want the newest revid for oldid
oldid = article_page.latest_revision.revid
talk_page = pywikibot.Page(site,'Talk:' + self.title)
GAN_re = re.search('{{GA\|',talk_page.text)
if GAN_re == None:
# Check if it was added to article history -- if it was we don't have an error
if talk_page.text.lower().find("{{article history") < 0 and talk_page.text.find("/GA" + str(self.page_num)) < 0:
GAN.notify_error("Adding oldid","Searching for GA template","Can't find GA template or article history link in Talk:" + self.title)
GAN.log(conn,"add_GA_star","Searching for GA template","Can't find GA template or article history link in Talk:" + self.title)
return None
GAN_start = GAN_re.span()[0]
GAN_text = talk_page.text[GAN_start:] # Strips everything before the template
GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA template text and nothing else
if GAN_text.find("oldid") < 0: # Don't add oldid if it's already there
new_GAN_text = GAN_text[0:-2] + "|oldid=" + str(oldid) + "}}"
talk_page.text = talk_page.text[:GAN_re.span()[0]] + new_GAN_text + talk_page.text[GAN_re.span()[0] + len(GAN_text):]
#print(talk_page.text)
if GA_config.is_live:
talk_page.save("Add oldid for good article") # UPDATE ON GO LIVE
#else:
#print("Not adding oldid to Talk:" + self.title)
def update_users(self, conn, users, name_changes, review_stats, wbgan):
#TODO if the namespace of a redirected user page is not user or user talk, ignore it
#print("Called update_users:")
#print("Self is " + str(self))
#print("Title is " + str(self.title))
#print("Nominator is " + str(self.nominator))
site = pywikibot.Site('en','wikipedia')
if self.nominator not in users.keys():
self.nominator_reviews = review_stats.get_review_count(self.nominator, name_changes)
self.nominator_GAs = WBGAN.get_GA_count(wbgan, self.nominator, name_changes)
u = pywikibot.User(site,'User:' + self.nominator)
target_user = GAN.get_link_redirect_target(conn, 'User:' + self.nominator)
GAN.log(conn,"update_users", self.title, str(target_user))
if target_user is not None:
GAN.log(conn,"update_users", self.title, "find offset for slash = " + str(target_user.title().find("/")))
if target_user is not None and target_user.title().find("/") < 0: # We don't want to follow the redirect if it has a slash because that's not a real user page
u = pywikibot.User(site, target_user.title().replace(" talk",""))
#print("User is " + str(u))
#print("Last edit is " + str(u.last_edit))
GAN.log(conn,"update_users", self.title, "nominator = " + self.nominator)
GAN.log(conn,"update_users", self.title, "u = " + u.username)
if target_user is None:
GAN.log(conn,"update_users", self.title, "target_user = None")
else:
GAN.log(conn,"update_users", self.title, "target_user = " + target_user.title())
u_last_edit = None
try:
u_last_edit = u.last_edit
except pywikibot.exceptions.ServerError as e:
GAN.log(conn,"update_users:exceptions",link, "Pywikibot server exception " + str(e) + " when trying to get user last edit")
u_last_edit = 'Error'
if u_last_edit in [None, 'Error']:
users[self.nominator] = {
'reviews': 0,
'GAs': 999,
'edit_age': self.nominator_edit_age
}
if u_last_edit == None:
GA_config.current_errors.append("No user edits found for user " + str(u) + " -- user may have been renamed. See [[User:ChristieBot#What to do if your username changes]] for how to fix this.")
else:
ule = u.last_edit[2]
ule_delta = datetime.datetime.utcnow() - ule
self.nominator_edit_age = ule_delta.days
users[self.nominator] = {
'reviews': self.nominator_reviews,
'GAs': self.nominator_GAs,
'edit_age': self.nominator_edit_age
}
self.nominator_reviews = users[self.nominator]['reviews']
self.nominator_GAs = users[self.nominator]['GAs']
self.nominator_edit_age = users[self.nominator]['edit_age']
if self.reviewer != None and self.reviewer not in users.keys() and self.reviewer != '':
self.reviewer_reviews = review_stats.get_review_count(self.reviewer, name_changes)
GAs = WBGAN.get_GA_count(wbgan, self.reviewer, name_changes)
#print("About to look up reviewer <" + str(self.reviewer) + "> for <" + self.title + ">")
ur = pywikibot.User(site,'User:' + str(self.reviewer))
ure = ur.last_edit[2]
reviewer_delta = datetime.datetime.utcnow() - ure
reviewer_edit_age = reviewer_delta.days
users[self.reviewer] = {
'reviews': self.reviewer_reviews,
'GAs': GAs,
'edit_age': reviewer_edit_age
}
# The next calls may not be needed as I believe the recalc call in GANbot brings all this up to date.
if self.reviewer != None and self.reviewer != '':
self.reviewer_reviews = users[self.reviewer]['reviews']
self.reviewer_GAs = users[self.reviewer]['GAs']
self.reviewer_edit_age = users[self.reviewer]['edit_age']
self.recalculate_RG()
self.recalculate_R_plus_one_over_G()
self.recalculate_custom_sort()
def print_GAN_entry(self):
entry =[]
#print("In print_GAN_entry for " + self.title + ", status = <" + self.status + ">")
GAN_entry_1 = "# {{GANentry|1=" + self.title + "|2=" + str(self.page_num) + "|shortdesc=" + str(self.shortdesc)
GAN_entry_2 = "}}"
if self.status in ('H','R','2'):
GAN_entry_2 = "|exists=yes}}"
GAN_entry_3 = " (" + str(self.nominator_reviews) + " reviews, "
if self.nominator_reviews == 1:
GAN_entry_3 = " (1 review, "
GAN_entry_4 = str(self.nominator_GAs) + " GAs) "
if self.nominator_GAs == 1:
GAN_entry_4 = "1 GA) "
GAN_entry_5 = "[[User:" + self.nominator + "|" + self.nominator + "]] ([[User talk:" + self.nominator + "|talk]]) " + self.nomination_ts.strftime("%H:%M, %-d %B %Y (UTC)")
entry.append(GAN_entry_1 + GAN_entry_2 + GAN_entry_3 + GAN_entry_4 + GAN_entry_5)
if self.status in ('H','R','2'):
display_ts = self.review_started_ts
entry_status = ''
if self.status == 'H':
display_ts = self.hold_ts
entry_status = '|status=on hold'
elif self.status == '2':
display_ts = self.second_opinion_ts
entry_status = '|status=2nd opinion'
display_ts_string = 'Unknown timestamp'
if display_ts != None:
display_ts_string = display_ts.strftime("%H:%M, %-d %B %Y (UTC)")
GAR_entry_1 = "#:{{GAReview" + entry_status + "}} "
GAR_entry_2 = "(" + str(self.reviewer_reviews) + " reviews, "
if self.reviewer_reviews == 1:
GAR_entry_2 = "(1 review, "
GAR_entry_3 = str(self.reviewer_GAs) + " GAs) "
if self.reviewer_GAs == 1:
GAR_entry_3 = "1 GA) "
#entry.append("#:{{GAReview" + entry_status + "}} (Reviews: " + str(self.reviewer_reviews) + ") [[User:" + self.reviewer + "|" + self.reviewer + "]] ([[User talk:" + self.reviewer + "|talk]]) " + display_ts_string)
entry.append(GAR_entry_1 + GAR_entry_2 + GAR_entry_3 + "[[User:" + self.reviewer + "|" + self.reviewer + "]] ([[User talk:" + self.reviewer + "|talk]]) " + display_ts_string)
if self.note != '':
entry.append("#: '''Note:''' " + self.note)
#print(self.title + " : " + str(self.nominator_edit_age) + " : " + str(self.reviewer_edit_age) + " : " + str(GA_config.inactivity_age))
if self.nominator_edit_age > GA_config.inactivity_age:
entry.append('#: [[File:Exclamation mark 2.svg|14px]]Nominator inactive for ' + str(self.nominator_edit_age) + ' days')
#print(self.title + " : Inactive nominator")
if self.reviewer_edit_age > GA_config.inactivity_age and self.status != '2':
entry.append('#: [[File:Exclamation mark 2.svg|14px]]Reviewer inactive for ' + str(self.reviewer_edit_age) + ' days')
#print(self.title + " : Inactive reviewer")
if len(self.warnings) > 0:
entry.append("#: '''Warning:''' nomination is malformed -- " + "; ".join(self.warnings))
return("\n".join(entry))
def is_transcluded(self): # Check to see if the review is transcluded
site = pywikibot.Site('en','wikipedia')
talk_page = pywikibot.Page(site,'Talk:' + self.title)
transclusion_string = "/GA" + str(self.page_num) + "}}" # Because of redirects (caused by page moves) it's safer to just test for the end of the string, which should still be unique.
transclusion_find = talk_page.text.find(transclusion_string)
if transclusion_find < 0:
return False
else:
return True
def transclude(self, conn): # Add the new review page to the article talk page and update the status
site = pywikibot.Site('en','wikipedia')
talk_page = pywikibot.Page(site,'Talk:' + self.title)
testing_page = pywikibot.Page(site,"User talk:ChristieBot/Test")
GAN_re = re.search('{{GA\s?nominee',talk_page.text) # Is there a nomination template on the talk page?
update_status = True
new_GAN_text = ''
transclusion = ''
# Log some basic information about the transclusion we're about to try to perform
GAN.log(conn,"transclude",str(self.title),"nominator is " + str(self.nominator) + "; status is " + str(self.status) + "; page is " + str(self.page_num) + "; already transcluded? " + str(self.is_transcluded()))
if GAN_re == None:
# If we're here we couldn't find the GA nominee template. Somebody might have removed it manually.
# This might mean that some of the close steps might already have been done. Check for the existence of a {{Failed GA}} template or
# article history. Check if the review is already transcluded.
#print("Called transclude; status " + self.status)
# Also set update status to False -- whatever happens in this branch we're not going to be updating the nominee template, because it's not there
update_status = False
fga_re = re.search('{{Failed[ ]?GA[^\}]*page\s*\=\s*' + str(self.page_num) + '[^\}]*\}\}',talk_page.text) # Search for Failed GA template
if fga_re == None:
GAN.log(conn,"transclude",str(self.title), "GA nominee template not on talk page -- couldn't find Failed GA")
# Can't find FailedGA, so look for articlehistory
# Article history will have the form {{article history ... actionNlink=Talk:Title/GAX...}} where X is the page number
# We don't care about the outcome, just that it's recorded
ah_re = re.search('{{article[ ]?history[^\}]*action.link\s*=\s*talk:'+self.title.lower() + '/ga' + str(self.page_num), talk_page.text.lower())
if ah_re == None:
GAN.log(conn,"transclude",str(self.title), "GA nominee template not on talk page -- couldn't find Article history")
# If we're here we've given up; there's no failed GA and no article history so report an error
# I suspect this is not an error if the status is P -- there doesn't have to be either if the GA template is there.
# If so then an additional check here for the {{GA}} template would be useful.
# The error is reported as "Could not find nominee template" because it's not there and we don't know why it's not there.
GA_config.current_errors.append("\nCould not find nominee template in Talk:" + self.title + " when trying to update status")
else:
GAN.log(conn,"transclude",str(self.title), "Found article history") # We found article history with the GA in it so we can assume it was closed correctly
else:
GAN.log(conn,"transclude",str(self.title), "Found Failed GA") # We found a Failed GA template so we can assume it was closed correctly
else:
# Here we did find the nominee template so we need to update it
GAN_start = GAN_re.span()[0]
GAN_text = talk_page.text[GAN_start:]
GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else
x = talk_page.text[0:GAN_re.span()[0]] # x is the part of the talk page prior to the nominee template
y = GAN_text
z = talk_page.text[len(x) + len (y):] # z is the part of the talk page after the nominee template
#status_match = re.search('\|[\s]*status[\s]*=[\s]*[^\|]*[\|\}]', GAN_text) # Original
# The above regex doesn't work if the status is the last parameter. May need to search for that separately because the length will be different.
status_match = re.search('\|[\s]*status[\s]*=[\s]*[^\|\}]*(\||\}\})', GAN_text)
if status_match == None:
GA_config.current_errors.append("\nCould not find status parameter in template in Talk:" + self.title + " when trying to update status")
else:
#print(self.title + ": status is " + self.status)
if self.status == '': # If we're transcluding a review and the reviewer didn't update the status in the nominee template, we should set the status to onreview
self.update_status('R')
#print("before new_GAN_text: status_template_string = " + self.status_template_string)
trailing_template_text = GAN_text[status_match.span()[1]-1:] # This works if the status parameter is not Last
if GAN_text[status_match.span()[1] - 1] != "|": # if we didn't find a pipe we found the trailing braces
trailing_template_text = "}}"
#new_GAN_text = GAN_text[0:status_match.span()[0]] + "|status=" + self.status_template_string + GAN_text[status_match.span()[1]-1:]
new_GAN_text = GAN_text[0:status_match.span()[0]] + "|status=" + self.status_template_string + trailing_template_text
#print("new GAN text ends as " + new_GAN_text)
talk_page.text = x + new_GAN_text + z
# Status is either updated or we skipped it, and the talk page text is not saved if it was updated.
# Now append the transclusion, but only if not already transcluded
talk_page_is_editable = True
GAN.log(conn, "transclude:protected page",self.title,"Before check for whether it's transcluded: talk_page_is_editable is " + str(talk_page_is_editable))
if not self.is_transcluded():
transclusion = "\n{{Talk:" + self.title + "/GA" + str(self.page_num) + "}}\n"
try:
GAN.log(conn, "transclude:protected page",self.title,"About to add transclusion to text")
talk_page.text += transclusion
except pywikibot.exceptions.UnsupportedPageError as e:
GAN.log(conn, "transclude:protected page",self.title,"Can't transclude the review: " + str(e))
talk_page_is_editable = False
if GA_config.is_live:
if talk_page_is_editable:
talk_page.save("Transcluding GA review")
# Update the review page with a line showing the nominator.
# First get the review page text
review_page = pywikibot.Page(site,self.review_page_name)
reviewer_line_re = re.search('\'\'\'Reviewer:\'\'\'',review_page.text) # Is the "'''Reviewer:'''" line still in the review page?
if reviewer_line_re == None:
# Here something has removed the reviewer info, so we don't know where to put the nominator info, and we bail OUT
GAN.log(conn, "transclude","checking for reviewer line","No reviewer line found")
else:
# We found it so we can insert the nominator line.
reviewer_line_start = reviewer_line_re.span()[0]
review_page_first_part = review_page.text[:reviewer_line_start]
review_page_last_part = review_page.text[reviewer_line_start:]
review_page.text = review_page_first_part + "\'\'\'Nominator:\'\'\' {{User|" + self.nominator + "}} " + self.nomination_ts.strftime("%H:%M, %-d %B %Y (UTC)") + "\n\n" + review_page_last_part
review_page.save("Adding nominator information")
#GAN.log(conn,"transclude","inserting nominator line", "new page is " + review_page.text)
else:
testing_page.text += "==Transcluding GA review for " + self.title + "=="
if self.is_transcluded():
testing_page.text += "\nNot transcluding review -- already transcluded"
else:
testing_page.text += "\nTransclusion text: </nowiki>" + transclusion + "</nowiki>"
if new_GAN_text == '':
testing_page.text += "\nNew status text: " + new_GAN_text + ""
else:
testing_page.text += "\nNew status text: " + new_GAN_text + ""
testing_page.save("Transcluding GA review for " + self.title)
if self.status == 'R':
self.edit_summary = "On review [[" + self.title + "]] by " + self.reviewer
elif self.status == 'H':
self.edit_summary = "On review and on hold [[" + self.title + "]] by " + self.reviewer
elif self.status == '2':
self.edit_summary = "On review and second opinion [[" + self.title + "]] by " + self.reviewer
return True
def add_a_review(self, conn): # Add a record to the reviews table. Should only be called at the time the article is placed in "on review" status.
# Check the article doesn't already have a review. If it does, only insert it if the reviewer is different.
review_inserted = False
sql = "select reviewer from " + GA_config.strings['GA reviews table name'] + " where article_title = '" + self.title.replace("'","''") + "' and page = " + str(self.page_num)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("GANbot: add_a_review",sql,e)
GAN.log(conn, "add_a_review","fetching reviewer",str(e))
return False
if cursor.rowcount == 0:
# Here we didn't find any reviews so we can just insert.
insert_review = True
else:
# Iterate through the reviews we find. Notify an error for each one. If none match the reviewer we were passed, insert the review record.
insert_review = True
for row in cursor.fetchall():
GAN.notify_error("GANbot: add_a_review","counting reviews", "found prior review for " + str(self.title) + '/' + str(self.page_num) + " by " + str(row['reviewer']), False)
if row['reviewer'] == self.reviewer:
insert_review = False;
if insert_review:
rst_string = self.review_started_ts.strftime("%Y-%m-%d %H:%M:%S")
sql = "insert into " + GA_config.strings['GA reviews table name'] + " (reviewer, article_title, page, review_ts) values ('" + self.reviewer.replace("'","''") + "','" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + rst_string + "')"
#sql = "insert into " + GA_config.strings['GA reviews table name'] + " (reviewer, article_title, page, review_ts) values ('" + self.reviewer.replace("'","''") + "','" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + str(self.review_started_ts) + "')"
#GAN.log(conn, "add_a_review","inserting review",sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("add_a_review",sql,e)
conn.rollback()
return False
conn.commit()
review_inserted = True
return review_inserted
def save_nomination_state(self, conn, name_changes, wbgan, review_stats): # save the current state of the nomination to the nominations table
cursor = conn.cursor()
rst_string = 'null'
if self.review_started_ts is not None:
rst_string = self.review_started_ts.strftime("%Y-%m-%d %H:%M:%S")
rst_string = "'" + rst_string + "'"
ht_string = 'null'
if self.hold_ts is not None:
ht_string = self.hold_ts.strftime("%Y-%m-%d %H:%M:%S")
ht_string = "'" + ht_string + "'"
sot_string = 'null'
if self.second_opinion_ts is not None:
sot_string = self.second_opinion_ts.strftime("%Y-%m-%d %H:%M:%S")
sot_string = "'" + sot_string + "'"
if self.reviewer == None:
self.reviewer = ''
if self.note == None:
self.note = ''
if self.note == None:
self.note = ''
#print("title = <" + str(self.title) + ">")
#print("nominator = <" + str(self.nominator) + ">")
#print("nomination_ts = <" + str(self.nomination_ts) + ">")
#print("status = <" + str(self.status) + ">")
#print("reviewer = <" + str(self.reviewer) + ">")
#print("note = <" + str(self.note) + ">")
#print("review_started_ts = <" + str(self.review_started_ts) + ">")
#print("hold_ts = <" + str(self.hold_ts) + ">")
#print("second_opinion_ts = <" + str(self.second_opinion_ts) + ">")
#print("shortdesc = <" + str(self.shortdesc) + ">")
sql = "insert into " + GA_config.strings['nominations table name'] + " (title, page, nominator, nomination_ts, status, reviewer, subtopic, note, review_started_ts, hold_ts, second_opinion_ts, shortdesc, nominator_reviews, nominator_GAs) values "
sql += "('" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + self.nominator.replace("'","''") + "','" + str(self.nomination_ts) + "','" + self.status.replace("'","''") + "','" + self.reviewer.replace("'","''") + "','" + self.subtopic.replace("'","''") + "','" + self.note.replace("'","''") + "'," + rst_string + "," + ht_string + "," + sot_string + ",'" + self.shortdesc.replace("'","''") + "'," + str(review_stats.get_review_count(self.nominator, name_changes)) + "," + str(WBGAN.get_GA_count(wbgan, self.nominator, name_changes)) + ")"
#print(sql)
#GAN.log(conn, "save_nomination_state","inserting nomination",sql)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("save",sql,e)
conn.commit()
return(cursor.rowcount)
#def save_nomination_state(self, conn): # save the current state of the nomination to the nominations table
# cursor = conn.cursor()
# rst_string = 'null'
# if self.review_started_ts is not None:
# rst_string = self.review_started_ts.strftime("%Y-%m-%d %H:%M:%S")
# rst_string = "'" + rst_string + "'"
# ht_string = 'null'
# if self.hold_ts is not None:
# ht_string = self.hold_ts.strftime("%Y-%m-%d %H:%M:%S")
# ht_string = "'" + ht_string + "'"
# sot_string = 'null'
# if self.second_opinion_ts is not None:
# sot_string = self.second_opinion_ts.strftime("%Y-%m-%d %H:%M:%S")
# sot_string = "'" + sot_string + "'"
# if self.reviewer == None:
# self.reviewer = ''
# if self.note == None:
# self.note = ''
# if self.note == None:
# self.note = ''
# #print("title = <" + str(self.title) + ">")
# #print("nominator = <" + str(self.nominator) + ">")
# #print("nomination_ts = <" + str(self.nomination_ts) + ">")
# #print("status = <" + str(self.status) + ">")
# #print("reviewer = <" + str(self.reviewer) + ">")
# #print("note = <" + str(self.note) + ">")
# #print("review_started_ts = <" + str(self.review_started_ts) + ">")
# #print("hold_ts = <" + str(self.hold_ts) + ">")
# #print("second_opinion_ts = <" + str(self.second_opinion_ts) + ">")
# #print("shortdesc = <" + str(self.shortdesc) + ">")
# sql = "insert into " + GA_config.strings['nominations table name'] + " (title, page, nominator, nomination_ts, status, reviewer, subtopic, note, review_started_ts, hold_ts, second_opinion_ts, shortdesc) values "
# sql += "('" + self.title.replace("'","''") + "'," + str(self.page_num) + ",'" + self.nominator.replace("'","''") + "','" + str(self.nomination_ts) + "','" + self.status.replace("'","''") + "','" + self.reviewer.replace("'","''") + "','" + self.subtopic.replace("'","''") + "','" + self.note.replace("'","''") + "'," + rst_string + "," + ht_string + "," + sot_string + ",'" + self.shortdesc.replace("'","''") + "')"
# #print(sql)
# try:
# cursor.execute(sql)
# except pymysql.Error as e:
# GAN.notify_error("save",sql,e)
# conn.commit()
# return(cursor.rowcount)
def update_status(self, new_status):
# Status definitions are:
# R -- on review. This requires that a review page exists. If it doesn't exist, the page is not on review. Note that a page may have been deleted.
# H -- on hold. Same as on review but the GA nominee status is set to onhold.
# 2 -- second opinion. Same as on review but the GA nominee status is set to 2ndopinion.
# P -- the reviewer passed the GAN. There should be no GA nominee template. The bot detects a pass by finding a GA template instead of a GA nominee template
# '' -- empty string status. This means the article is nominated but is not being reviewed. This is inconsistent with the existence of a review page.
# X -- the nomination has been removed. This happens if the GA nominee status is deleted. It should also happen if the review page is deleted.
self.status_link_url = 'https://en.wikipedia.org/w/index.php?title=Talk:' + urllib.parse.quote(self.title) + '%2FGA' + str(self.page_num)
if new_status in ['R','review','onreview']:
self.status = 'R'
self.status_string = 'Under review'
self.status_template_string = 'onreview'
elif new_status in ['H','hold','onhold']:
self.status = 'H'
self.status_string = 'On hold'
self.status_template_string = 'onhold'
elif new_status in ['2','2nd opinion','2ndopinion']:
self.status = '2'
self.status_string = '2nd opinion'
self.status_template_string = '2ndopinion'
elif new_status in ['P','Passed']:
self.status = 'P'
self.status_string = 'Passed'
self.status_template_string = ''
elif new_status in ['F','Failed']:
self.status = 'F'
self.status_string = 'Failed'
self.status_template_string = ''
elif new_status in ['X','Removed']:
self.status = 'X'
self.status_string = 'Removed'
self.status_template_string = ''
elif new_status in ['', None]:
self.status = ''
self.status_string = 'Start review'
self.status_template_string = ''
self.status_link_url +='&action=edit&editintro=Template:GAN/editintro&preload=Template:GAN/preload'
return None
def new_status_message(self, new_status, old_status):
# returns a string for the edit summary saying what changed
# Needs the reviewer added for H and R
#print("Called new_status_message for " + str(self.title) + " with args new_status = <" + str(new_status) + "> and old_status = <" + str(old_status) + ">; edit summary is initially <" + str(self.edit_summary) + ">")
if old_status == new_status: # Should never happen
self.edit_summary = "No change"
elif old_status == None:
if new_status == "":
self.edit_summary = "New [[" + self.title + "]] (" + self.subtopic + ")"
elif new_status == "H":
self.edit_summary = "On hold [[" + self.title + "]] by " + self.reviewer
elif new_status == "2":
self.edit_summary = "Second opinion requested for [[" + self.title + "]] by " + self.reviewer
elif new_status == "R":
self.edit_summary = "On review [[" + self.title + "]] by " + self.reviewer
elif new_status == "P":
self.edit_summary = "Passed [[" + self.title + "]]"
elif new_status == "F":
self.edit_summary = "Failed [[" + self.title + "]]"
elif new_status == "X":
self.edit_summary = "Removed [[" + self.title + "]]"
elif new_status == "P":
self.edit_summary = "Passed [[" + self.title + "]]"
elif new_status == "F":
self.edit_summary = "Failed [[" + self.title + "]]"
elif new_status == "X":
self.edit_summary = "Removed [[" + self.title + "]]"
elif new_status == "H":
self.edit_summary = "On hold [[" + self.title + "]] by " + self.reviewer
elif new_status == '2':
self.edit_summary = "2nd opinion [[" + self.title + "]]"
elif new_status == 'R':
self.edit_summary = "On review [[" + self.title + "]] by " + self.reviewer
elif new_status == '':
if old_status == "H":
self.edit_summary = "No longer on hold [[" + self.title + "]]"
elif old_status == "2":
self.edit_summary = "No longer on second opinion [[" + self.title + "]]"
elif old_status == "R":
self.edit_summary = "No longer on review [[" + self.title + "]]"
else:
self.edit_summary = "Status changed from <" + old_status + "> to <" + new_status + "> for [[" + self.title + "]]"
#print("Called new_status_message for " + str(self.title) + " with args new_status = <" + str(new_status) + "> and old_status = <" + str(old_status) + ">; set edit summary to <" + str(self.edit_summary) + ">")
def tell_nominator(self): # Leave the nominator a talk page message about the state of the nomination
#print("In tell nominator for " + self.title)
#if self.title == 'United Nations General Assembly Building':
# return(None)
if self.reviewer in GA_config.reviewers_who_notify_nominators:
return(None)
site = pywikibot.Site('en','wikipedia')
title = self.title
page = str(self.page_num)
if self.status == 'X':
return()
result = ""
if self.status == 'H':
result = "|result=hold"
elif self.status == 'P':
result = "|result=pass"
elif self.status == "F":
result = "|result=fail"
msg = "\n{{subst:GANotice|article=" + title + "|days=7" + result + "|reviewlink=Talk:" + title + "/GA" + page + "}} "
msg += "<!-- Template:GANotice --> <small>Message delivered by [[User:ChristieBot|ChristieBot]], on behalf of [[User:" + self.reviewer + "|" + self.reviewer + "]]</small> -- [[User:" + self.reviewer + "|" + self.reviewer + "]] ([[User talk:" + self.reviewer + "|talk]]) ~~~~~\n"
if GA_config.is_live:
talk_page_name = "User talk:" + self.nominator
else:
talk_page_name = GA_config.strings['GA nominator talk page']
talk_page = pywikibot.Page(site, talk_page_name)
talk_page.text += msg
try:
talk_page.save("Your [[WP:GA|GA]] nomination of [[" + self.title + "]]",minor=False)
except:
GAN.notify_error("tell_nominator","Saving talk page notification for " + self.title + "/" + self.status_string,e)
return(None)
def compare(self, other_nom): # returns a dictionary of diffs between this nomination and a dictionary of active_nomination values
matches = {'title': True, 'page': True, 'nomination_ts': True, 'nominator': True, 'status': True, 'reviewer': True, 'subtopic': True, 'note': True, 'snapshot_ts': True, 'review_started_ts': True, 'hold_ts': True, 'second_opinion_ts': True}
if self.title != other_nom['title']:
matches['title'] = False
if str(self.page_num) != str(other_nom['page']):
matches['page'] = False
if self.nomination_ts != other_nom['nomination_ts']:
matches['nomination_ts'] = False
if self.nominator != other_nom['nominator']:
matches['nominator'] = False
if self.status != other_nom['status']:
matches['status'] = False
if self.reviewer != other_nom['reviewer'] and not (self.reviewer == None and other_nom['reviewer'] == ''):
matches['reviewer'] = False
if self.subtopic != other_nom['subtopic']:
matches['subtopic'] = False
if self.note != other_nom['note']:
matches['note'] = False
if self.review_started_ts != other_nom['review_started_ts']:
matches['review_started_ts'] = False
if self.hold_ts != other_nom['hold_ts']:
matches['hold_ts'] = False
if self.second_opinion_ts != other_nom['second_opinion_ts']:
matches['second_opinion_ts'] = False
return matches
def recalculate_custom_sort(self):
# custom_sort_1 -- New nominators (zero promoted GAs) are first, followed by R_over_G in descending order
# New nominators are sorted in descending order of number of reviews done, then by age
# The rest are sorted within R_over_G by ascending number of GAs
a = ''
if self.nominator_GAs == 0:
key1 = '0'
key2 = str(99999 - self.nominator_reviews)
key3 = str(99999 - self.age_in_days)
key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S")
a = key1 + key2 + key3 + key4
else:
key1 = '99'
key2 = str(90000 - int(100.0 * float(self.R_over_G)))
key3 = str(10000 + self.nominator_GAs)
key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S")
a = key1 + key2 + key3 + key4
self.custom_sort_1 = a
self.custom_sort_2 = self.nomination_ts.strftime("%Y%m%d%H%M%S") + self.title
free_GAs = GA_config.free_GAs
if int(self.nominator_GAs) <= free_GAs:
#print("In free branch: nGAs = <" + str(self.nominator_GAs) + ">; free_GAs = " + str(free_GAs) + ">")
key1 = str(100 + self.nominator_GAs)[1:]
key2 = str(99999 - self.nominator_reviews)
#key3 = str(99999 - self.age_in_days)
key3 = self.nomination_ts.strftime("%Y%m%d%H%M%S")
a = key1 + key2 + key3
else:
#print("In non-free branch")
key1 = '99'
key2 = str(90000 - int(100.0 * float(self.R_plus_one_over_G)))
key3 = str(10000 + self.nominator_GAs)
#key4 = str(99999 - self.age_in_days)
key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S")
key5 = self.title
a = key1 + key2 + key3 + key4 + key5
self.custom_sort_1 = a
#print("custom sort 1 for " + self.title + " is <" + self.custom_sort_1 + ">")
a = ''
if self.nominator_GAs == 0:
key1 = '00'
key2 = str(99999 - self.nominator_reviews)
key3 = str(99999 - self.age_in_days)
key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S")
a = key1 + key2 + key3 + key4
else:
key1 = '99'
key2 = str(90000 - int(100.0 * float(self.R_plus_one_over_G)))
#key2 = str(90000 - int(100.0 * float(self.R_over_G)))
key3 = str(10000 + self.nominator_GAs)
key4 = self.nomination_ts.strftime("%Y%m%d%H%M%S")
a = key1 + key2 + key3 + key4
self.custom_sort_3 = a
#if self.title in ('Kamil Tolon','Tellico Dam','Renewable energy in Turkey','Cartesian tree'):
# print(self.title)
# print("reviews: " + str(self.nominator_reviews))
# print("nominator_GAs: " + str(self.nominator_GAs))
# print("age in days: " + str(self.age_in_days))
# print("key1: " + key1)
# print("key2: " + key2)
# print("key3: " + key3)
# print("key4: " + key4)
# print("cs3: " + self.custom_sort_3)
# print("R_over_G: " + str(self.R_over_G))
# print("R_plus_one_over_G: " + str(self.R_plus_one_over_G))
def recalculate_RG(self):
R = self.nominator_reviews
G = self.nominator_GAs
self.R_over_G = 0.0
if R > 0 and G == 0:
self.R_over_G = 'Infinity'
elif R > 0:
self.R_over_G = "{:.2f}".format(float(R)/float(G))
self.R_minus_G = R - G
def recalculate_R_plus_one_over_G(self):
R = self.nominator_reviews
G = self.nominator_GAs
self.R_plus_one_over_G = 0.0
if R > 0 and G == 0:
self.R_plus_one_over_G = 'Infinity'
#elif R > 0:
elif G > 0:
self.R_plus_one_over_G = "{:.2f}".format((float(1.0) + float(R))/float(G))
self.R_minus_G = R - G
class Nom_list:
def __init__(self): # Constructor
self.noms = []
self.noms_dict = {}
def add(self, nom): # Add a nom to the dictionary for this list. Key is a tuple of title and page
self.noms.append(nom)
self.noms_dict[(nom.title, nom.page_num)] = nom
def print_GAN_entries(self, target, sort_order, reverse_bool, review_stats, name_changes): # print a table, include the tabletop, each row, and the table bottom.
self.noms = sorted(self.noms, key=operator.attrgetter(sort_order),reverse=False)
section_text = []
for nom in self.noms:
section_text.append(nom.print_GAN_entry(review_stats, name_changes))
return("\n".join(section_text))
class Review_stats:
# There are three tables that hold reviewing information:
# * GA_reviewing_baseline -- holds a record for each reviewer, showing how many reviews they had done at the time the table was created. This should never be updated again.
# * GA_reviews -- holds one record for each review done since the baseline. A few records may precede the baseline but the code is written to ignore these.
# * GA_reviewing -- the current reviewing statistics are in this table. It can be rebuilt by the update_statistics() method, which adds a count of the reviews in GA_reviews to the baseline.
# * name_changes -- holds "old_name"/"new_name" pairs. Used to combine data from an old user name into the newer name
@classmethod
def update_statistics(cls, conn): # Delete the GA_reviewing table and recreate it from the baseline plus the table of reviews
# Note this does *NOT* take into account the name changes. I think that would make it too complicated. It does a simple match to the review name; we can address name
# changes when reporting to the stats page
sql = "delete from " + GA_config.strings['GA reviewing statistics table name']
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
conn.rollback()
GAN.notify_error("update_statistics",sql,e)
return False
sql = "insert into " + GA_config.strings['GA reviewing statistics table name'] + " select a.reviewer, sum(a.num_reviews) as num_reviews, '" + datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + "' as snapshot_ts from ("
sql += " select binary b.reviewer as reviewer, b.num_reviews from " + GA_config.strings['GA reviewing baseline table name'] + " b"
sql += " union all"
sql += " select r.reviewer as reviewer, count(*) as num_reviews from " + GA_config.strings['GA reviews table name'] + " r where r.review_ts > (select max(snapshot_ts) as base_ts from " + GA_config.strings['GA reviewing baseline table name'] + ") and r.superseded_ts is null group by binary r.reviewer"
sql += ") a group by binary a.reviewer order by sum(a.num_reviews) desc"
GAN.log(conn,"update_statistics", "N/A","sql is " + sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
conn.rollback()
GAN.notify_error("update_statistics",sql,e)
return False
conn.commit()
# Now read the corrections page and reload the corrections table.
site = pywikibot.Site('en','wikipedia')
grsc_page_title = GA_config.strings['GA reviewing stats corrections page']
grsc_page = pywikibot.Page(site,grsc_page_title)
corrections_text = grsc_page.text
# Delete the existing records
cursor = conn.cursor(pymysql.cursors.DictCursor)
sql = "delete from " + GA_config.strings['GA reviewing statistics corrections table name']
try:
cursor.execute(sql)
except pymysql.Error as e:
conn.rollback()
GAN.notify_error("update_statistics: deleting old corrections",sql,e)
return False
GAN.log(conn,"update_statistics", None,"deleted old corrections")
# Parse the text to find the table and split into rows
corrections_re = re.search("!Page number\n!Old reviewer\n!New reviewer\n!Comment\n", corrections_text)
if corrections_re is None:
pass # There are no corrections to be made to the statistics
else:
remaining_text = corrections_text[corrections_re.span()[1]:]
corrections_rows = remaining_text.split("\n|-")
for crow in corrections_rows:
crow = crow.replace('\n','').replace('|-','').replace('||','|').strip()
crow = crow[1:]
crow_values = crow.split('|')
# Possibly check that the review cited does exist and the old reviewer name does match
sql = "insert into " + GA_config.strings['GA reviewing statistics corrections table name'] + " (article_title, page_number, old_reviewer, new_reviewer, comment) values "
sql += "('" + crow_values[0] + "', " + crow_values[1] + ", '" + crow_values[2] + "','" + crow_values[3] + "','" + crow_values[4] + "')"
try:
cursor.execute(sql)
except pymysql.Error as e:
conn.rollback()
GAN.notify_error("update_statistics: inserting correction",sql,e)
break
GAN.log(conn,"update_statistics", None,"inserted a correction")
conn.commit()
# Assemble any errors and post them at the end of the corrections page? Need to delete and rewrite that section if so.
return Review_stats.write_statistics_page(conn)
@classmethod
def write_statistics_page(cls, conn): # write the Wikipedia page that holds the GA reviewing stats
# Modify the query to include corrections
#sql = "select reviewer, sum(num_reviews) as num_reviews_g from"
#sql += " (select case when n.new_name is null then r.reviewer else n.new_name end as reviewer, r.num_reviews"
#sql += " from " + GA_config.strings['GA reviewing statistics table name'] + " r"
#sql += " left join " + GA_config.strings['name changes table name'] + " n on r.reviewer = n.old_name"
#sql += " ) a group by a.reviewer order by num_reviews_g desc, a.reviewer"
sql = "select reviewer, sum(num_reviews) as num_reviews_g from"
sql += " (select case when n.new_name is null then r.reviewer else n.new_name end as reviewer, r.num_reviews"
sql += " from " + GA_config.strings['GA reviewing statistics table name'] + " r"
sql += " left join " + GA_config.strings['name changes table name'] + " n on r.reviewer = n.old_name"
sql += " union all "
sql += " select old_reviewer, 0-count(*) as num_reviews from " + GA_config.strings['GA reviewing statistics corrections table name']
sql += " union all "
sql += " select new_reviewer, count(*) as num_reviews from " + GA_config.strings['GA reviewing statistics corrections table name']
sql += " ) a group by a.reviewer order by num_reviews_g desc, a.reviewer"
GAN.log(conn,"write_statistics", None,"sql is " + sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("write_statistics",sql,e)
return False
stats_page = ['<table class="wikitable">','<tr><th>User</th><th>Reviews</th></tr>']
for row in cursor.fetchall():
stats_page.append('<tr> <td> [[User:' + row['reviewer'] + '|' + row['reviewer'] + ']] </td> <td> ' + str(row['num_reviews_g']) + ' </td> </tr>')
stats_page.append('</table>')
stats_text = '\n'.join(stats_page)
site = pywikibot.Site('en','wikipedia')
grs_page_title = GA_config.strings['GA reviewing stats page']
grs_page = pywikibot.Page(site,grs_page_title)
grs_page.text = stats_text
grs_page.save("Updating GA reviewing statistics")
return True
def __init__(self, conn): # New constructor for getting data from the database
# No attempt is made here to deal with the name changes. That has to be done by the code that looks up the review counts.
self.reviewers = {} # Will contain a dictionary of reviewers giving the number of reviews they've done
sql = "select reviewer, num_reviews from " + GA_config.strings['GA reviewing statistics table name']
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("getting reviewing stats","sql",e)
result = cursor.fetchall()
for row in result: # dump the data into a local dictionary for use in the statistics
self.reviewers[row['reviewer']] = row['num_reviews']
def get_review_count(self, user_name, name_changes): # returns the number of reviews for a given user. Adds the number in the reviewing statistics to the number found for any alternate names.
reviews = 0
if user_name in self.reviewers.keys():
reviews= int(self.reviewers[user_name])
if user_name in name_changes.keys():
if name_changes[user_name] in self.reviewers.keys():
reviews += int(self.reviewers[name_changes[user_name]])
elif user_name in name_changes.values():
old_names = [x for x in name_changes.keys() if name_changes[x] == user_name]
for old_name in old_names:
if old_name in self.reviewers.keys():
reviews += int(self.reviewers[old_name])
return reviews
class WBGAN:
# All this data is derived from the tables stored in the WP:WBGAN page database
@classmethod
def get_wbgan(cls, config, gan_conn):
wbgan = {}
with gan_conn.cursor() as cursor:
sql = "select nominator, count(*) as GA_count from " + GA_config.strings['historical GA reviews table name'] + " where type = 'GAN' "
sql += " and lower(outcome) in ('pass','passed','listed', 'promoted') group by nominator order by count(*) desc"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("getting nominator data",sql,e)
result = cursor.fetchall()
for row in result:
wbgan[row[0]] = row[1]
return wbgan
#
# Below is the code to retrieve this data from SDZeroBot. That listing has the advantage of including very old GAs, but it does not include users with an
# apostrophe, and doesn't include old successful nominations if they are delisted or now FAs
#
#database = "s54328__goodarticles_p"
#conn = pymysql.connections.Connection(user=config['client']['user'], password=config['client']['password'], database="s54328__goodarticles_p", host='tools.db.svc.eqiad.wmflabs')
#with conn.cursor() as cursor:
# sql = "select nominator, count(*) as GA_count from nominators group by nominator order by count(*) desc"
# try:
# cursor.execute(sql)
# except pymysql.Error as e:
# GAN.notify_error("getting WBGAN data",sql,e)
# result = cursor.fetchall()
# for row in result:
# wbgan[row[0]] = row[1]
#return wbgan
@classmethod
def get_GA_count(cls, wbgan, user_name, name_changes): # returns the number of GAs for a given user. Adds the number in the reviewing statistics to the number found for any alternate names.
GAs = 0
if user_name in wbgan.keys():
GAs = int(wbgan[user_name])
if user_name in name_changes.keys():
if name_changes[user_name] in wbgan.keys():
GAs += int(wbgan[name_changes[user_name]])
if user_name in name_changes.values():
old_names = [x for x in name_changes.keys() if name_changes[x] == user_name]
for old_name in old_names:
if old_name in wbgan.keys():
GAs += int(wbgan[old_name])
return GAs
@classmethod
def get_one(cls, config, title):
database = "s54328__goodarticles_p"
conn = pymysql.connections.Connection(user=config['client']['user'], password=config['client']['password'], database="s54328__goodarticles_p", host='tools.db.svc.eqiad.wmflabs')
sql = "select nominator, date as promotion_date from nominators where article = '" + title.replace("'","''") + "'"
cursor = conn.cursor(pymysql.cursors.DictCursor)
records_found = 0
try:
records_found = cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("WBGAN.get_one",sql,e)
return None
if records_found > 1:
GAN.notify_error("WBGAN.get_one",sql,"Found more than one record")
return None
elif records_found == 0:
return None
else:
result = cursor.fetchone()
return result
@classmethod
def get_promotion_date(cls, config, title, promoted_near_date):
wbgan_row = WBGAN.get_one(config, title)
if wbgan_row is None:
return None
else:
#print(title + " was promoted on " + str(promoted_near_date))
wbgan_datetime = datetime.datetime.combine(wbgan_row['promotion_date'], datetime.datetime.min.time())
#print("wbgan_datetime is " + str(wbgan_datetime) + "; promoted_near_date is " + str(promoted_near_date))
if promoted_near_date > wbgan_datetime - datetime.timedelta(3) and promoted_near_date > wbgan_datetime + datetime.timedelta(3):
#print("inside range")
return wbgan_datetime
else:
#print("outside range")
return None
class Active_nomination:
@classmethod
def get_titles(self, conn):
titles = []
sql = "select title from " + GA_config.strings['active nominations table name'] # Gets all the nominations that were active at the last run
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("getting active nominations",sql,e)
return titles
rows = list(cursor.fetchall())
titles = [x['title'] for x in rows]
return titles
@classmethod
def get_active_nomination(cls, conn, title): # Retrieve a row from the active nominations table
cursor = conn.cursor(pymysql.cursors.DictCursor)
sql = "select n.title, n.page, n.nomination_ts, n.nominator, n.status, n.reviewer, n.subtopic, n.note, n.snapshot_ts, n.review_started_ts, n.hold_ts, n.second_opinion_ts, ifnull(n.shortdesc,'') as shortdesc from " + GA_config.strings['active nominations table name'] + " n "
sql += " where n.title = '" + title.replace("'","''") + "'"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("get_active_nomination",sql,e)
if cursor.rowcount > 1:
GA_config.current_errors.append("More than one active nomination found for [[" + title + "]]\n")
return None
if cursor.rowcount == 0:
return None
row = cursor.fetchone()
return row
@classmethod
def update_active_nominations(cls, conn): # The active_nominations table should reflect the current state of the GA nominations. This rebuilds it by summarizing the events in the nominations table.
sql = "delete from " + GA_config.strings['active nominations table name']
cursor = conn.cursor()
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("update_active_nominations","sql",e)
sql = "insert into " + GA_config.strings['active nominations table name'] + " (title, page, nomination_ts, nominator, status, reviewer, subtopic, note, snapshot_ts, review_started_ts, hold_ts, second_opinion_ts, shortdesc) "
sql += " select n.title, n.page, n.nomination_ts, n.nominator, n.status, n.reviewer, n.subtopic, n.note, n.snapshot_ts, n.review_started_ts, n.hold_ts, n.second_opinion_ts, n.shortdesc"
sql += " from " + GA_config.strings['nominations table name'] + " n inner join (select title, page, max(snapshot_ts) as max_snapshot_ts from " + GA_config.strings['nominations table name'] + " group by title, page) nm"
sql += " on n.title = nm.title and n.page = nm.page and n.snapshot_ts = nm.max_snapshot_ts"
sql += " where n.status not in ('P','F','X')"
#GAN.log(conn,"update_active_nominations","N/A",sql)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("update_active_nominations",sql,e)
conn.commit()
return(cursor.rowcount)
class GAN:
@classmethod
def get_link_redirect_target(cls, conn, link):
GAN.log(conn,"get_link_redirect_target",link, "Called get_link_redirect_target")
site = pywikibot.Site('en','wikipedia')
page = pywikibot.Page(site,link)
try:
if page.exists():
try:
page_text = page.get()
return(None) # if not an error, then this is not a redirect
except pywikibot.exceptions.IsRedirectPageError as e:
redir_target = page.getRedirectTarget()
return(redir_target)
except pywikibot.exceptions.ServerError as e:
GAN.notify_error("get_link_redirect_target","ServerError in getting page " + page.title(),e)
return(None)
else:
return(None)
except pywikibot.exceptions.Error as e:
GAN.log(conn,"get_link_redirect_target:exceptions",link, "Pywikibot server exception " + str(e))
return(None)
@classmethod
def write_backlog_by_sort_order(cls, gan_conn, all_noms, sort_order):
backlog_noms = sorted(all_noms, key=operator.attrgetter(sort_order), reverse=False)
work_list = []
nominators_represented = [] # Each nominator can only have one in the backlog list
nominators_receiving_reviews = [] # Tracks nominators who have a nomination under review
for n in backlog_noms:
if n.status != '':
if n not in nominators_receiving_reviews:
nominators_receiving_reviews.append(n.nominator)
for n in backlog_noms:
if n.nominator not in nominators_represented and n.nominator not in nominators_receiving_reviews:
work_list.append(n)
nominators_represented.append(n.nominator)
if len(work_list) >= 10:
break
first_list = ['[[Wikipedia:Good article nominations#'+ x.subtopic + '|' + x.title + ']]' for x in work_list[:5]]
second_list = ['[[Wikipedia:Good article nominations#'+ x.subtopic + '|' + x.title + ']]' for x in work_list[5:]]
comment_start = "\n<!-- If you clear an item from backlog and want to update the list before the bot next runs, here are the next 5 oldest nominations:\n• "
comment_end = "-->"
site = pywikibot.Site('en','wikipedia')
backlog_by_sort_order_page = pywikibot.Page(site,GA_config.strings['GAN backlog by sort order'])
backlog_by_sort_order_page.text = '\n• '.join(first_list) + comment_start + '\n• '.join(second_list) + comment_end
backlog_by_sort_order_page.save("Updating high priority backlog")
@classmethod
def parse_article_history_actions(cls, article_history_text, actions):
ah_actions = []
for a in actions:
one_action = {}
a_date_str = 'null'
a_link_str = 'null'
a_result_str = 'null'
a_oldid_str = 'null'
a_ = re.search(a + "\s*=\s*[^\|]*",article_history_text)[0] # a_ contains 'action1 = GAx\n'
l = re.search(a + "\s*=\s*", a_) # l contains 'action1 = '
r = a_[l.span()[1]:] # r contains 'GAx\n'
type = ""
if r[2:3] in ['n','N','c','C']:
type = "GAN"
elif r[2:3] in ['r','R']:
type = "GAR"
elif r[2:3] in ['a','A']:
type = "DGA"
one_action['type'] = type
a_date = re.search(a + "date\s*=\s*[^\|]*",article_history_text)
if a_date is not None:
a_date = a_date[0]
l = re.search(a + "date\s*=\s*", a_date)
a_raw_date_str = a_date[l.span()[1]:].strip()
try:
a_date_ts = parse(a_raw_date_str.replace("(UTC)","").replace("(UTC","").strip())
one_action['date'] = a_date_ts
a_date_str = "'" + a_date_ts.strftime("%Y-%m-%d %H:%M:%S") + "'"
except ValueError as e:
continue
#print("a_date_str is " + a_date_str)
a_link = re.search(a + "link\s*=\s*[^\|]*",article_history_text)
if a_link is not None:
a_link = a_link[0]
l = re.search(a + "link\s*=\s*", a_link)
a_link_str = "'" + a_link[l.span()[1]:].strip().replace("'","''") + "'"
one_action['link'] = a_link[l.span()[1]:].replace("_"," ").strip()
#print("a_link_str is " + a_link_str)
a_result = re.search(a + "result\s*=\s*[^\|]*",article_history_text)
if a_result is not None:
a_result = a_result[0]
l = re.search(a + "result\s*=\s*", a_result)
a_result_str = "'" + a_result[l.span()[1]:].strip().replace("'","''") + "'"
one_action['result'] = a_result[l.span()[1]:].strip()
#print("a_result_str is " + a_result_str)
a_oldid = re.search(a + "oldid\s*=\s*[^\|]*",article_history_text)
if a_oldid is not None:
a_oldid = a_oldid[0]
l = re.search(a + "oldid\s*=\s*", a_oldid)
a_oldid_str = a_oldid[l.span()[1]:].strip()
one_action['oldid'] = a_oldid_str
if a_oldid_str == "":
a_oldid_str = "null"
ah_actions.append(one_action)
return(ah_actions)
@classmethod
def get_article_history_actions(cls, article_history_text):
matches = re.findall("[Aa]ction\d*\s*=\s*[gG][aA][NnRCcr]",article_history_text)
if matches == None:
return(None)
actions = []
for m in matches:
ms = re.search("[Aa]ction[\d]*",m).span()
actions.append(m[ms[0]:ms[1]])
return(actions)
@classmethod
def get_article_history_template(cls, article_text):
#print("Calling get_article_history_template")
ah_re = re.search("{{[aA]rticle\s?[hH]istory",article_text)
#print("matching string is " + str(ah_re))
if ah_re == None:
return(None)
ah_start = ah_re.span()[0]
ah_text = article_text[ah_start:] # Strips everything before the template
ah_text = (GAN.find_enclosed_string(ah_text))[0] # This means ah_text has the article history template text and nothing else
ah_text = ah_text[2:-2] # Strip the braces
ah_text = ah_text.strip()
return(ah_text)
@classmethod
def is_redirect(cls, page):
try:
test = page.get()
except pywikibot.exceptions.IsRedirectPageError as e:
return(True)
return(False)
@classmethod
def flush_audit(cls, conn):
audit_days_to_keep = GA_config.audit_days_to_keep
sql = "delete from " + GA_config.strings['audit table name'] + " where event_ts < now() - interval " + str(audit_days_to_keep) + " day;"
cursor = conn.cursor()
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("flush_audit",sql,e)
conn.commit()
return(cursor.rowcount)
@classmethod
def log(cls, conn, source, title, message):
# This method writes records to the audit table. Setting logging flags in GA_config allows debug messages to be limited to particular
# spans of code.
# If logging is disabled do nothing. If it's not explicitly disabled it's enabled.
message = message[:1000]
log = True
if source in GA_config.logging_flags.keys():
log = GA_config.logging_flags[source]
if log == True:
sql = "insert into " + GA_config.strings['audit table name'] + " (event_ts, source, title, message) values (now(6),'" + str(source) + "','" + str(title).replace("'","''") + "','" + str(message).replace("'","''") + "')"
cursor = conn.cursor()
# if we are on a tty output via print as well as writing to db
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("log",sql,e)
conn.commit()
if sys.stdout.isatty():
# We are running interactively so print the log message to stdout
print(str(title) + ' / ' + str(message))
return(None)
@classmethod
def wiki2datetime(cls, wikistamp):
time, date = wikistamp.split(', ')
hour, minute = time.split(':')
day, month, year, UTC = date.split(' ')
month = GAN.monthConvert(month)
dtVals = [int(year), int(month), int(day), int(hour), int(minute)]
dt = datetime.datetime(*dtVals)
return(dt)
@classmethod
def monthConvert(cls, name):
'''
Takes in either the name of the month or the number of the month and returns
the opposite. An input of str(July) would return int(7) while an input of
int(6) would return str(June).
Takes: int OR string
Returns: string OR int
'''
if type(name) is str:
if name == "January": return 1
elif name == "February": return 2
elif name == "March": return 3
elif name == "April": return 4
elif name == "May": return 5
elif name == "June": return 6
elif name == "July": return 7
elif name == "August": return 8
elif name == "September": return 9
elif name == "October": return 10
elif name == "November": return 11
elif name == "December": return 12
else: raise ValueError
elif type(name) is int:
if name == 1:return('January')
elif name == 2:return('February')
elif name == 3:return('March')
elif name == 4:return('April')
elif name == 5:return('May')
elif name == 6:return('June')
elif name == 7:return('July')
elif name == 8:return('August')
elif name == 9:return('September')
elif name == 10:return('October')
elif name == 11: return('November')
elif name == 12: return('December')
else: raise ValueError
@classmethod
def check_params(cls, params, title): # Get the GA params for a nomination
param_errors = []
#title = "Undefined article title"
#if 'title' in params.keys():
# if params['title'] != None:
# title = params['title']
if params['page'] == None:
param_errors.append("invalid review page parameter")
params['page'] = '0' # set to zero so that the rest of the code will have something to work with; this will be reported as an error
if params['nominator'] == None:
param_errors.append("invalid nominator parameter")
params['nominator'] = 'Example'
if params['status'] == None or params['status'] not in ['','2','H','R']:
params['status'] = 'R' # set to onreview as the default and post an error
param_errors.append("invalid status parameter")
if len(param_errors) > 0:
malformed_details = '; '.join(param_errors)
GA_config.current_errors.append("\nMalformed nomination for [[" + title + "]]: " + malformed_details)
#print('Found malformed nomination')
return param_errors
@classmethod
def get_params(cls, conn, article): # Get the GA params for a nomination
GAN.log(conn,"get_params",article.title(), "Called get_params")
#print("Calling get_GA_params with article " + article.title())
try:
article_text = article.text
except pywikibot.exceptions.ServerError as e:
GA_config.current_errors.append('\nGot a pywikibot server error when trying to read the text of ' + article.title())
GAN.log(conn,"get_params:exceptions",article.title(), "Got a pywikibot server error when trying to read the article text")
return None
title = article.title()[5:]
GAN_re = re.search('{{GA[ ]?nominee',article.text)
if GAN_re == None:
return None
GAN_start = GAN_re.span()[0]
GAN_text = article.text[GAN_start:] # Strips everything before the template
GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else
# The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values.
# This is necessary because the param values can include more template calls.
#print(GAN_text)
param_offsets = {}
found_params = {}
rt_match = re.search('\|[\s]*time[\s]*=[\s]*',GAN_text)
if rt_match != None:
param_offsets[rt_match.span()[0]] = 'rtime'
found_params['rtime']=rt_match.span()
nm_match = re.search('\|[\s]*nominator[\s]*=[\s]*',GAN_text)
if nm_match != None:
param_offsets[nm_match.span()[0]] = 'nominator'
found_params['nominator']=nm_match.span()
pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text)
if pg_match != None:
param_offsets[pg_match.span()[0]] = 'page'
found_params['page']=pg_match.span()
su_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text)
if su_match != None:
param_offsets[su_match.span()[0]] = 'subtopic'
found_params['subtopic']=su_match.span()
st_match = re.search('\|[\s]*status[\s]*=[\s]*',GAN_text)
if st_match != None:
param_offsets[st_match.span()[0]] = 'status'
found_params['status']=st_match.span()
nt_match = re.search('\|[\s]*note[\s]*=[\s]*',GAN_text)
if nt_match != None:
param_offsets[nt_match.span()[0]] = 'note'
found_params['note']=nt_match.span()
sd_match = re.search('\|[\s]*shortdesc[\s]*=[\s]*',GAN_text)
if sd_match != None:
param_offsets[sd_match.span()[0]] = 'shortdesc'
found_params['shortdesc']=sd_match.span()
sorted_param_keys = sorted(param_offsets.keys())
#print(sorted_param_keys)
#print(param_offsets)
#print(found_params)
if sorted_param_keys == []:
return None
up_to_first_named_param = GAN_text[:sorted_param_keys[0]]
#print(up_to_first_named_param)
first_bar = up_to_first_named_param.find('|')
try:
tm_text = up_to_first_named_param[first_bar+1:]
except:
GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
return None
#print("TM <" + tm_text + ">")
tm_match = re.search("\d\d:\d\d,.* \(UTC\)",tm_text)
if tm_match == None:
GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
return None
else:
#print(str(tm_match.span()))
tm_text = tm_text[tm_match.span()[0]:tm_match.span()[1]]
try:
timestamp = GAN.wiki2datetime(tm_text)
except:
GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
return None
#print(timestamp)
named_param_strings = []
for a in range(len(sorted_param_keys)-1):
named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]])
named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:])
#print(named_param_strings)
status = ''
user_nom = None
page = None
note = ''
subtopic = 'Miscellaneous'
shortdesc = ''
for i in range(len(sorted_param_keys)):
param = param_offsets[sorted_param_keys[i]]
param_i = named_param_strings[i]
if param_i[-2:] == "}}":
param_i = param_i[:-2]
if param_i[-1:] == '|':
param_i = param_i[:-1]
if param_i[0] == '|':
param_i = param_i[1:]
if param == 'nominator':
user_search = re.search('(User:|user:|User talk:|User Talk:|user Talk:|user talk:)[^\|\]]+',param_i)
if user_search == None:
continue
else:
user_span = user_search.span()
user_text = param_i[user_span[0]:user_span[1]]
user_nom_span = (re.search(':',user_text)).span()
user_nom = user_text[user_nom_span[1]:]
#print('U1 = '+ user_nom)
site = pywikibot.Site('en','wikipedia') # Users sometimes sign with something other than their exact user name. If we follow the link to their user page and extract that page's title that resolves any differences.
upage = pywikibot.Page(site, "User:" + user_nom)
user_nom = upage.title()[5:]
#print('U2 = '+ user_nom)
elif param == 'rtime':
continue
elif param == 'subtopic':
subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip()
#print('Su ='+subtopic)
elif param == 'page':
page_match = re.search('[0-9]+',param_i)
if page_match == None:
page = None
else:
page_n_span = page_match.span()
page = param_i[page_n_span[0]:page_n_span[1]]
#print('P =' + page)
elif param == 'note':
note = param_i.replace('note','').replace('=','').strip()
#print('Nt ='+note)
elif param == 'status':
status_string = param_i.replace('|','').replace('status','').replace('=','').strip()
#print('St string = <'+status_string+'>')
if status_string.lower() in ['onreview','review','on review']:
status = 'R'
elif status_string.lower() in ['onhold','hold','on hold']:
status = 'H'
elif status_string.lower() in ['2ndopinion', '2nd opinion']:
status = '2'
elif status_string == '':
status = ''
else:
status = None
#print('Stat = '+ status)
elif param == 'shortdesc':
shortdesc = param_i.replace('shortdesc','').replace('=','').strip()
#print('Sd ='+shortdesc)
param_dict = {}
param_dict['nominator']=user_nom
param_dict['status']=status
param_dict['page']=page
param_dict['subtopic']=subtopic
param_dict['timestamp']=timestamp
param_dict['note']=note
param_dict['shortdesc']=shortdesc
return(param_dict)
@classmethod
def get_params_from_text_X(cls, title, rev_text): # Get the GA params for a nomination
#TODO delete this -- I don't think anything uses it.
#Marked with_X to see if it's used
GAN_re = re.search('{{GA\s?[nN]ominee',rev_text)
if GAN_re == None:
#print("Can't find template")
return None
GAN_start = GAN_re.span()[0]
GAN_text = rev_text[GAN_start:] # Strips everything before the template
GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else
# The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values.
# This is necessary because the param values can include more template calls.
#print(GAN_text)
param_offsets = {}
found_params = {}
rt_match = re.search('\|[\s]*time[\s]*=[\s]*',GAN_text)
if rt_match != None:
param_offsets[rt_match.span()[0]] = 'rtime'
found_params['rtime']=rt_match.span()
nm_match = re.search('\|[\s]*nominator[\s]*=[\s]*',GAN_text)
if nm_match != None:
param_offsets[nm_match.span()[0]] = 'nominator'
found_params['nominator']=nm_match.span()
pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text)
if pg_match != None:
param_offsets[pg_match.span()[0]] = 'page'
found_params['page']=pg_match.span()
su_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text)
if su_match != None:
param_offsets[su_match.span()[0]] = 'subtopic'
found_params['subtopic']=su_match.span()
st_match = re.search('\|[\s]*status[\s]*=[\s]*',GAN_text)
if st_match != None:
param_offsets[st_match.span()[0]] = 'status'
found_params['status']=st_match.span()
nt_match = re.search('\|[\s]*note[\s]*=[\s]*',GAN_text)
if nt_match != None:
param_offsets[nt_match.span()[0]] = 'note'
found_params['note']=nt_match.span()
sd_match = re.search('\|[\s]*shortdesc[\s]*=[\s]*',GAN_text)
if sd_match != None:
param_offsets[sd_match.span()[0]] = 'shortdesc'
found_params['shortdesc']=sd_match.span()
sorted_param_keys = sorted(param_offsets.keys())
#print(sorted_param_keys)
#print(param_offsets)
#print(found_params)
if sorted_param_keys == []:
return None
up_to_first_named_param = GAN_text[:sorted_param_keys[0]]
#print(up_to_first_named_param)
first_bar = up_to_first_named_param.find('|')
timestamp = None
try:
tm_text = up_to_first_named_param[first_bar+1:]
except:
GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
return None
#print("TM <" + tm_text + ">")
try:
timestamp = parse(tm_text.replace("(UTC)","").strip())
except ValueError as e:
pass # For this version we allow invalid timestamps to still return the other params
named_param_strings = []
for a in range(len(sorted_param_keys)-1):
named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]])
named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:])
#print(named_param_strings)
status = ''
user_nom = None
page = None
note = ''
subtopic = 'Miscellaneous'
shortdesc = ''
for i in range(len(sorted_param_keys)):
param = param_offsets[sorted_param_keys[i]]
param_i = named_param_strings[i]
if param_i[-2:] == "}}":
param_i = param_i[:-2]
if param_i[-1:] == '|':
param_i = param_i[:-1]
if param_i[0] == '|':
param_i = param_i[1:]
if param == 'nominator':
user_search = re.search('(User:|user:|User talk:|User Talk:|user Talk:|user talk:)[^\|\]]+',param_i)
if user_search == None:
continue
else:
user_span = user_search.span()
user_text = param_i[user_span[0]:user_span[1]]
user_nom_span = (re.search(':',user_text)).span()
user_nom = user_text[user_nom_span[1]:]
#print('U1 = '+ user_nom)
site = pywikibot.Site('en','wikipedia') # Users sometimes sign with something other than their exact user name. If we follow the link to their user page and extract that page's title that resolves any differences.
page = pywikibot.Page(site, "User:" + user_nom)
user_nom = page.title()[5:]
#print('U2 = '+ user_nom)
elif param == 'rtime':
continue
elif param == 'subtopic':
if param_i[0] == '|':
param_i = param_i[1:]
if param_i[-1] == '|':
param_i = param_i[:-1]
if param_i.find("|") > -1:
param_i = param_i[:param_i.find("|")]
subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip()
#print('Su ='+subtopic)
elif param == 'page':
page_match = re.search('[0-9]+',param_i)
if page_match == None:
page = None
else:
page_n_span = page_match.span()
page = param_i[page_n_span[0]:page_n_span[1]]
#print('P =' + page)
elif param == 'note':
note = param_i.replace('note','').replace('=','').strip()
#print('Nt ='+note)
elif param == 'status':
status_string = param_i.replace('|','').replace('status','').replace('=','').strip()
#print('St string = <'+status_string+'>')
if status_string.lower() in ['onreview','review','on review']:
status = 'R'
elif status_string.lower() in ['onhold','hold','on hold']:
status = 'H'
elif status_string.lower() in ['2ndopinion', '2nd opinion']:
status = '2'
elif status_string == '':
status = ''
else:
status = None
#print('Stat = '+ status)
elif param == 'shortdesc':
shortdesc = param_i.replace('shortdesc','').replace('=','').strip()
#print('Sd ='+shortdesc)
param_dict = {}
param_dict['nominator']=user_nom
param_dict['status']=status
param_dict['page']=page
param_dict['subtopic']=subtopic
param_dict['timestamp']=timestamp
param_dict['note']=note
param_dict['shortdesc']=shortdesc
return(param_dict)
@classmethod
def get_failed_params(cls, article, offset): # Get the GA params for a failed GA
#print("Calling get_failed_params with article " + article.title())
title = article.title()
text = article.text[offset:]
#print("in gfp, stripped text is " + text)
GAN_re = re.search('{{Failed\s?GA',text)
if GAN_re == None:
return None
GAN_start = GAN_re.span()[0]
GAN_text = text[GAN_start:] # Strips everything before the template
GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else
# The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values.
# This is necessary because the param values can include more template calls.
#print("in gfp, template only text is "+GAN_text)
param_offsets = {}
found_params = {}
tp_match = re.search('\|[\s]*topic[\s]*=[\s]*',GAN_text)
if tp_match != None:
param_offsets[tp_match.span()[0]] = 'topic'
found_params['topic']=tp_match.span()
st_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text)
if st_match != None:
param_offsets[st_match.span()[0]] = 'subtopic'
found_params['subtopic']=st_match.span()
pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text)
if pg_match != None:
param_offsets[pg_match.span()[0]] = 'page'
found_params['page']=pg_match.span()
ol_match = re.search('\|[\s]*oldid[\s]*=[\s]*',GAN_text)
if ol_match != None:
param_offsets[ol_match.span()[0]] = 'oldid'
found_params['oldid']=ol_match.span()
sm_match = re.search('\|[\s]*small[\s]*=[\s]*',GAN_text)
if sm_match != None:
param_offsets[sm_match.span()[0]] = 'small'
found_params['small']=sm_match.span()
dt_match = re.search('\|[\s]*date[\s]*=[\s]*',GAN_text)
if dt_match != None:
param_offsets[dt_match.span()[0]] = 'date'
found_params['small']=dt_match.span()
sorted_param_keys = sorted(param_offsets.keys())
#print(sorted_param_keys)
#print(param_offsets)
#print(found_params)
if sorted_param_keys == []:
return None
named_param_strings = []
for a in range(len(sorted_param_keys)-1):
named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]])
named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:])
#print(named_param_strings)
page = None
topic = 'Miscellaneous'
subtopic = 'Miscellaneous'
date = ''
small = ''
oldid = ''
for i in range(len(sorted_param_keys)):
param = param_offsets[sorted_param_keys[i]]
param_i = named_param_strings[i]
if param_i[-2:] == "}}":
param_i = param_i[:-2]
if param_i[-1:] == '|':
param_i = param_i[:-1]
if param_i[0] == '|':
param_i = param_i[1:]
if param == 'topic':
topic = param_i.replace('|','').replace('topic','').replace('=','').strip()
#print('Tp ='+topic)
elif param == 'subtopic':
subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip()
#print('Su ='+subtopic)
elif param == 'page':
page_match = re.search('[0-9]+',param_i)
if page_match == None:
page = None
else:
page_n_span = page_match.span()
page = param_i[page_n_span[0]:page_n_span[1]]
#print('P =' + str(page))
elif param == 'oldid':
oldid = param_i.replace('|','').replace('oldid','').replace('=','').strip()
#print('Ol ='+ str(oldid))
elif param == 'small':
small = param_i.replace('|','').replace('small','').replace('=','').strip()
#print('Su ='+small)
elif param == 'date':
date = param_i.replace('|','').replace('date','').replace('=','').strip()
#print('Dt ='+date)
param_dict = {}
param_dict['topic']=topic
param_dict['subtopic']=subtopic
param_dict['oldid']=oldid
param_dict['page']=page
param_dict['small']=small
param_dict['date']=date
return(param_dict)
@classmethod
def get_delisted_params(cls, article): # Get the GA params for a delisted GA
#print("Calling get_delisted_params with article " + article.title())
title = article.title()
GAN_re = re.search('{{[dD]elisted\s?GA',article.text)
if GAN_re == None:
return None
GAN_start = GAN_re.span()[0]
GAN_text = article.text[GAN_start:] # Strips everything before the template
GAN_text = (GAN.find_enclosed_string(GAN_text))[0] # This means GAN_text has the GA nominee template text and nothing else
# The steps are: find all the params that are named, sort them by their offsets, then loop through in offset order and set the param values.
# This is necessary because the param values can include more template calls.
print("In get_delisted_params: " + GAN_text)
param_offsets = {}
found_params = {}
tp_match = re.search('\|[\s]*topic[\s]*=[\s]*',GAN_text)
if tp_match != None:
param_offsets[tp_match.span()[0]] = 'topic'
found_params['topic']=tp_match.span()
st_match = re.search('\|[\s]*subtopic[\s]*=[\s]*',GAN_text)
if st_match != None:
param_offsets[st_match.span()[0]] = 'subtopic'
found_params['subtopic']=st_match.span()
pg_match = re.search('\|[\s]*page[\s]*=[\s]*',GAN_text)
if pg_match != None:
param_offsets[pg_match.span()[0]] = 'page'
found_params['page']=pg_match.span()
ol_match = re.search('\|[\s]*oldid[\s]*=[\s]*',GAN_text)
if ol_match != None:
param_offsets[ol_match.span()[0]] = 'oldid'
found_params['oldid']=ol_match.span()
sm_match = re.search('\|[\s]*small[\s]*=[\s]*',GAN_text)
if sm_match != None:
param_offsets[sm_match.span()[0]] = 'small'
found_params['small']=sm_match.span()
dt_match = re.search('\|[\s]*date[\s]*=[\s]*',GAN_text)
if dt_match != None:
param_offsets[dt_match.span()[0]] = 'date'
found_params['date']=dt_match.span()
sorted_param_keys = sorted(param_offsets.keys())
#print(sorted_param_keys)
#print(param_offsets)
#print(found_params)
if sorted_param_keys == []:
return None
#up_to_first_named_param = GAN_text[:sorted_param_keys[0]]
#print(up_to_first_named_param)
#first_bar = up_to_first_named_param.find('|')
#try:
# tm_text = up_to_first_named_param[first_bar+1:]
#except:
# GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
# return None
#print("TM <" + tm_text + ">")
#tm_match = re.search("\d\d:\d\d,.* \(UTC\)",tm_text)
#if tm_match == None:
# GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
# return None
#else:
# #print(str(tm_match.span()))
# tm_text = tm_text[tm_match.span()[0]:tm_match.span()[1]]
#try:
# timestamp = GAN.wiki2datetime(tm_text)
#except:
# GA_config.current_errors.append('\nCould not parse timestamp for ' + title)
# return None
#print(timestamp)
named_param_strings = []
for a in range(len(sorted_param_keys)-1):
named_param_strings.append(GAN_text[sorted_param_keys[a]:sorted_param_keys[a+1]])
named_param_strings.append(GAN_text[sorted_param_keys[len(sorted_param_keys)-1]:])
#print(named_param_strings)
page = None
topic = 'Miscellaneous'
subtopic = 'Miscellaneous'
small = ''
oldid = ''
date = ''
for i in range(len(sorted_param_keys)):
param = param_offsets[sorted_param_keys[i]]
param_i = named_param_strings[i]
if param_i[-2:] == "}}":
param_i = param_i[:-2]
if param_i[-1:] == '|':
param_i = param_i[:-1]
if param_i[0] == '|':
param_i = param_i[1:]
if param == 'topic':
topic = param_i.replace('|','').replace('topic','').replace('=','').strip()
#print('Tp ='+topic)
elif param == 'subtopic':
subtopic = param_i.replace('|','').replace('subtopic','').replace('=','').strip()
#print('Su ='+subtopic)
elif param == 'page':
page_match = re.search('[0-9]+',param_i)
if page_match == None:
page = None
else:
page_n_span = page_match.span()
page = param_i[page_n_span[0]:page_n_span[1]]
#print('P =' + str(page))
elif param == 'oldid':
oldid = param_i.replace('|','').replace('oldid','').replace('=','').strip()
#print('Ol ='+ str(oldid))
elif param == 'small':
small = param_i.replace('|','').replace('small','').replace('=','').strip()
#print('Sm ='+small)
elif param == 'date':
date = param_i.replace('|','').replace('date','').replace('=','').strip()
#print('Dt ='+date)
param_dict = {}
param_dict['topic']=topic
param_dict['subtopic']=subtopic
param_dict['oldid']=oldid
param_dict['page']=page
param_dict['small']=small
param_dict['date']=date
return(param_dict)
@classmethod
def move_miscellaneous_to_end(cls,topic_list):
new_list = []
misc_found = False
for i in topic_list:
if i != 'Miscellaneous':
new_list.append(i)
else:
misc_found = True
if misc_found:
new_list.append('Miscellaneous')
return(new_list)
@classmethod
def GAN_page_top_text(cls, target):
top_text = ["{{Short description|Wikipedia page for submissions and discussions on Good article distinction candidates}}"]
top_text.append("<noinclude>{{pp-semi-indef}}{{pp-move-indef}}</noinclude>")
top_text.append("<!-- Putting categories and inter language links here to avoid confusion from people adding to the bottom of last articles section. -->")
top_text.append("[[Category:WikiProject Good articles|Good article nominations]]")
top_text.append("[[Category:Non-talk pages that are automatically signed]]")
top_text.append("<!-- [[Category:Articles needing attention]] -->")
top_text.append("{{User:Dispenser/Checklinks/config|interval=fortnightly|generator=all|convert=None|namespaces=0}}")
top_text.append("<!-- End of categories and interwikis-->")
top_text.append("{{/guidelines}}")
top_text.append("")
top_text.append("={{anchor|NomsByTopic}}Nominations=")
top_text.append("[[File:GA candidate.svg|40px|left|Good article nominations]]")
top_text.append("<div style=\"font-size: 20px; text-align: center;\">Alternative lists of articles awaiting review</div>")
top_text.append("<div style=\"font-size: 16px; text-align: center;\">[[User:SDZeroBot/GAN sorting|Sortable list annotated with more detailed topic information]]</div>")
top_text.append("<div style=\"font-size: 16px; text-align: center;\">[[User:ChristieBot/SortableGANoms|List allowing sorting by nominator, review count, and other fields]]</div>")
top_text.append("")
top_text.append("To add good article nominations to this page, please see the [[Wikipedia:Good article nominations/Instructions|instructions]].")
top_text.append("")
top_text.append("Note: For guidance in locating and [[Wikipedia:Citing sources|citing sources]] for articles, please see [[Wikipedia:Identifying reliable sources]]. For guidance in locating and citing sources for medicine and psychology-related articles, see [[Wikipedia:Identifying reliable sources (medicine)]]. For guidance in locating and citing sources for science and math-related articles, please see [[Wikipedia:Scientific citation guidelines]].")
top_text.append("__NOTOC__")
top_text.append("<!-- NOMINATION CATEGORIES BEGIN HERE -->")
top_text.append("<!-- EVERYTHING BELOW THIS COMMENT IS UPDATED AUTOMATICALLY BY A BOT -->")
top_text.append("")
return("\n".join(top_text))
@classmethod
def GAN_page_bottom_text(cls):
bottom_text= ["<!-- EVERYTHING ABOVE THIS COMMENT IS UPDATED AUTOMATICALLY BY A BOT -->"]
bottom_text.append("{{-}}<!-- For proper page length when Misc is empty -->")
bottom_text.append("{{Wikipedia:Good article nominations/Topic lists}}")
bottom_text.append("{{Wikipedia community}}")
bottom_text.append("<!-- NOMINATION CATEGORIES END HERE -->")
return("\n".join(bottom_text))
@classmethod
def notify_error(cls, location, command, error, fatal = False):
GA_config.errors_found = True
site = pywikibot.Site('en','wikipedia')
page = pywikibot.Page(site, GA_config.strings['GAN bugs page'])
page.text += "\n==Error notification==\n* Location: " + location + "\n* Command: " + command + "\n* Error: " + str(error) + "\n-- ~~~~\n"
page.save("Reporting an error in " + location)
if fatal:
exit()
@classmethod
def clear_errors(cls):
site = pywikibot.Site('en','wikipedia')
page = pywikibot.Page(site, GA_config.strings['GAN errors page'])
page.text = ""
page.save("Clearing errors at start of run")
@classmethod
#def find_enclosed_string(cls, string): # Used to search for matching right brace pairs when getting params.
# print("String passed to find_enclosed_string is <" + string + ">")
# left_brace_cnt = 0
# enclosed_list = []
# enclosed_str_range = [0, 0]
# for i, s in enumerate(string):
# print(string[i:i+1])
# if s == "{":
# if left_brace_cnt == 0:
# enclosed_str_range[0] = i
# left_brace_cnt += 1
# elif s == "}":
# left_brace_cnt -= 1
# if left_brace_cnt == 0:
# enclosed_str_range[1] = i
# if enclosed_str_range[1] > enclosed_str_range[0]:
# enclosed_list.append(string[enclosed_str_range[0]:enclosed_str_range[1]+1])
# enclosed_str_range = [0, 0]
# return enclosed_list
def find_enclosed_string(cls, string): # Used to search for matching right brace pairs when getting params.
#print("String passed to find_enclosed_string is <" + string + ">")
left_brace_pair_cnt = 0
enclosed_list = []
enclosed_str_range = [0, 0]
for i, s in enumerate(string):
s2 = (string[i:i+2])
if s2 == "{{":
if left_brace_pair_cnt == 0:
enclosed_str_range[0] = i
left_brace_pair_cnt += 1
elif s2 == "}}":
left_brace_pair_cnt -= 1
if left_brace_pair_cnt == 0:
enclosed_str_range[1] = i
if enclosed_str_range[1] > enclosed_str_range[0]:
enclosed_list.append(string[enclosed_str_range[0]:enclosed_str_range[1]+2])
enclosed_str_range = [0, 0]
return enclosed_list
@classmethod
def write_errors_page(cls,error_page_text):
#print("write errors called with :" + error_page_text)
site = pywikibot.Site('en','wikipedia')
page = pywikibot.Page(site, GA_config.strings['GAN errors page'])
error_summary = "All errors cleared"
if error_page_text == '' and GA_config.errors_found == False:
page.text = ""
else:
page.text += error_page_text
error_summary = "Updating list of GAN errors"
page.save(error_summary)
return None
@classmethod
def mark_superseded_reviews(cls, conn):
cursor = conn.cursor(pymysql.cursors.DictCursor)
cursor2 = conn.cursor(pymysql.cursors.DictCursor)
cursor3 = conn.cursor(pymysql.cursors.DictCursor)
sql = "select article_title, page from " + GA_config.strings['GA reviews table name'] + " where superseded_ts is null group by article_title, page having count(*) > 1"
#print(sql)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("mark_superseded_reviews",sql,e)
if cursor.rowcount != 0:
# We found at least one review with a blank reviewer
GAN.log(conn,"mark_superseded_reviews","N/A", "Found supersession candidates")
for row in cursor.fetchall():
GAN.log(conn,"mark_superseded_reviews","N/A", "Supersession candidates: " + str(row['article_title']) + " / " + str(row['page']))
sql2 = "select reviewer, review_ts from " + GA_config.strings['GA reviews table name'] + " where article_title = '" + row['article_title'].replace("'","''") + "' and page = " + str(row['page']) + " order by review_ts desc"
try:
cursor2.execute(sql2)
except pymysql.Error as e:
GAN.notify_error("mark_superseded_reviews",sql2,e)
if cursor2.rowcount == 0:
GAN.notify_error("mark_superseded_reviews","Marking superseded reviews: can't find " + row['article_title'].replace("'","''") + " / " + str(row['page']),e)
else:
skipped_first = False
for row2 in cursor2.fetchall():
#print("Found reviewer " + row2['reviewer'] + " at time " + row2['review_ts'].strftime("%Y%m%d%H%M%S") + " for " + row['article_title'] + " / " + str(row['page']))
if skipped_first == False:
skipped_first = True
else:
# The ones after the first one are the ones that should be marked superseded.
sql3 = "update " + GA_config.strings['GA reviews table name'] + " set superseded_ts = '" + datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S") + "' where article_title = '"
sql3 += row['article_title'].replace("'","''") + "' and page = " + str(row['page']) + " and reviewer = '" + row2['reviewer'].replace("'","''") + "' and review_ts = '" + str(row2['review_ts']) + "'"
GAN.log(conn,"mark_superseded_reviews", row['article_title'],"marking review " + str(row['page']) + "as superseded")
try:
pass
cursor.execute(sql3)
except pymysql.Error as e:
conn.rollback()
GAN.notify_error("mark_superseded_reviews",sql3,e)
else:
GAN.log(conn,"mark_superseded_reviews", "N/A","nothing to mark as superseded")
conn.commit()
@classmethod
def integrity_checks(cls, conn):
cursor = conn.cursor(pymysql.cursors.DictCursor)
# Check for a nomination record without a P/F/X to close it that does not appear in active nominations.
sql = "select n.title, n.page from"
sql += " (select distinct n1.title, n1.page from " + GA_config.strings['nominations table name'] + " n1"
sql += " left join " + GA_config.strings['active nominations table name'] + " a"
sql += " on n1.title = a.title"
sql += " and n1.page = a.page"
sql += " where a.title is null) n"
sql += " left join (select distinct n2.title, n2.page from " + GA_config.strings['nominations table name'] + " n2 where status in ('P','F','X')) n2"
sql += " on n.title = n2.title"
sql += " and n.page = n2.page"
sql += " where n2.title is null"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("integrity_checks",sql,e)
if cursor.rowcount != 0:
# We found at least one record that has a nomination record not represented in active_nominations
for row in cursor.fetchall():
GAN.notify_error("integrity check","Found unclosed inactive nomination " + row['title'] + '/' + str(row['page']),e)
# Check for more than one active nomination record for any article.
sql = "select title from " + GA_config.strings['active nominations table name'] + " group by title having count(*) > 1"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("integrity_checks",sql,e)
if cursor.rowcount != 0:
# We found at least one article that has multiple active nomination records
for row in cursor.fetchall():
GAN.notify_error("integrity check","Found multiple active nominations for " + row['title'],e)
# Check for any reviewers with a blank reviewer name in nominations table
sql = "select title, page from " + GA_config.strings['nominations table name'] + " where reviewer = '' and status in ('H','2','R','P','F');"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("integrity_checks",sql,e)
if cursor.rowcount != 0:
# We found at least one review with a blank reviewer
for row in cursor.fetchall():
GAN.notify_error("integrity check","Found blank reviewer in " + GA_config.strings['nominations table name'] + " table for " + str(row['title']) + " / " + str(row['page']),e)
# Check for any reviewers with a blank reviewer name in GA_reviews table
sql = "select article_title, page from " + GA_config.strings['GA reviews table name'] + " where reviewer = '';"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("integrity_checks",sql,e)
if cursor.rowcount != 0:
# We found at least one review with a blank reviewer
for row in cursor.fetchall():
GAN.notify_error("integrity check","Found blank reviewer in " + GA_config.strings['GA reviews table name'] + " for " + str(row['title']) + " / " + str(row['page']),e)
# Check for duplicate GA_review records -- same reviewer recorded more than once for a given review
sql = "select article_title, page, reviewer from " + GA_config.strings['GA reviews table name'] + " group by article_title, page, reviewer having count(*) > 1"
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("integrity_checks",sql,e)
if cursor.rowcount != 0:
# We found at least one article that has a duplicate reviewer
for row in cursor.fetchall():
GAN.notify_error("integrity check","Found duplicate reviewer for " + str(row['article_title']) + " / " + str(row['page']),e)
return None
class Name_changes:
@classmethod
def get_name_changes(cls, conn): # Build a dictionary from the name_changes table
name_changes = {}
cursor = conn.cursor(pymysql.cursors.DictCursor)
sql = "select n.old_name, n.new_name from " + GA_config.strings['name changes table name'] + " n "
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("initialize name changes dictionary",sql,e)
for row in cursor.fetchall():
name_changes[row['old_name']] = row['new_name']
return name_changes
class GAN_history:
@classmethod
def analyse_a_page(cls, conn, site, GA_page_title):
review_page = pywikibot.Page(site, GA_page_title)
if not review_page.exists():
print("Review page " + GA_page_title + " no longer exists")
pass
elif review_page.title()[-1:] not in ['1','2','3','4','5','6','7','8','9','0']:
GAN.log(conn,"analyse_a_page",review_page.title(),"Not a GA subpage")
else:
review_page_num = int(review_page.title()[-1:])
try:
review_page_text = review_page.get()
# Here we know it's not a redirect page because the get didn't raise an error
reviewer = review_page.oldest_revision['user']
review_ts = review_page.oldest_revision['timestamp']
article_title = review_page.title()[5:-4]
#GAN.log(conn,"analyse_a_page",article_title, "reviewer is " + reviewer + "; review was started " + str(review_ts))
if GAN_history.is_already_inserted(conn, site, reviewer, article_title, review_page_num, review_ts):
GAN.log(conn,"analyse_a_page",review_page.title(),"Skipping review for page " + str(review_page_num) + " -- already inserted")
else:
GAN_history.insert_historical_GA_review(conn, site, reviewer, article_title, review_page_num, review_ts)
except pywikibot.exceptions.IsRedirectPageError as e:
review_page_redir_target = review_page.getRedirectTarget()
if review_page_redir_target.exists():
reviewer = review_page_redir_target.oldest_revision['user']
review_ts = review_page_redir_target.oldest_revision['timestamp']
article_title = review_page_redir_target.title()[5:-4]
#GAN.log(conn,"analyse_a_page",review_page.title(), "Reviewer is " + reviewer + "; review was started " + str(review_ts))
if GAN_history.is_already_inserted(conn, site, reviewer, article_title, review_page_num, review_ts):
GAN.log(conn,"analyse_a_page",review_page.title(),"Skipping review for page " + str(review_page_num) + " -- already inserted")
else:
GAN_history.insert_historical_GA_review(conn, site, reviewer, article_title, review_page_num, review_ts)
else:
GAN.log(conn,"analyse_a_page",review_page.title(), "Redirect target page " + review_page_redir_target.title() + " does not exist")
@classmethod
def is_already_inserted(cls, conn, site, reviewer, article_title, page_num, review_ts):
sql = "select * from " + GA_config.strings['historical GA reviews table name'] + " where reviewer = '" + reviewer.replace("'","''") + "' and article_title = '" + article_title.replace("'","''") + "' and page = " + str(page_num) + " and review_ts = '" + review_ts.strftime("%Y-%m-%d %H:%M:%S") + "'"
#GAN.log(conn,"is_already_inserted",article_title, "sql is " + sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("is_already_inserted",sql,e)
return(True)
if cursor.rowcount != 0:
return(True)
else:
return(False)
@classmethod
def insert_historical_GA_review(cls, conn, site, reviewer, article_title, page_num, review_ts):
GAN.log(conn,"scan_for_new_pages", article_title,"inserting into historical reviews table")
sql = "insert into " + GA_config.strings['historical GA reviews table name'] + " (reviewer, article_title, page, review_ts) values ('" + reviewer.replace("'","''") + "','" + article_title.replace("'","''") + "'," + str(page_num) + ",'" + str(review_ts) + "')"
#GAN.log(conn,"insert_historical_GA_review",article_title, "sql is " + sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("insert_historical_GA_review",sql,e)
conn.rollback()
conn.commit()
return(None)
@classmethod
def record_historical_reviewer_run(cls, conn, reviewer, method):
GAN.log(conn,"scan_for_new_pages", article_title,"inserting into historical reviewer runs table")
sql = "insert into " + GA_config.strings['historical reviewer_runs table name'] + " (reviewer, method, run_ts) values ('" + reviewer.replace("'","''") + "','" + method.replace("'","''") + "','" + str(datetime.datetime.utcnow()) + "')"
#GAN.log(conn,"record_historical_reviewer_run","N/A", "sql is " + sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("record_historical_reviewer_run",sql,e)
conn.rollback()
conn.commit()
return(None)
@classmethod
def run_exists(cls, conn, reviewer, method):
sql = "select * from " + GA_config.strings['historical reviewer_runs table name'] + " where reviewer = '" + reviewer.replace("'","''") + "' and method = '" + method.replace("'","''") + "'"
GAN.log(conn,"run_exists","N/A", "sql is " + sql)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute(sql)
except pymysql.Error as e:
GAN.notify_error("run_exists",sql,e)
return(True)
if cursor.rowcount != 0:
return(True)
else:
return(False)