User:ZackBot/single cleanup

From Wikipedia, the free encyclopedia
#!/usr/bin/env ruby
# encoding: utf-8

require 'mediawiki_api'
require 'HTTParty'
require 'csv'
require 'open-uri'
require './helper'
require 'fileutils'

INFOBOX_REGEX = /(?=\{\{(?:Single)?\s*[Ii]nfobox\s*(?:Single[s]*)?)(\{\{(?>[^{}]++|\g<1>)*}})/

CERT_REGEX = /\|\s*Certification\s*= ([^{}\|]*)\n\s*(\|)/

QUERY_URL = "https://petscan.wmflabs.org/?psid=632339&format=json"

Helper.read_env_vars

client = MediawikiApi::Client.new 'https://en.wikipedia.org/w/api.php'
client.log_in ENV['USERNAME'], ENV['PASSWORD']

json = JSON.load(open(QUERY_URL))
titles = json["*"].first["a"]["*"].map{ | page| page["title"].gsub("_"," ")}
puts titles.size

titles.each do |title|
  title.strip!
  puts title

  full_text = client.get_wikitext(title).body
  original = full_text.dup

  next if Helper.no_bots?(full_text)
  next unless (Helper.at_least_once(full_text, "Infobox Single", INFOBOX_REGEX))

  full_text.scan(INFOBOX_REGEX).each do |match|
    orinal_text =  match.first
    updated_text = orinal_text.dup
    updated_text.gsub!(CERT_REGEX, '\2')
    full_text.sub!(orinal_text, updated_text)
  end

  if original.eql? full_text
    puts "- NO CHANGES"
    next
  end

  client.edit(title: title, text: full_text, summary: "Fixing infobox not to use [[:Category:Pages using Infobox single with deprecated certification parameter|deprecated certification parameter]]")
  puts "- SUCCESS"
end

puts "DONE"