User:Plastikspork/frenchcommune.pl

From Wikipedia, the free encyclopedia
#!/usr/bin/perl
# Module: strict -- All variables must be declared
use strict;

# Module: utf8 -- Extended characters
use utf8;

# Module: MediaWiki::Bot
use MediaWiki::Bot;

# Module: Encode
use Encode;

# Username
my $user = 'Plasticspork';
print "User: $user\n";

# Prompt for password
print "Password: ";
my $pass = <STDIN>;

# Removing trailing spaces/newlines from password
$pass =~ s/[ \t\r\n]+$//;

# Set the edit summary
my $edit_summary='Copy coordinates from French Wikipedia, and update map';

# Mark edits as minor
my $is_minor = 1;

#Create an editor for the English Language Wikipedia
my $editor=MediaWiki::Bot->new($user);
$editor->set_wiki('en.wikipedia.org','w');

#Create an editor for the French Language Wikipedia
my $freditor=MediaWiki::Bot->new($user);
$freditor->set_wiki('fr.wikipedia.org','w');

#Turn debugging on for both editors
$editor->{debug} = 1;
$freditor->{debug} = 1;

#Log in to both Wikipedia projects
if( $editor->login($user, $pass) ) {
  print "Failed to login!\n";
  exit;
}
$freditor->login($user, $pass);

#Grab the article list from the maintenance category
my @articlelist = $editor->get_pages_in_category("Category:Infobox French commune with missing latitude or longitude");

#Declare some variables
my $article = "";
my $frarticle = "";
my $frtext = "";
my $lat = "";
my $lon = "";
my $coord = "";
my $latx = "";
my $lonx = "";

# Loop through the list of articles
foreach $article (@articlelist) {

  # Print a link to the article being processed
  print "Processing: ".$article."\n";

  #Pull the article wikitext from the English WP
  my $text = $editor->get_text($article);

  #Determine the French article name
  if ($text =~ /\[\[fr:([^\]\[]*)\]\]/ ) {
    $frarticle = $1
  } else {
    $frarticle = $article;
  }

  #Pull the French article wikitext from the French WP
  $frtext = $freditor->get_text($frarticle);

  #Grab the latitude and longitude from the French article
  if ($frtext =~ /\|[ ]*latitude[ ]*=[ ]*(-?[0-9]+\.[0-9]+)[\|\r\n]/m) {
    $lat = $1;
  } else {
    $lat = "";
  }
  if ($frtext =~ /\|[ ]*longitude[ ]*=[ ]*(-?[0-9]+\.[0-9]+)[\|\r\n]/m) {
    $lon = $1;
  } else {
    $lon = "";
  }

  #Isolate the infobox in the English WP article
  my $head = "";
  my $foot = "";
  my @sections = split(/(\{\{[_ ]*Infobox[_ ]+French[_ ]+commune(?:[^{}]|{{[^{}]*}}|[\r\n])*\}\})/im, $text);
  if ( $#sections + 1 == 3 ) {
    $head = $sections[0];
    $text = $sections[1];
    $foot = $sections[2];
  }

  # Show the old infobox
  print "------- Old Infobox -----\n";
  print $text;

  #Add the latitude and longitude to the English article
  if ($text =~ /\|[ ]*latitude[ ]*=[ ]*[\r\n]/m ) {
    $text =~ s/(\|[ ]*latitude[ ]*=)[ ]*([\r\n])/$1 $lat$2/m;
  } elsif ( $text =~ /\|[ ]*lat long[ ]*=(?:[^{}]|{{[^{}]*}})*[\r\n]/m ) {
    $text =~ s/(\|[ ]*)(lat long)([ ]*=)((?:[^{}]|{{[^{}]*}})*)([\r\n])/$1latitude$3 $lat$5$1$2$3$4$5/m;
  }
  if ($text =~ /\|[ ]*longitude[ ]*=[ ]*[\r\n]/m ) {
    $text =~ s/(\|[ ]*longitude[ ]*=)[ ]*([\r\n])/$1 $lon$2/m;
  } elsif ( $text =~ /\|[ ]*lat long [ ]*=(?:[^{}]|{{[^{}]*}})*[\r\n]/m ) {
    $text =~ s/(\|[ ]*)(lat long )([ ]*=)((?:[^{}]|{{[^{}]*}})*)([\r\n])/$1longitude$3 $lon$5$1$2$3$4$5/m;
  }

  #Save the old coordinates for error checking purposes
  if ($text =~ /({{[_ ]*[Cc]oord[^{}]*}})/) {
    $coord = $1;
    if( $coord =~ /{{[_ ]*[Cc]oord[ ]*\|[ ]*([0-9]+)[ ]*\|[ ]*([0-9]+)[ ]*\|[ ]*([0-9]+)[ ]*\|[ ]*N[ ]*\|[ ]*([0-9]+)[ ]*\|[ ]*([0-9]+)[ ]*\|[ ]*([0-9]+)[ ]*\|[ ]*[EW]/ ) {
      ($latx, $lonx) = ( int($1) + int($2)/60 + int($3)/3600, int($4) + int($5)/60 + int($6)/3600 );
    }
  } else {
    $coord = "";
  }

  #Remove lat long if no longer needed
  if ($text =~ /latitude[ ]*=[ ]*-?[0-9]+\.[0-9]+/m and $text =~ /longitude[ ]*=[ ]*-?[0-9]+\.[0-9]+/m) {
    $text =~ s/\|[ ]*lat long[ ]*=[ ]*(?:{{[_ ]*[Cc]oord[^{}]*}}|[^{}\|])*[\r\n]//;
  }
  #Remove image map, x, y, if no longer needed
  if ($text =~ /\|[ ]*latitude[ ]*=[ ]*-?[0-9]+\.[0-9]+/m and $text =~ /\|[ ]*longitude[ ]*=[ ]*-?[0-9]+\.[0-9]+/m and $text =~ /\|[ ]*image map[ ]*=[ ]*France[_ ]jms\.png/m ) {
    $text =~ s/\|[ ]*x[ ]*=[ ]*[0-9]*[ ]*[\r\n]//;
    $text =~ s/\|[ ]*y[ ]*=[ ]*[0-9]*[ ]*[\r\n]//;
    $text =~ s/\|[ ]*image map[ ]*=[ ]*France[_ ]jms\.png[ ]*[\r\n]//;
  }

  # Show revised infobox
  print "\n------- New Infobox -----\n";
  print $text;

  # Print the change in coordinates as a sanity check
  print "\n------- Check -------\n";
  printf("     %12s %12s\n", "latitude", "longitude");
  printf("New: %12.9f %12.9f\n", $lat,  $lon);
  printf("Old: %12.9f %12.9f\n", $latx, $lonx);
  if( (abs($latx - $lat) > 1e-9) ||
    (abs($lonx - $lon) > 1e-9) ) {
    print "*** COORDINATES DO NOT AGREE ***\n";
  }

  # Insert the infobox back into the article
  $text = $head.$text.$foot;

  # Ask for confirmation to commit the changes
  print "Press enter to commit or s to skip\n";
  my $response = <STDIN>;

  if ($response =~ /s/i) {
    print "Skipping...\n";
  } else {
    # Submit to Wikipedia.
    # Note: This does not warn of edit conflicts, it just overwrites existing text.
    print "Submitting...\n";
    $editor->edit($article, $text, $edit_summary, $is_minor);
  }

  # Take a break (frequent edits are forbidden per bot policy)
  print "Sleep 5\n";
  sleep 5;
}