User:FACBot/fanmp.pl

From Wikipedia, the free encyclopedia
#!/usr/bin/perl -w
#
# fanm.pl -- Create the WP:FANMP page from the FA page
# Usage: fanmp.pl
#    21 Sep 14 Created

use English;
use strict;
use utf8;
use warnings;

use Carp;
use Data::Dumper;
use File::Basename qw(dirname);
use File::Spec;
use MediaWiki::Bot;
use POSIX qw(strftime);
use XML::Simple;

binmode (STDERR, ':utf8');
binmode (STDOUT, ':utf8');

# Pages used
my $fa_page = "Wikipedia:Featured articles";
my $fanmp_page = "Wikipedia:Featured articles that haven't been on the Main Page";

my $editor = MediaWiki::Bot->new ({
        assert        => 'bot',
        host        => 'en.wikipedia.org',
        protocol     => 'https',
}) or die "new MediaWiki::Bot failed";

my $dirname = dirname (__FILE__, '.pl');
push @INC, $dirname;
require Cred;
my $cred = new Cred ();
my $log = $cred->log ();

sub allow_bots ($$;$) {
    my($text, $user, $opt) = @ARG;
    return 0 if $text =~ /{{[nN]obots}}/;
    return 1 if $text =~ /{{[bB]ots}}/;
    if ($text =~ /{{[bB]ots\s*\|\s*allow\s*=\s*(.*?)\s*}}/s){
        return 1 if $1 eq 'all';
        return 0 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $ARG eq $user, @bots)?1:0;
    }
    if ($text =~ /{{[bB]ots\s*\|\s*deny\s*=\s*(.*?)\s*}}/s){
        return 0 if $1 eq 'all';
        return 1 if $1 eq 'none';
        my @bots = split(/\s*,\s*/, $1);
        return (grep $ARG eq $user, @bots)?0:1;
    }
    if (defined($opt) && $text =~ /{{[bB]ots\s*\|\s*optout\s*=\s*(.*?)\s*}}/s){
        return 0 if $1 eq 'all';
        my @opt = split(/\s*,\s*/, $1);
        return (grep $ARG eq $opt, @opt)?0:1;
    }
    return 1;
}

sub error_exit ($) {
    my @message = @ARG;
    if ($editor->{error}->{code}) {
        push @message, ' (', $editor->{error}->{code} , ') : ' , $editor->{error}->{details};
    }
    $cred->error (@message);
}

sub has_nested_text ($\$) {
    my ($tag, $text) = @ARG;
#    print "tag='$tag'\n";
    my $has_nested_text = 0;
    while ($$text =~ /{{$tag[^}]+({{[^}]+}})/) {
#        print "Nested text!!!!\n";
        my $nested_text = $1;
#        print "nested text='$nested_text'\n";   
        my $transformed_text = $nested_text;
        $transformed_text =~ s/{{(.+)}}/%%<$1>%%/;
#        print "transformed text=$transformed_text\n";   
        $$text =~ s/\Q$nested_text\E/$transformed_text/;
        $has_nested_text = 1;                                    
    }
    return $has_nested_text;
}

sub reset_nested_text (\$) {
    my ($text) = @ARG;
    $$text =~ s/%%</{{/g;
    $$text =~ s/>%%/}}/g;
}

sub todays_featured_article () {
    my $date = strftime "%B %d, %Y", gmtime;
    $date =~ s/  / /;
    $date =~ s/0(\d,)/$1/;
   
    my $template = "Template:TFA title/$date";
    my $todays_page = $editor->get_text ($template) or
        error_exit ("Unable to find '$template')");

    $cred->showtime ("Todays' page is $todays_page\n");
       
    my $talk = "Talk:$todays_page";
    my $date2 = strftime "%d %B %Y", gmtime;
    $date2 =~ s/0(\d )/$1/;
   
    my $text = $editor->get_text ($talk) or
        error_exit ("Unable to find '$talk')");
    $cred->error ("no bots allowed on '$talk'") unless allow_bots ($text, $cred->user);
   
    $text =~ s/{{Article\s*History/{{ArticleHistory/is;
    my $has_nested_text = has_nested_text ('ArticleHistory', $text);
    my ($articleHistory) = $text =~ /{{ArticleHistory(.+)}}/gis;

    if ($articleHistory =~ /maindate(\d*)\s*=\s*0*($date|$date2)/) {
        $cred->showtime ("\tmaindate$1 already updated manually\n");
    } else {
        my $id = '';
        if ($articleHistory =~ /maindate/) {
            for ($id = 2;; ++$id) {
                if ($articleHistory =~ /maindate$id/) {
#                    print "\t\tfound maindate$id\n";
                } else {
#                    print "\t\tno $id - going with that\n";
                    last;
                }
            }
        }

        $cred->showtime ("\tupdating article history\n");
        $text =~ s/({{ArticleHistory.+)}}/$1\n|maindate$id=$date2\n}}/is;

        if ($has_nested_text) {
            reset_nested_text ($text);       
        }
       
        $editor->edit ({
            page => $talk,
            text => $text,
            summary => "$todays_page is today's featured article",
            bot => 1,
            minor => 0,
        }) or
            error_exit ("unable to edit '$talk'");
    }
   
    return $todays_page;
}

$editor->login ({
    username => $cred->user,
    password => $cred->password
}) or die $editor->{error}->{code} . ': ' . $editor->{error}->{details};

# Update today's featured article
my $todays_page = todays_featured_article ();

# First, we need to get the FA page
my $fa_text = $editor->get_text ($fa_page) or
    error_exit ("Unable to find '$fa_page')");
$cred->error ("no bots allowed on '$fa_page'") unless allow_bots ($fa_text, $cred->user);

# Update today's masterpiece
my $update = 0;
if ($fa_text =~ /{{FA\/BeenOnMainPage\|'*\[\[\Q$todays_page\E\]\]'*/s) {
    $cred->showtime ("\tBeenOnMainPage already updated manually\n");
} elsif ($fa_text =~ /{{FA\/BeenOnMainPage\|'*\[\[\Q$todays_page\E\|.+?\]\]'*/s) {
    $cred->showtime ("\tBeenOnMainPage already updated manually\n");
} elsif ($fa_text =~ s/('*\[\[\Q$todays_page\E\]\]'*)/{{FA\/BeenOnMainPage|$1}}/s) {
    $update = 1;
} elsif ($fa_text =~ s/('*\[\[\Q$todays_page\E\|.+?\]\]'*)/{{FA\/BeenOnMainPage|$1}}/s) {
    $update = 1;
}

if ($update) {   
    $cred->showtime ("\tupdating $fa_page\n");
    $editor->edit ({
        page => $fa_page,
        text => $fa_text,
        summary => "$todays_page is today's featured article",
        bot => 1,
        minor => 0,
    }) or
        error_exit ("unable to edit '$fa_page'");
}

my @fanmp;
my @a = split /\n/, $fa_text;
foreach (@a) {
    if (/^==/) {
        push @fanmp, $ARG;
    } elsif (/\[\[(File|Category):/) {
        next;
    } elsif (/\* "*'*\[\[[^#].+\]\]'*"*/) {
        push @fanmp, $ARG;
    } elsif (/^"*'*\[\[[^#].+\]\]'*"*/) {
        push @fanmp, "\* $ARG";
    }           
}

# Now we insert the counts
my $first = 1;
my $count = 0;
my $total = 0;
my @lines;
foreach my $line (@fanmp) {
    if ($line =~ /^==/) {
        if ($first) {
            $first = 0;
        } else {
            if (0 == $count) {
                push @lines, "* '''None'''";
            }
            push @lines, "<!-- $count -->";
            $total += $count;
            $count = 0;
        }
    } else {
#            if (0 == $count) {
#                $line =~ s/..//;
#            }
            ++$count;           
    }
    push @lines, $line;
}
push @lines, "<!-- $count -->\n";
$total += $count;
# print "total = $total\n";

my $lines = join "\n", @lines;

# Then we need to get the FANMP page
my $fanmp_text = $editor->get_text ($fanmp_page) or
    error_exit ("Unable to find '$fanmp_page')");
$cred->error ("no bots allowed on '$fanmp_page'") unless allow_bots ($fanmp_text, $cred->user);

$fanmp_text =~ s/==.+\|}/|}/s;
$fanmp_text =~ s/{{formatnum:\d+}}/{{formatnum:$total}}/;
$fanmp_text =~ s/\|}/$lines|}/;

# Update the FANMP page
$editor->edit ({
    page => $fanmp_page,
    text => $fanmp_text,
    summary => "Updating $fanmp_page to remove $todays_page",
    bot => 1,
    minor => 0,
}) or
    error_exit ("unable to edit $fanmp_page");

$cred->showtime ("done\n");
exit 0;