User:AnomieBOT/source/tasks/TemplateReplacer16.pm

From Wikipedia, the free encyclopedia
package tasks::TemplateReplacer16;

=pod

=begin metadata

Bot:      AnomieBOT
Task:     TemplateReplacer16
BRFA:     Wikipedia:Bots/Requests for approval/AnomieBOT 29
Status:   Approved 2009-05-03
Created:  2009-04-30
OnDemand: true

When consensus exists for deprecating external link parameters from an infobox,
the bot will go through all transclusions of the infobox, remove the external
link parameters, and add the corresponding external link or external link
template to the article's External links section if that section does not
already contain the corresponding link. An External links section will be
created if necessary. Any issues encountered will be logged. Each page will
only be processed once (a local database holds the pageids of all
successfully-processed pages).

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $extlink_templates_re=undef;
my $no_edit_just_to_remove_parameters=0;

my $req="[[User:AnomieBOT/req/Template Infobox adult biography 1|request]]";
my $seq=2;
my $template='Infobox adult biography';

# Transclusion iterator
my %iter=(
    list        => 'embeddedin',
    eititle     => "Template:$template",
    einamespace => 0,
    eilimit     => 'max',
);

# Matching external links; "%X" is the text of the template parameter
my @to_process=qw/homepage imdb iafd egafd bgafd afdb eurobabeindex/;
my %ext_links=(
    'iafd' => {
        fix  => sub { my $x=shift; $x=~s{/$}{}; return $x; },
        link => 'http://(?:www\.)?iafd\.com/(?:person\.rme/perfid|person\.asp\?PerfID)=%X(?:/.*)?',
    },
    'egafd' => {
        fix  => sub { my $x=shift; $x=~s{/$}{}; return $x; },
        link => 'http://(?:www\.)?egafd\.(?:com|co\.uk)/actresses/details\.php/id/%X(?:/|/gender=f)?',
    },
    'bgafd' => {
        fix  => sub { my $x=shift; $x=~s{/$}{}; return $x; },
        link => '(?:http://(?:www\.)?bgafd\.co\.uk/actresses/details\.php/id/%X(?:/|/gender=f)?|http://(?:www\.)?iafd\.com/(?:person\.rme/perfid|person\.asp\?PerfID)=%X(?:/|/gender=[mf])?)',
    },
    'afdb' => {
        fix  => sub { my $x=shift; $x=~s{/$}{}; $x=~s/^0+//; return $x; },
        link => 'http://(?:www\.)?adultfilmdatabase.com/+(?:actor\.cfm\?actorid=0*%X|actor/.*-0*%X/|index\.cfm/Action/DA/ActorID/0%X/.*)',
    },
    'eurobabeindex' => {
        fix  => sub { my $x=shift; return $x; },
        link => 'http://(?:www\.)?eurobabeindex\.com/sbandoindex/%X\.html',
    },
    'imdb' => {
        fix  => sub { my $x=shift; $x=~s{/$}{}; $x=~s/^0+//; return $x; },
        link => 'http://(?:www\.)?imdb\.com/name/nm0*%X/?',
    },
    'homepage' => {
        fix  => sub { my $x=shift; $x=~s{/$}{}; return $x; },
        link => '%X/?',
        putfirst => 1,
        keepparam => 1,
    },
);

# External link generators
sub xx {
    my ($t,$x,$n)=@_;
    if($x=~/=/ || ($n//'')=~/=/){
        $x="1=$x";
        $n="2=$n" if defined($n);
    }
    my $ret="{{$t|$x";
    $ret.="|$n" if defined($n);
    $ret.="}}";
    return $ret;
}
sub xx2 {
    my ($t,$x,$n,$g)=@_;
    my $ret="{{$t|id=$x";
    $ret.="|gender=$g" if(($g//'')=~/^(?:male|female)$/);
    $ret.="|name=$n" if defined($n);
    $ret.="}}";
    return $ret;
}
sub xx3 {
    my ($l,$x,$n,$g,$gg)=@_;
    my $ret="[$l$x";
    if($g eq 'male'){
        $ret.=$gg->[1];
    } elsif($g eq 'female'){
        $ret.=$gg->[2];
    } else {
        $ret.=$gg->[0];
    }
    $ret.=" $n]";
    return $ret;
}
my %ext_templates=(
    'iafd'          => sub { return xx2('iafd name', $_[0], $_[1], $_[2]); },
    'egafd'         => sub { return xx3('http://www.egafd.co.uk/actresses/details.php/id/', $_[0], $_[1], $_[2], ['', '', '/gender=f']).' at EGAFD'; },
    'bgafd'         => sub { return ($_[2] eq 'male') ? xx2('iafd name', $_[0], $_[1], $_[2]) : xx3('http://www.bgafd.co.uk/actresses/details.php/id/', $_[0], $_[1], $_[2], ['', '/gender=m', '/gender=f']).' at BGAFD'; },
    'afdb'          => sub { return xx2('afdb name', $_[0], $_[1], $_[2]); },
    'eurobabeindex' => sub { return xx3('http://www.eurobabeindex.com/sbandoindex/', $_[0], $_[1], '', ['.html']).' at Eurobabeindex'; },
    'imdb'          => sub { return xx('IMDb name', sprintf("%07s", $_[0]), $_[1]); },
    'homepage'      => sub { return xx('official', $_[0], undef); },
);

sub get_extlink_templates_re {
    my $re=qr/\{\{\s*(?i:imdb name|iafd name|afdb name)\s*(?:\||\}\})/o;
    return $re;
}

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2009-05-03<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 29]]

=cut

sub approved {
    return -1;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('TemplateReplacer16', 0, 10, qw/d::Sections d::IWNS d::Redirects/);

    return 60 if(!defined($api->load_IWNS_maps($api)));

    if(!defined($extlink_templates_re)){
        $extlink_templates_re=get_extlink_templates_re($api);
    }

    # Cleanup database
    if(($api->store->{'seq'}//0)!=$seq){
        %{$api->store}=(seq=>$seq);
    }

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    $self->_output_log($api);

    # Get a list of templates redirecting to our target
    my %templates=$api->redirects_to_resolved("Template:$template");
    if(exists($templates{''})){
        $api->warn("Failed to get redirects to target templates: ".$templates{''}{'error'}."\n");
        return 60;
    }

    # Get the list of pages to check
    my $iter=$api->iterator(%iter);
    while(defined($_=$iter->next)){
        if(!$_->{'_ok_'}){
            $api->warn("Could not retrieve backlinks from iterator: ".$_->{'error'}."\n");
            return 60;
        }

        my $pageid=$_->{'pageid'};
        next if exists($api->store->{$pageid});

        # Cleanup the log
        my $log={};
        $log=$api->store->{'log'} if exists($api->store->{'log'});
        delete $log->{$_}{$pageid} foreach (keys %$log);
        $api->store->{'log'}=$log;

        my $title=$_->{'title'};
        $api->log("Processing $title");

        # WTF?
        if(exists($_->{'missing'})){
            $api->warn("$title is missing? WTF?\n");
            next;
        }

        # Ok, check the page
        my $tok=$api->edittoken($title);
        if($tok->{'code'} eq 'shutoff'){
            $api->warn("Task disabled: ".$tok->{'content'}."\n");
            return 300;
        }
        if($tok->{'code'} ne 'success'){
            $api->warn("Failed to get edit token for $title: ".$tok->{'error'}."\n");
            next;
        }
        next if exists($tok->{'missing'});

        # Get page text
        my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

        # Step 1: Find the parameters for the infobox. Also, strip the
        # parameters we are intending to process.
        my %infobox_params=();
        my $ct=0;
        my @process=();
        my $outtxt=$api->process_templates($intxt, sub {
            my $name=shift;
            my @params=@{shift()};
            shift; # $wikitext
            shift; # $data
            my $oname=shift;

            return undef unless exists($templates{"Template:$name"});
            if($ct++>0){ # More than one infobox?
                $self->_log($api, 'Multiple infoboxen', $pageid, $title, "$ct instances of the infobox detected.");
                $api->store->{$pageid}=1;
                return undef;
            }
            my @out=();
            foreach ($api->process_paramlist(@params)){
                $_->{'value'}=~s/^\s+|\s+$//g;
                $infobox_params{$_->{'name'}}=$_->{'value'} unless $_->{'value'} eq '';
                if(exists($ext_links{$_->{'name'}})){
                    push @process, $_->{'name'} unless $_->{'value'}=~/^(?><!--.*?-->\s*)*$/;
                    push @out, $_->{'text'} if($ext_links{$_->{'name'}}{'keepparam'} // 0);
                } else {
                    push @out, $_->{'text'};
                }
            }
            return "{{$oname|".join("|", @out)."}}";
        });
        next if $ct>1;
        if($ct<1){
            $self->_log($api, 'No infobox', $pageid, $title, "No instance of the infobox was found in the page.");
            $api->store->{$pageid}=2;
            next;
        }
        unless(@process){
            # Nothing to do here.
            $api->store->{$pageid}=1000000;
            next;
        }

        # Step 2: Extract the external links section
        my $nowiki;
        ($outtxt,$nowiki)=$api->strip_nowiki($outtxt);
        my $comments=[];
        while(my ($k,$v)=each(%$nowiki)){
            push @$comments, $k if $v=~/^<!--/;
        }
        $comments=join("|", @$comments);
        my @sections=();
        my $extlink_section=undef;
        my @split=("", "", split /((?:^|\n)==(=?)[^=\n](?:.*[^=\n])?\2==)(?=(?:\s*(?:$comments))*\s*(?:\n|$))/, $outtxt);
        for(my $i=0; $i<@split; $i+=3){
            my $h=$api->replace_nowiki($split[$i+0], $nowiki);
            $h=~s/^(\n?==)(=?)\s*External\s*(\2==)$/$1$2 External links $3/i;
            $h=~s/^(\n?==)(=?)(.*)External link\(s\)(.*\2==)$/$1$2$3External links$4/i;
            $h=~s/^(\n?==)(=?)(.*)External link((?!s).*\2==)$/$1$2$3External links$4/i;
            $h=~s/^(\n?==)(=?)(.*)External references?(.*\2==)$/$1$2$3External links$4/i;
            my $s=$h.$api->replace_nowiki($split[$i+2], $nowiki);
            push @sections, \$s;
            $extlink_section=\$s if $h=~/External links/i;
        }
        if(!defined($extlink_section)){
            # Crap, we have to create an external links section.
            $self->_log($api, 'Added "External links"', $pageid, $title, "No \"External links\" section was found in the page. Check if one was added in the right place.");
            my $x=pop @sections;
            my ($pre,$post)=$api->extract_end_content($$x);
            return 60 if(!defined($pre));
            $pre=~s/\s+$/\n/;
            push @sections, \$pre;
            my $dummy="\n== External links ==\n\n";
            $extlink_section=\$dummy;
            push @sections, $extlink_section;
            push @sections, \$post;
        } elsif($extlink_section==$sections[-1]){
            # Last section, strip off the post-content junk
            my $x=pop @sections;
            my ($pre,$post)=$api->extract_end_content($$x);
            return 60 if(!defined($pre));
            $extlink_section=\$pre;
            push @sections, $extlink_section;
            push @sections, \$post;
        }

        # Step 3: Process our parameters
        my $res=$api->query([],
            action=>'parse',
            text=>$$extlink_section,
            prop=>'externallinks',
        );
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to parse external links section for $title: ".$res->{'error'}."\n");
            return 60;
        }
        my @el=();
        @el=@{$res->{'parse'}{'externallinks'}} if exists($res->{'parse'}{'externallinks'});
        my $add='';
        my $put_first=0;
        foreach my $param (@to_process){
            next unless grep($_ eq $param, @process);
            my $id=$infobox_params{$param};
            my $fix = $ext_links{$param}{'fix'} // undef;
            $id=$fix->($id) if defined($fix);
            next if $id eq '';
            my $link = $ext_links{$param}{'link'};
            $link=~s/%X/\Q$id\E/g;
            next if grep(/^$link$/, @el);
            my $tmpl=$ext_templates{$param}->($id, $infobox_params{'name'} // undef, $infobox_params{'gender'} // undef);
            $add.="\n* $tmpl";
            $put_first=1 if($ext_links{$param}{'putfirst'}//0);
        }

        # Step 4: Reassemble the page, if anything changed in step 3
        if($add ne ''){
            if($put_first && $$extlink_section=~s/\n\*/$add\n*/){
                # Move "homepage" to the top of the external links
            } elsif($$extlink_section=~s/(\n\*\s*$extlink_templates_re.*?)\n/$1$add\n/){
                # Put it after any other existing external link templates
            } elsif($$extlink_section=~s/(\s*\n===)/$add$1/){
                # There is a subsection in there (e.g. "Reviews"), put the
                # links before it.
            } else {
                # Just tack it on the end.
                $$extlink_section=~s/(\s*)$/$add$1/;
            }
            $outtxt=join('', map { $$_ } @sections);
        } elsif($no_edit_just_to_remove_parameters){
            $api->store->{$pageid}=1000001;
            next;
        } else {
            $outtxt=$api->replace_nowiki($outtxt, $nowiki);
        }

        # Step 5: Perform the edit.
        $process[-1]='and '.$process[-1] if @process>1;
        my $summary='Moving deprecated '.join((@process>2)?', ':' ', @process)." from {{$template}} to External links per $req";
        $api->log("$summary in $title");
        my $r=$api->edit($tok, $outtxt, $summary, 0, 1);
        if($r->{'code'} ne 'success'){
            $api->warn("Write failed on $title: ".$r->{'error'}."\n");
            next;
        }

        # Mark this page as done
        $api->store->{$pageid}=2000000;

        # If we've been at it long enough, let another task have a go.
        if(time()>=$endtime){
            $self->_output_log($api);
            return 0;
        }
    }

    # No more pages to check, try again in 10 minutes or so in case of errors.
    $self->_output_log($api);
    return 600;
}

sub _log {
    my $self=shift;
    my $api=shift;
    my $section=shift;
    my $pageid=shift;
    my $title=shift;
    my $message=shift;

    my $log={};
    $log=$api->store->{'log'} if exists($api->store->{'log'});
    $log->{$section}={} unless exists($log->{$section});
    $log->{$section}{$pageid}=[$title, $message];
    $api->store->{'log'}=$log;
    $api->log("LOG: $title: $message");
}

sub _output_log {
    my $self=shift;
    my $api=shift;

    $api->log("Updating log");
    my $tok=$api->edittoken("User:AnomieBOT/TemplateReplacer16 log/$seq");
    if($tok->{'code'} eq 'shutoff'){
        $api->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    if($tok->{'code'} ne 'success'){
        $api->warn("Failed to get edit token for log: ".$tok->{'error'}."\n");
        return;
    }
    my $header="This is a log of issues encountered during the processing of the task TemplateReplacer16/$seq. Do not edit this page, the bot will overwrite it.\n";
    my $intxt=exists($tok->{'missing'})?$header:$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};
    my $outtxt=$header;
    my $log={};
    $log=$api->store->{'log'} if exists($api->store->{'log'});
    foreach my $section (sort keys %$log){
        my @out=();
        foreach my $pageid (keys %{$log->{$section}}){
            next unless exists($api->store->{$pageid});
            my ($title,$message)=@{$log->{$section}{$pageid}};
            push @out, "* [[:$title]]: $message\n";
        }
        next unless @out;
        $outtxt.="\n== $section ==\n".join('', @out) if @out;
    }
    if($outtxt ne $intxt){
        my $r=$api->edit($tok, $outtxt, 'Updating log', 0, 0);
        if($r->{'code'} ne 'success'){
            $api->warn("Could not write log: ".$r->{'error'}."\n");
            return;
        }
    }
}

1;