User:AnomieBOT/source/tasks/TalkTagger.pm

From Wikipedia, the free encyclopedia
package tasks::TalkTagger;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    TalkTagger
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 69
Status:  Completed 2013-07-17
Created: 2013-06-12

Tag talk pages with {{tl|Reliable sources for medical articles}} when they meet
the criteria laid out by [[WP:WPMED]].

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/:time/;
use Digest::SHA qw/sha256_base64/;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my %aftercats = (
    'Category:Talk header templates' => 1,
    'Category:Non-subject-matter-related article-talk header templates' => 1,
    'Category:Article talk header templates' => 1,
    'Category:Portal talk header templates' => 1,
    'Category:Script talk header templates' => 1,
    'Category:Template talk header templates' => 1,
    'Category:User talk header templates' => 1,
    'Category:Wikipedia talk header templates' => 1,
    'Category:Wikipedia GA templates' => 1,
    'Category:Wikipedia featured content templates' => 1,
    'Category:Wikipedia release version templates' => 1,
    'Category:WikiProject banners'=>2,
    'Category:WikiProject banners with quality assessment'=>2,
    'Category:WikiProject banners without quality assessment'=>2,
);

my @iterdef = (
    {
        list => 'embeddedin',
        eititle => [
            'Template:Infobox disease',
            'Template:Infobox symptom',
            'Template:Interventions infobox',
            'Template:Diagnostic infobox',
            'Template:Drugbox',
            'Template:Drugclassbox',
        ],
        einamespace => 0,
        eilimit => 'max',
    },
);

my $template = 'Reliable sources for medical articles';
my $summary = "Tagging with {{$template}} for [[WP:WPMED]]. Errors? [[User:AnomieBOT/shutoff/TalkTagger]]";

sub new {
    my $class = shift;
    my $self = $class->SUPER::new();
    $self->{'iterdef'} = [ @iterdef ];
    $self->{'iter'} = undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2013-07-15, task completed 2013-07-17<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 69]]

=cut

sub approved {
    return -1;
}

sub run {
    my ($self, $api) = @_;
    my $res;

    $api->task( 'TalkTagger', 0, 10, qw/d::Templates d::Redirects/ );
    my $ptemplate = "Template:$template";

    while ( 1 ) {
        my $iter = $self->{'iter'};
        if ( !defined( $iter ) ) {
            last unless @{$self->{'iterdef'}};
            $iter = $api->iterator( %{shift @{$self->{'iterdef'}}} );
            $self->{'iter'} = $iter;
        }

        while ( my $p = $iter->next ) {
            if ( !$p->{'_ok_'} ) {
                $api->warn( "Failed to retrieve page list: " . $p->{'error'} . "\n" );
                return 60;
            }

            my $title = 'Talk:' . $p->{title};
            next if $api->store->{"done $title"} // 0;

            my $tok = $api->edittoken( $title, EditRedir => 1,
                templates => { templates => $ptemplate, limit => 'max' },
            );
            if ( $tok->{'code'} eq 'shutoff' ) {
                $api->warn( "Task disabled: " . $tok->{'content'} . "\n" );
                return 300;
            }
            if ( $tok->{'code'} ne 'success' ) {
                $api->warn( "Failed to get edit token for $title: " . $tok->{'error'} . "\n" );
                next;
            }
            if ( exists( $tok->{'redirect'} ) ) {
                $api->warn( "Skipping redirect $title\n" );
                next;
            }
            next if @{$tok->{'templates'} // []};

            my $intxt = $tok->{'revisions'}[0]{'slots'}{'main'}{'*'} // '';

            # Look for content or a template that we don't go after
            my ($outtxt,$nowiki) = $api->strip_nowiki( $intxt );
            my $outtmpl = {};
            my $after = $api->process_templates( $outtxt, \&_strip_templates, $outtmpl );
            $outtxt = '';

            # First, look for a WikiProject banner
            my $chk = 0;
            while(1){
                $chk = 0;
                $outtxt .= $1 if $after =~ s/^((?:\s*<!--.*?-->)*)//s;
                last unless $after =~ s/^(\s*)(\x02[0-9A-Za-z_-]+\x03)//;
                my ($sp, $tag) = ($1, $2);
                if ( !exists( $outtmpl->{$tag} ) ) {
                    # Not a template, so put it back and stop looking.
                    $after=$sp.$tag.$after;
                    last;
                }

                $chk = _chk_template( $api, $outtmpl->{$tag}{'name'} );

                # It's a WikiProject banner! Go before it
                if ( $chk & 2 ) {
                    $after=$sp.$tag.$after;
                    last;
                }

                # It's some other template, keep looking
                $outtxt.=$sp.$tag;
            }

            # If we didn't find a WikiProject banner, look at the other templates.
            unless ( $chk & 2 ) {
                $after = $outtxt . $after;
                $outtxt = '';
                while(1){
                    $outtxt .= $1 if $after =~ s/^((?:\s*<!--.*?-->)*)//s;
                    last unless $after =~ s/^(\s*)(\x02[0-9A-Za-z_-]+\x03)//;
                    my ($sp, $tag) = ($1, $2);
                    if ( !exists( $outtmpl->{$tag} ) ) {
                        # Not a template, so put it back and stop looking.
                        $after=$sp.$tag.$after;
                        last;
                    }

                    $chk = _chk_template( $api, $outtmpl->{$tag}{'name'} );

                    # It's a template we should go after, continue
                    if ( ( $chk & 3 ) == 1 ) {
                        $outtxt.=$sp.$tag;
                        next;
                    }

                    # It's some other template. End!
                    $after=$sp.$tag.$after;
                    last;
                }
            }

            # Now, insert the new tag
            $outtxt .= "\n" if $outtxt ne '';
            $outtxt .= "{{$template}}";
            $outtxt .= "\n" unless $after=~/^\s*\n/;
            $outtxt .= $after;
            $outtxt = _unstrip_templates( $outtxt, $outtmpl );
            $outtxt = $api->replace_nowiki( $outtxt, $nowiki );

            if ( $intxt ne $outtxt ) {
                $api->log("$summary in $title");
                my $r = $api->edit( $tok, $outtxt, $summary, 1, 1 );
                if($r->{'code'} ne 'success'){
                    $api->warn("Write failed on $title: ".$r->{'error'}."\n");
                    next;
                }
            }
            $api->store->{"done $title"} = 1;
        }

        $self->{'iter'} = undef;
    }

    $api->log( "TalkTagger may be DONE!" );
    # For restart
    $self->{'iterdef'} = [ @iterdef ];

    return 3600;
}

# process_templates callback to strip templates and store them in the fourth
# parameter hash
sub _strip_templates {
    my ($name, $params, $wikitext, $data) = @_;
    return undef if $name=~/^#tag:\s*ref$/is;

    $wikitext = _unstrip_templates( $wikitext, $data );
    my $tmp = $wikitext;
    utf8::encode( $tmp ) if utf8::is_utf8( $tmp );
    my $tag="\x02" . sha256_base64( $tmp ) . "\x03";
    $tag =~ tr!+/=!-_!d;
    $data->{$tag} = { name=>$name, text=>$wikitext };
    return $tag;
}

# Undo what _strip_templates did
sub _unstrip_templates {
    my $wikitext=shift;
    my $templ=shift;

    $wikitext =~ s!(\x02[a-zA-Z0-9_-]+\x03)! exists( $templ->{$1} ) ? $templ->{$1}{'text'} : $1 !gioe;
    return $wikitext;
}

sub _chk_template {
    my ($api, $name) = @_;
    my $chk = $api->cache->get( "TalkTagger:chk<<$name>>" );
    if(!defined($chk)){
        my $res = $api->query(
            titles    => "Template:$name",
            prop      => 'categories',
            cllimit   => 'max',
            redirects => 1,
        );
        if ( $res->{'code'} ne 'success' ) {
            $api->warn( "Failed to get cats for Template:$name: " . $res->{'error'} . "\n" );
            return 60;
        }

        my $pg = (values %{$res->{'query'}{'pages'}})[0];
        my @c = @{$pg->{'categories'} // []};
        $chk = 0;
        while ( my ($k,$v) = each %aftercats ) {
            $chk |= $v if grep( $k eq $_->{'title'}, @c );
        }
        $api->cache->set( "TalkTagger:chk<<$name>>", $chk, 86400);
        $pg->{'title'}=~s/^Template://;
        $api->cache->set( "TalkTagger:chk<<$pg->{title}>>", $chk, 86400);
    }
    return $chk;
}

1;