User:AnomieBOT/source/tasks/RandomPagePicker.pm

From Wikipedia, the free encyclopedia
package tasks::RandomPagePicker;

=pod

=begin metadata

Bot:     AnomieBOT
Task:    RandomPagePicker
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 21
Status:  Approved 2009-01-14
Created: 2009-01-09

Periodically choose a random article from a category or union/intersection of
categories and write it to a page, as instructed by {{tlu|User:AnomieBOT/RandomPage}}.

=end metadata

=cut

use utf8;
use strict;

use Data::Dumper;
use POSIX;
use Date::Parse;
use AnomieBOT::Task qw/:time ns2cmtype/;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $template='User:AnomieBOT/RandomPage';
my $category='Category:AnomieBOT RandomPage subscriptions';
my $minimum_frequency=3600; # 1 hour
my $max_next=1800; # must be less than $minimum_frequency

# Default configuration:
#   frequency: How often to edit, for example "2 days" or "2 hours 30 minutes".
#       Recognized values: minutes, hours, days, weeks, months, years.
#   categories: Prefix notation of arrays. For example, to do "(A or B) and (C
#       or D)", do [ AND, [ OR, A, B ], [ OR, C, D ] ].
#   namespaces: Namespaces to choose from, as for the API cmnamespace.
#   summary: Edit summary to use
#   repeat:  Boolean, if false then articles will not be repeated until all
#       other articles have had a chance.
#   botflag: Boolean.
my %default_cfg=(
    frequency   => '1 week',
    categories  => '',
    namespaces  => '0',
);

# Cache so we don't look up the same category multiple times
my %cache=();

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2009-01-14.<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 21]]

=cut

sub approved {
    return 5;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('RandomPagePicker', 0, 10, qw/d::Templates d::IWNS/);

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    # Load namespaces
    $self->{'namespaces'}={$api->namespace_reverse_map};

    my $next=$max_next;
    %cache=();
    my %q=(
        generator       => 'categorymembers',
        gcmtitle        => $category,
        gcmsort         => 'sortkey',
        gcmlimit        => 'max',
        prop            => 'info',
    );
    do {
        $res=$api->query(%q);
        if($res->{'code'} ne 'success'){
            $api->warn("Failed to retrieve transclusion list for $template: ".$res->{'error'}."\n");
            return 60;
        }
        if(exists($res->{'query-continue'})){
            $q{'gcmcontinue'}=$res->{'query-continue'}{'categorymembers'}{'gcmcontinue'};
        } else {
            delete $q{'gcmcontinue'};
        }

        # Process found pages
        foreach (values %{$res->{'query'}{'pages'}}){
            return 0 if $api->halting;

            my $page=$_->{'title'};
            my $pageid=$_->{'pageid'};
            my $revid=$_->{'lastrevid'} // 0;
            my $check=$api->store->{$pageid} // undef;

            # If the page has been edited, we have to check it because they
            # might have edited the template parameters.
            $check=undef if(defined($check) && $check->{'revid'}!=$revid);

            # If the page hasn't been edited since the last check, we can use
            # the saved data to possibly skip loading the page.
            if(defined($check) && $check->{'nextrun'}>time()){
                my $t=$check->{'nextrun'}-time();
                $next=$t if $t<$next;
                next;
            }
            my $min=(defined($check) && exists($check->{'min'}))?$check->{'min'}:0;

            $api->log("Checking for $template in $page");

            # Ok, check the page
            my $tok=$api->edittoken($page, EditRedir => 1);
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for $page: ".$tok->{'error'}."\n");
                next;
            }
            next if exists($tok->{'missing'});

            # Get page text
            my $intxt=$tok->{'revisions'}[0]{'slots'}{'main'}{'*'};

            my $summary="Automatically updating $template";
            my $minor=0;
            my $bot=0;
            my $done=0;
            $check={
                revid => $tok->{'lastrevid'} // 0,
                nextrun => 0,
                min => $min,
            };
            my $outtxt=$api->process_templates($intxt, sub {
                my $name=shift;
                my @params=@{shift()};

                return undef unless $name eq $template;
                my %cfg=%default_cfg;
                foreach (@params){
                    $cfg{$1}=$2 if /^\s*([^=]+?)\s*=\s*(.*?)\s*$/s;
                }
                $cfg{'minor'}=($cfg{'minor'}=~/^[1y]|yes$/i)?1:0 if(exists($cfg{'minor'}));
                $cfg{'botflag'}=($cfg{'botflag'}=~/^[1y]|yes$/i)?1:0 if(exists($cfg{'botflag'}));

                my $out="{{$template\n";
                $out.=' | frequency  = '.$cfg{'frequency'}."\n";
                $out.=' | categories = '.$cfg{'categories'}."\n";
                $out.=' | namespaces = '.$cfg{'namespaces'}."\n";
                $out.=' | summary    = '.$cfg{'summary'}."\n" if exists($cfg{'summary'});
                $out.=' | minor      = '.($cfg{'minor'}?'yes':'no')."\n" if exists($cfg{'minor'});
                $out.=' | botflag    = '.($cfg{'botflag'}?'yes':'no')."\n" if exists($cfg{'botflag'});
                $out.=' | template   = '.$cfg{'template'}."\n" if exists($cfg{'template'});
                $out.=" | this page  = $page\n";
                if($done){
                    $out.=" | error      = Only one $template is allowed per page\n}}";
                    return $out;
                }
                $done=1;

                if(exists($cfg{'date'})){
                    $cfg{'date'}=str2time($cfg{'date'});
                    $cfg{'date'}=0 unless defined($cfg{'date'});
                } else {
                    $cfg{'date'}=0;
                }
                my $t=add_frequency($cfg{'date'}, $cfg{'frequency'}, $check->{'min'});
                if(!defined($t)){
                    $out.=" | error      = Invalid frequency\n}}";
                    $check->{'nextrun'}=time()+86400;
                    $summary="{{[[User:AnomieBOT/RandomPage]]}} error: Invalid frequency";
                    return $out;
                }
                $check->{'nextrun'}=$t;
                if($t>time()){
                    $t-=time();
                    $next=$t if $t<$next;
                    return undef;
                }

                if($cfg{'namespaces'}!~/^\d+(?:,\d+)*$/){
                    $out.=" | error      = Invalid namespaces parameter\n}}";
                    $check->{'nextrun'}=time()+86400;
                    $summary="{{[[User:AnomieBOT/RandomPage]]}} error: (Invalid namespaces parameter)";
                    return $out;
                }
                my %ns=();
                foreach (split /,/, $cfg{'namespaces'}){
                    if(exists($self->{'namespaces'}{$_})){
                        $ns{$_&~1}=1 if exists($self->{'namespaces'}{$_&~1});
                        $ns{$_|1}=1 if exists($self->{'namespaces'}{$_|1});
                    } else {
                        $out.=" | error      = Invalid namespace number $_, see [[Help:Namespaces#List of namespaces|Help:Namespaces]].\n}}";
                        $check->{'nextrun'}=time()+86400;
                        $summary="{{[[User:AnomieBOT/RandomPage]]}} error: (Invalid namespace number $_, see [[Help:Namespaces#List of namespaces|Help:Namespaces]].)";
                        return $out;
                    }
                }

                $self->{'lookups'}=0;
                my $pages;
                eval {
                    $pages=$self->load_pages($api, join('|',sort { $a<=>$b } keys %ns), $cfg{'categories'});
                };
                if($@){
                    my $x=$@;
                    $x=~s/\s+$//;
                    $out.=" | error      = Invalid categories parameter: <nowiki>$x</nowiki>\n}}";
                    $check->{'nextrun'}=time()+86400;
                    $summary="{{[[User:AnomieBOT/RandomPage]]}} error: (Invalid categories parameter: <nowiki>$x</nowiki>)";
                    return $out;
                }
                if(!defined($pages)){
                    $next=60 if $next>60;
                    return undef;
                }

                $check->{'min'}=$self->{'lookups'}*600;
                if(!@$pages){
                    $out.=" | error      = No pages match.\n}}";
                    $check->{'nextrun'}=time()+$check->{'min'};
                    $summary="{{[[User:AnomieBOT/RandomPage]]}} error: (No pages match.)";
                    return $out;
                }

                # Find the list of pages we've picked in the last 5000 edits
                # (and since the last time we ran out of pages), to avoid
                # picking them again too soon
                my %pages;
                @pages{@$pages}=undef;
                my %qc=(
                    titles  => $page,
                    prop    => "revisions",
                    rvprop  => "comment",
                    rvuser  => $api->user,
                    rvlimit => "max",
                );
                $qc{'rvend'}=$api->store->{"reset$pageid"} if exists($api->store->{"reset$pageid"});
                my $resc=$api->query(%qc);
                if($resc->{'code'} ne 'success'){
                    $api->warn("Failed to retrieve edit summaries for $page: ".$resc->{'error'}."\n");
                    return 60;
                }
                foreach (@{(values %{$resc->{'query'}{'pages'}})[0]{'revisions'}}){
                    next unless($_->{'comment'} && $_->{'comment'}=~/\[\[([^]]*)\]\]$/);
                    delete $pages{$1};
                    last unless %pages;
                }
                if(%pages){
                    $pages=[keys %pages];
                } else {
                    # Ran out of pages, reset the date for "recently"
                    $api->store->{"reset$pageid"}=$tok->{'revisions'}[0]{'timestamp'};
                }

                my $pg=$pages->[int rand(@$pages)];
                $t=time();
                $out.=" | page       = $pg\n";
                $out.=" | date       = ".strftime("%F %T +0000", gmtime $t)."\n";
                my $min=$check->{'min'};
                $min=$minimum_frequency if($min<$minimum_frequency);
                my @m=();
                if($min>=7*86400){
                    my $w=POSIX::floor($min/(7*86400));
                    $min-=$w*7*86400;
                    push @m, "$w week".(($w==1)?'':'s');
                }
                if($min>=86400){
                    my $d=POSIX::floor($min/86400);
                    $min-=$d*86400;
                    push @m, "$d day".(($d==1)?'':'s');
                }
                if($min>=3600){
                    my $h=POSIX::floor($min/3600);
                    $min-=$h*3600;
                    push @m, "$h hour".(($h==1)?'':'s');
                }
                if($min>0){
                    my $m=POSIX::ceil($min/60);
                    push @m, "$m minute".(($m==1)?'':'s');
                }
                $out.=" | minimum frequency = ".join(' ', @m)."\n";

                $out.="}}";

                $minor=$cfg{'minor'} if exists($cfg{'minor'});
                $bot=$cfg{'botflag'} if exists($cfg{'botflag'});
                $summary=$cfg{'summary'} if exists($cfg{'summary'});
                $summary=substr($summary,0,250-length($pg)-6).": [[$pg]]";

                $check->{'nextrun'}=add_frequency($t, $cfg{'frequency'}, $check->{'min'});
                return $out;
            });

            # Need to edit?
            if($outtxt ne $intxt){
                $api->log("$summary in $page");
                my $r=$api->edit($tok, $outtxt, $summary, $minor, $bot);
                if($r->{'code'} ne 'success'){
                    $api->warn("Write failed on $page: ".$r->{'error'}."\n");
                    next;
                }
                $check->{'revid'}=$r->{'edit'}{'newrevid'} // 0;
            } else {
                $api->log("Nothing to do in $page");
            }

            # Store data
            $api->store->{$pageid}=$check;

            # If we've been at it long enough, let another task have a go.
            return 0 if time()>=$endtime;
        }
    } while(exists($q{'gcmcontinue'}));

    return $next;
}

sub add_frequency {
    my ($lastrun,$freq,$min)=@_;
    local $_=' '.$freq;

    my @t=gmtime $lastrun;
    while(s/^\s+(\d+)\s+(minute|hour|day|week|month|year)s?//i){
        $t[1]+=$1 if lc($2) eq 'minute';
        $t[2]+=$1 if lc($2) eq 'hour';
        $t[3]+=$1 if lc($2) eq 'day';
        $t[3]+=7*$1 if lc($2) eq 'week';
        $t[4]+=$1 if lc($2) eq 'month';
        $t[5]+=$1 if lc($2) eq 'year';
    }
    return undef unless(/^\s*$/);
    my $t=timegm($t[0],$t[1],$t[2],$t[3],$t[4],$t[5]);
    $min=$minimum_frequency if $min<$minimum_frequency;
    $t=$lastrun+$min if $t<$lastrun+$min;
    return $t;
}

sub load_pages {
    my $self=shift;
    my $api=shift;
    my $ns=shift;
    my $text=shift;
    $text=~s/^\s+|\s+$//g;

    if($text=~/^Category:/i){
        if(!exists($cache{$text})){
            my %q=(
                list        => 'categorymembers',
                cmtitle     => $text,
                cmprop      => 'title',
                cmnamespace => $ns,
                cmtype      => ns2cmtype($ns),
                cmlimit     => 'max'
            );
            my %x=();
            do {
                my $res=$api->query(%q);
                if($res->{'code'} ne 'success'){
                    $api->warn("Failed to retrieve categories for $text: ".$res->{'error'});
                    return undef;
                }
                if(exists($res->{'query-continue'})){
                    $q{'cmcontinue'}=$res->{'query-continue'}{'categorymembers'}{'cmcontinue'};
                } else {
                    delete $q{'cmcontinue'};
                }
                foreach (@{$res->{'query'}{'categorymembers'}}){
                    $_->{'title'}=~s/^([^:]+) talk:/$1:/ if(($_->{'ns'}&1)==1);
                    $_->{'title'}=~s/^Talk:// if $_->{'ns'}==1;
                    $x{$_->{'title'}}=1;
                }
                $self->{'lookups'}++;
            } while(exists($q{'cmcontinue'}));
            $cache{$text}=[keys %x];
        }
        return $cache{$text};
    }

    die "Invalid parameter \"$text\"\n" unless $text=~/\{\{\s*(AND|OR|AND NOT|SUBCATS)\s*\|(.+?)\}\}$/is;
    my ($op,$params)=(uc($1),$2);
    $params=~s/^\s+|\s+$//g;

    my @params=();
    my $depth=0;
    my $l=length($params);
    my $j=0;
    for(my $i=0; $i<$l; $i++){
        my $c=substr($params,$i,1);
        if($c eq '{'){
            $depth++;
        } elsif($c eq '}'){
            $depth--;
            die "Unexpected '}' in \"$text\"\n" if $depth<0;
        } elsif($c eq '|' && $depth==0){
            push @params, substr($params, $j, $i-$j);
            $j=$i+1;
        }
    }
    push @params, substr($params, $j, $l-$j) if $j<$l;
    die "Unmatched '{' in \"$text\"\n" if $depth!=0;

    if($op eq 'SUBCATS'){
        die "$op first parameter must be a category" unless(@params>=1 && $params[0]=~/^Category:/i);
        push @params, -1 if(@params==1);
        die "$op optional second parameter must be an integer depth" if(@params>=2 && $params[1]!~/^[+-]?\d+$/);
        die "$op takes only 1 or 2 parameters" if @params>=3;
        my %cats=();
        my @cats=( [$params[0], $params[1]] );
        while(my $x=shift(@cats)){
            my ($cat,$depth)=@$x;
            next if exists($cats{$cat});
            $cats{$cat}=1;
            next if $depth==0;
            my $res=$self->load_pages($api, '14', $cat);
            return undef unless defined($res);
            push @cats, [ $_, $depth-1 ] foreach (@$res);
        }
        $op='OR';
        @params=keys %cats;
    }

    my %pages=();
    my $add=1;
    foreach my $p (@params){
        my $res=$self->load_pages($api, $ns, $p);
        return undef unless defined($res);
        foreach (@$res){
            $pages{$_}=0 unless exists($pages{$_});
            $pages{$_}+=$add;
        }
        $add=-1 if $op eq 'AND NOT';
    }

    if($op eq 'AND'){
        my $ct=@params;
        return [grep($pages{$_}>=$ct, keys %pages)];
    } elsif($op eq 'OR'){
        return [keys %pages];
    } elsif($op eq 'AND NOT'){
        return [grep($pages{$_}>=1, keys %pages)];
    } else {
        die "Invalid op \"$op\"";
    }
}

1;