User:AnomieBOT/source/tasks/EditorReviewArchiver.pm

From Wikipedia, the free encyclopedia
package tasks::EditorReviewArchiver;

=pod

=for warning
Due to breaking changes in AnomieBOT::API, this task will probably not run
anymore. If you really must run it, try getting a version from before
2018-08-12.

=begin metadata

Bot:     AnomieBOT
Task:    EditorReviewArchiver
BRFA:    Wikipedia:Bots/Requests for approval/AnomieBOT 46
Status:  Inactive 2014-05-29
Created: 2010-10-02

Archive reviews at [[WP:Editor review]].

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/:time/;
use Data::Dumper;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $archive_days=30;
my $autolist_days=21;
my $inactive_days=7;
my $notification_days=3;

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'lasttime'}=0;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2010-10-14<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT 46]]

=for info
Bot is currently inactive, as Editor Review has been closed.

=cut

sub approved {
    return -2;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $api->task('EditorReviewArchiver', 0, 10, qw/d::Talk d::Templates d::Redirects/);

    if($self->{'lasttime'}==0){
        if(exists($api->store->{'lasttime'})){
            my $t=$api->store->{'lasttime'};
            $self->{'lasttime'}=$t if($t=~/^\d+$/ && $t<=time());
        }
    }
    $self->{'broken'}=$api->store->{'broken'}//0 unless exists($self->{'broken'});
    my $starttime=time();
    my $t=$self->{'lasttime'}+($self->{'broken'}?600:7200)-$starttime;
    return $t if $t>0;

    my $screwup=' ([[User:'.$api->user.'/shutoff/EditorReviewArchiver|errors?]])';
    my $broken=0;

    # Load the editor review page
    my $tok=$api->edittoken('Wikipedia:Editor review');
    if($tok->{'code'} eq 'shutoff'){
        $api->warn("Task disabled: ".$tok->{'content'}."\n");
        return 300;
    }
    if($tok->{'code'} ne 'success'){
        $api->warn("Failed to get edit token for Wikipedia:Editor review: ".$tok->{'error'}."\n");
        return 60;
    }
    return 300 if exists($tok->{'missing'});
    my $intxt=$tok->{'revisions'}[0]{'*'};

    # Get the list of current reviews
    my @requests=();
    my %rr=();
    my $fail=0;
    my $outtxt=$api->process_templates($intxt, sub {
        return undef if $fail;
        my $name=shift;

        return undef unless $name=~m!^Wikipedia:Editor review/(.+)$!;
        my $name2=$1;
        return undef if $name2 eq 'Header';

        my $rdata;
        if(exists($api->store->{$name2})){
            $rdata=$api->store->{$name2};
        } else {
            $rdata={ name=>$name2, page=>$name, notified=>undef, user=>undef };
        }
        if(!defined($rdata->{'user'})){
            my $res=$api->query(
                titles  => $name,
                prop    => 'revisions',
                rvlimit => 1,
                rvdir   => 'newer',
                rvprop  => 'timestamp|user',
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to get first revision timestamp for $name: ".$res->{'error'}."\n");
                $fail=60;
                return undef;
            }
            my $p = (values %{$res->{'query'}{'pages'}})[0];
            if(exists($p->{'missing'})){
                $rdata->{'reviewed'}=0;
                push @requests, $rdata;
                return '';
            }
            $res=$p->{'revisions'}[0];
            my $u=$res->{'user'};
            $rdata->{'user'}=($name2=~/^\Q$u\E(?:\s.*)?$/?$u:undef);
            $rdata->{'timestamp'}=ISO2timestamp($res->{'timestamp'});
            $api->store->{$name2}=$rdata;
        }

        $rdata->{'reviewed'}=1;
        push @requests, $rdata;
        $rr{$name}=$name;
        return '';
    });
    return $fail if $fail;
    $outtxt=~s/\n-----*\s*?\n/\n/g;

    # Replace redirects
    my $rmredir=scalar @requests;
    %rr=$api->resolve_redirects(keys %rr);
    if(exists($rr{''})){
        $api->warn("Failed to check for redirects: ".$rr{''}{'error'}."\n");
        $fail=60;
        return undef;
    }
    @requests=grep { ($rr{$_->{'page'}}//$_->{'page'}) eq $_->{'page'} } @requests;
    $rmredir-=scalar @requests;

    # Now find which are unreviewed, and find any untranscluded unreviewed
    # requests
    my $iter=$api->iterator(
        generator    => 'categorymembers',
        gcmtitle     => 'Category:Wikipedians on editor review/Backlog',
        gcmnamespace => 4,
        gcmtype      => 'page',
        gcmlimit     => 'max',
        gcmsort      => 'timestamp',
    );
    my $new=0;
    while($_=$iter->next){
        if(!$_->{'_ok_'}){
            $api->warn("Could not retrieve category members from iterator: ".$_->{'error'}."\n");
            return 60;
        }
        my $name=$_->{'title'};
        next unless $name=~m!^Wikipedia:Editor review/(.+)$!;
        my $name2=$1;
        next if $name2 eq 'Editor-reviewsubst';
        next if $name2=~m/^Archive /;
        if(!exists($api->store->{$name2})){
            my $rdata={ name=>$name2, page=>$name, notified=>undef };
            $res=$api->query(
                titles  => $name,
                prop    => 'revisions',
                rvlimit => 1,
                rvdir   => 'newer',
                rvprop  => 'timestamp|user',
            );
            if($res->{'code'} ne 'success'){
                $api->warn("Failed to get first revision timestamp for $_->{title}: $res->{error}\n");
                return 60;
            }
            $res=(values %{$res->{'query'}{'pages'}})[0]{'revisions'}[0];
            my $u=$res->{'user'};
            $rdata->{'user'}=($name2=~/^\Q$u\E(?:\s.*)?$/?$u:undef);
            $rdata->{'timestamp'}=ISO2timestamp($res->{'timestamp'});
            $api->store->{$name2}=$rdata;
            if(time()<$rdata->{'timestamp'}+$autolist_days*86400){
                unshift @requests, $rdata;
                $new++;
            }
        }
        foreach (@requests) {
            $_->{'reviewed'}=0 if $_->{'page'} eq $name;
        }
    }

    # Find which ones might need attention
    $t=time()-($archive_days-$notification_days)*86400;
    my @pages=map $_->{'page'}, grep { $_->{'reviewed'} && $_->{'timestamp'}<$t } @requests;

    # Process them
    my %archive=%{$api->store->{'archive'} // {}}; # Load saved archivals
    while(@pages){
        my $res2=$api->query(
            titles => join('|', splice(@pages, 0, 500)),
            prop   => 'revisions',
            rvprop => 'timestamp|content',
        );
        if($res2->{'code'} ne 'success'){
            $api->warn("Failed to load page data: ".$res2->{'error'}."\n");
            return 60;
        }
        foreach my $p (values %{$res2->{'query'}{'pages'}}) {
            next if exists($p->{'missing'});
            my $rdata=(grep $_->{'page'} eq $p->{'title'}, @requests)[0] // next;
            next if ($p->{'revisions'}[0]{'*'}//'')=~/<!--[^>]*noautoarchive[^>]*-->/i;
            my $t=ISO2timestamp($p->{'revisions'}[0]{'timestamp'});
            if(!$rdata->{'notified'}){
                # Need to notify?
                next unless $t+($inactive_days-$notification_days)*86400 < time();
                my $user=undef;
                $api->process_templates(($p->{'revisions'}[0]{'*'}//''), sub {
                    return undef if defined($user);
                    my $name=shift;
                    my $params=shift;
                    return undef unless $name=~/^(?:User2?|Admin)$/;
                    ($user=$params->[0])=~s/^\s*|\s*$//g;
                    return undef;
                });
                $user//=$rdata->{'user'};
                if(!defined($user)){
                    $api->warn("Could not find user for $p->{title}\n");
                    $api->whine("Cannot find user for [[$p->{title}]]", "The editor review page [[$p->{title}]] does not have any of the templates {{tl|user}}, {{tl|user2}}, or {{tl|admin}}, so I cannot determine the editor to notify. Please either fix the page to use one of those templates or add <code>&lt;!--noautoarchive--&gt;</code>.");
                    next;
                }
                my $dt=strftime("%e %B %Y", gmtime(time()+$notification_days*86400));
                $dt=~s/^\s+//;
                $res=$api->whine('Automatic processing of your editor review', "This is an automated message. Your [[".$p->{'title'}."|editor review]] is scheduled to be closed on $dt because it will have been open for more than $archive_days days and inactive for more than $inactive_days days. You can keep it open longer by posting a comment to the review page requesting more input. Adding <code>&lt;!--noautoarchive--&gt;</code> to the review page will prevent further automated actions. ", Summary=>"Your review is scheduled to be closed on $dt", Pagename=>"User talk:$user", NoSmallPrint=>1);
                if($res->{'code'} eq 'shutoff'){
                    $api->warn("Task disabled: ".$res->{'content'}."\n");
                    return 300;
                }
                if($res->{'code'} eq 'pageprotected' || $res->{'code'} eq 'botexcluded'){
                    # If they really don't want messages, let them.
                    $api->warn("Cannot notify $user: ".$res->{'error'}."\n");
                } elsif($res->{'code'} ne 'success'){
                    $api->warn("Failed to get edit User talk:$user: ".$res->{'error'}."\n");
                    $broken=1;
                    next;
                }
                $rdata->{'notified'}=time();
                $api->store->{$rdata->{'name'}}=$rdata;
            } else {
                # Time to close?
                next unless $rdata->{'notified'}+$notification_days*86400 < time();
                next unless $t+$inactive_days*86400 < time();

                # Mark for archival and removal from the main page
                $archive{$rdata->{'page'}}=[ (gmtime $rdata->{'timestamp'})[5]+1900, { page=>$rdata->{'page'}, name=>$rdata->{'name'}, txt=>', '.strftime('%e %B %Y', gmtime $rdata->{'timestamp'}) } ];
                $api->store->{'archive'}=\%archive;
            }
        }
    }

    # reconstruct the main page
    my $r=join("\n----\n", map "{{$_->{page}}}", grep !exists($archive{$_->{'page'}}), @requests);
    unless($outtxt=~s/(<!-- Add new requests below this line. Make sure to add ---- under the request -->).*(<!--If you are submitting your username for a review, please put it at the top of the list rather than here at the bottom-->)/$1\n$r\n$2/s){
        $api->whine('[[WP:Editor review]] is broken', 'In order to process [[WP:Editor review]], I rely on the comments <code><nowiki><!-- Add new requests below this line. Make sure to add ---- under the request --></nowiki></code> and <code><nowiki><!--If you are submitting your username for a review, please put it at the top of the list rather than here at the bottom--></nowiki></code> to know where the reviews are supposed to go. Please fix it!');
        return 300;
    }
    if($intxt ne $outtxt){
        my @summary=();
        push @summary, "listing $new new review".($new==1?'':'s') if $new;
        my $ct=scalar keys %archive;
        push @summary, "archiving $ct closed review".($ct==1?'':'s') if $ct;
        push @summary, "removing $rmredir redirect".($rmredir==1?'':'s') if $rmredir;
        push @summary, 'correcting page formatting';
        $summary[$#summary]='and '.$summary[$#summary] if @summary>1;
        my $summary=ucfirst(join(@summary>2?', ':' ', @summary));
        $res=$api->edit($tok, $outtxt, "$summary. $screwup", 0, 0);
        if($res->{'code'} ne 'success'){
            $api->warn("Edit of WP:Editor review failed: ".$res->{'error'}."\n");
            return 60;
        }
    }

    # Need to archive?
    if(%archive){
        # First, split archivals by year
        my %years=();
        while(my ($page,$x)=each %archive){
            $years{$x->[0]}{$x->[1]{'page'}}=$x->[1];
        }

        while(my ($year,$data)=each %years){
            my $tok=$api->edittoken("Wikipedia:Editor review/Archive ($year)");
            if($tok->{'code'} eq 'shutoff'){
                $api->warn("Task disabled: ".$tok->{'content'}."\n");
                return 300;
            }
            if($tok->{'code'} ne 'success'){
                $api->warn("Failed to get edit token for Wikipedia:Editor review/Archive ($year): ".$tok->{'error'}."\n");
                return 60;
            }
            my $ct=scalar keys %$data;

            # Load or create archive page text
            my $intxt=$tok->{'revisions'}[0]{'*'} // "{{Editor Review Navigation}}\n{{bots|deny=HagermanBot}}\nThese are links to reviews of editors from [[Wikipedia:Editor review]] from $year. They are in alphabetical order and include multiple submissions. The date listed is the date of the original submission by the editor.\n\n{{compactTOC8|side=yes|top=yes|num=yes}}\n";
            my $outtxt=$intxt; $outtxt=~s/\s*\n==.*/\n/s;

            # Load all existing links from the page
            my @l=($intxt=~m!(?<=\n)(?:[:*]+\s*)\[\[((?i:Wikipedia|WP):[eE]ditor[ _]review/([^|\]]+))(?:\|[^]]+)?\]\]([^\n]*)!g);
            for(my $i=0; $i<@l; $i+=3){
                $l[$i]=~s/_/ /g;
                $l[$i+1]=~s/_/ /g;
                $data->{$l[$i]}={ page=>$l[$i], name=>$l[$i+1], txt=>$l[$i+2] };
            }

            # Sort the mess, and reconstruct the page
            @l=sort { uc($a->{'name'}) cmp uc($b->{'name'}) } values %$data;
            my ($x,$k,$prev)=('','','');
            foreach (@l) {
                if($k ne substr(uc($_->{'name'}),0,1)){
                    $k=substr(uc($_->{'name'}),0,1);
                    $outtxt.="\n==$k==\n";
                }
                ($x=$_->{'name'})=~s/\s*\(\d+\)$//;
                $outtxt.='*' if $x eq $prev;
                $outtxt.="*[[$_->{page}|$_->{name}]]$_->{txt}\n";
                $prev=$x;
            }

            if($intxt ne $outtxt){
                $res=$api->edit($tok, $outtxt, "Archiving $ct closed review".($ct==1?'':'s').". $screwup", 0, 0);
                if($res->{'code'} ne 'success'){
                    $api->warn("Edit of WP:Editor review/Archive ($year) failed: ".$res->{'error'}."\n");
                    return 60;
                }
            }
        }

        # Done archiving, clear store
        delete $api->store->{'archive'};
    }

    # Save timestamp and broken flag
    $self->{'lasttime'}=$starttime;
    $self->{'broken'}=$broken;
    $api->store->{'lasttime'}=$starttime;
    $api->store->{'broken'}=$broken;
    return $starttime+($broken?600:7200)-time();
}

1;