User:AnomieBOT/source/tasks/MassDeleter.pm

From Wikipedia, the free encyclopedia
package tasks::MassDeleter;

=pod

=begin metadata

Bot:       AnomieBOT III
Task:      MassDeleter
BRFA:      Wikipedia:Bots/Requests for approval/AnomieBOT III 2
Status:    Approved 2016-04-13
Created:   2016-03-26
Exclusion: false
OnDemand:  true

Performs mass deletions after consensus at XfD or other processes. Can also CSD
associated talkpages and their subpages.

=end metadata

=cut

use utf8;
use strict;

use AnomieBOT::Task qw/ISO2timestamp/;
use Data::Dumper;
use URI::Escape;
use vars qw/@ISA/;
@ISA=qw/AnomieBOT::Task/;

my $screwup;

# Deletion summary.
my $reason = 'Deleting per [[Wikipedia:Templates for discussion/Log/2019 February 27#Template:PBB]]';

# Retry time when no pages are left to delete
my $retryTime = 3600;

# Delete talk pages?
my $csdTalk = 1;

# Return an arrayref of hashrefs for pages needing deletion and a continuation
# hashref, or undef and a retry time. The hashrefs must contain an 'id' field
# being the page_id to be deleted, and 'ns' and 'title' for logging.
sub get_pages {
    my ($self, $api, $from) = @_;

    my ($dbh);
    eval {
        ($dbh) = $api->connectToReplica( 'enwiki' );
    };
    if ( $@ ) {
        $api->warn( "Error connecting to replica: $@\n" );
        return (undef, 300);
    }

    my $cont = '';
    if ( defined( $from ) ) {
        my ($ns, $title) = ( $from->{'ns'}, $from->{'title'} );
        $title = $dbh->quote( $title );
        $cont = " AND (page_namespace > $ns OR page_namespace = $ns AND page_title >= $title)";
    }

    my @rows;
    eval {
        @rows = @{ $dbh->selectall_arrayref( qq{
            SELECT p1.page_id AS id, p1.page_namespace AS ns, p1.page_title AS title
                FROM page as p1
                 LEFT JOIN (
                   templatelinks AS tl JOIN linktarget AS lt ON(tl.tl_target_id = lt.lt_id) -- JOIN page AS p2 ON (p2.page_id = tl.tl_from AND p2.page_namespace NOT IN (2,3))
                 ) ON (lt_namespace = p1.page_namespace AND lt_title = p1.page_title)
                WHERE p1.page_namespace=10 AND p1.page_title LIKE 'PBB/%' AND tl_from IS NULL $cont
                ORDER BY p1.page_namespace, p1.page_title
                LIMIT 501
            }, { Slice => {} } ) };
    };
    if ( $@ ) {
        $api->warn( "Error fetching page list from replica: $@\n" );
        return (undef, 300);
    }

    my $last = @rows > 500 ? pop(@rows) : undef;
    return (\@rows, $last);
}

sub new {
    my $class=shift;
    my $self=$class->SUPER::new();
    $self->{'cont'} = undef;
    bless $self, $class;
    return $self;
}

=pod

=for info
Approved 2016-04-13<br />[[Wikipedia:Bots/Requests for approval/AnomieBOT III 2]]

=cut

sub approved {
    return -500;
}

sub run {
    my ($self, $api)=@_;
    my $res;

    $screwup='If this bot is malfunctioning, please report it at [[User:'.$api->user.'/shutoff/MassDeleter]]';

    $api->task('MassDeleter', 0, 10, qw/d::IWNS/);

    my %rns = $api->namespace_reverse_map();

    my ($rows, $cont) = $self->get_pages( $api, $self->{'cont'} );
    if ( !defined( $rows ) ) {
        return $cont;
    }

    if ( !@$rows ) {
        $api->log( 'No more pages to delete, stopping.' );
        $self->{'cont'} = undef;
        return $retryTime;
    }

    # Spend a max of 5 minutes on this task before restarting
    my $endtime=time()+300;

    for my $row (@$rows) {
        $self->{'cont'} = $row;

        return 0 if $api->halting;

        my $id = $row->{'id'};
        my $nsname = $row->{'ns'} ? $rns{$row->{'ns'}} // "<$row->{ns}>" : '';
        my $title = $row->{'title'};
        utf8::decode( $title );
        my $page = ( $nsname ? "$nsname:" : '' ) . $title;

        my %q = ( NoExclusion => 1 );
        $q{'Title'} = $page if exists( $rns{$row->{'ns'}} );

        my $tok=$api->gettoken('csrf', %q );
        if ( $tok->{'code'} eq 'shutoff' ) {
            $api->warn( "Task disabled: " . $tok->{'content'} . "\n" );
            return 300;
        }
        if ( $tok->{'code'} ne 'success' ) {
            $api->warn( "Failed to get delete token for $page (id=$id): " . $tok->{'error'} . "\n" );
            return 300;
        }
        if ( exists( $tok->{'missing'} ) ) {
            #$api->log("$page (id=$id) no longer exists, skipping");
            next;
        }

        $api->log( "Deleting $page (id=$id): $reason" );
        my $res = $api->action( $tok,
            action => 'delete',
            pageid => $id,
            reason => "$reason. $screwup",
        );
        if ( $res->{'code'} ne 'success' ) {
            $api->warn( "Failed to delete $page (id=$id): " . $res->{'error'} . "\n" );
            next;
        }

	if ( $csdTalk && ($row->{'ns'} & 1) == 0 && exists( $rns{$row->{'ns'}|1} ) ) {
            DELTALK: {
                $res = $api->query(
                    titles => $rns{$row->{'ns'}|1} . ':' . $title,
                );
                my $p = (values %{$res->{'query'}{'pages'}})[0];
                if ( exists( $p->{'pageid'} ) ) {
                    my $id2 = $p->{'pageid'};
                    my $page2 = $p->{'title'};
                    $api->log( "Deleting $page2 (id=$id2): Talk page of a deleted page" );
                    $res = $api->action( $tok,
                        action => 'delete',
                        pageid => $id2,
                        reason => "[[WP:CSD#G8|G8]]: Talk page of deleted page. $screwup",
                    );
                    if ( $res->{'code'} ne 'success' ) {
                        $api->warn( "Failed to delete $page2 (id=$id2): " . $res->{'error'} . "\n" );
                        last DELTALK;
                    }
                }

                my $iter = $api->iterator(
                    generator => 'allpages',
                    gapnamespace => $row->{'ns'} | 1,
                    gapprefix => "$title/",
                    gaplimit => 'max',
                    prop => 'info',
                    inprop => 'subjectid',
                );
                my %skip = ();
                ITER: while( my $p = $iter->next ) {
                    last unless $p->{'_ok_'};
                    my @parts = split( m!/!, $p->{'title'} );
                    for ( my $i = 0; $i < @parts; $i++ ) {
                        my $t = join( '/', @parts[0..$i] );
                        next ITER if exists( $skip{$t} );
                    }
                    if ( exists( $p->{'subjectid'} ) ) {
                        $skip{$p->{'title'}} = 1;
                        next ITER;
                    }
                    my $id2 = $p->{'pageid'};
                    my $page2 = $p->{'title'};
                    $api->log( "Deleting $page2 (id=$id2): Subpage of a deleted page" );
                    $res = $api->action( $tok,
                        action => 'delete',
                        pageid => $id2,
                        reason => "[[WP:CSD#G8|G8]]: Subpage of deleted page. $screwup",
                    );
                    if ( $res->{'code'} ne 'success' ) {
                        $api->warn( "Failed to delete $page2 (id=$id2): " . $res->{'error'} . "\n" );
                        $skip{$p->{'title'}} = 1;
                    }
                }
            }
	}

        # If we've been at it long enough, let another task have a go.
        return 0 if time()>=$endtime;
    }

    return 0;
}

1;