User:ImageRemovalBot/removebot-badlinks.pl

From Wikipedia, the free encyclopedia

ImageRemovalBot's code for removing invalid file links. Requires User:FairuseBot/Pearle.pm, User:FairuseBot/Pearle/WikiPage.pm and User:FairuseBot/libBot.pm.

#!/usr/bin/perl


# RemoveBot-badlinks
#
# A bot to remove bad image links (eg. 'File:http://' or 'File:C:\')

use strict;
use warnings;

use lib '/home/bot/perllib';

use libBot;

#exit;

my $test = 0;

my $homedir = '/home/bot/removebot';

Pearle::init("ImageRemovalBot", "<INSERT PASSWORD HERE>", "$homedir/removebot-badlinks.log","$homedir/cookies-badlinks.txt");
Pearle::config(nullOK => 1, printlevel => 4, loglevel => 2);
config(username => "ImageRemovalBot");

if(!Pearle::login())
{
	exit;
}

{
	my @articles;
	my $image;
	my $images_removed = 0;
	
	Pearle::myLog(2, "Beginning set at " . time() . "\n");

	# Get the list of pages with redlinked images
	if($test)
	{
		@articles = ("User:Carnildo/sandbox4", "Hussain Sagar");
	}
	else
	{
		@articles = Pearle::getCategoryArticles('Category:Articles with missing files');
	}

	if(scalar(@articles) == 0)
	{
		Pearle::myLog(2, "No files in category.\n");
		exit;
	}
			
	Pearle::myLog(4, join("\n", @articles) . "\n");
	Pearle::myLog(2, scalar(@articles) . " pages found\n");
	
	foreach my $article (@articles)
	{
		my $image_regex;
		
		my $full_comment = "";
		my $removal_prefix = undef;
		my $removal_comment = "Removing external link used as image";
		
		my @images = Pearle::getPageImages($article);
		@images = grep {$_ =~ /(?:File:Https?:\/\/|File:(?:C|D):\\)/} @images;
		if(scalar(@images) == 0)
		{
			Pearle::myLog(4, "Article $article has no URL images.\n");
			next;
		}
		Pearle::myLog(3, "External-linked images: ", join(", ", @images), "\n");
		
		foreach my $image (@images)
		{
			my ($raw_image) = $image =~ /(?:Image|File):(.*)/;
			$raw_image = MakeWikiRegex($raw_image);

			$image_regex = "[ _]*(?:[Ii][Mm][Aa][Gg][Ee]|[Ff][Ii][Ll][Ee])[ _]*:[ _]*${raw_image}[ _]*";

			# Sanity check
			if(!defined($raw_image) or $image !~ /$raw_image/)
			{
				botwarnlog("\n*Parse error on image [[:$image]] ($raw_image)");
				exit;
			}
			Pearle::myLog(3, "Image regex: $image_regex\n");
			
			eval
			{
				my $hits = 0;
				Pearle::myLog(3, "Page for removal: $article\n");
				if($hits = RemoveImageFromPage($image, $article, $image_regex, $removal_prefix, $removal_comment)) 	# Don't limit if we just touched the article
				{
					Pearle::myLog(2, "Removed external link from article $article ($hits times)\n");
					Pearle::limit();
				}
				$images_removed += $hits;
			};
			if($@)
			{
				if(925 == $@)
				{
					botwarnlog("\n*Page [[:$article]] is protected removing external link.");
				}
				else
				{
					die;
				}
			}
		}
	}
	Pearle::myLog(2, "Finished with set.  Removed $images_removed images.\n");
}