User:CLT20RecordsUpdateBot/Source/update.php

From Wikipedia, the free encyclopedia
<?php
 
ini_set('display_errors', 0);
ini_set('max_execution_time', 2500);
 
set_error_handler(
    function($code, $msg, $file, $line) {
        if ( strpos($msg, 'DOMDocument') !== false ) {  # Do not log HTML parsing warnings
            return false;
        }
        file_put_contents(
            'error_log.txt',
            $code . '|' . (str_replace(['&', '|', "\r\n", "\n"], ['&amp;', '&#124;', '<br />', 'br />'], $msg)) . '|' . $file . '|' . $line . "\r\n",
            FILE_APPEND
        );
    }, E_ALL ^ E_NOTICE
);
 
# Delete the status and error logs and backup file if any (not if using resume)
if ( ! @$_GET['resume'] ) {
    if ( file_exists('status.txt') ) {
        unlink('status.txt');
    }
    if ( file_exists('error_log.txt') ) {
        unlink('error_log.txt');
    }
    if ( file_exists('edit_failed_backup.txt') ) {
        unlink('edit_failed_backup.txt');
    }
}
 
function queryWikiAPI($method, $headers = [], $getdata = [], $postdata = []) {
 
    $wikiAPIPath = 'https://en.wikipedia.org/w/api.php';
 
    # Add a request ID
    if ( $method == 'POST' ) {
        $postdata['requestid'] = mt_rand();
    }
    else {
        $getdata['requestid'] = mt_rand();
    }
 
    # Additional headers for POST requests
    if ( $method == 'POST' && $postdata ) {
        $headers[] = 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8';
        $headers[] = 'Content-Length: ' . strlen(http_build_query($postdata));
    }
 
    $streamContextOptions = [
        'http' => [
            'method' => $method,
            'header' => implode("\r\n", $headers),
            'content' => http_build_query($postdata),
        ]
    ];
 
    # For non-POST requests, delete the request body
    if ( $method != 'POST' ) {
        unset($streamContextOptions['http']['content']);
    }
 
    $uri = $wikiAPIPath . ($getdata ? ('?' . http_build_query($getdata)) : '');
 
    $result = file_get_contents($uri, 0, stream_context_create($streamContextOptions));
    sleep(3);
 
    return $result;
 
}
 
$wikiAPIRequestHeaders = [
    'Accept: text/xml',
    'DNT: 1',
    'User-Agent: ',  # Sensitive information removed
];
 
$startTime = time();
 
# Log in
function CLT20RecordsUpdateBot_login() {
 
    global $wikiAPIRequestHeaders, $wikiAPIEditToken, $username, $password;
 
    # Username and password
    $username = 'CLT20RecordsUpdateBot';
    $password = '';  // Password removed

    $obtainLoginTokenResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,
        [],
        [
            'format' => 'xml',
            'action' => 'login',
            'lgname' => $username,
            'lgpassword' => $password,
        ]
    );
    if ( $obtainLoginTokenResult === false ) {
        die('Failed to log in: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($obtainLoginTokenResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $loginInfo = $XMLDOMDoc->getElementsByTagName('login')->item(0);
    $cookiePrefix = $loginInfo->getAttribute('cookieprefix');
    $sessionID = $loginInfo->getAttribute('sessionid');
    $loginToken = $loginInfo->getAttribute('token');
 
    # Construct the sessionID cookie

    $wikiAPIRequestHeaders['cookie'] = "Cookie: {$cookiePrefix}_session={$sessionID}";
    # Use a uinque 'cookie' key rather than a numeric key, so that additional headers can be added to $wikiAPIRequestHeaders
    # without deleting this one. It does not break the implode() function used to assemble the headers

    # Send a second request with the login token
    $loginWithTokenResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,
        [],
        [
            'format' => 'xml',
            'action' => 'login',
            'lgname' => $username,
            'lgpassword' => $password,
            'lgtoken' => $loginToken,
        ]
    );
    if ( $loginWithTokenResult === false ) {
        die('Failed to log in: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($loginWithTokenResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $loginInfo = $XMLDOMDoc->getElementsByTagName('login')->item(0);
 
    $loginResult = $loginInfo->getAttribute('result');
    if ( $loginResult != 'Success' ) {
        die("Login unsuccessful (result: {$loginResult})");
    }
 
    $loginUserName = $loginInfo->getAttribute('lgusername');
    $loginUserID = $loginInfo->getAttribute('lguserid');
    $loginToken = $loginInfo->getAttribute('lgtoken');
 
    # Set additional cookies after login

    $wikiAPIRequestHeaders['cookie'] .= "; {$cookiePrefix}UserName={$loginUserName}; {$cookiePrefix}UserID={$loginUserID}; {$cookiePrefix}Token={$loginToken}";
 
}
CLT20RecordsUpdateBot_login();

 
# Once logged in, automatically log out when the execution of the script terminates
register_shutdown_function(
    function() {
        global $wikiAPIRequestHeaders;
        queryWikiAPI('GET', $wikiAPIRequestHeaders,
            [
                'format' => 'xml',
                'action' => 'logout',
            ]
        );
    }
);
 
 
# Get the text of the page, the latest revision timestamp and edit token
$PageTitle = 'List of Champions League Twenty20 records and statistics';
 
function CLT20RecordsUpdateBot_getPageInfo() {
 
    global $wikiAPIRequestHeaders, $wikiAPIEditToken, $PageTitle, $PageText, $PageLatestRevisionTS, $username, $password;
 
    # Before proceeding, check for any new messages on the user talk page
    $hasNewMessagesResult = queryWikiAPI('GET', $wikiAPIRequestHeaders,
        [
            'format' => 'xml',
            'action' => 'query',
            'meta' => 'userinfo',
            'uiprop' => 'hasmsg',
        ]
    );
    if ( $hasNewMessagesResult === false ) {  # Don't stop the script here, only give a warning
        trigger_error('Cannot get info about new talk page messages: Query to Wikipedia API failed', E_USER_WARNING);
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($hasNewMessagesResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        trigger_error("Cannot get info about new talk page messages: Error: [{$errorCode}] {$errorMessage}", E_USER_WARNING);
    }
    elseif ( $XMLDOMDoc->getElementsByTagName('userinfo')->item(0)->hasAttribute('messages') ) {
        die('New message on user talk page (<a href="https://en.wikipedia.org/wiki/User_talk:' . urlencode($username) . '" target="_blank">view</a> | '
            . '<a href="https://en.wikipedia.org/w/index.php?title=User_talk:' . urlencode($username) . '&amp;diff=cur" target="_blank">last edit</a>)');
    }
 
    $getPageInfoResult = queryWikiAPI('GET', $wikiAPIRequestHeaders,
        [
            'action' => 'query',
            'format' => 'xml',
            'prop' => 'info|revisions',
            'titles' => $PageTitle,
            'intoken' => 'edit',
            'rvprop' => 'content|timestamp'
        ]
    );
    if ( $getPageInfoResult === false ) {
        die('Failed to obtain page text: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($getPageInfoResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $pageInfo = $XMLDOMDoc->getElementsByTagName('pages')->item(0)->getElementsByTagName('page')->item(0);
 
    # Stop if the page is missing
    if ( $pageInfo->hasAttribute('missing') ) {
        die('Failed to obtain page text (page does not exist or has been deleted)');
    }
 
    # Get the edit token
    $wikiAPIEditToken = $pageInfo->getAttribute('edittoken');
    if ( $wikiAPIEditToken == '+\\' || strpos($wikiAPIEditToken, '+\\') === false ) {
        die('Bad edit token obtained');
    }
 
    $revisionInfo = $pageInfo->getElementsByTagName('rev')->item(0);
 
    $PageText = $revisionInfo->childNodes->item(0)->nodeValue;
    $PageLatestRevisionTS = $revisionInfo->getAttribute('timestamp');
 
}

CLT20RecordsUpdateBot_getPageInfo();
 
# Stop the script if the page obtained is a redirect
if ( preg_match('/^#\s*+REDIRECT\s*+\[\[.*\]\]/isu', $PageText) ) {
    die('Redirect page obtained');
}
 
# Check for any {{bots}} or {{nobots}} templates
if ( 
    preg_match('/\{\{\s*+(?:[Nn]obots|[Bb]ots\s*+\|(?:.*?\|)?(?:deny\s*+\=\s*+all|allow\s*+\=\s*+none))/su', $PageText)
     || preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?deny\s*+\=(?:[^\|]*?,)?\s*+CLT20RecordsUpdateBot\s*+(?:,|\||\}\})/su', $PageText)
     || (
            preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?allow\s*+\=[^\|]*?(?:\||\}\})/su', $PageText)
            && ! preg_match('/\{\{\s*+[Bb]ots\s*+\|(?:.*?\|)?allow\s*+\=(?:[^\|]*?,)?\s*+CLT20RecordsUpdateBot\s*+(?:,|\||\}\})/su', $PageText)
        ) 
    ) {
    
    die('A {{bots}} or {{nobots}} template does not allow CLT20RecordsUpdateBot to edit this page');
}
 
 
# If the "resume" GET parameter is true, get the text of the backup file and use it to edit.
# This backup file is saved in the event of an edit conflict or other error when editing
# so that all updates do not have to be redone in the next attempt.
if ( @$_GET['resume'] ) {
    $PageText = file_get_contents('edit_failed_backup.txt');
    if ( $PageText === false ) {
        die("Cannot find the backup file");
    }
    $PageLatestRevisionTS = date('Y:m:d\TH:i:s\Z', $startTime);  # Set the edit confilct detection time to the start time of the script

    CLT20RecordsUpdateBot_editPage();
 
    unlink('edit_failed_backup.txt');
    exit;
}
 
 
# Encode areas wich should not be edited
# These will be decoded with html_entity_decode() before the wikitext is sent back to the server

# HTML comments
$PageText = preg_replace_callback('/\<\!--(.*?)--\>/us',
                                    function($match) {
                                        return '<!--' . str_replace(['&', '<', '>', '{', '}', '|', '!', '='],
                                                                    ['&amp;', '&lt;', '&gt;', '&#123;', '&#125;', '&#124;', '&#33;', '&#61;'],
                                                                    $match[1]) . '-->';
                                    }, $PageText);
 
# Tags where wikitext is not parsed
$PageText = preg_replace_callback('/(\<(nowiki|pre|math|source|syntaxhighlight)(?(?=\s)[^\>]*+)\>)(.*?)\<\/\2\>/us',  # Allow attributes only if there is a space after the tag name
                                    function($match) {
                                        return $match[1] . str_replace(['&', '<', '>', '{', '}', '|', '!', '='],
                                                                       ['&amp;', '&lt;', '&gt;', '&#123;', '&#125;', '&#124;', '&#33;', '&#61;' ],
                                                                       $match[3]) . '</' . $match[2] . '>' ;
                                    }, $PageText);
 
# Characters in template calls which may conflict with header and table syntax
$PageText = preg_replace_callback('/\{\{(?:[^\{\}]++|(?<!\{)\{|\}(?!\})|(?R))*?\}\}/u',
                                    function($match) {
                                        return str_replace(['&', '|', '!', '='], ['&amp;', '&#124;', '&#33;', '&#61;'], $match[0]);
                                    }, $PageText);
 
# Page text is obtained and encoded, now update it

$updateStartTime = time();
 
include 'StatsUpdateFunctions.php';
 
# Filter the stats GET parameter
# Remove non-existent function names and place valid ones in correct order
$StatsToUpdate = array_values(array_intersect(
    array_keys($StatsUpdateFunctions),
    explode('|', $_GET['stats'])
));
 
# Start updating
foreach ( $StatsToUpdate as $funcName ) {
 
    try {
        $funcCallResult = call_user_func($StatsUpdateFunctions[$funcName]);
    }
    catch ( Exception $error ) {
        trigger_error('Exception thrown: <div class="exception-msg">' . $error->getMessage() . "</div>in function {$funcName}", E_USER_WARNING);
        $funcCallResult = false;
    }
 
    file_put_contents('status.txt', $funcName . '|' . ((int) $funcCallResult) . "\r\n", FILE_APPEND);
 
}
unset($funcName, $funcCallResult);
 
# Decode encoded comments, nowiki tags etc. before commiting the edit

$PageText = preg_replace_callback('/\{\{(?:[^\{\}]++|(?<!\{)\{|\}(?!\})|(?R))*?\}\}/u',
                                    function($match) {
                                        return html_entity_decode($match[0], ENT_QUOTES | ENT_HTML5, 'UTF-8');
                                    }, $PageText);
 
$PageText = preg_replace_callback('/(\<(syntaxhighlight|source|math|pre|nowiki)(?(?=\s)[^\>]*+)\>)(.*?)\<\/\2\>/us',
                                    function($match) {
                                        return $match[1] . html_entity_decode($match[3], ENT_QUOTES | ENT_HTML5, 'UTF-8') . '</' . $match[2] . '>' ;
                                    }, $PageText);
 
$PageText = preg_replace_callback('/\<\!--(.*?)--\>/us',
                                    function($match) {
                                        return '<!--' . html_entity_decode($match[1], ENT_QUOTES | ENT_HTML5, 'UTF-8') . '-->';
                                    }, $PageText);
 
 
# Updating finished, now edit

$endTime = time();
 
 
function CLT20RecordsUpdateBot_editPage() {
 
    global $wikiAPIEditToken, $wikiAPIRequestHeaders, $PageTitle, $PageText, $PageLatestRevisionTS, $startTime, $endTime;
 
    # Get the update results (to be used in the edit summary)
    $updateResults = file('status.txt');
 
    if ( $updateResults !== false ) {
        $updateResults = array_map( 
            function($line) {
                return explode('|', trim($line));
            }, 
            $updateResults
        );
 
        $totalUpdates = count($updateResults);
        $successfulUpdates = count(array_filter($updateResults,
            function($result) {
                return $result[1] == 1;
            }
        ));
        $failedUpdates = count(array_filter($updateResults,
            function($result) {
                return $result[1] == 0;
            }
        ));
        $updateTime = ((int) (($endTime - $startTime) / 60)) . ':' . str_pad(($endTime - $startTime) % 60, 2, '0', STR_PAD_LEFT);
 
        $editSummary = "[[WP:BOT|Bot]]: Updating statistics ({$successfulUpdates} updates successful, {$failedUpdates} failed, {$updateTime})";
    }
    else {  # Use a generic edit summary if the status file is not available for some reason
        $editSummary = "[[WP:BOT|Bot]]: Updating statistics";
    }
    
    # Edit the page
    $editPageResult = queryWikiAPI('POST', $wikiAPIRequestHeaders,
        [],
        [
            'format' => 'xml',
            'action' => 'edit',
            'title' => $PageTitle,
            'summary' => $editSummary,
            'text' => $PageText,
            'basetimestamp' => $PageLatestRevisionTS,
            'nocreate' => true,
            'md5' => md5($PageText),
            'token' => $wikiAPIEditToken,
        ]
    );
    if ( $editPageResult === false ) {
        die('Failed to edit: Query to Wikipedia API failed');
    }
 
    $XMLDOMDoc = new DOMDocument();
    $XMLDOMDoc->loadXML($editPageResult);
 
    if ( $XMLDOMDoc->getElementsByTagName('error')->length ) {
        $errorCode = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('code');
        $errorMessage = $XMLDOMDoc->getElementsByTagName('error')->item(0)->getAttribute('info');
 
        # Save the wikitext to a backup file before ending. Can be retreived by adding &resume=1 in the URL
        file_put_contents('edit_failed_backup.txt', $PageText);
 
        die("[{$errorCode}] {$errorMessage}");
    }
 
    $editInfo = $XMLDOMDoc->getElementsByTagName('edit')->item(0);
 
    if ( $editInfo->getAttribute('result') != 'Success' ) {
        file_put_contents('edit_failed_backup.txt', $PageText);
        die('Failed to edit: Unknown error');
    }
 
    $oldRevision = $editInfo->getAttribute('oldrevid');
    $newRevision = $editInfo->getAttribute('newrevid');
 
    echo "#{$oldRevision}|{$newRevision}";
 
}

CLT20RecordsUpdateBot_editPage();
 
?>