User:Mr. Stradivarius/findargdups.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
jQuery(document).ready(function($) {

var myContent = document.getElementsByName('wpTextbox1')[0];

if(mw.config.get('wgNamespaceNumber') != -1 && myContent) {
var portletlink = mw.util.addPortletLink('p-tb', '#', 'Find dups', 't-fdup');
$(portletlink).click(function(e) {
  e.preventDefault();
  wpFindDuplicateArgs(0);
});
}

// -------------------------------------------------------------------------------- //
function wpFindDuplicateArgs(debugflag)
{
  // Flag used to determine if we have issued an alert popup
  var alertissued=0;
  // Internal for and while loop variables
  var i=0; var j=0; var loopcount=0;
  // Array used to hold the list of unnested templates
  var tlist = []; 
  // Regular expression which matchs a template arg
  var argexp = new RegExp("\\|[\\s]*([^\\s=\\|\\[\\]\\{\\}][^=\\|\\[\\]\\{\\}]*[^\\s=\\|\\[\\]\\{\\}]|[^\\s=\\|\\[\\]\\{\\}]|)[\\s]*=", "gm");
  
  // Copy the contents of the text window so we can modify it without problems
  var mytxt = myContent.value;
  // Remove some includeonly, noinclude, and onlyinclude tags
  mytxt = mytxt.replace(/<\/?[ ]*(?:includeonly|noinclude|onlyinclude)[ ]*>/gi, '');
  // Remove PAGENAME, BASEPAGENAME, ... nested inside of triple braces
  mytxt = mytxt.replace(/\{\{\{[^\{\}]*\|[ ]*\{\{[A-Z]+\}\}\}\}\}/g, '');
  // Remove some triple braces and parserfunctions inside of triple braces
  loopcount = 0; 
  while((mytxt.search(/\{\{\{[^\{\}]*\}\}\}/g) >= 0) && (loopcount < 5) ) {
    mytxt = mytxt.replace(/\{\{\{[^\{\}]*\}\}\}/g, '');
    mytxt = mytxt.replace(/\{\{#[a-z]+:[^{}=]*\}\}/gi, '');
    loopcount++;
  }
  // Replace some bare braces with HTML equivalent
  mytxt = mytxt.replace(/([^\{])\{([^\{])/g, '$1&#123;$2');
  mytxt = mytxt.replace(/([^\}])\}([^\}])/g, '$1&#125;$2');
  // Remove newlines and tabs which confuse the regexp search
  mytxt = mytxt.replace(/[\s]/gm, ' ');
  // Compress whitespace
  mytxt = mytxt.replace(/[\s][\s]+/gm, ' ');
  // Remove some nowiki and pre text
  mytxt = mytxt.replace(/<nowiki[^<>]*>(?:<[^\/]|[^<])*<\/nowiki[^<>]*>/gi, '');
  mytxt = mytxt.replace(/<pre[^<>]*>(?:<[^\/]|[^<])*<\/pre[^<>]*>/gi, '');
  // Remove some HTML comments
  mytxt = mytxt.replace(/<!--(?:[^>]|[^\-]>|[^\-]->)*-->/gm, '');
  // Modify some = inside of file/image/wikilinks which cause false positives
  loopcount = 0;
  while((mytxt.search(/\[\[[^\[\]\{\}]*=/gi) >= 0) && (loopcount < 5) ) {
    mytxt = mytxt.replace(/(\[\[[^\[\]\{\}]*)=/gi, '$1&#61;');
    loopcount++;
  }

  // Now start unnesting the templates
  loopcount = 0;
  while( (mytxt.search(/(?:\{\{|\}\})/g) >= 0) && (loopcount < 20) ) {
  	// Split into chunks, isolating the unnested templates
    var strlist = mytxt.split(/(\{\{[^\{\}]*\}\})/);
    // Loop through the chunks, removing the unnested templates
    for (i = 0; i < strlist.length; i++) {
      if( strlist[i].search(/^\{\{[^\{\}]*\}\}$/) >= 0 ) {
         tlist.push(strlist[i]);
         strlist[i] = '';
      }
    }
    // Join the chunks back together for the next iteration
    mytxt = strlist.join('');
    loopcount++;
  }
  
  // Preprocess some = signs inside of non-citation-templated citations
  for(i=0; i < tlist.length; ++i) {
  	j=0;
  	while( (tlist[i].search(/<ref[^<>\/]*>(?:<[^\/]|[^<])*=/gi) >= 0) 
  		&& (j < 50) ) {
  		tlist[i] = tlist[i].replace(/(<ref[^<>\/]*>(?:<[^\/]|[^<])*)=/gi, '$1&#61;');
	}
  }
  // Now find duplicates in the list of unnested templates
  for(i=0; i < tlist.length; ++i) {
    // Add numbers for unnamed parameters
    var unp=0;
    tlist[i] = tlist[i].replace(/(\{\{[\s_]*#invoke[\s ]*:[^{}\|]*)\|([^{}\|=]*\|)/gi, '$1|0=$2');
    while((tlist[i].search(/(\{\{(?:[^{}\[\]]|\[\[[^\[\]]*\]\])*?\|)((?:[^{}\[\]=\|]|\[\[[^\[\]]*\]\])*(?:\||\}\}))/) >= 0)
          && (unp < 25)) {
      unp++;
      tlist[i] = tlist[i].replace(/(\{\{(?:[^{}\[\]]|\[\[[^\[\]]*\]\])*?\|)((?:[^{}\[\]=\|]|\[\[[^\[\]]*\]\])*(?:\||\}\}))/, '$1' + unp + '=$2');
    }
    // Array to hold any found duplicate args (reduce number of alerts)
    var f = [];
    // Split the template into an array of | arg = ... strings
    var p = tlist[i].match(argexp);
    if( p ) {
      for(j=0; j < p.length; ++j) {
        p[j] = p[j].replace(argexp, '$1');
      }
      p = p.sort();
      for(j=0; j < p.length - 1; ++j) {
        if( p[j] == p[j+1]) {
          f.push(p[j]);
        }
      }
    }
    if(f.length > 0) {
      alertissued = alertissued + 1;
      if(alertissued < 5) {
        alert('\"' + f.join('\", \"') + '\" in\n' + tlist[i]);
      } else if(alertissued == 6) {
        alert('More duplicates found, fix some and run again!');
      }
    }
  }
  if (alertissued) {
  	var editsummary = document.getElementsByName('wpSummary')[0];
  	var mysummary = 'Clean up [[:Category:Pages using duplicate arguments in template calls|duplicate template arguments]] using [[User:Frietjes/findargdups|findargdups]]';
  	if(typeof editsummary == 'object') {
	  	if(typeof findargdupseditsummary == 'string') {
  			mysummary = findargdupseditsummary;
  		}
  		if (editsummary.value.indexOf(mysummary) == -1) {
  			if (editsummary.value.match(/[^\*\/\s][^\/\s]?\s*$/)) {
  				editsummary.value += '; ' + mysummary;
  			} else {
  				editsummary.value += mysummary;
  			}
  		}
  	}
  }
}
// -------------------------------------------------------------------------------- //

});