User:N8wilson/EggHunt.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
$.when( mw.loader.using( 'mediawiki.util' ), $.ready ).then( function () {
  
  // Cease and desist when outside of article space
  if ( mw.config.get( 'wgCanonicalNamespace' ) !== '') return;
  
  mcss = 3;        // minimum common substring to count
  numEggs = 0;     // will exclude "hidden" category
  marked = false;  // flag to keep from double-marking
  
  // Recursively find total length of *ordered* common sequences using
  // a greedy approach that always selects the longest unused sequence first
  //
  // xs -> x_start, xe -> e_end, x_i -> x_iterator, mx -> (position of) max_x
  // same pattern with y vars
  var inCommon = function(mat, xs, xe, ys, ye) {
    if (xe - xs < mcss || ye - ys < mcss)  return 0;
    // set up contextual vars (to avoid clobbering)
    var max = mat[xs][ys], mx = xs, my = ys;
    for (xi = xs; xi < xe; xi++) {
      for (yi = ys; yi < ye; yi++) {
        if (mat[xi][yi] > max) { 
          max = mat[xi][yi];
          mx = xi;
          my = yi;
        }
      }
    }
    return (max < mcss) ? 0 : max + inCommon(mat, xs, mx-max+1, ys, my-max+1) + inCommon(mat, mx+1, xe, my+1, ye);
  };
  
  // Score a link using longest common substring as a percent of the length of the shortest string
  var strScore = function(short, long) {
    if (typeof(short) != "string" || typeof(long) != "string") return 0;
    shortA = new Array(short.length);
    for (s=0; s<short.length; s++) {
      shortA[s] = new Array(long.length);
      shortA[s][0] = short[s] == long[0] ? 1 : 0;
    }
    for (l=0; l<long.length; l++) {
      shortA[0][l] = short[0] == long[l] ? 1 : 0;
    }
    for (s=1; s<short.length; s++) {
      for (l=1; l<long.length; l++) {
        if (short[s] == long[l]) {
          shortA[s][l] = 1 + shortA[s-1][l-1];
        } else {
          shortA[s][l] = 0;
        }
      }
    }
    sharedSeqs = inCommon(shortA, 0, shortA.length, 0, shortA[0].length);
    return sharedSeqs / Math.min(short.length, long.length);
    //return shortA.flat().reduce(function(a,b){return Math.max(a,b);}, 0) / Math.min(short.length, long.length);
  };
  
  // Categories and scoring
  var egg_cats = [
    {min: 0.85, mark:'' , cnt: 0, name:'hidden'},
    {min: 0.40, mark:'🥚', cnt: 0, name:'unlikely'},
    {min: 0.20, mark:'🐣', cnt: 0, name:'possible'},
    {min: 0.02, mark:'🐥', cnt: 0, name:'probable'},
    {min: 0.00, mark:'🐤', cnt: 0, name:'unmatched'},
  ];
  
  // Filter level 1: links in paragraph tags of the article content
  $("#mw-content-text p a").filter(
    function(idx, el) {
      // Filter level 2: links must have title attribute, visible text, and a target beginning with /wiki/....
      // (mostly so we can use these assumptions later)
      return $(this).attr("title") && $(this).text() && (!$(this).attr("href").indexOf("/wiki/"));
    }
  ).filter(
    function(idx, el) {
      // Filter level 3: Remove inline timeplates
      return $(this).parents(".Inline-Template").length == 0;
    }
  ).each(
    function(idx, el) {
      // build a lower case text and a title with any disambig clarifiers removed (trailing parens)
      loc = $(this).attr("title").search(/[ _]\(.+\)$/);
      title_lc = (loc >= 0) ? $(this).attr("title").substr(0,loc).toLowerCase() : $(this).attr("title").toLowerCase();
      text_lc = $(this).text().toLowerCase();
      // short-circuit if either the title or link text is fully contained in the other (not EGG)
      if (text_lc.indexOf(title_lc) >= 0 || title_lc.indexOf(text_lc) >= 0 ) return 1;
      // otherwise report possible EGG
      score = strScore(text_lc, title_lc);
      for (c = 0; c<egg_cats.length; c++) {
        if (score >= egg_cats[c].min) {
          egg_cats[c].cnt++;
          $(this).addClass("eggHunt-"+egg_cats[c].name);
          break;
        }
      }
      console.log('[' + score.toFixed(3) + '], "' + $(this).text() + '", "' + $(this).attr("title") + '"');
      numEggs++;
    }
  );
  
  // remove count of hidden eggs
  numEggs -= egg_cats[0].cnt; 
  
  // Install UI hook
  var node = mw.util.addPortletLink('p-cactions', "#", numEggs + ' possible 🥚s', 'ca-egghunt', 'Tag '+numEggs+' possible EASTEREGGs in article');
  $(node).on('click', function(e) {
    if (!marked) { 
      for (c=0; c<egg_cats.length; c++) {
        $("a.eggHunt-"+egg_cats[c].name).after(egg_cats[c].mark); 
      }
      marked = true;
    }
    return false;
  });
  
});