Jump to content

User:Harej/citation-watchlist.js

From Wikipedia, the free encyclopedia
Note: After saving, you have to bypass your browser's cache to see the changes. Google Chrome, Firefox, Microsoft Edge and Safari: Hold down the ⇧ Shift key and click the Reload toolbar button. For details and instructions about other browsers, see Wikipedia:Bypass your cache.
/* Per-wiki configuration */

const LANGUAGE = 'en';
const FAMILY = 'wikipedia';
const actionApiEndpoint = `https://${LANGUAGE}.${FAMILY}.org/w/api.php`;
const restApiEndpoint = `https://api.wikimedia.org/core/v1`;
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
const msgWarning = "Warning";
const msgCaution = "Caution";
const msgInspect = "Inspect";
const warnEmoji = '\u2757';
const cautionEmoji = '\u270B';
const inspectEmoji = '\uD83D\uDD0E';
const warnSectionHeader = "==Warn==";
const cautionSectionHeader = "==Caution==";
const inspectSectionHeader = "==Inspect==";


/*
Citation Watchlist Script – Highlights watchlist entries when questionable sources are added

author:  Hacks/Hackers
license: GPL 4.0
*/

let publicSuffixSet = new Set();
let warnList = new Set();
let cautionList = new Set();
let inspectList = new Set();

function prependEmojiWithTooltip(element, emoji, domains, tooltipText) {
  let processedType = '';
  if (emoji === warnEmoji) {
    processedType = 'warn';
  } else if (emoji === cautionEmoji) {
    processedType = 'caution';
  } else if (emoji === inspectEmoji) {
    processedType = 'inspect';
  } else {
    console.error('Unsupported emoji type');
    return;
  }

  if (element.getAttribute(`data-processed-${processedType}`) === 'true') {
    return;
  }

  const emojiSpan = document.createElement('span');
  emojiSpan.textContent = emoji + " ";
  emojiSpan.title = tooltipText + ": " + domains.join(", ");
  element.parentNode.insertBefore(emojiSpan, element);
  element.setAttribute(`data-processed-${processedType}`, 'true');
}

async function parseWatchlist() {
  const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
  for (const container of entriesContainers) {
    const diffLink = container.querySelector('a.mw-changeslist-diff');
    const histLink = container.querySelector('a.mw-changeslist-history');
    const prevLink = container.querySelector('a.mw-history-histlinks-previous');
    const curLink = container.querySelector('a.mw-history-histlinks-current');
    
    let revision = null;
    let urlParams = '';

    if (diffLink) {
      urlParams = new URLSearchParams(diffLink.href);
      revision = {
        oldrevision: urlParams.get('diff'),
        newrevision: urlParams.get('oldid'),
        element: diffLink.parentNode.parentNode
      };
    } else if (histLink) {
      urlParams = new URLSearchParams(histLink.href);
      const pageID = urlParams.get('curid');
      const firstID = await fetchFirstRevisionId(pageID);
      revision = {
        oldrevision: firstID,
        element: histLink.parentNode.parentNode
      };
    } else if (prevLink) {
      urlParams = new URLSearchParams(prevLink.href);
      const previousRevisionMap = await fetchPreviousRevisionIds([urlParams.get('oldid')]);
      revision = {
        oldrevision: urlParams.get('oldid'),
        newrevision: previousRevisionMap[urlParams.get('oldid')],
        element: prevLink.parentNode.parentNode
      };
    } else if (curLink) {
      urlParams = new URLSearchParams(curLink.href);
      revision = {
        oldrevision: urlParams.get('oldid'),
        element: curLink.parentNode.parentNode
      };
    }
    if (revision) {
      await fetchDiffAndProcess(revision);
    }
  }
}

async function buildURL(params) {
  const url = new URL(actionApiEndpoint);
  Object.keys(params).forEach(key => url.searchParams.append(key, params[key]));
  return url;
}

function getRootDomain(hostname, publicSuffixSet) {
  const domainParts = hostname.split('.');
  for (let i = 0; i < domainParts.length; i++) {
    const candidate = domainParts.slice(i).join('.');
    if (publicSuffixSet.has(candidate) || publicSuffixSet.has(`!${candidate}`)) {
      return domainParts.slice(i - 1).join('.');
    }
  }
  return hostname;
}

function extractAddedURLs(wikitext) {
  const addedURLs = [];
  const urlRegex = /https?:\/\/[^\s<"]+/g;
  let match;
  while ((match = urlRegex.exec(wikitext)) !== null) {
    try {
      const url = new URL(match[0]);
      addedURLs.push(url.href);
    } catch (error) {
      console.error(`Invalid URL rejected: ${match[0]}`);
    }
  }
  return addedURLs;
}

async function fetchFromActionAPI(params) {
  const url = await buildURL(params);
  console.log(`Action API request: ${url}`);

  try {
    const response = await fetch(url);
    if (!response.ok) {
      throw new Error(`Network response was not ok: ${response.statusText}`);
    }
    return await response.json();
  } catch (error) {
    console.error('Error fetching data from MediaWiki API:', error);
    throw error;
  }
}

async function fetchRevisionContent(revId) {
    var api = new mw.Api();
    try {
        const data = await api.get({
            action: 'query',
            prop: 'revisions',
            revids: revId,
            rvprop: 'content'
        });
        const page = Object.values(data.query.pages)[0];
        if (page.revisions && page.revisions.length > 0) {
            return page.revisions[0]['*'];
        } else {
            throw new Error("Revision not found.");
        }
    } catch (error) {
        console.error(error);
        throw error;
    }
}

async function fetchDiffAndProcess(revision) {
  let oldWikitext = await fetchRevisionContent(revision.oldrevision);
  let fromURLs = await extractAddedURLs(oldWikitext);

  // First revisions of articles are designated as having an oldrevision but not
  // a newrevision.
  let newWikitext = null;
  let toURLs = [];
  if (revision.newrevision) {
  	newWikitext = await fetchRevisionContent(revision.newrevision);
  	toURLs = await extractAddedURLs(newWikitext);
  }

  const toURLSet = new Set(toURLs);
  addedURLs = fromURLs.filter(url => !toURLSet.has(url));

  console.log(`Old revision: ${revision.oldrevision}
  New revision: ${revision.newrevision}
  Revision element: ${revision.element.innerHTML}
  Added URLs: ${addedURLs.join(' ')}
  `);

  const matchedWarnDomains = [];
  const matchedCautionDomains = [];
  const matchedInspectDomains = [];

  for (const url of addedURLs) {
    const hostname = new URL(url).hostname;
    const domain = getRootDomain(hostname, publicSuffixSet);

    if (warnList.has(domain) && !matchedWarnDomains.includes(domain)) {
      matchedWarnDomains.push(domain);
    } else if (cautionList.has(domain) && !matchedCautionDomains.includes(domain)) {
      matchedCautionDomains.push(domain);
    } else if (inspectList.has(domain) && !matchedInspectDomains.includes(domain)) {
      matchedInspectDomains.push(domain);
    }
  }

  if (matchedWarnDomains.length > 0) {
    prependEmojiWithTooltip(revision.element, warnEmoji, matchedWarnDomains, msgWarning);
  }
  if (matchedCautionDomains.length > 0) {
    prependEmojiWithTooltip(revision.element, cautionEmoji, matchedCautionDomains, msgCaution);
  }
  if (matchedInspectDomains.length > 0) {
    prependEmojiWithTooltip(revision.element, inspectEmoji, matchedInspectDomains, msgInspect);
  }
}

async function fetchAndOrganizeDomainLists(pageNames) {
  const params = {
    action: 'query',
    prop: 'revisions',
    titles: pageNames.join('|'), // Join all page names
    rvprop: 'content',
    rvslots: '*',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const pages = data.query.pages;
    
    const warnList = new Set();
    const cautionList = new Set();
    const inspectList = new Set();

    for (const pageId in pages) {
      const content = pages[pageId].revisions[0].slots.main['*'];
      let currentList = null;

      const lines = content.split('\n');
      for (let line of lines) {
        if (line.trim() === warnSectionHeader) {
          currentList = warnList;
        } else if (line.trim() === cautionSectionHeader) {
          currentList = cautionList;
        } else if (line.trim() === inspectSectionHeader) {
          currentList = inspectList;
        }

        if (line.startsWith('*') && currentList) {
          const domain = line.substring(1).trim();
          currentList.add(domain);
        }
      }
    }

    return {
      warnList,
      cautionList,
      inspectList
    };
  } catch (error) {
    console.error('Error fetching or parsing the page content:', error);
    throw error;
  }
}

async function fetchPreviousRevisionIds(revisionIds) {
  const params = {
    action: 'query',
    prop: 'revisions',
    revids: revisionIds.join('|'), // join all revision IDs
    rvprop: 'ids',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const pages = data.query.pages;
    const revisionMap = {};
    for (const pageId in pages) {
      const revisions = pages[pageId].revisions;
      if (revisions && revisions.length > 0) {
        for (const revision of revisions) {
          revisionMap[revision.revid] = revision.parentid;
        }
      }
    }
    return revisionMap;
  } catch (error) {
    console.error('Error fetching previous revision IDs:', error);
    return {};
  }
}

async function fetchFirstRevisionId(pageID) {
  const params = {
    action: 'query',
    pageids: pageID,
    prop: 'revisions',
    rvlimit: 1,
    rvdir: 'newer',
    format: 'json',
    origin: '*'
  };

  try {
    const data = await fetchFromActionAPI(params);
    const pages = data.query.pages;
    const pageId = Object.keys(pages)[0];
    const revisions = pages[pageId].revisions;

    if (revisions && revisions.length > 0) {
      return revisions[0].revid;
    } else {
      throw new Error('No revisions found for this page.');
    }
  } catch (error) {
    console.error('Error fetching first revision ID:', error);
    return null;
  }
}

async function fetchDomainListPages(pageName) {
  const cacheKey = `citationWatchlistFetchDomainListPages_${pageName}`;
  const cacheExpiration = 4 * 60 * 60 * 1000; // 4 hours in milliseconds
  const now = Date.now();
  const cachedData = localStorage.getItem(cacheKey);
  const cachedTimestamp = localStorage.getItem(`${cacheKey}_timestamp`);
  if (cachedData && cachedTimestamp && (now - parseInt(cachedTimestamp, 10)) < cacheExpiration) {
    console.log("Loaded list of lists from cache");
    return JSON.parse(cachedData);
  } else {
    const params = {
      action: 'query',
      prop: 'revisions',
      titles: pageName,
      rvprop: 'content',
      rvslots: '*',
      format: 'json',
      origin: '*'
    };
    try {
      const data = await fetchFromActionAPI(params);
      const page = data.query.pages;
      const pageId = Object.keys(page)[0];
      const content = page[pageId].revisions[0].slots.main['*'];
      const pageTitles = [];
      const lines = content.split('\n');
      for (let line of lines) {
        if (line.startsWith('* [[')) {
          const match = line.match(/\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
          if (match) {
            pageTitles.push(match[1]);
          }
        }
      }
      localStorage.setItem(cacheKey, JSON.stringify(pageTitles));
      localStorage.setItem(`${cacheKey}_timestamp`, now.toString());
      console.log("Loaded from API and stored in cache");
      return pageTitles;
    } catch (error) {
      console.error('Error fetching or parsing the page content:', error);
      throw error;
    }
  }
}

async function fetchPublicSuffixList() {
  const pslUrl = `https://${LANGUAGE}.${FAMILY}.org/wiki/${publicSuffixList}?action=raw`;
  console.log(`Raw page text request: ${pslUrl}`);
  try {
    const response = await fetch(pslUrl);
    const content = await response.text();
    const suffixSet = new Set();
    const lines = content.split('\n');
    for (const line of lines) {
      if (line.trim() && !line.trim().startsWith('//')) {
        suffixSet.add(line.trim());
      }
    }
    return suffixSet;
  } catch (error) {
    console.error("Error fetching Public Suffix List:", error);
    return new Set();
  }
}

async function runScript() {
  publicSuffixSet = await fetchPublicSuffixList();
  if (publicSuffixSet.size === 0) {
    console.error('Public Suffix List loading failed');
    return;
  }
  console.log("Welcome to Citation Watchlist");
  const listPages = await fetchDomainListPages(listOfLists);
  try {
    const lists = await fetchAndOrganizeDomainLists(listPages);
    lists.warnList.forEach(warnList.add, warnList);
    lists.cautionList.forEach(cautionList.add, cautionList);
    lists.inspectList.forEach(inspectList.add, inspectList);
  } catch (error) {
    console.error('Error fetching domain lists:', error);
  }
  await parseWatchlist();
}

runScript().then(() => console.log('Citation Watchlist script finished executing'));