User:Harej/citation-watchlist.js
Appearance
Code that you insert on this page could contain malicious content capable of compromising your account. If you import a script from another page with "importScript", "mw.loader.load", "iusc", or "lusc", take note that this causes you to dynamically load a remote script, which could be changed by others. Editors are responsible for all edits and actions they perform, including by scripts. User scripts are not centrally supported and may malfunction or become inoperable due to software changes. A guide to help you find broken scripts is available. If you are unsure whether code you are adding to this page is safe, you can ask at the appropriate village pump. This code will be executed when previewing this page. |
This user script seems to have a documentation page at User:Harej/citation-watchlist. |
/* Per-wiki configuration */
const LANGUAGE = 'en';
const FAMILY = 'wikipedia';
const actionApiEndpoint = `https://${LANGUAGE}.${FAMILY}.org/w/api.php`;
const restApiEndpoint = `https://api.wikimedia.org/core/v1`;
const publicSuffixList = "Wikipedia:Citation_Watchlist/Public_Suffix_List";
const listOfLists = "Wikipedia:Citation_Watchlist/Lists";
const msgWarning = "Warning";
const msgCaution = "Caution";
const msgInspect = "Inspect";
const warnEmoji = '\u2757';
const cautionEmoji = '\u270B';
const inspectEmoji = '\uD83D\uDD0E';
const warnSectionHeader = "==Warn==";
const cautionSectionHeader = "==Caution==";
const inspectSectionHeader = "==Inspect==";
/*
Citation Watchlist Script – Highlights watchlist entries when questionable sources are added
author: Hacks/Hackers
license: GPL 4.0
*/
let publicSuffixSet = new Set();
let warnList = new Set();
let cautionList = new Set();
let inspectList = new Set();
function prependEmojiWithTooltip(element, emoji, domains, tooltipText) {
let processedType = '';
if (emoji === warnEmoji) {
processedType = 'warn';
} else if (emoji === cautionEmoji) {
processedType = 'caution';
} else if (emoji === inspectEmoji) {
processedType = 'inspect';
} else {
console.error('Unsupported emoji type');
return;
}
if (element.getAttribute(`data-processed-${processedType}`) === 'true') {
return;
}
const emojiSpan = document.createElement('span');
emojiSpan.textContent = emoji + " ";
emojiSpan.title = tooltipText + ": " + domains.join(", ");
element.parentNode.insertBefore(emojiSpan, element);
element.setAttribute(`data-processed-${processedType}`, 'true');
}
async function parseWatchlist() {
const entriesContainers = document.querySelectorAll('.mw-changeslist-links');
for (const container of entriesContainers) {
const diffLink = container.querySelector('a.mw-changeslist-diff');
const histLink = container.querySelector('a.mw-changeslist-history');
const prevLink = container.querySelector('a.mw-history-histlinks-previous');
const curLink = container.querySelector('a.mw-history-histlinks-current');
let revision = null;
let urlParams = '';
if (diffLink) {
urlParams = new URLSearchParams(diffLink.href);
revision = {
oldrevision: urlParams.get('diff'),
newrevision: urlParams.get('oldid'),
element: diffLink.parentNode.parentNode
};
} else if (histLink) {
urlParams = new URLSearchParams(histLink.href);
const pageID = urlParams.get('curid');
const firstID = await fetchFirstRevisionId(pageID);
revision = {
oldrevision: firstID,
element: histLink.parentNode.parentNode
};
} else if (prevLink) {
urlParams = new URLSearchParams(prevLink.href);
const previousRevisionMap = await fetchPreviousRevisionIds([urlParams.get('oldid')]);
revision = {
oldrevision: urlParams.get('oldid'),
newrevision: previousRevisionMap[urlParams.get('oldid')],
element: prevLink.parentNode.parentNode
};
} else if (curLink) {
urlParams = new URLSearchParams(curLink.href);
revision = {
oldrevision: urlParams.get('oldid'),
element: curLink.parentNode.parentNode
};
}
if (revision) {
await fetchDiffAndProcess(revision);
}
}
}
async function buildURL(params) {
const url = new URL(actionApiEndpoint);
Object.keys(params).forEach(key => url.searchParams.append(key, params[key]));
return url;
}
function getRootDomain(hostname, publicSuffixSet) {
const domainParts = hostname.split('.');
for (let i = 0; i < domainParts.length; i++) {
const candidate = domainParts.slice(i).join('.');
if (publicSuffixSet.has(candidate) || publicSuffixSet.has(`!${candidate}`)) {
return domainParts.slice(i - 1).join('.');
}
}
return hostname;
}
function extractAddedURLs(wikitext) {
const addedURLs = [];
const urlRegex = /https?:\/\/[^\s<"]+/g;
let match;
while ((match = urlRegex.exec(wikitext)) !== null) {
try {
const url = new URL(match[0]);
addedURLs.push(url.href);
} catch (error) {
console.error(`Invalid URL rejected: ${match[0]}`);
}
}
return addedURLs;
}
async function fetchFromActionAPI(params) {
const url = await buildURL(params);
console.log(`Action API request: ${url}`);
try {
const response = await fetch(url);
if (!response.ok) {
throw new Error(`Network response was not ok: ${response.statusText}`);
}
return await response.json();
} catch (error) {
console.error('Error fetching data from MediaWiki API:', error);
throw error;
}
}
async function fetchRevisionContent(revId) {
var api = new mw.Api();
try {
const data = await api.get({
action: 'query',
prop: 'revisions',
revids: revId,
rvprop: 'content'
});
const page = Object.values(data.query.pages)[0];
if (page.revisions && page.revisions.length > 0) {
return page.revisions[0]['*'];
} else {
throw new Error("Revision not found.");
}
} catch (error) {
console.error(error);
throw error;
}
}
async function fetchDiffAndProcess(revision) {
let oldWikitext = await fetchRevisionContent(revision.oldrevision);
let fromURLs = await extractAddedURLs(oldWikitext);
// First revisions of articles are designated as having an oldrevision but not
// a newrevision.
let newWikitext = null;
let toURLs = [];
if (revision.newrevision) {
newWikitext = await fetchRevisionContent(revision.newrevision);
toURLs = await extractAddedURLs(newWikitext);
}
const toURLSet = new Set(toURLs);
addedURLs = fromURLs.filter(url => !toURLSet.has(url));
console.log(`Old revision: ${revision.oldrevision}
New revision: ${revision.newrevision}
Revision element: ${revision.element.innerHTML}
Added URLs: ${addedURLs.join(' ')}
`);
const matchedWarnDomains = [];
const matchedCautionDomains = [];
const matchedInspectDomains = [];
for (const url of addedURLs) {
const hostname = new URL(url).hostname;
const domain = getRootDomain(hostname, publicSuffixSet);
if (warnList.has(domain) && !matchedWarnDomains.includes(domain)) {
matchedWarnDomains.push(domain);
} else if (cautionList.has(domain) && !matchedCautionDomains.includes(domain)) {
matchedCautionDomains.push(domain);
} else if (inspectList.has(domain) && !matchedInspectDomains.includes(domain)) {
matchedInspectDomains.push(domain);
}
}
if (matchedWarnDomains.length > 0) {
prependEmojiWithTooltip(revision.element, warnEmoji, matchedWarnDomains, msgWarning);
}
if (matchedCautionDomains.length > 0) {
prependEmojiWithTooltip(revision.element, cautionEmoji, matchedCautionDomains, msgCaution);
}
if (matchedInspectDomains.length > 0) {
prependEmojiWithTooltip(revision.element, inspectEmoji, matchedInspectDomains, msgInspect);
}
}
async function fetchAndOrganizeDomainLists(pageNames) {
const params = {
action: 'query',
prop: 'revisions',
titles: pageNames.join('|'), // Join all page names
rvprop: 'content',
rvslots: '*',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const pages = data.query.pages;
const warnList = new Set();
const cautionList = new Set();
const inspectList = new Set();
for (const pageId in pages) {
const content = pages[pageId].revisions[0].slots.main['*'];
let currentList = null;
const lines = content.split('\n');
for (let line of lines) {
if (line.trim() === warnSectionHeader) {
currentList = warnList;
} else if (line.trim() === cautionSectionHeader) {
currentList = cautionList;
} else if (line.trim() === inspectSectionHeader) {
currentList = inspectList;
}
if (line.startsWith('*') && currentList) {
const domain = line.substring(1).trim();
currentList.add(domain);
}
}
}
return {
warnList,
cautionList,
inspectList
};
} catch (error) {
console.error('Error fetching or parsing the page content:', error);
throw error;
}
}
async function fetchPreviousRevisionIds(revisionIds) {
const params = {
action: 'query',
prop: 'revisions',
revids: revisionIds.join('|'), // join all revision IDs
rvprop: 'ids',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const pages = data.query.pages;
const revisionMap = {};
for (const pageId in pages) {
const revisions = pages[pageId].revisions;
if (revisions && revisions.length > 0) {
for (const revision of revisions) {
revisionMap[revision.revid] = revision.parentid;
}
}
}
return revisionMap;
} catch (error) {
console.error('Error fetching previous revision IDs:', error);
return {};
}
}
async function fetchFirstRevisionId(pageID) {
const params = {
action: 'query',
pageids: pageID,
prop: 'revisions',
rvlimit: 1,
rvdir: 'newer',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const pages = data.query.pages;
const pageId = Object.keys(pages)[0];
const revisions = pages[pageId].revisions;
if (revisions && revisions.length > 0) {
return revisions[0].revid;
} else {
throw new Error('No revisions found for this page.');
}
} catch (error) {
console.error('Error fetching first revision ID:', error);
return null;
}
}
async function fetchDomainListPages(pageName) {
const cacheKey = `citationWatchlistFetchDomainListPages_${pageName}`;
const cacheExpiration = 4 * 60 * 60 * 1000; // 4 hours in milliseconds
const now = Date.now();
const cachedData = localStorage.getItem(cacheKey);
const cachedTimestamp = localStorage.getItem(`${cacheKey}_timestamp`);
if (cachedData && cachedTimestamp && (now - parseInt(cachedTimestamp, 10)) < cacheExpiration) {
console.log("Loaded list of lists from cache");
return JSON.parse(cachedData);
} else {
const params = {
action: 'query',
prop: 'revisions',
titles: pageName,
rvprop: 'content',
rvslots: '*',
format: 'json',
origin: '*'
};
try {
const data = await fetchFromActionAPI(params);
const page = data.query.pages;
const pageId = Object.keys(page)[0];
const content = page[pageId].revisions[0].slots.main['*'];
const pageTitles = [];
const lines = content.split('\n');
for (let line of lines) {
if (line.startsWith('* [[')) {
const match = line.match(/\[\[([^\]]+)\]\]/); // Matches the first instance of [[Page Title]]
if (match) {
pageTitles.push(match[1]);
}
}
}
localStorage.setItem(cacheKey, JSON.stringify(pageTitles));
localStorage.setItem(`${cacheKey}_timestamp`, now.toString());
console.log("Loaded from API and stored in cache");
return pageTitles;
} catch (error) {
console.error('Error fetching or parsing the page content:', error);
throw error;
}
}
}
async function fetchPublicSuffixList() {
const pslUrl = `https://${LANGUAGE}.${FAMILY}.org/wiki/${publicSuffixList}?action=raw`;
console.log(`Raw page text request: ${pslUrl}`);
try {
const response = await fetch(pslUrl);
const content = await response.text();
const suffixSet = new Set();
const lines = content.split('\n');
for (const line of lines) {
if (line.trim() && !line.trim().startsWith('//')) {
suffixSet.add(line.trim());
}
}
return suffixSet;
} catch (error) {
console.error("Error fetching Public Suffix List:", error);
return new Set();
}
}
async function runScript() {
publicSuffixSet = await fetchPublicSuffixList();
if (publicSuffixSet.size === 0) {
console.error('Public Suffix List loading failed');
return;
}
console.log("Welcome to Citation Watchlist");
const listPages = await fetchDomainListPages(listOfLists);
try {
const lists = await fetchAndOrganizeDomainLists(listPages);
lists.warnList.forEach(warnList.add, warnList);
lists.cautionList.forEach(cautionList.add, cautionList);
lists.inspectList.forEach(inspectList.add, inspectList);
} catch (error) {
console.error('Error fetching domain lists:', error);
}
await parseWatchlist();
}
runScript().then(() => console.log('Citation Watchlist script finished executing'));