Wikipedia:Odd links/SQL

From Wikipedia, the free encyclopedia

This page contains information on how to regenerate the Wikipedia:Odd links report.



DROP TABLE IF EXISTS oddlinks;

CREATE TABLE oddlinks (
  title varchar(255) BINARY NOT NULL,
  link varchar(255) BINARY NOT NULL,
  from_ns int(8) unsigned NOT NULL,
  to_ns int(8) unsigned NOT NULL
);

DROP TABLE IF EXISTS namespaces;

CREATE TABLE namespaces (
  ns int(8) unsigned NOT NULL,
  prefix varchar(255) BINARY NOT NULL
);

INSERT INTO namespaces VALUES ( 0, '' );
INSERT INTO namespaces VALUES ( 1, 'Talk'  );
INSERT INTO namespaces VALUES ( 2, 'User' );
INSERT INTO namespaces VALUES ( 3, 'User Talk' );
INSERT INTO namespaces VALUES ( 4, 'Wikipedia' );
INSERT INTO namespaces VALUES ( 5, 'Wikipedia Talk' );
INSERT INTO namespaces VALUES ( 6, 'File' );
INSERT INTO namespaces VALUES ( 7, 'File Talk' );
INSERT INTO namespaces VALUES ( 8, 'MediaWiki' );
INSERT INTO namespaces VALUES ( 9, 'MediaWiki Talk' );
INSERT INTO namespaces VALUES ( 10, 'Template' );
INSERT INTO namespaces VALUES ( 11, 'Template Talk' );
INSERT INTO namespaces VALUES ( 12, 'Help' );
INSERT INTO namespaces VALUES ( 13, 'Help Talk' );
INSERT INTO namespaces VALUES ( 14, 'Category' );
INSERT INTO namespaces VALUES ( 15, 'Category Talk' );
INSERT INTO namespaces VALUES ( 100, 'Portal' );
INSERT INTO namespaces VALUES ( 101, 'Portal Talk' );

// Capture links from namespace 0 to various other namespaces
// This is done in sections as each million rows take around 3 minutes to process

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN  000000 AND 999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 1000000 AND 1999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 2000000 AND 2999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 3000000 AND 3999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 4000000 AND 4999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 5000000 AND 5999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 6000000 AND 6999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 7000000 AND 7999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 8000000 AND 8999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 9000000 AND 9999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 10000000 AND 10999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 11000000 AND 11999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 12000000 AND 12999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 13000000 AND 13999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 14000000 AND 14999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 15000000 AND 15999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 16000000 AND 16999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 17000000 AND 17999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 18000000 AND 18999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 19000000 AND 19999999;

INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 20000000 AND 20999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 21000000 AND 21999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 22000000 AND 22999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 23000000 AND 23999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 24000000 AND 24999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 25000000 AND 25999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 26000000 AND 26999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 27000000 AND 27999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 28000000 AND 28999999;
INSERT INTO oddlinks SELECT f.page_title, l.pl_title, f.page_namespace, l.pl_namespace FROM enwiki_p.pagelinks l INNER JOIN enwiki_p.page f ON l.pl_from = f.page_id WHERE f.page_namespace = 0 AND l.pl_namespace IN ( 2, 3, 7, 15, 101 ) AND f.page_id BETWEEN 29000000 AND 29999999;

// Do not consider pages with 'under construction' notices on them

DROP TABLE IF EXISTS pages_under_contruction;

CREATE TABLE pages_under_contruction
AS
   SELECT p.page_title
   FROM enwiki_p.page p
   INNER JOIN enwiki_p.templatelinks t ON p.page_id = t.tl_from
   WHERE t.tl_title IN ( 'Underconstruction','UnderConstruction','Under_construction','Under_Construction','Construction','Undercon','UnderCon' )
   AND   t.tl_namespace = 10;
   

SELECT concat( '* [[', o.title, ']] ? [[', n.prefix, ':', o.link, ']]' )
FROM oddlinks o
INNER JOIN namespaces n ON o.to_ns = n.ns
WHERE title IN ( 
   SELECT *
   FROM pages_under_contruction
);
ORDER BY n.ns ASC, o.title ASC;
   

DELETE FROM oddlinks WHERE title IN ( 
   SELECT *
   FROM pages_under_contruction
);


// Do not consider pages which link to User:CorenSearchBot
SELECT *
FROM oddlinks
WHERE from_ns = 0
AND to_ns = 2
AND link = 'CorenSearchBot';

DELETE
FROM oddlinks
WHERE from_ns = 0
AND to_ns = 2
AND link = 'CorenSearchBot';


// Pick out signaturs in articles - paired links to User: and matching User_Talk: namespaces
DROP TABLE IF EXISTS sigs;

CREATE TABLE sigs AS
SELECT o1.title, o1.link
FROM   oddlinks o1, oddlinks o2
WHERE o1.title = o2.title
AND   o1.link = o2.link
AND   o1.to_ns = 2
AND   o2.to_ns = 3;

SELECT concat( '* [[', title, ']] ? [[User:', link, ']] [[User talk:', link, ']]' )
FROM sigs
ORDER BY 1 ASC;

DELETE
FROM oddlinks
WHERE EXISTS
( SELECT *
  FROM sigs
  WHERE oddlinks.title = sigs.title
  AND oddlinks.link = sigs.link )
AND oddlinks.to_ns IN ( 2, 3 );

// Report remaining odd links

SELECT concat( '* [[', o.title, ']] ? [[', n.prefix, ':', o.link, ']]' )
FROM oddlinks o
INNER JOIN namespaces n ON o.to_ns = n.ns
ORDER BY n.ns ASC, o.title ASC;