Jump to content

User:Wakebrdkid/Popular category browsing

From Wikipedia, the free encyclopedia

Demonstration video: http://www.youtube.com/watch?v=f3QXwY-XR28

Working on adding language splits:

The most popular premier football league articles from April 2013 split by language.

Mathematica 10.1 code

(* download and extract monthly traffic file from \
http://dumps.wikimedia.org/other/pagecounts-ez/merged/ *)
str = OpenRead[
   "E:\\Wiki\\traffic\\Uncompressed\\pagecounts-2015-02-views-ge-5-\
totals"];
(* can take almost an hour to generate the article traffic \
association, uses about 4 GB of memory *)
pageTraffic =
  Association@
   Reap[While[True,
      Read[str, {Word, Word, Number}] //
       If[# === EndOfFile, Break[],
         If[#[[1]] == "en.z", Sow[URLDecode@#[[2]] -> #[[3]]]]] &]][[
    2, 1]];

traffic[category_] := <|"name" -> #,
    "traffic" -> pageTraffic@StringReplace[#, " " -> "_"]|> & /@
  WikipediaData["Category" -> category, "CategoryMembers"]

updatePages[category_] := (AppendTo[history, category];
  pages = traffic[current = category])
updatePages[category_, "Append"] :=
 pages = DeleteDuplicates@Join[pages, traffic@category]

history = {}; updatePages@"Main topic classifications"; \
onlyCategories = False;

Panel@Column@{Dynamic[
    ToString@
      Length@If[onlyCategories,
        Select[pages, StringMatchQ[#name, "Category:*"] &], pages] <>
     " pages"],
   Row@{Button["<", updatePages[current = history[[-2]]];
      history = history[[;; -3]],
      Enabled -> Dynamic@If[Length@history > 1, True, False]],
     InputField[Dynamic[current, updatePages@# &], String],
     " Only categories:", Checkbox@Dynamic@onlyCategories},
   Pane[Dynamic@
     Grid@MapIndexed[{Button["x", pages = DeleteCases[pages, #]],
         If[StringMatchQ[#name, "Category:*"],
          Button["+",
           updatePages[StringDrop[#name, StringLength@"Category:"],
            "Append"]; pages = DeleteCases[pages, #]]],
         If[StringMatchQ[#[[1]], "Category:*"],
          Button[">",
           updatePages@
            StringDrop[#name, StringLength@"Category:"]]], #2[[1]],
         Hyperlink[#name,
          "http://en.wikipedia.org/wiki/" <>
           URLEncode@StringReplace[#name, " " -> "_"]], #traffic} &,
       SortBy[If[onlyCategories,
         Select[pages, StringMatchQ[#name, "Category:*"] &],
         pages], -#traffic &]], ImageSize -> {500, 600},
    Scrollbars -> {False, Automatic}]}