User:CleanupListingBot/Source

From Wikipedia, the free encyclopedia

The source of the bot written in vb.net 2.0. Requires the DotNetWikiBot library. Currently alpha...not all exceptions caught.Smallman12q (talk) 17:43, 30 August 2010 (UTC)

Imports DotNetWikiBot
Imports System.Collections
Imports System.IO.File
Imports System.IO



Module Module1

    'Structure articlesubcategory
    '    'declaring a structure named articlesubcategory
    '    Dim article As String
    '    Dim subcategory As String
    'End Structure


    Dim enWiki As Site = Nothing
    Dim catfilecounter As Integer = 0
    Dim appbase As String = AppDomain.CurrentDomain.SetupInformation.ApplicationBase()
    Dim categorytochecktitles As New ArrayList
    Dim report As String = Nothing
    Dim directoryindex() As String = Nothing

    Sub Main()

        'Log in
        Dim username, password As String

        Console.Write("Please enter username: ")
        username = Console.ReadLine()

        Console.Write("Please enter password: ")
        password = Console.ReadLine()

        Console.Clear()

        Console.WriteLine("Attempting to log into the english wiki as: " + username)
        Console.WriteLine("Please note that https is not used.")

        Try
            enWiki = New Site("http://en.wikipedia.org", username, password)
        Catch e As Exception
            Console.WriteLine("Login error: " + e.Message)
            quit()
        End Try

        Console.WriteLine("Log in successful....clearing user name and password for security...")

        username = Nothing
        password = Nothing
        Console.Clear()

        'Get the cleanup categories
        'Get cleanup categories
        'Create a directory for subcategory of them
        'Copy articles from each cat to a text file
        'Load text files and compare
        Console.WriteLine("Loading category: Wikipedia maintenance categories sorted by month")

        Dim categoryname As String = "Wikipedia maintenance categories sorted by month"
        Dim currentdirectory As String = appbase + "\" + categoryname

        If Not Directory.Exists(currentdirectory) Then

            Directory.CreateDirectory(currentdirectory)
            Console.WriteLine("Category not found locally...begin downloading...")
            getcat(categoryname, currentdirectory)
            Console.WriteLine("Done downloading cleanup...")
            pause()

        End If

        'Get category to check
        Console.Write("Enter category to check: ")
        Dim categorytocheck As String = Console.ReadLine()

        'Get the category
        Dim categorytochecklist As PageList = Nothing 'As New PageList(enWiki) '
        Try
            Console.WriteLine("Loading category pages...")
            categorytochecklist = New PageList(enWiki)
            categorytochecklist.FillAllFromCategoryTree(categorytocheck) 'Get category titles
            Console.WriteLine("Category loading complete....removing non-articles.")
            categorytochecklist.FilterNamespaces({0}) 'Remove non-articles
            Console.WriteLine("Category filtering...complete.")
            Console.WriteLine("There are " + categorytochecklist.Count.ToString + " articles.")
        Catch ex As Exception
            exceptionquit("Loading category error", ex)
        End Try

        'Send the page titles to an arraylist
        'Dim categorytochecktitles As New ArrayList 'Global

        For Each article As Page In categorytochecklist
            categorytochecktitles.Add(article.title)
        Next
        categorytochecklist = Nothing 'Clear out pagelist



        '''''''''''''''''
        'Compare
        'Check each directory for articles.txt and compare against it
        'Write results to 0.txt

        'http://www.java2s.com/Code/VB/Data-Structure/ListallDirectoriesunderadirectory.htm
        currentdirectory = appbase + "\Wikipedia maintenance categories sorted by month"
        intersectcat(currentdirectory)

        ''''''''''''Report
        'Load index
        ''
        'appbase + "\Category directory.txt"

        report += "The following is a cleanup report generated on " + Date.UtcNow.ToString
        Dim index As New ArrayList
        'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
        Try
            ' Create an instance of StreamReader to read from a file.
            ' The using statement also closes the StreamReader.
            Using sr As New StreamReader(currentdirectory + "\articles.txt")
                Dim line As String
                ' Read and display lines from the file until the end of
                ' the file is reached.
                Do
                    line = sr.ReadLine()
                    If Not (line Is Nothing) Then
                        'Console.WriteLine("Adding " + line)
                        Dim parts() As String = line.Split("#") 'c, 2)
                        'remove the number #
                        index.Add(parts(1))
                    End If
                Loop Until line Is Nothing
            End Using
        Catch e As Exception
            ' Let the user know what went wrong.
            Console.WriteLine("The file could not be read:")
            Console.WriteLine(e.Message)
        End Try

        directoryindex = arraylisttostring(index)
        index = Nothing

        currentdirectory = appbase + "\Wikipedia maintenance categories sorted by month"
        reportoncat(currentdirectory, 1)

        Dim objWriter As New System.IO.StreamWriter(appbase + "\Report.txt", True) 'Append
        objWriter.WriteLine(report) ' + vbNewLine)'it's appended
        'Console.WriteLine("Writing...")
        objWriter.Close()

        'Logout missing?
    End Sub



    Function intersect(ByRef list1 As ArrayList, ByRef list2 As ArrayList) As String()
        Dim intersection As New ArrayList
        If (list1.Count > list2.Count) Then
            'use list2
            For Each piece In list2
                If list1.Contains(piece) Then
                    intersection.Add(piece)
                End If
            Next
        Else
            'user list1
            For Each piece In list1
                If list2.Contains(piece) Then
                    intersection.Add(piece)
                End If
            Next
        End If
        Return arraylisttostring(intersection)
    End Function

    Function arraylisttostring(ByRef array As ArrayList) As String()
        Return DirectCast(array.ToArray(GetType(String)), String())
    End Function

    Function header(ByVal input As String, ByVal depth As Integer) As String
        'Return ("=" * depth) + input + ("=" * depth)'this would've worked in Python *.*

        Dim equals As String = "==================================="
        Dim equalstoadd = equals.Substring(0, depth)
        Return equalstoadd + input + equalstoadd
    End Function

    Sub reportoncat(ByVal currentdirectory As String, ByVal depth As Integer)
        Dim Root As New DirectoryInfo(currentdirectory)
        Dim Dirs As DirectoryInfo() = Root.GetDirectories()

        'Find category real name
        Dim rootname As String = Root.Name
        rootname = directoryindex(rootname.Split("#")(0))
        report += vbNewLine + header(rootname, depth) + vbNewLine


        'check this cat's articles
        Dim f As New IO.FileInfo(currentdirectory + "\0.txt")
        If (f.Exists = True) Then
            'read(File)
            Dim pages As New ArrayList
            'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
            Try
                ' Create an instance of StreamReader to read from a file.
                ' The using statement also closes the StreamReader.
                Using sr As New StreamReader(currentdirectory + "\0.txt")
                    Dim line As String
                    ' Read and display lines from the file until the end of
                    ' the file is reached.
                    Do
                        line = sr.ReadLine()
                        If Not (line Is Nothing) Then
                            'Console.WriteLine("Adding " + line)
                            'pages.Add(line)
                            report += "* [[" + line + "]]" + vbNewLine
                        End If
                    Loop Until line Is Nothing
                End Using
            Catch e As Exception
                ' Let the user know what went wrong.
                Console.WriteLine("The file could not be read:")
                Console.WriteLine(e.Message)
            End Try

        End If


        'Each subcat
        For Each DirectoryName As DirectoryInfo In Dirs
            Try
                reportoncat(DirectoryName.FullName, depth + 1)
            Catch E As Exception
                Console.WriteLine("Error accessing")
            End Try
        Next
    End Sub

    Sub intersectcat(ByVal thecurrentdirectory As String)
        Dim Root As New DirectoryInfo(thecurrentdirectory)
        Dim Dirs As DirectoryInfo() = Root.GetDirectories()

        'check this cat's articles
        Dim f As New IO.FileInfo(thecurrentdirectory + "\articles.txt")
        If (f.Exists = True) Then
            'read(File)
            Dim pages As New ArrayList
            'Read from file 'http://msdn.microsoft.com/en-us/library/db5x7c0d.aspx
            Try
                ' Create an instance of StreamReader to read from a file.
                ' The using statement also closes the StreamReader.
                Using sr As New StreamReader(thecurrentdirectory + "\articles.txt")
                    Dim line As String
                    ' Read and display lines from the file until the end of
                    ' the file is reached.
                    Do
                        line = sr.ReadLine()
                        If Not (line Is Nothing) Then
                            'Console.WriteLine("Adding " + line)
                            pages.Add(line)
                        End If
                    Loop Until line Is Nothing
                End Using
            Catch e As Exception
                ' Let the user know what went wrong.
                Console.WriteLine("The file could not be read:")
                Console.WriteLine(e.Message)
            End Try

            'Now intersect them
            ''''''''''''''''''''''''''''Need .Net 4.0 for this
            Dim intersection() As String
            intersection = intersect(categorytochecktitles, pages)

            pages = Nothing

            'We now write the intresection to file
            If intersection.Length > 0 Then
                Dim x As New PageList(enWiki, intersection)
                x.SaveTitlesToFile(thecurrentdirectory + "/0.txt")
            End If
            'categorytochecktitles.inte()
            'ArrayA.Intersect(ArrayB).Any()
            'Dim intersection =

        End If

        'Each subcat
        For Each DirectoryName As DirectoryInfo In Dirs
            Try
                intersectcat(DirectoryName.FullName)
            Catch E As Exception
                Console.WriteLine("Error accessing")
            End Try
        Next
    End Sub

    Sub getcat(ByVal categoryname As String, ByVal thecurrrentdirectory As String)
        Dim maintenancecategory, maintenancesubcategory As New PageList(enWiki)
        maintenancecategory.FillFromCategory(categoryname)
        maintenancesubcategory.FillSubsFromCategory(categoryname)
        thecurrrentdirectory += "\" + catfilecounter.ToString + "#"
        createandrecorddirectory(categoryname, thecurrrentdirectory)
        For Each one As Page In maintenancesubcategory
            one.RemoveNSPrefix()
        Next


        'Write articles to file in directory
        If (maintenancecategory.Count > 0) Then
            maintenancecategory.SaveTitlesToFile(thecurrrentdirectory + "\articles.txt")
            Console.WriteLine("Saved files of category" + categoryname)
            maintenancecategory = Nothing 'clear out category when done
        End If


        'Check the subcategories
        For Each subcat As Page In maintenancesubcategory
            Console.WriteLine("Getting subcategory: '" + subcat.title + " of '" + categoryname + "'")
            getcat(subcat.title, thecurrrentdirectory)
        Next

    End Sub

    Sub createandrecorddirectory(ByVal categoryname As String, ByVal thecurrentdirectory As String)
        Dim piece As String
        piece = catfilecounter.ToString + "#"
        catfilecounter += 1
        Directory.CreateDirectory(thecurrentdirectory) ' + "\" + piece)
        Dim objWriter As New System.IO.StreamWriter(appbase + "\Category directory.txt", True) 'Append
        objWriter.WriteLine(piece + categoryname) ' + vbNewLine)'it's appended
        'Console.WriteLine("Writing...")
        objWriter.Close()

    End Sub

    Sub quit()
        Console.WriteLine("Press any key to quit...")
        Console.ReadLine()
        End
    End Sub

    Sub pause()
        Console.WriteLine("Press any key to continue...")
        Console.ReadLine()
    End Sub

    Sub exceptionquit(ByVal errorwith As String, ByVal ex As Exception)
        Console.WriteLine(errorwith + ":" + ex.Message)
        quit()
    End Sub



End Module