User:RonBot/8/Source1

DatRem.py

mycatlist=list()
pagelist=list()
datelistst=list()
datelistend=list()
removed=0
added=0


Main Program

from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import DatRem

site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login

#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))

      
def startAllowed():
    textpage = page.Page(site, "User:RonBot/8/Run").getWikiText()
    if textpage == "Run":
        return "run"
    else:
        return "no"

def allow_bots(text, user):
    user = user.lower().strip()
    text = mwparserfromhell.parse(text)
    for tl in text.filter_templates():
        if tl.name.matches(['bots', 'nobots']):
            break
    else:
        return True
    print "template found" #Have we found one
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
	if param.name == 'allow':
            print "We have an ALLOW" # allow found
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            print "We have a DENY" # deny found
            if ''.join(bots) == 'none':
                print "none - true"
                return True
	    for bot in bots:
                if bot in (user, 'all'):
                    pnt(bot)
                    pnt(user)
                    print "all - false"
                    return False
    if (tl.name.matches('nobots') and len(tl.params) == 0):
        print "match - false"
        return False
    return True

def findpages(nextcat):
    lastContinue=''
    touse=''
    print nextcat
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        pnt(touse)
        for filep in touse: #For page in the list
            pagename=filep.unprefixedtitle
            if "Category" in pagename:
                if pagename not in DatRem.mycatlist:
                    DatRem.mycatlist.append(pagename)
                    pnt("APPENDING "+pagename)
                    print len(DatRem.mycatlist)
                else:
                    pnt("NOT APPENDING "+pagename) 
            else:
                if pagename not in DatRem.pagelist: #Have we a unique page name?
                    DatRem.pagelist.append(pagename)
                    pnt(pagename)
                else:
                    print "page in list"
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def examinetext(text):
    DatRem.datelistst=()
    DatRem.datelistst=list()
    DatRem.datelistend=()
    DatRem.datelistend=list()
    last=0
    for match in re.finditer(r'^=\s(January|February|March|April|May|June|July|August|September|October|November|December)[\S\s]*?=$',text,re.MULTILINE):
        foundstart=match.start()
        foundend=match.end()
        founddate=text[match.start():match.end()]
        pnt(founddate)
        DatRem.datelistst.append(match.start())
        DatRem.datelistend.append(match.end())
        print match.start(), match.end(), match.start()-last
        last=match.start()
    #We need list in revese as numbers will change as we remove text, so work from bottom up.
    print "Reverse"
    DatRem.datelistst.reverse()
    DatRem.datelistend.reverse()
    ListLen=len(DatRem.datelistst)
    LastStart=DatRem.datelistend[0]+11
    #We need to look for start later date - end earlier date is, say <10, and test for just wite space inbetween
    for loopvar in range(0, ListLen):
        print DatRem.datelistst[loopvar], DatRem.datelistend[loopvar]
        print LastStart-DatRem.datelistend[loopvar]
        print LastStart
        if LastStart-DatRem.datelistend[loopvar]<10:
            print "Remove"
            losetext=text[DatRem.datelistst[loopvar]:DatRem.datelistend[loopvar]+1]
            print repr(losetext)
            gap=text[LastStart-2:DatRem.datelistend[loopvar]+2]
            print repr(gap)
            if gap.isspace():
                print "All WhiteSpace"
                pnt("++++"+losetext+"++++")
                print"-------------------------------"
                print repr(text[0:DatRem.datelistst[loopvar]])
                print"-------------------------------"
                print repr(text[DatRem.datelistend[loopvar]+2:])
                print"-------------------------------"
                text=text[0:DatRem.datelistst[loopvar]]+text[DatRem.datelistend[loopvar]+2:]
                pnt(text)
                DatRem.removed += 1
        LastStart=DatRem.datelistst[loopvar] 
        print LastStart
        print len(text)
    return text


def checkpage():
    size=len(DatRem.pagelist)
    print size
    for pagetitle in DatRem.pagelist:
        pagetitletext = pagetitle.encode('utf-8')
        print pagetitletext
        pagepage = page.Page(site, pagetitle)
        pagetext = pagepage.getWikiText()
        go = allow_bots(pagetext, 'RonBot')# does page allow bots
        if go:
            print"++++++++++++++++++++++++++++++++++++++++"
            print"REMOVAL bot allowed on article"
            pnt(pagetext)
            print len(pagetext)
            pagetext=examinetext(pagetext)
            pnt(pagetext)
            print len(pagetext)
            if DatRem.removed>0:
                try:
                    #pagepage.edit(text=pagetext, bot=True, summary="(Task 8 - uesrpace trial) - Removal of unused date headers") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
                    print "writing changed page"
                except:
                    print"Failed to write"
                print"++++++++++++++++++++++++++++++++++++++++"


    return

def main():
    go = startAllowed() #Check if task is enabled
    DatRem.mycatlist=list()
    DatRem.pagelist=list()
    DatRem.mycatlist.append("Category:Wikpedia Help pages with dated sections")
    DatRem.removed=0
    listnum=0
    while listnum<len(DatRem.mycatlist):
        pnt( "CAT" + DatRem.mycatlist[listnum])
        findpages(DatRem.mycatlist[listnum])
        listnum+=1
        print "LIST No. ", listnum
        print len(DatRem.pagelist)
    #Test System - KILL NEXT TWO LINES
    #DatRem.pagelist=list()
    #DatRem.pagelist.append("User:Ronhjones/Sandbox5")
    if len(DatRem.pagelist)>0:
        checkpage()
    
    print DatRem.removed
    print (time.ctime())
      
if __name__ == "__main__":
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        main()

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.