User:RonBot/8/Source1
DatRem.py
mycatlist=list()
pagelist=list()
datelistst=list()
datelistend=list()
removed=0
added=0
Main Program
from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import DatRem
site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login
#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
try:
print(s)
except UnicodeEncodeError:
print(s.encode('utf-8'))
def startAllowed():
textpage = page.Page(site, "User:RonBot/8/Run").getWikiText()
if textpage == "Run":
return "run"
else:
return "no"
def allow_bots(text, user):
user = user.lower().strip()
text = mwparserfromhell.parse(text)
for tl in text.filter_templates():
if tl.name.matches(['bots', 'nobots']):
break
else:
return True
print "template found" #Have we found one
for param in tl.params:
bots = [x.lower().strip() for x in param.value.split(",")]
if param.name == 'allow':
print "We have an ALLOW" # allow found
if ''.join(bots) == 'none': return False
for bot in bots:
if bot in (user, 'all'):
return True
elif param.name == 'deny':
print "We have a DENY" # deny found
if ''.join(bots) == 'none':
print "none - true"
return True
for bot in bots:
if bot in (user, 'all'):
pnt(bot)
pnt(user)
print "all - false"
return False
if (tl.name.matches('nobots') and len(tl.params) == 0):
print "match - false"
return False
return True
def findpages(nextcat):
lastContinue=''
touse=''
print nextcat
while True:
params = {'action':'query',
'list':'categorymembers',
'cmtitle':nextcat,
'cmlimit':'max',
'cmcontinue':lastContinue
}
req = api.APIRequest(site, params) #Set the API request
res = req.query(False) #Send the API request and store the result in res
touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
pnt(touse)
for filep in touse: #For page in the list
pagename=filep.unprefixedtitle
if "Category" in pagename:
if pagename not in DatRem.mycatlist:
DatRem.mycatlist.append(pagename)
pnt("APPENDING "+pagename)
print len(DatRem.mycatlist)
else:
pnt("NOT APPENDING "+pagename)
else:
if pagename not in DatRem.pagelist: #Have we a unique page name?
DatRem.pagelist.append(pagename)
pnt(pagename)
else:
print "page in list"
if 'continue' not in res:
break
lastContinue = res['continue']['cmcontinue']
print "continue"
return
def examinetext(text):
DatRem.datelistst=()
DatRem.datelistst=list()
DatRem.datelistend=()
DatRem.datelistend=list()
last=0
for match in re.finditer(r'^=\s(January|February|March|April|May|June|July|August|September|October|November|December)[\S\s]*?=$',text,re.MULTILINE):
foundstart=match.start()
foundend=match.end()
founddate=text[match.start():match.end()]
pnt(founddate)
DatRem.datelistst.append(match.start())
DatRem.datelistend.append(match.end())
print match.start(), match.end(), match.start()-last
last=match.start()
#We need list in revese as numbers will change as we remove text, so work from bottom up.
print "Reverse"
DatRem.datelistst.reverse()
DatRem.datelistend.reverse()
ListLen=len(DatRem.datelistst)
LastStart=DatRem.datelistend[0]+11
#We need to look for start later date - end earlier date is, say <10, and test for just wite space inbetween
for loopvar in range(0, ListLen):
print DatRem.datelistst[loopvar], DatRem.datelistend[loopvar]
print LastStart-DatRem.datelistend[loopvar]
print LastStart
if LastStart-DatRem.datelistend[loopvar]<10:
print "Remove"
losetext=text[DatRem.datelistst[loopvar]:DatRem.datelistend[loopvar]+1]
print repr(losetext)
gap=text[LastStart-2:DatRem.datelistend[loopvar]+2]
print repr(gap)
if gap.isspace():
print "All WhiteSpace"
pnt("++++"+losetext+"++++")
print"-------------------------------"
print repr(text[0:DatRem.datelistst[loopvar]])
print"-------------------------------"
print repr(text[DatRem.datelistend[loopvar]+2:])
print"-------------------------------"
text=text[0:DatRem.datelistst[loopvar]]+text[DatRem.datelistend[loopvar]+2:]
pnt(text)
DatRem.removed += 1
LastStart=DatRem.datelistst[loopvar]
print LastStart
print len(text)
return text
def checkpage():
size=len(DatRem.pagelist)
print size
for pagetitle in DatRem.pagelist:
pagetitletext = pagetitle.encode('utf-8')
print pagetitletext
pagepage = page.Page(site, pagetitle)
pagetext = pagepage.getWikiText()
go = allow_bots(pagetext, 'RonBot')# does page allow bots
if go:
print"++++++++++++++++++++++++++++++++++++++++"
print"REMOVAL bot allowed on article"
pnt(pagetext)
print len(pagetext)
pagetext=examinetext(pagetext)
pnt(pagetext)
print len(pagetext)
if DatRem.removed>0:
try:
#pagepage.edit(text=pagetext, bot=True, summary="(Task 8 - uesrpace trial) - Removal of unused date headers") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
print "writing changed page"
except:
print"Failed to write"
print"++++++++++++++++++++++++++++++++++++++++"
return
def main():
go = startAllowed() #Check if task is enabled
DatRem.mycatlist=list()
DatRem.pagelist=list()
DatRem.mycatlist.append("Category:Wikpedia Help pages with dated sections")
DatRem.removed=0
listnum=0
while listnum<len(DatRem.mycatlist):
pnt( "CAT" + DatRem.mycatlist[listnum])
findpages(DatRem.mycatlist[listnum])
listnum+=1
print "LIST No. ", listnum
print len(DatRem.pagelist)
#Test System - KILL NEXT TWO LINES
#DatRem.pagelist=list()
#DatRem.pagelist.append("User:Ronhjones/Sandbox5")
if len(DatRem.pagelist)>0:
checkpage()
print DatRem.removed
print (time.ctime())
if __name__ == "__main__":
with warnings.catch_warnings():
warnings.simplefilter("ignore", FutureWarning)
main()
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.