User:RonBot/7/Source1

foot.py

mycatlist1=list()
mycatlist2=list()
allplayers=list()
excludelist=list()
donelist=list()
nomatchlist=list()
stublist=list()
redirlist=list()
removed=0
added=0
added=0

Main Program

from wikitools import *
import time
import datetime
import urllib
import json
import userpassbot #Bot password
import warnings
import re
import mwparserfromhell
import datetime
import sys
import foot

site = wiki.Wiki() #Tell Python to use the English Wikipedia's API
site.login(userpassbot.username, userpassbot.password) #login

#routine to autoswitch some of the output - as filenames have accented chars!
def pnt(s):
    try:
        print(s)
    except UnicodeEncodeError:
        print(s.encode('utf-8'))

      
def startAllowed():
    textpage = page.Page(site, "User:RonBot/7/Run").getWikiText()
    if textpage == "Run":
        return "run"
    else:
        return "no"

def allow_bots(text, user):
    user = user.lower().strip()
    text = mwparserfromhell.parse(text)
    for tl in text.filter_templates():
        if tl.name.matches(['bots', 'nobots']):
            break
    else:
        return True
    print "template found" #Have we found one
    for param in tl.params:
        bots = [x.lower().strip() for x in param.value.split(",")]
	if param.name == 'allow':
            print "We have an ALLOW" # allow found
            if ''.join(bots) == 'none': return False
            for bot in bots:
                if bot in (user, 'all'):
                    return True
        elif param.name == 'deny':
            print "We have a DENY" # deny found
            if ''.join(bots) == 'none':
                print "none - true"
                return True
	    for bot in bots:
                if bot in (user, 'all'):
                    pnt(bot)
                    pnt(user)
                    print "all - false"
                    return False
    if (tl.name.matches('nobots') and len(tl.params) == 0):
        print "match - false"
        return False
    return True

def findpages(nextcat):
    lastContinue=''
    touse=''
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmnamespace':'0|14',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        for filep in touse: #For page in the list
            pagename=filep.unprefixedtitle
            if "Category" in pagename:
                if pagename not in foot.mycatlist1:
                    foot.mycatlist1.append(pagename)
                    pnt("APPENDING "+pagename)
                    print len(foot.mycatlist1)
                else:
                    pnt("NOT APPENDING "+pagename) 
            else:
                if pagename not in foot.allplayers: #Have we a unique player name?
                    foot.allplayers.append(pagename)
                    #pnt(pagename)
                    if pagename not in foot.excludelist: #Is this name not in the exclude list?
                        if pagename not in foot.nomatchlist: #Is this name unique
                            foot.nomatchlist.append(pagename)
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def splittextpoint(pagetext):
    # Only here if we see a "stub"
    size=len(pagetext)-1
    print size
    curly=False
    for loopvar in xrange(size,size-100, -1):
        mychar=pagetext[loopvar]
        print loopvar, repr(mychar)
        if mychar=="]":
            return size
        if mychar=="}":
            curly=True
        if curly==True:
            matchObj = re.match( r'\n', mychar)
            if matchObj:
                if curly==False:
                    return size
                else:
                    return loopvar
    return size

def ProcessDoneCat(nextcat):
    lastContinue=''
    touse=''
    print("PDC")
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmnamespace':'0|14',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        for filep in touse: #For page in the list
            pnt(filep.unprefixedtitle)
            pagename=filep.unprefixedtitle
            if pagename in foot.excludelist:
                pnt("REMOVE THE CAT IN "+pagename)
                foot.donelist.append(pagename)
            else:
                pnt("EXCUDE "+pagename)
                foot.excludelist.append(pagename)
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def RemoveCat():
    size=len(foot.donelist)
    print size
    for pagetitle in foot.donelist:
        pagetitletext = pagetitle.encode('utf-8')
        print pagetitletext
        pagepage = page.Page(site, pagetitle)
        pagetext = pagepage.getWikiText()
        go = allow_bots(pagetext, 'RonBot')# does user page allow bots
        if go:
            print"++++++++++++++++++++++++++++++++++++++++"
            print"REMOVAL bot allowed on article"
            pnt(pagetext)
            pagetext = re.sub(r'\[\[Category:Association footballers not categorized by position\]\]\n*', '', pagetext)
            pnt(pagetext)
            try:
                pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Removal of [[:Category:Association footballers not categorized by position]]") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
                foot.removed += 1
                print "writing changed page"
            except:
                print"Failed to write"
            print"++++++++++++++++++++++++++++++++++++++++"
    return

def AddCat():
    print (time.ctime())
    size=len(foot.nomatchlist)
    print size
    for pagetitle in foot.nomatchlist:
        pagetitletext = pagetitle.encode('utf-8')
        pagepage = page.Page(site, pagetitle, True, False) # dont follow redirects!
        pageredir= pagepage.isRedir()
        pagetext = pagepage.getWikiText()
        size = len(pagetext)-1
        cutplace=size
        if "stub" in pagetext:
            foot.stublist.append(pagetitle)
            cutplace=splittextpoint(pagetext)
        go = allow_bots(pagetext, 'RonBot')# does user page allow bots
        if go:
            #print"++++++++++++++++++++++++++++++++++++++++"
            print pagetitletext+ " ADDITION bot allowed on article"
            if pageredir:
                print "REDIRECT " + pagetitletext # show that page, but don't add the cat.
                foot.redirlist.append(pagetitle)
            else:
                if cutplace<size:
                    pagetext=pagetext[0:cutplace]+"\n"+"[[Category:Association footballers not categorized by position]]"+"\n"+pagetext[cutplace+1:]
                else:
                    pagetext = pagetext+"\n"+"[[Category:Association footballers not categorized by position]]"
                try:
                    pagepage.edit(text=pagetext, bot=True, summary="(Task 7) - Addition of [[:Category:Association footballers not categorized by position]]") #(DO NOT UNCOMMENT UNTIL BOT IS APPROVED)
                    foot.added += 1
                    print "writing changed page"
                except:
                    print"Failed to write"
                print"++++++++++++++++++++++++++++++++++++++++"
        if foot.added+foot.removed>=13000: #Termination for trials. comment out this line and next for full run
            return
    return

def findexclude(nextcat):
    lastContinue=''
    touse=''
    while True:
        params = {'action':'query',
                  'list':'categorymembers',
                  'cmtitle':nextcat,
                  'cmlimit':'max',
                  'cmnamespace':'0|14',
                  'cmcontinue':lastContinue
                  }
        req = api.APIRequest(site, params) #Set the API request
        res = req.query(False) #Send the API request and store the result in res
        touse = pagelist.listFromQuery(site, res['query']['categorymembers'])#Make a list
        for filep in touse: #For page in the list
            pagename=filep.unprefixedtitle
            if "Category" in pagename:
                if pagename not in foot.mycatlist2:
                    foot.mycatlist2.append(pagename)
                    pnt("APPENDING "+pagename)
                    print len(foot.mycatlist2)
                else:
                    pnt("NOT APPENDING "+pagename) 
            else:
                if pagename not in foot.excludelist:
                    foot.excludelist.append(pagename)
        if 'continue' not in res:
            break
        lastContinue = res['continue']['cmcontinue']
        print "continue"
    return 

def TestMainIO():
    #foot.nomatchlist=list()
    #foot.nomatchlist.append("User:Ronhjones/Sandbox3")
    #foot.nomatchlist.append("User:Ronhjones/Sandbox4")
    #foot.nomatchlist.append("User:Ronhjones/Sandbox5")
    #pagepage = page.Page(site, 'Sammy Frost', True, False) # dont follow redirects!
    #pagetext = pagepage.getWikiText()
    ##cutplace=splittextpoint(pagetext)
    #pagetext=pagetext[0:cutplace]+"\n"+"[[Category:Association footballers not categorized by position]]"+"\n"+pagetext[cutplace+1:]
    #pnt(repr(pagetext))
        
    
    foot.donelist=list()
    foot.nomatchlist.append("User:Ronhjones/Sandbox4")
    foot.nomatchlist.append("User:Ronhjones/Sandbox5")
    AddCat()
    sys.exit()
    

def main():
    go = startAllowed() #Check if task is enabled
    #TestMainIO() # - test run was OK.
    #sys.exit()
    #Get the exclude list
    foot.mycatlist2=list()
    foot.excludlist=list()
    foot.stublist=list()
    foot.redirlist=list()
    foot.mycatlist2.append("Category:Association football defenders")
    foot.mycatlist2.append("Category:Association football central defenders")
    foot.mycatlist2.append("Category:Association football fullbacks")
    foot.mycatlist2.append("Category:Association football sweepers")
    foot.mycatlist2.append("Category:Association football forwards")
    foot.mycatlist2.append("Category:Association football inside forwards")
    foot.mycatlist2.append("Category:Association football outside forwards")
    foot.mycatlist2.append("Category:Association football goalkeepers")
    foot.mycatlist2.append("Category:Association football midfielders")
    foot.mycatlist2.append("Category:Association football wing halves")
    foot.mycatlist2.append("Category:Association football wingers")
    foot.mycatlist2.append("Category:Women's association football defenders")
    foot.mycatlist2.append("Category:Women's association football forwards")
    foot.mycatlist2.append("Category:Women's association football goalkeepers")
    foot.mycatlist2.append("Category:Women's association football midfielders")
    foot.mycatlist2.append("Category:Association football player non-biographical articles")
    foot.mycatlist2.append("Category:Association football utility players")
    foot.mycatlist2.append("Category:Women's association football central defenders")
    foot.mycatlist2.append("Category:Women's association football fullbacks")
    foot.mycatlist2.append("Category:Women's association football wingers")
    foot.mycatlist2.append("Category:Women's association football utility players")
    listnum=0
    while listnum<len(foot.mycatlist2):
        pnt( "CAT " + foot.mycatlist2[listnum])
        findexclude(foot.mycatlist2[listnum])
        listnum+=1
        print "LIST No. ", listnum
        print len(foot.excludelist)

    #Get the target cat, if not in exclude then add to that list
    #Otherwise add to donelist - these will need to have the cat removed.
    foot.removed=0
    foot.added=0
    print "check the done cat"        
    ProcessDoneCat("Category:Association footballers not categorized by position")
    print len(foot.donelist)
    print len(foot.excludelist)
    if len(foot.donelist)>0:
        RemoveCat()

    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist1.txt','w')
    print "OPEN FILE 1"
    for item in foot.excludelist:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close
    
    #Now ready to process Mainlist
    #Make a list of players that are NOT in the exclude list
    foot.mycatlist1=list()
    foot.allplayers=list()
    foot.nomatchlist=list()
    foot.mycatlist1.append("Category:Association football defenders")
    foot.mycatlist1.append("Category:Footballers by city or town")
    foot.mycatlist1.append("Category:Association football players by club")
    foot.mycatlist1.append("Category:Association football players by competition")
    foot.mycatlist1.append("Category:Association football players by country")
    foot.mycatlist1.append("Category:Association football players by national team")
    foot.mycatlist1.append("Category:Association football players by nationality")
    foot.mycatlist1.append("Category:Women's association football players")
    foot.mycatlist1.append("Category:Expatriate association football players")
    listnum=0
    while listnum<len(foot.mycatlist1):
        pnt( "CAT" + foot.mycatlist1[listnum])
        findpages(foot.mycatlist1[listnum])
        listnum+=1
        print "LIST No. ", listnum
        print len(foot.allplayers)
        print len(foot.nomatchlist)

    foot.nomatchlist.sort()

    if len(foot.nomatchlist)>0:
        AddCat()
        #write local file
        myfile=open('C:\\Python27\\bot\\log7\\articlelist2.txt','w')
        print "OPEN FILE 1"
        for item in foot.nomatchlist:
            try:
                myfile.write("%s\n" % item)
            except UnicodeEncodeError:
                myfile.write("%s\n" % item.encode('utf-8'))
        myfile.close
        
    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist3.txt','w')
    print "OPEN FILE 3"
    for item in foot.allplayers:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close

    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist4.txt','w')
    print "OPEN FILE 4"
    for item in foot.stublist:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close

    #write local file    
    myfile=open('C:\\Python27\\bot\\log7\\articlelist5.txt','w')
    print "OPEN FILE 5"
    for item in foot.redirlist:
        try:
            myfile.write("%s\n" % item)
        except UnicodeEncodeError:
            myfile.write("%s\n" % item.encode('utf-8'))
    myfile.close
    
    print foot.added
    print foot.removed
    print (time.ctime())
      
if __name__ == "__main__":
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", FutureWarning)
        main()

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.