User:Somepinkdude/PyWikiBot failed.py

####################################################################
#This was a PyWikiBot which I tried to create, but it turns out    #
#to have duplicated AnomieBOT's reference task.  My original PWB   #
#page will be used for a future bot I am working on.               #
####################################################################

import pywikibot
import re
import urllib.request as web

def main(pagename):
    #Basic definitions valid for each reference fixed.
    site=pywikibot.Site('en', 'wikipedia')
    page=pywikibot.Page(site, pagename)
    #Get HTML code of WP page
    wikifiedPagename=pagename.replace(" ", "_")
    WPcode=web.urlopen('https://en.wikipedia.org/wiki/'+wikifiedPagename)#Doesn't work
    citeErrors=re.findall("Cite error: The named reference <code>[^<>]+</code> was invoked but never defined"
                          , WPcode)
    for i in range(len(citeErrors)):
        #Remove the "Cite error..." text to leave the ref names.
        errorNames[i]=citeErrors[i].replace("Cite error: The named reference <code>", "")
        errorNames[i]=citeErrors[i].replace("</code> was invoked but never defined", "")
    #Start the actual program, which is repeated for each reference name
    for refname in errorNames:
        print(page)

        txt=page.text
        print(txt)

        versions = page.revisions(content=True)
        references = []


        # Compile a list of all references with name refname in the
        # revision history
        refname_regex = f"< ?ref[^<>/]+name ?= ?\"{refname}\"[^></]+>[^<>]+< ?/ ?ref ?>"
        for rev in versions:
            # Finds references named with refname that are not self-closing
            namedrefs=re.findall(refname_regex, rev.text)
            for i in namedrefs:
                references[index]=i
                index=index+1

        bigref=0

        finalref="{{unreferenced}}"
        if compare_title(references)==1:
            # Go through all of the references, and note the value and
            # size of the largest reference with the given title

            for i in references:
                if len(i)>bigref:
                    bigref=len(i)
                    finalref=i

        # Replace the first self-closing reference with the generated definition
        ref_tag=f"< ?ref[^<>/]+name ?= ?\"{refname}\"[^></]+/ ?>"
        newtxt=re.sub(ref_tag, finalref, txt, count=1)

        # Save as long as the page does not exclude bots
        if not bool(re.search("{{ ?nobots ?}}|{{ ?bots[^}]+deny ?= ?all", txt)):
            page.text=newtxt
            page.save("Bot edit: fixing undefined references")
            print(newtxt)

def compare_title(reflist):
    """Compare the titles of the page, and return True if they are
    identical, False otherwise"""
    for i in range(len(reflist)):
        refA=reflist[i]
        # Find "title" properties.  Ideally, there should only be one
        # title property, inside the {{cite}} template.
        try:
            titlesA=re.findall("title ?= ?\"[^\"><]+\"", refA)
        except Error:
            print("Had an error")

        # Remove ' title=" ' from title string
        cliptitleA=re.sub("title ?= ?\"", "", titlesA)

        # Remove quotes and whitespace
        cliptitleA=re.sub("[\"' ]", "", cliptitleA)
        titles[i]=cliptitleA
        for i in range(len(titles)):
            if not (titles[i]==titles[i+1]):
                return False
        return True

if __name__ == "__main__":
    main('Mr. Jaggers')
# This is a test page so that it doesn't mess up another user's articles... I tried, but the script does NOT work on userspace.

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.