User:Somepinkdude/PyWikiBot failed.py
####################################################################
#This was a PyWikiBot which I tried to create, but it turns out #
#to have duplicated AnomieBOT's reference task. My original PWB #
#page will be used for a future bot I am working on. #
####################################################################
import pywikibot
import re
import urllib.request as web
def main(pagename):
#Basic definitions valid for each reference fixed.
site=pywikibot.Site('en', 'wikipedia')
page=pywikibot.Page(site, pagename)
#Get HTML code of WP page
wikifiedPagename=pagename.replace(" ", "_")
WPcode=web.urlopen('https://en.wikipedia.org/wiki/'+wikifiedPagename)#Doesn't work
citeErrors=re.findall("Cite error: The named reference <code>[^<>]+</code> was invoked but never defined"
, WPcode)
for i in range(len(citeErrors)):
#Remove the "Cite error..." text to leave the ref names.
errorNames[i]=citeErrors[i].replace("Cite error: The named reference <code>", "")
errorNames[i]=citeErrors[i].replace("</code> was invoked but never defined", "")
#Start the actual program, which is repeated for each reference name
for refname in errorNames:
print(page)
txt=page.text
print(txt)
versions = page.revisions(content=True)
references = []
# Compile a list of all references with name refname in the
# revision history
refname_regex = f"< ?ref[^<>/]+name ?= ?\"{refname}\"[^></]+>[^<>]+< ?/ ?ref ?>"
for rev in versions:
# Finds references named with refname that are not self-closing
namedrefs=re.findall(refname_regex, rev.text)
for i in namedrefs:
references[index]=i
index=index+1
bigref=0
finalref="{{unreferenced}}"
if compare_title(references)==1:
# Go through all of the references, and note the value and
# size of the largest reference with the given title
for i in references:
if len(i)>bigref:
bigref=len(i)
finalref=i
# Replace the first self-closing reference with the generated definition
ref_tag=f"< ?ref[^<>/]+name ?= ?\"{refname}\"[^></]+/ ?>"
newtxt=re.sub(ref_tag, finalref, txt, count=1)
# Save as long as the page does not exclude bots
if not bool(re.search("{{ ?nobots ?}}|{{ ?bots[^}]+deny ?= ?all", txt)):
page.text=newtxt
page.save("Bot edit: fixing undefined references")
print(newtxt)
def compare_title(reflist):
"""Compare the titles of the page, and return True if they are
identical, False otherwise"""
for i in range(len(reflist)):
refA=reflist[i]
# Find "title" properties. Ideally, there should only be one
# title property, inside the {{cite}} template.
try:
titlesA=re.findall("title ?= ?\"[^\"><]+\"", refA)
except Error:
print("Had an error")
# Remove ' title=" ' from title string
cliptitleA=re.sub("title ?= ?\"", "", titlesA)
# Remove quotes and whitespace
cliptitleA=re.sub("[\"' ]", "", cliptitleA)
titles[i]=cliptitleA
for i in range(len(titles)):
if not (titles[i]==titles[i+1]):
return False
return True
if __name__ == "__main__":
main('Mr. Jaggers')
# This is a test page so that it doesn't mess up another user's articles... I tried, but the script does NOT work on userspace.
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.