User:GemmaBot/ruth.py
Jump to navigation
Jump to search
# Bug fixes: In dictionary generation, if repeat chapter name exists, place [2] at the end of it. Put in some logic here at PLB to read that and cut it off the end.
# Add redirect from "The" at the beginning
# Go thru Index and mark pages as not needing to be proofread
# Fix some possible transclusion stuff?
import re
import time
print("Importing pywikibot...")
import pywikibot
print("Imported!")
pagename = "Page:The Star in the Window.pdf/347"
site = pywikibot.Site()
page = pywikibot.Page(site, pagename)
text = page.text
book_name = "RUTH OF THE U. S. A."
pageroot = "Ruth of the U.S.A. (IA ruthofusa00balm).pdf"
chapter_begin_root = "|CHAPTER "
chapters = {'A Beggar and a Passport': 1, 'The Wand of War': 17, 'The New {{SIC|Role|Rôle}}': 30, "At Mrs. Corliss'": 42, '"You\'re Not Like Anyone Else"': 48, '"We\'re Fighting"': 70, '"One of Our Own!"': 91, 'France': 110, 'To Picardy': 127, 'The Great Attack': 141, 'The Resistance': 157, '"How Could This {{SIC|Happen|Happen?}}"': 175, 'Byrne Arrives': 197, 'Full Confession': 212, "Gerry's Problem": 229, 'Into Germany': 244, 'The Road to Lauengratz': 260, 'The Message in Cipher': 274, 'The Underground Railway': 291, "An Officers' Prison": 305, 'The Raid on the Schloss': 323, '"The War\'s Over"': 348}
jobs = [
"add/fix headers and footers",
"convert en dashes to em dashes",
"templatize quote-apostrophe combinations",
"convert double em dash to [[Template:bar]]",
"convert two returns to [[Template:dhr]]",
"take out spaces before returns",
"shrink plate image size to 500px, since that seems more accurate throughout",
"replace ’ with '", "replace ‘ with '",
"replace ” with \"", "replace “ with \"",
"\" ?\" -> \"?\"",
"\" !\" -> \"!\"",
"\" :\" -> \":\"",
"\" ;\" -> \";\"",
"\" — \" -> \"—\"",
"\" —\" -> \"—\"",
"\" -\" at end of line -> \"—\"",
"\"-\" at end of line -> \"—\"",
"\" - \" -> \"—\"",
"fix double line break before {{nop}}",
""]
# IF "{{c|PLATE " in
num = 16
last_page = 381
# CHANGE THIS BACK NEXT TIME
start_num = num
chapter_num = 1
extra = 0
while 1:
num += 1
if num > last_page:
break
jobs_done = []
pagename = f"Page:{pageroot}/{num}"
site = pywikibot.Site()
page = pywikibot.Page(site, pagename)
print(f"Checking {pagename}...")
#functions just didn't work for this, for some STUPID reason, so here we do this uglier thing...
# ----
#finding extra pages and subtracting them from page_num
nextpagename = f"Page:{pageroot}/{num+1}"
nextpage = pywikibot.Page(site, nextpagename)
# if not page.text.endswith("<noinclude></noinclude>") and not page.text.endswith("<noinclude>\n<references/></noinclude>") and not page.text.endswith("<noinclude>\n<references /></noinclude>") and not page.text.endswith("<noinclude>{{smallrefs}}</noinclude>") and not page.text.endswith("/e}}</noinclude>"):
# print(page.text)
# continueornot = input(f"Found something new in a footer. Continue running? ")
if "[[File:" in page.text and "pagequality level=\"0\"" in nextpage.text:
print("Not a page. Skipping...")
print("----")
extra +=1
continue
if "pagequality level=\"0\"" in page.text:
print("Not a page. Skipping...")
# print("Not a page. Exiting...")
extra +=1
print("----")
continue
# extra +=1
# break
if "’" in page.text:
page.text = page.text.replace("’", "'")
jobs_done.insert(0, jobs[7])
if "‘" in page.text:
page.text = page.text.replace("‘", "'")
jobs_done.insert(0, jobs[8])
if "”" in page.text:
page.text = page.text.replace("”", "\"")
jobs_done.insert(0, jobs[9])
if "“" in page.text:
page.text = page.text.replace("“", "\"")
jobs_done.insert(0, jobs[10])
#endash to emdash
# if "–" in page.text:
# page.text = page.text.replace("–", "—")
# jobs_done.insert(0, jobs[1])
#quotes to templates
if "\"'" in page.text or "'\"" in page.text:
ogpage = page.text
page.text = page.text.replace("\"'\"", "{{\" ' \"}}")
page.text = page.text.replace("'\"'", "{{' \" '}}")
page.text = page.text.replace("\"'", "{{\" '}}")
page.text = page.text.replace("'\"", "{{' \"}}")
page.text = page.text.replace("'{{' \"}}", "''\"")
page.text = page.text.replace("{{\" '}}'", "\"''")
if ogpage != page.text:
jobs_done.insert(0, jobs[2])
if "——" in page.text:
page.text = page.text.replace("——", "{{bar|2}}")
jobs_done.insert(0, jobs[3])
if "\n\n\n" in page.text:
page.text = page.text.replace("\n\n\n", "\n{{dhr}}\n")
jobs_done.insert(0, jobs[4])
if " \n" in page.text:
page.text = page.text.replace(" \n", "\n")
jobs_done.insert(0, jobs[5])
if " ?" in page.text:
page.text = page.text.replace(" ?", "?")
jobs_done.insert(0, jobs[11])
if " !" in page.text:
page.text = page.text.replace(" !", "!")
jobs_done.insert(0, jobs[12])
if " :" in page.text:
page.text = page.text.replace(" :", ":")
jobs_done.insert(0, jobs[13])
if " ;" in page.text:
page.text = page.text.replace(" ;", ";")
jobs_done.insert(0, jobs[14])
if " — " in page.text:
page.text = page.text.replace(" — ", "—")
jobs_done.insert(0, jobs[15])
if " —" in page.text:
page.text = page.text.replace(" —", "—")
jobs_done.insert(0, jobs[16])
if " -\n" in page.text:
page.text = page.text.replace(" -\n", "—\n")
jobs_done.insert(0, jobs[17])
if "-\n" in page.text:
page.text = page.text.replace("-\n", "—\n")
jobs_done.insert(0, jobs[18])
if " - " in page.text:
page.text = page.text.replace(" - ", "—")
jobs_done.insert(0, jobs[19])
if "\n\n{{nop}}" in page.text:
page.text = page.text.replace("\n\n{{nop}}", "\n{{nop}}")
jobs_done.insert(0, jobs[20])
#logic for headers/footers
page_num = num - start_num - extra
try:
chapter_end = int(chapters[list(chapters.keys())[chapter_num]])
if chapter_end == page_num:
chapter_num +=1
except:
pass
chapter_begin = int(chapters[list(chapters.keys())[chapter_num-1]])
chapter_name = list(chapters.keys())[list(chapters.values()).index(chapter_begin)].upper()
if " MC" in chapter_name or chapter_name.startswith("MC"):
chapter_name = chapter_name.replace("MC", "Mc")
header = ""
footer = ""
if page_num == chapter_begin:
if chapter_begin_root not in page.text:
print("FATAL ERROR: Chapter does not begin where the table of contents you gave says it does!")
print("\nError log:\n")
print(f"Page name: {pagename}")
print(f"Chapter: {chapter_name}")
print(f"Chapter begin: {chapter_begin}")
print(f"Chapter end: {chapter_end}")
print(f"Page number: {page_num}")
print(f"Extra: {extra}")
exit()
header = ""
footer = f"{{{{rh/1|{page_num}|class=__pageno}}}}"
jobs_done.insert(0, jobs[0])
else:
footer = ""
header = f"{{{{rvh|{page_num}|{chapter_name}|{book_name}}}}}"
jobs_done.insert(0, jobs[0])
# if {{fine block/s}} or /e in header/footer
if "{{fine block/s}}</noinclude>" in page.text:
header = header + "{{fine block/s}}"
if "{{fine block/e}}</noinclude>" in page.text:
footer = "{{fine block/e}}" + footer
if "{{block center/s}}</noinclude>" in page.text:
header = header + "{{block center/s}}"
if "{{block center/e}}</noinclude>" in page.text:
footer = "{{block center/e}}" + footer
if "{{smallrefs}}</noinclude>" in page.text:
footer = "{{smallrefs}}" + footer
parser = page.text.split("<")
headerplace = parser[2]
# headerplace comes out as, fx, <pagequality level="3" user="PseudoSkull" />{{rh|2|A WILD-GOOSE CHASE}}
headerparse = headerplace.split(">")
parser.pop(2)
headerparse.pop(1)
headerparse.insert(1, header)
headerparse = ">".join(headerparse)
parser.insert(2, headerparse)
# print(parser)
# footers
parser.pop(len(parser) - 1)
parser.pop(len(parser) - 1)
parser = "<".join(parser)
final_list = [parser, f"<noinclude>{footer}</noinclude>"]
page.text = "".join(final_list)
if len(jobs_done) > 0:
jobs_done = ", ".join(jobs_done)
else:
jobs_done = "absolutely nothing!"
# print(header)
# print(footer)
print(f"Chapter: {chapter_name}")
print(f"Chapter begin: {chapter_begin}")
print(f"Chapter end: {chapter_end}")
print(f"Page number: {page_num}")
print(f"Extra: {extra}")
edit_summary = f"Bot: {jobs_done}"
print(edit_summary)
print("Sleeping 50 seconds...")
time.sleep(50)
page.save(edit_summary)
print("----")
I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
In case this is not legally possible:
I grant anyone the right to use this work for any purpose, without any conditions, unless such conditions are required by law.
Public domainPublic domainfalsefalse