User:GemmaBot/keeban.py

From Wikisource
Jump to navigation Jump to search
# Bug fixes: In dictionary generation, if repeat chapter name exists, place [2] at the end of it. Put in some logic here at PLB to read that and cut it off the end.
# Add redirect from "The" at the beginning
# Go thru Index and mark pages as not needing to be proofread

# Fix some possible transclusion stuff?

import re
import time
print("Importing pywikibot...")
import pywikibot
print("Imported!")

pagename = "Page:The Star in the Window.pdf/347"
site = pywikibot.Site()

page = pywikibot.Page(site, pagename)
text = page.text

book_name = "KEEBAN"
pageroot = "Keeban (IA keeban00balm).pdf"
chapter_begin_root = "{{Plain heading|"

num = 12
last_page = 307
start_num = num
last_page_num = last_page - start_num
chapter_num = 1
extra = 0

chapters = {'In Two Places at Once': 1, 'And Escapes from Both': 14, 'Encounter by the River': 31, 'I Sit in on Fate': 48, 'The Underworld Intrudes': 60, 'Fail to Prevent a Bump-Off': 72, 'I Keep My Own Counsel': 87, 'A Lady Discredits Me': 98, 'I Seek the Underworld': 107, 'The Ways of Its Logic': 116, "The Thieves' Ball": 134, 'I Discover "The Queer"': 153, 'Soothing Effects': 173, 'I Take Government Orders': 185, 'I Assist a Get-away': 196, 'I Walk into a Parlor': 210, 'A Gas Called KX': 219, 'Doris Appears and Vanishes': 239, 'I Hear of the Glass Room': 248, 'Doris and I Are Taken to It': 256, 'Doris Enters Glass Room': 267, 'A Croaking and Finis': 287}

jobs = [
    "add/fix headers and footers",
    "convert en dashes to em dashes",
    "templatize quote-apostrophe combinations",
    "convert double em dash to [[Template:bar]]",
    "convert two returns to [[Template:dhr]]",
    "take out spaces before returns",
    "shrink plate image size to 500px, since that seems more accurate throughout",
    "replace ’ with '", "replace ‘ with '",
    "replace ” with \"", "replace “ with \"",
    "\" ?\" -> \"?\"",
    "\" !\" -> \"!\"",
    "\" :\" -> \":\"",
    "\" ;\" -> \";\"",
    "\"\" -> \"\"",
    "\"\" -> \"\"",
    "\" -\" at end of line -> \"\"",
    "\"-\" at end of line -> \"\"",
    "\" - \" -> \"\"",
    "fix double line break before {{nop}}",
    ""]

def timedisplay(sec):
    time_elapsed_m = int(sec / 60)
    time_elapsed_h = int(time_elapsed_m / 60)
    time_elapsed_s_rem = int(sec % 60)
    time_elapsed_m_rem = int(time_elapsed_m % 60)
    if time_elapsed_m >= 1 and time_elapsed_m < 60:
        return f"{time_elapsed_h} hours, {time_elapsed_m} minutes, {time_elapsed_s_rem} seconds"
    elif time_elapsed_m >= 60:
        return f"{time_elapsed_h} hours, {time_elapsed_m_rem} minutes, {time_elapsed_s_rem} seconds"
    else:
        return f"{time_elapsed_h} hours, {time_elapsed_m} minutes, {sec} seconds"

# IF "{{c|PLATE " in

while 1:
    num += 1
    if num > last_page:
        break
    jobs_done = []
    pagename = f"Page:{pageroot}/{num}"
    site = pywikibot.Site()
    page = pywikibot.Page(site, pagename)
    print(f"Checking {pagename}...")
    #functions just didn't work for this, for some STUPID reason, so here we do this uglier thing...
    # ----
    #finding extra pages and subtracting them from page_num
    nextpagename = f"Page:{pageroot}/{num+1}"
    nextpage = pywikibot.Page(site, nextpagename)
    # if not page.text.endswith("<noinclude></noinclude>") and not page.text.endswith("<noinclude>\n<references/></noinclude>") and not page.text.endswith("<noinclude>\n<references /></noinclude>") and not page.text.endswith("<noinclude>{{smallrefs}}</noinclude>") and not page.text.endswith("/e}}</noinclude>"):
    #     print(page.text)
    #     continueornot = input(f"Found something new in a footer. Continue running? ")
    if "[[File:" in page.text and "pagequality level=\"0\"" in nextpage.text:
        print("Not a page. Skipping...")
        print("----")
        extra +=1
        continue
    if "pagequality level=\"0\"" in page.text:
        print("Not a page. Skipping...")
        # print("Not a page. Exiting...")
        extra +=1
        print("----")
        continue
        # extra +=1
        # break
    if "’" in page.text:
        page.text = page.text.replace("’", "'")
        jobs_done.insert(0, jobs[7])
    if "‘" in page.text:
        page.text = page.text.replace("‘", "'")
        jobs_done.insert(0, jobs[8])
    if "”" in page.text:
        page.text = page.text.replace("”", "\"")
        jobs_done.insert(0, jobs[9])
    if "“" in page.text:
        page.text = page.text.replace("“", "\"")
        jobs_done.insert(0, jobs[10])
    #endash to emdash
    if "–" in page.text:
        page.text = page.text.replace("–", "—")
        jobs_done.insert(0, jobs[1])
    #quotes to templates
    if "\"'" in page.text or "'\"" in page.text:
        ogpage = page.text
        page.text = page.text.replace("\"'\"", "{{\" ' \"}}")
        page.text = page.text.replace("'\"'", "{{' \" '}}")
        page.text = page.text.replace("\"'", "{{\" '}}")
        page.text = page.text.replace("'\"", "{{' \"}}")
        page.text = page.text.replace("'{{' \"}}", "''\"")
        page.text = page.text.replace("{{\" '}}'", "\"''")
        if ogpage != page.text:
            jobs_done.insert(0, jobs[2])
    if "——" in page.text:
        page.text = page.text.replace("——", "{{bar|2}}")
        jobs_done.insert(0, jobs[3])
    if "\n\n\n" in page.text:
        page.text = page.text.replace("\n\n\n", "\n{{dhr}}\n")
        jobs_done.insert(0, jobs[4])
    if " \n" in page.text:
        page.text = page.text.replace(" \n", "\n")
        jobs_done.insert(0, jobs[5])
    if " ?" in page.text:
        page.text = page.text.replace(" ?", "?")
        jobs_done.insert(0, jobs[11])
    if " !" in page.text:
        page.text = page.text.replace(" !", "!")
        jobs_done.insert(0, jobs[12])
    if " :" in page.text:
        page.text = page.text.replace(" :", ":")
        jobs_done.insert(0, jobs[13])
    if " ;" in page.text:
        page.text = page.text.replace(" ;", ";")
        jobs_done.insert(0, jobs[14])
    if " — " in page.text:
        page.text = page.text.replace(" — ", "—")
        jobs_done.insert(0, jobs[15])
    if " —" in page.text:
        page.text = page.text.replace(" —", "—")
        jobs_done.insert(0, jobs[16])
    if " -\n" in page.text:
        page.text = page.text.replace(" -\n", "—\n")
        jobs_done.insert(0, jobs[17])
    if "-\n" in page.text:
        page.text = page.text.replace("-\n", "—\n")
        jobs_done.insert(0, jobs[18])
    if " - " in page.text:
        page.text = page.text.replace(" - ", "—")
        jobs_done.insert(0, jobs[19])
    if "\n\n{{nop}}" in page.text:
        page.text = page.text.replace("\n\n{{nop}}", "\n{{nop}}")
        jobs_done.insert(0, jobs[20])
    #logic for headers/footers
    page_num = num - start_num - extra
    percent = int((page_num/last_page_num) * 100)
    eta = ((last_page_num - page_num) * 52) + 112
    try:
        chapter_end = int(chapters[list(chapters.keys())[chapter_num]])
        if chapter_end == page_num:
            chapter_num +=1
    except:
        pass
    chapter_begin = int(chapters[list(chapters.keys())[chapter_num-1]])
    chapter_name = list(chapters.keys())[list(chapters.values()).index(chapter_begin)].upper()
    if " MC" in chapter_name or chapter_name.startswith("MC"):
        chapter_name = chapter_name.replace("MC", "Mc")
    chapter_name = book_name
    header = ""
    footer = ""
    if page_num == chapter_begin:
        if chapter_begin_root not in page.text:
            print("FATAL ERROR: Chapter does not begin where the table of contents you gave says it does!")
            print("\nError log:\n")
            print(f"Page name: {pagename}")
            print(f"Chapter: {chapter_name}")
            print(f"Chapter begin: {chapter_begin}")
            print(f"Chapter end: {chapter_end}")
            print(f"Page number: {page_num}")
            print(f"Extra: {extra}")
            exit()
        header = ""
        footer = f"{{{{rh/1|{page_num}|class=__pageno}}}}"
        # footer = ""
        jobs_done.insert(0, jobs[0])
    else:
        footer = ""
        header = f"{{{{rvh|{page_num}|{chapter_name}|{book_name}}}}}"
        jobs_done.insert(0, jobs[0])
    # if {{fine block/s}} or /e in header/footer
    if "{{fine block/s}}</noinclude>" in page.text:
        header = header + "{{fine block/s}}"
    if "{{fine block/e}}</noinclude>" in page.text:
        footer = "{{fine block/e}}" + footer
    if "{{block center/s}}</noinclude>" in page.text:
        header = header + "{{block center/s}}"
    if "{{block center/e}}</noinclude>" in page.text:
        footer = "{{block center/e}}" + footer
    if "{{smallrefs}}</noinclude>" in page.text:
        footer = "{{smallrefs}}" + footer
    parser = page.text.split("<")
    headerplace = parser[2]
    # headerplace comes out as, fx, <pagequality level="3" user="PseudoSkull" />{{rh|2|A WILD-GOOSE CHASE}}
    headerparse = headerplace.split(">")
    parser.pop(2)
    headerparse.pop(1)
    headerparse.insert(1, header)
    headerparse = ">".join(headerparse)
    parser.insert(2, headerparse)
    # print(parser)
    # footers
    parser.pop(len(parser) - 1)
    parser.pop(len(parser) - 1)
    parser = "<".join(parser)
    final_list = [parser, f"<noinclude>{footer}</noinclude>"]
    page.text = "".join(final_list)
    if len(jobs_done) > 0:
        jobs_done = ", ".join(jobs_done)
    else:
        jobs_done = "absolutely nothing!"
    # print(header)
    # print(footer)
    print(f"Chapter: {chapter_name}")
    print(f"Chapter begin: {chapter_begin}")
    print(f"Chapter end: {chapter_end}")
    print(f"Page number: {page_num}")
    print(f"Extra: {extra}")
    print(f"{percent}% done (page {page_num} of {last_page_num}).")
    print(f"Estimated time remaining: {timedisplay(eta)}")
    edit_summary = f"Bot: {jobs_done}"
    print(edit_summary)
    print("Sleeping 50 seconds...")
    time.sleep(50)
    page.save(edit_summary)
    print("----")

I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.

In case this is not legally possible:

I grant anyone the right to use this work for any purpose, without any conditions, unless such conditions are required by law.

Public domainPublic domainfalsefalse