User:GemmaBot/films.py

From Wikisource
Jump to navigation Jump to search
#This is gonna be worse'n a sure 'nuff funeral.
pageq = input("Page name: ")
if pageq == "":
    print("You nincompoop! Don't type nothing!")
    exit()
import time
import re
import random
print("Importing pywikibot...")
import pywikibot
print("Imported!")

print("Processing data...")

# Remove item from requests list (see below)

sections = ["Uploaded to Commons", "Not uploaded to Commons", "Lost films"]

def requestremove(film, year):
    find_more_lines = False
    edit_summary = f"Removing [[{film}]] from request lists, as it is already done..."
    print(edit_summary)
    for section in sections:
        pagename = f"Wikisource:WikiProject Film/{section}/{year}"
        site = pywikibot.Site()
        page = pywikibot.Page(site, pagename)
        if page.text != "":
            pageparse = page.text.split("\n")
            new_pageparse = pageparse.copy()
            print(f"Checking {pagename}")
        else:
            continue
        for line in pageparse:
            if line.startswith(f"* [[{film}|") or line.startswith(f"* [[{film}]]"):
                new_pageparse.remove(line)
                find_more_lines = True
            elif find_more_lines == True:
                if line.startswith("* ") or line.startswith("==") or line == "":
                    find_more_lines = False
                    page.text = "\n".join(new_pageparse)
                    page.save(f"{edit_summary}")
                    break
                else:
                    new_pageparse.remove(line)

# Pick a random film

list_to_shuffle = []

def filmpick(section):
    for i in range(100):
        if len(str(i)) == 1:
            pagename = f"Wikisource:WikiProject Film/{section}/190{i}"
        else:
            pagename = f"Wikisource:WikiProject Film/{section}/19{i}"
        # print(pagename)
        site = pywikibot.Site()
        page = pywikibot.Page(site, pagename)
        if page.text != "":
            pageparse = page.text.split("\n")
            print(f"Checking {pagename}")
        else:
            continue
        for line in pageparse:
            if line.startswith("* "):
                list_to_shuffle.append(f"\n\n({pagename})\n\n{line}")

if pageq == "p" or pageq == "pick":
    print("Picking a random film...")
    filmpick("Uploaded to Commons")
    filmpick("Not uploaded to Commons")
    random_max = len(list_to_shuffle)-1
    random = random.randint(0, random_max)
    print(list_to_shuffle[random])
    exit()

#Correction of Motion Pictures, 1912-1939 page

try:
    int(pageq)
    print("Processing correction of Motion Pictures, 1912-1939 page...")
    pagename = f"Page:Motion Pictures 1912 to 1939 (IA Motionpict19121939librrich0010).djvu/{pageq}"
    site = pywikibot.Site()
    page = pywikibot.Page(site, pagename)
    page.text = page.text.replace("”", "\"")
    page.text = page.text.replace(" \n", "\n")
    page.text = page.text.replace("\n**SEE", " SEE")
    page.text = page.text.replace("\n** SEE", " SEE")
    page.text = page.text.replace("SEE\n** ", " SEE ")
    page.text = page.text.replace("SEE\n**", " SEE ")
    page.text = page.text.replace(",\n", ".\n")
    page.text = page.text.replace("\n\n\n", "\n\n")
    page.text = page.text.replace(", ©", ". ©")
    page.text = page.text.replace("..", ".")
    page.text = page.text.replace(",.", ".")
    page.text = page.text.replace("\n** SEE", " SEE")
    page.text = page.text.replace("Bros,", "Bros.")
    page.text = page.text.replace("Co,", "Co.")
    page.text = page.text.replace("Corp,", "Corp.")
    page.text = page.text.replace("Credits;", "Credits:")
    page.text = page.text.replace("descr,", "descr.")
    page.text = page.text.replace("deser,", "descr.")
    page.text = page.text.replace(" deser.", " descr.")
    page.text = page.text.replace("Eclair", "Éclair")
    page.text = page.text.replace("Frangaise", "Française")
    page.text = page.text.replace(", From", ". From")
    page.text = page.text.replace(" ft,", " ft.")
    page.text = page.text.replace("Inc,", "Inc.")
    page.text = page.text.replace("Ju1", "Jul")
    page.text = page.text.replace("Metro-Goldwyn- ", "Metro-Goldwyn-")
    page.text = page.text.replace("Mfg,", "Mfg.")
    page.text = page.text.replace("Mr,", "Mr.")
    page.text = page.text.replace("Mrs,", "Mrs.")
    page.text = page.text.replace("0ct", "Oct")
    page.text = page.text.replace("Players- ", "Players-")
    page.text = page.text.replace("Prod,", "Prod.")
    page.text = page.text.replace("pseud,", "pseud.")
    page.text = page.text.replace("Ltd,", "Ltd.")
    page.text = page.text.replace(" v, ", " v. ")
    page.text = page.text.replace(" no,", " no.")
    page.text = re.sub(r" LP ([0-9])", r" LP \1", page.text)
    page.text = re.sub(r" LU ([0-9])", r" LU \1", page.text)
    page.text = re.sub(r" MP ([0-9])", r" MP \1", page.text)
    page.text = re.sub(r" MU ([0-9])", r" MU \1", page.text)
    page.text = re.sub(r" ([A-Z]), ", r" \1. ", page.text)
    page.text = re.sub(r" \(([A-Z]), ", r" (\1. ", page.text)
    page.text = re.sub(r"([0-9])\n", r"\1.\n", page.text)
    page.text = page.text.replace("  ", " ")
    print("Processed!")
    page.save("Fixed something on the Internet that's from before the Internet existed: autofixed some OCR errors on this page")
    print("All done!")
    exit()
except ValueError:
    pagename = f"Wikisource:WikiProject Film/Drafts/{pageq}"
# pagename = f"{pageq}"
site = pywikibot.Site()

page = pywikibot.Page(site, pagename)

if page.text == "" and pageq != "r":
    print(f"Page {pagename} does not exist. Please check the name and try again.")
    exit()

# Correct GeoCities transcript

if page.text.startswith("g\n"):
    print("Fixing GeoCities transcript...")
    geocities_list = []
    geocities_lines = page.text.split("\n")
    for line in geocities_lines: #initial conversion from everything
        line = line.lstrip()
        try:
            int(line)
            geocities_list.append("\n}}\n\n----\n\n")
        except ValueError:
            if line != "" and not line.startswith("["):
                geocities_list.append(line)
    geocities_list = " ".join(geocities_list)
    geocities_list = geocities_list.replace("  ", " ")
    geocities_list = geocities_list.replace("\n ", "\n")
    geocities_list = geocities_list.replace(" }", "}")
    geocities_list = geocities_list.replace("----\n\n\"", "----\n\n{{ft/d|\n\"")
    geocities_list = re.sub(r"----\n\n([A-Z])", r"----\n\n{{ft/s|\n\1", geocities_list)
    geocities_list = geocities_list.split("----\n")
    geocities_final = []
    for line in geocities_list:
        if "--" in line:
            twodash = line.replace("---", "—")
            twodash = line.replace("--", "—")
        else:
            twodash = line
        line = line.replace("---", "—")
        line = line.replace("--", "—")
        line = line.replace(" - ", " — ")
        line = line.replace("\n- ", "— ")
        line = line.replace("- ", "")
        geocities_final.append(line)
    geocities_final = "----\n".join(geocities_final)
    geocities_final = geocities_final[8:]
    geocities_final += "}}\n}}"
    page.text = f"{geocities_final}"
    print("Done. Saving...")
    page.save(f"Fixed something on the Internet that's from before the Internet existed")
    print("All done! Please remember to use me again when this is all proofread.")
    exit()

titles_q = input("Did you make sure to add the titles? ")

if pageq == "r":
    print("Processing correction of a copyright renewal page...")
    pagename = f"{titles_q}"
    site = pywikibot.Site()

    page = pywikibot.Page(site, pagename)
    page.text = page.text.replace("  ", " ")
    page.text = page.text.replace("\n*", "\n\n*")
    page.text = page.text.replace("\n\n*#", "\n*#")
    page.text = page.text.replace("\n=", "\n\n=")
    page.text = page.text.replace("®", "©")
    page.text = page.text.replace(", ©", ". ©")
    page.text = page.text.replace("Bros,", "Bros.")
    page.text = page.text.replace("Co,", "Co.")
    page.text = page.text.replace("Corp,", "Corp.")
    page.text = page.text.replace("Inc.", "inc.")
    page.text = page.text.replace("Inc,", "inc.")
    page.text = page.text.replace("0ct", "Oct")
    print(page.text)
    page.save("Fixed something on the Internet that's from before the Internet existed: autofixed some OCR errors on this page")
    exit()

page.text = page.text.replace("\n\n\n", "\n\n")
page.text = page.text.replace("}}\n----", "}}\n\n----")
page.text = re.sub(r"----\n([0-9])", r"----\n\n\1", page.text)

parse = page.text.split("\n\n")
dashesbool = False

for item in parse:
    if item == "----" and dashesbool == False:
        dashesbool = True
    if not item.endswith("}}") and item != "----" and item != "----" and not item.startswith("0") and not item.startswith("1") and not item.startswith("2") and not item.startswith("3")  and not item.startswith("4") and not item.startswith("5") and dashesbool == True:
        print(f"ERROR: The following item ({parse.index(item)}) does not end with }}:\n\n{item}")
        exit()
data = []
single_line_data = []
time_bool = False
line_bool = False
title = pageq
if "(" in title:
    realtitle = title[:title.find("(")-1]
    realtitlewithsymbol = f"{title}|{realtitle}"
    similar = f"{{{{similar|{realtitle}}}}}\n"
else:
    realtitle = title
    realtitlewithsymbol = title
    similar = ""
# print("Title: " + title)
file = ""
author = ""
author_proper = ""
author_override = ""
publisher = ""
year = ""
pd = ""
note = ""
if title.startswith("A "):
    defaultsort = f"{{{{DEFAULTSORT:{realtitle[2:]}}}}}"
    if title.startswith("An "):
        defaultsort = f"{{{{DEFAULTSORT:{realtitle[3:]}}}}}"
elif title.startswith("The "):
    defaultsort = f"{{{{DEFAULTSORT:{realtitle[4:]}}}}}"
else:
    defaultsort = ""
categories = ""
portals = ""
section = ""
next = ""
user = ""
problematic = False
problematic_list = []
time_num = 0

## remove page from Uploaded or Not uploaded list

def CartoonOrNot(medium):
    global categories
    if "Cartoons" in categories:
        categories = categories + f", {medium} cartoons"
    else:
        categories = categories + f", {medium} film"

# put all the data from the page into list and variables
for line in parse:
    if line.startswith("User: "):
        user = line[6:]
    if line.startswith("File: "):
        if line.startswith("File: File:"):
            print("ERROR: Filename began with \"File:\"! PLEASE don't do this, it's bad form!")
            file = line[11:]
        else:
            file = line[6:]
        if file.endswith("webm"):
            source = "webm"
        elif file.endswith("ogv"):
            source = "ogv"
        elif file.endswith("ogg"):
            source = "ogg"
    if line.startswith("Section: "):
        section = line[9:]
    if line.startswith("Next: "):
        next = f"[[{line[6:]}]]"
    if line.startswith("Author: "):
        author = line[8:]
        if ", " in line:
            author_proper = author.split(", ")
            if len(author_proper) == 2:
                author_proper = f"{{{{al|{author_proper[0]}}}}} and {{{{al|{author_proper[1]}}}}}"
            else:
                for auth in author_proper:
                    author_proper[author_proper.index(auth)] = f"{{{{al|{auth}}}}}"
                author_proper_last = author_proper.pop()
                author_proper = f"{', '.join(author_proper)}, and {author_proper_last}"
            author_override = f"|override_author = {author_proper}"
        else:
            author_proper = f"{{{{al|{author}}}}}"
            author_override = author
        print(author_proper)
        print(author_override)
    if line.startswith("Publisher: "):
        publisher = line[11:]
    if line.startswith("Year: "):
        year = line[6:]
    if line.startswith("PD: "):
        pd = line[4:]
    if line.startswith("Note: "):
        note = line[6:]
    if line.startswith("Cat: "):
        categories = line[5:]
        # Silent vs. Sound categorizing
        if "Silent film" not in categories and "Sound film" not in categories and "Silent cartoons" not in categories and "Sound cartoons" not in categories:
            if int(year) < 1927:
                CartoonOrNot("Silent")
            else:
                while 1:
                    silentorsound = input("Silent or sound? ")
                    if silentorsound == "si" or silentorsound == "silent":
                        CartoonOrNot("Silent")
                        break
                    elif silentorsound == "so" or silentorsound == "sound":
                        CartoonOrNot("Sound")
                        break
                    else:
                        print("Sorry, I didn't understand that. Type \"si\" for silent and \"so\" for sound.")
        # Romance subset of drama
        if "Romance film" in categories and "Drama film" not in categories:
            categories += ", Drama film"
        if ", " in categories:
            categories = categories.split(", ")
            categories.sort()
            portals = "/".join(categories)
            newcats = []
            for category in categories:
                newcats.append(f"[[Category:{category}]]")
            categories = "\n".join(newcats)
        else:
            portals = categories
            categories = f"[[Category:{categories}]]"
    if line_bool == True:
        single_line_data.append(line)
        if "{{" in single_line_data[0]:
            print(f"\nERROR: The following timestamp is incorrectly entered:\n\n{single_line_data[0]}")
            exit()
        if len(single_line_data[0]) <= 3:
            print(f"\nERROR: Looks like you forgot to fill in this timestamp:\n\n{single_line_data[0]}\n\n{single_line_data[1]}")
            exit()
        data.append({
            "time": single_line_data[0],
            "line": single_line_data[1]
        })
        single_line_data.clear()
        line_bool = False
    if time_bool == True:
        time_num += 1
        if line.endswith("p") or line.endswith("P"):
            linesplit = line.split(" ")
            single_line_data.append(linesplit[0])
            problematic = True
            problematic_list.append(time_num)
        else:
            single_line_data.append(line)
        time_bool = False
        line_bool = True
    if line == "----" or line == "-----":
        time_bool = True

# print(data[5])
# print(file)

edit_summary = f"Entering transcription of a film that's generations older than the Internet, on the Internet itself (proofread by {user} from [[Wikisource:WikiProject Film/Drafts/{title}]])"""

# check if any important data missing
if user == "":
    print("\nERROR: User not specified!")
if pd == "":
    print("\nERROR: PD tag not specified!")
    exit()
if year == "":
    print("\nERROR: Year of publication not specified!")
    exit()
if publisher == "":
    print("\nERROR: Publisher not specified!")
    exit()
if author == "":
    print("\nERROR: Author not specified!")
    exit()
if file == "":
    print("\nERROR: Filename not specified!")
    exit()
if categories == "":
    catcontinue = input("No categories/portals were specified. Are you sure you want to continue? If not just CTRL+C or CTRL+Z to exit out. ")
if note == "TBA":
    print("\nERROR: Note is TBA!")
    exit()




# Create index page

progress = ""

if problematic:
    progress = "C"
else:
    progress = "V"

print("Processed!")
print("----")

pagename = f"Index:{file}"
page = pywikibot.Page(site, pagename)

indexpagescreate = []

for datum in data:
    time_for_index = datum["time"]
    indexpagescreate.append(f"{{{{time|{data.index(datum)+1}|t={time_for_index}}}}}")

index_pages = "\n".join(indexpagescreate)

page.text = f"""{{{{:MediaWiki:Proofreadpage_index_template
|Type=book
|Title=''[[{realtitlewithsymbol}]]''
|Language=en
|Volume=
|Author={author_proper}
|Translator=
|Editor=
|Illustrator=
|School=
|Publisher=[[Portal:{publisher}|{publisher}]]
|Address=
|Year={year}
|Key=
|ISBN=
|OCLC=
|LCCN=
|BNF_ARK=
|ARC=
|Source={source}
|Image=1
|Progress={progress}
|Pages={index_pages}
|Volumes=
|Remarks=
|Width=
|Css=
|Header=
|Footer=
|Transclusion=yes
}}}}
"""
print("Creating Index page...")
print(edit_summary)
page.save(f"{edit_summary}")
print("----")

# print(pagename)
# print(page.text)




# Create Pages

if titles_q == "":
    count = 0
else:
    try:
        count = int(titles_q)
    except TypeError:
        pass

def timedisplay(sec):
    time_elapsed_m = int(sec / 60)
    time_elapsed_h = int(time_elapsed_m / 60)
    time_elapsed_s_rem = int(sec % 60)
    time_elapsed_m_rem = int(time_elapsed_m % 60)
    if time_elapsed_m >= 1 and time_elapsed_m < 60:
        return f"{time_elapsed_h} hours, {time_elapsed_m} minutes, {time_elapsed_s_rem} seconds"
    elif time_elapsed_m >= 60:
        return f"{time_elapsed_h} hours, {time_elapsed_m_rem} minutes, {time_elapsed_s_rem} seconds"
    else:
        return f"{time_elapsed_h} hours, {time_elapsed_m} minutes, {sec} seconds"

while 1:
    count +=1
    percent = int(((count-1)/len(data)) * 100)
    eta = ((len(data) - count) * 52) + 112
    if count > len(data):
        break
    line_now = data[count-1]["line"]
    pagename = f"Page:{file}/{count}"
    page = pywikibot.Page(site, pagename)
    if count in problematic_list:
        page.text = f"""<noinclude><pagequality level="2" user="{user}" /></noinclude>{line_now}<noinclude></noinclude>"""
    else:
        page.text = f"""<noinclude><pagequality level="3" user="{user}" /></noinclude>{line_now}<noinclude></noinclude>"""
    print("Doing: " + pagename)
    print(f"{percent}% done (page {count} of {len(data)}).")
    print(f"Estimated time remaining: {timedisplay(eta)}")
    print("Sleeping 50 seconds...")
    time.sleep(50)
    page.save(edit_summary)
    print("----")




# Generate final transclusion

transclusionpagescreate = []

pagename = title
page = pywikibot.Page(site, pagename)

for datum2 in data:
    time_for_transclusion = datum2["time"]
    transclusionpagescreate.append(f"{{{{page|{file}/{data.index(datum2)+1}|num={time_for_transclusion}}}}}")

transclusion_pages = "\n".join(transclusionpagescreate)

transcluded_content = f"""{similar}{{{{header
 | title       = {realtitle}
 | author      = {author_override}
 | translator  =
 | section     = {section}
 | previous    =
 | next        = {next}
 | year        = {year}
 | portal      = {portals}
 | notes       = {note} {{{{Film|{file}|thumbtime=2|size=400px}}}}
}}}}{defaultsort}

<div style="margin-left: 3em; margin-right: 3em;">
{transclusion_pages}
</div>


{{{{{pd}}}}}

{categories}"""

print(transcluded_content)

page.text = f"{transcluded_content}"

print("All pages done! Transcluding...")
page.save(edit_summary)
print("----")





##### Remove from request lists

requestremove(pageq, year)



##### Archiving section

archivep1 = "Archiving draft page (part 1, redirect)..."
print(archivep1)

pagename = f"Wikisource:WikiProject Film/Drafts/{pageq}"
# pagename = f"{pageq}"
site = pywikibot.Site()

page = pywikibot.Page(site, pagename)

contents_to_archive = page.text
page.text = f"#REDIRECT [[Wikisource:WikiProject Film/Drafts/Archives/{pageq}]]"
page.save(f"{archivep1}")





print("----")

archivep2 = "Archiving draft page (part 2, archive page)..."
print(archivep2)

pagename = f"Wikisource:WikiProject Film/Drafts/Archives/{pageq}"
# pagename = f"{pageq}"
site = pywikibot.Site()

page = pywikibot.Page(site, pagename)

page.text = contents_to_archive
page.save(f"{archivep2}")




print("----")

archivep3 = "Archiving draft page (part 3, removal from drafts list)..."
print(archivep3)

pagename = f"Wikisource:WikiProject Film/Drafts"

site = pywikibot.Site()

page = pywikibot.Page(site, pagename)

page_just_archived = ""
drafts_page_parse = page.text.split("\n")
for line in drafts_page_parse:
    if line.startswith(f"* [[/{pageq}|"):
        page_just_archived = line
        drafts_page_parse.remove(line)
page.text = "\n".join(drafts_page_parse)

if f"/{pageq}|" not in page_just_archived:
    print("ERROR: Somethin' ain't right on that draft page. Maybe a draft was moved and the draft list not updated? Go check that out.")
    # exit()

page.save(f"{archivep3}")




print("----")

pagename = f"Wikisource:WikiProject Film/Drafts/Archives"

site = pywikibot.Site()

page = pywikibot.Page(site, pagename)

archivep4 = "Archiving draft page (part 4, add entry to archives list)..."
print(archivep4)

page.text = f"{page.text}\n{page_just_archived}"

page.save(f"{archivep4}")


# Add page to Template:New texts if proofread fully

# pagename = "Template:New texts"
# page = pywikibot.Page(site, pagename)
#
# newtextparse = page.text.split("\n")
# newtextparse.insert(3, f"{{{{new texts/item|{title}|{author}|{year}}}}}")
# text_to_move = newtextparse[10]
# newtextparse.pop(10)
# newtextparse.insert(15, text_to_move)
# page.text = "\n".join(newtextparse)
#
# print("Adding to Template:New texts...")
# page.save(edit_summary)


#|Progress=X - To be verified
#|Progress=C - To be proofread
#|Progress=V - To be validated

I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.

In case this is not legally possible:

I grant anyone the right to use this work for any purpose, without any conditions, unless such conditions are required by law.

Public domainPublic domainfalsefalse