User:GemmaBot/lostfilms.py
Jump to navigation
Jump to search
import re
import string
# y = open('SFF.html', 'r')
# y = y.read()
# hcount = 0
# f = re.findall('href="(http://memory\.loc\.gov/diglib/ihas/loc\.mbrs\.sfdb\..+?)"', y)
# f.remove('http://memory.loc.gov/diglib/ihas/loc.mbrs.sfdb.8950/')
# x = open('allfilms.txt', 'r')
# v = x.readlines()
# z = x.read()
# # list_of_more_parens = []
# lines = []
# count = 0
# for l in v:
# if l != '':
# print("Line is: '" + l + "'")
# l = l.replace("\n", "")
# l = "'''" + l + "'''"
# l = re.sub("'''[0-9]+? (.+?) (\(19.+?\))", "'''["+f[count]+r" \1] \2", l)
# lines.append(l)
# # if '(' not in l:
# # print("GOT HERE", l)
# # if l.count('(') > 1:
# # print("GOT TO MORE THAN ONE PARENS", l)
# # # list_of_more_parens.append(l)
# count += 1
# print(lines)
# print(len(lines))
# ultimate_string = '\n\n'.join(lines)
# w = open('newfilms.txt', 'w+')
# w.write(ultimate_string)
# w.close()
# exit()
# print(count)
# for
# s = list(set(f))
# print(len(s))
# g = re.findall('[0-9]+? (.+? \(19.+?\)', z)
# # g = re.findall('\n[0-9]+? .+? \(19', z)
# # g = re.findall('1', z)
# print(g)
# print("is g")
# print(len(g))
# ultimate_string = 'a'
# w = open('newfilms.txt', 'w+')
# w.write(ultimate_string)
# w.close()
x = open('newfilms.txt', 'r')
y = x.readlines()
lines_over_140 = []
new_list = []
count_set = 46
count_till_next_page = 46
current_letter = 'A'
d_count = 0
for l in y:
item = re.search("'''\[.+? (.+?)\]", l)
# if item:
if item:
item = item.group(1)
if item[0] in string.ascii_uppercase and item[0] != current_letter:
current_letter = item[0]
new_list.append(f"##{current_letter}##\n\n")
if count_till_next_page == 0:
new_list.append('\n{{nop}}\n')
new_list.append('\n----\n')
new_list.append(f"\n##{current_letter}##\n\n")
count_till_next_page = 46
continue
if l != "\n":
count_till_next_page -= 1
# print("Counted with " + l)
# print('Line: ' + l)
new_list.append(l)
if len(l) >= 137 and "<!--bot ignore-->" not in l:
count_till_next_page -= 1
# print("GOT TO", l)
# d_count += 1
# if d_count == 5:
# exit()
# exit()
# lines_over_140.append(l)
# print(lines_over_140)
# print(len(lines_over_140))
ultimate_string = ''.join(new_list)
ultimate_string = ultimate_string.replace("<!--bot ignore-->", "")
w = open('filmparse.txt', 'w+')
w.write(ultimate_string)
w.close()
I, the copyright holder of this work, hereby release it into the public domain. This applies worldwide.
In case this is not legally possible:
I grant anyone the right to use this work for any purpose, without any conditions, unless such conditions are required by law.
Public domainPublic domainfalsefalse