User:Polbot/code/Function 2
From Wikisource
For importing works and Wikifying them, the code has to be modified for each work. For each section below, the code given is a rather generic example, but you'll have to modify the code through trial and error to produce code that works for your source.
Contents |
[edit] Gutenberg texts
# Note: This script is for works from Project Gutenberg, www.gutenberg.org, although it works pretty well for other texts. # I download the html or txt file, and place {{curly braces}} around chapter names to tell the script where to divide pages. # I also start each text file with a line containing the name of the work, and a line containing the name of the author. use strict; use LWP::UserAgent; use Encode; use Perlwikipedia; my $file = shift; my $test = shift; my $pw; print "Reading file. . .\n"; open(infile, "<$file") or die "File not found\n"; my $line = <infile>; chop $line; my $title = $line; $line = <infile>; chop $line; my $author = $line; my $translator = ""; if ($author =~ m/^(.*)\|(.*)$/) { $author = $1; $translator = $2; } my @pagenames = (); my @wikis = (); $line = <infile>; while ($line =~ m/^\s*$/) { $line = <infile>; } chop $line; $line =~ m/^\{\{(.*)\}\}$/ or die "Improper format\n"; my $pagename = $1; my $wiki = ""; while ($line = <infile>) { chop $line; if ($line =~ m/^\{\{(.*)\}\}/) { my $temp = $1; push @pagenames, $pagename; push @wikis, $wiki; $pagename = $temp; $wiki = ""; } else { $wiki .= "$line\n"; } } push @pagenames, $pagename; push @wikis, $wiki; close(infile); print scalar(@pagenames) . " pages.\n"; if (!$test) { # login $pw=Perlwikipedia->new(); #$pw->{debug} = 1; $pw->{mech}->agent('Firefox/2.0.0.6'); $pw->set_wiki('en.wikisource.org', 'w'); print "Logging in as Polbot\n"; my $login_status=$pw->login('Polbot','[password]'); die "I can't log in." unless ($login_status == 0); # Output automatic TOC open(outfile, ">test.txt"); print outfile "Contents:\n\n"; foreach my $pagename (@pagenames) { if ($pagename =~ m/^\#+/) { # if a chapter starts with #, it's its own page, not a subpage. # if it starts with ##, it's its own page, and it gets <poem> tags. if ($pagename =~ m/^\#+(.*)\: (.*)$/) { print outfile "* [[$1]]: $2\n"; } else { $pagename =~ m/^\#+(.*)$/; print outfile "* [[$1]]\n"; } } else { if ($pagename =~ m/^(.*)\/(.*)$/) { my $book = $1; my $chapter = $2; if ($chapter =~ m/^(.*)\: (.*)$/) { print outfile "* [[$title/$book/$1|$1]]: $2\n"; } else { print outfile "* [[$title/$book/$chapter|$chapter]]\n"; } } else { if ($pagename =~ m/^(.*)\: (.*)$/) { print outfile "* [[$title/$1|$1]]: $2\n"; } else { print outfile "* [[$title/$pagename|$pagename]]\n"; } } } } close (outfile); print "Wrote TOC\n"; sleep 5; } foreach my $start (0 .. $#pagenames) { my $pagename = $pagenames[$start]; my $wiki = $wikis[$start]; my $poem = 0; $poem = 1 if $pagename =~ s/^\#\#/\#/; print " $start. " . $pagenames[$start] . "\n"; $wiki =~ s/<\!--.*?-->//gi; # remove html comments $wiki =~ s/<span class=[\"\']?pagenum[\"\']?>.*?<\/span>//gi; # remove page numbers $wiki =~ s/<span class=[\"\']?smallcaps[\"\']?>(.*?)<\/span>/{{small-caps|$1}}/gi; # format smallcaps $wiki =~ s/<p class=[\"\']?authorsc[\"\']?>(.*?)<\/p>/<div align=right>{{bbsc|$1}}<\/div>/gi; # author smallcaps #$wiki =~ s/\s*<b>(.*?)<\/b>\s*/\n==$1==\n/gis; # Make bolds into headers $wiki =~ s/\s*<h\d[^>]*>\s*([^\s].*?[^\s])\s*<\/h\d>\s*/\n'''$1'''\n\n/gis; # Make headers into bold $wiki =~ s/<\/?u>//gi; # remove underlines $wiki =~ s/<ins.*?>(.*?)<\/ins>/$1/gis; #remove ins $wiki =~ s/<br( \/)?>//gi; # take out BR $wiki =~ s/<\/?(i|em)>/\'\'/gi; # italics $wiki =~ s/<hr[^>]*>//gi; # remove HRs. $wiki =~ s/\ \;/ /g; $wiki =~ s/<div class=[\"\']?blockquot[\"\']?>(.*?)<\/div>/<div style=\"margin-left: 5%; margin-right: 10%;\">\n$1\n<\/div>/gis; # blockquotes #$wiki =~ s/<(\/?)pre>/<$1poem>/gi; # pre to poem # footnotes: # while ($wiki =~ m/<a href=\"\#(.*?)\".*?<\/a>/gi) { # my $footnote_name = $1; # # $wiki =~ s/<a href=\"\#$footnote_name\".*?<\/a>(.*)<div class=\"footnote\"><a name=\"$footnote_name\".*?<\/a>.*?<\/a>\s*(.*?)<\/div>/<ref>$2<\/ref>$1/si; # } # $wiki =~ s/<hr( style=\"[^\"]*\" )?\/>\n\'\'\'FOOTNOTES:\'\'\'//gis; # footnotes: # <a name="Ibr" id="Ibr"></a><a href="#Ib"><sup>b</sup></a> # <p class="footnote"><a name="Ia" id="Ia"></a>\n<a href="#Iar">a</a> See <a href="#pagex">page x</a>.</p> while ($wiki =~ m/<a href=\"\#(.*?)\".*?<\/a>/gi) { my $footnote_name = $1; $wiki =~ s/<a href=\"\#$footnote_name\".*?<\/a>(.*)<p class=\"footnote\"><a name=\"$footnote_name\".*?<\/a>.*?<\/a>\s*(.*?)<\/p>/<ref>$2<\/ref>$1/si; } # <span class="i0">And<a name="FNanchor_46:2_77" id="FNanchor_46:2_77"></a><a href="#Footnote_46:2_77" class="fnanchor">[46:2]</a></span><br /> # poems: #$wiki =~ s/<p class=\"i0">(.*?)<\/p>/ $1/gis; $wiki =~ s/^<span class="i1">(.*)<\/span>/ $1/gim; #$wiki =~ s/<p class=\"i8">(.*?)<\/p>/ $1/gis; $wiki =~ s/<div class=\"stanza\">(.*?)<\/div>/$1/gis; $wiki =~ s/<div class=\"poem\">(.*?)<\/div>/<poem>$1<\/poem>/gis; # other divs and spans $wiki =~ s/<\/?p*?>/\n/gi; # remove paragraphs $wiki =~ s/^(\s*\*)+\s*$/{{star-divider}}/gm; # * * * $wiki =~ s/\n?<a [^>]*?\/>/ /gi; # remove lone anchors $wiki =~ s/<a .*?>(.*?)<\/a>/$1/gi; # links $wiki =~ s/<img .*?>/\n[image not included]\n/gi; # images $wiki =~ s/\_/''/g; # convert _ into italics #$wiki =~ s/^ +//gm if (!$poem); #leading spaces $wiki =~ s/\n\s*\n\s*\n/\n\n/g; # Take out excess whitespace $wiki = "<poem>\n$wiki</poem>\n[[Category:Poems]]\n" if $poem; # $wiki .= "\n==Footnotes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>" if $wiki =~ m/<ref>/; $wiki .= "\n==Footnotes==\n<references />" if $wiki =~ m/<ref>/; my $header = "{{header2\n|title=[[$title]]\n|author=$author\n"; $header .= "|translator=$translator\n" if $translator; my $prev = ""; if ($start > 0) { $prev = $pagenames[$start-1] ; $prev =~ s/\: .*$//; } my $next = $pagenames[$start+1]; $next =~ s/\: .*$//; # if a chapter starts with #, it's its own page, not a subpage. if ($prev) { if ($prev =~ m/^\#+(.*)$/) { $prev = "[[$1]]"; } else { $prev = "[[$title/$prev|$prev]]"; } } if ($next) { if ($next =~ m/^\#+(.*)$/) { $next = "[[$1]]"; } else { $next = "[[$title/$next|$next]]"; } } $pagename =~ m/\#*(.*)/; $header .= "|section=$1\n"; $header .= "|previous=$prev\n|next=$next\n|notes=\n}}\n"; $wiki = $header . $wiki; #$wiki = "$header<poem>\n$wiki</poem>"; # THIS TIME ONLY if ($test) { open(outfile, ">test.txt"); print outfile $wiki; die "done"; } else { my $writeto = $pagename; $writeto =~ s/^(.*)\: .*$/$1/; if ($writeto =~ s/^\#//) { #nothing } else { $writeto = "$title/$writeto"; } print "Writing to [[$writeto]]\n"; $pw->edit("$writeto", $wiki, "Importing from Project Gutenberg using an automated script"); #sleep 5; } } print "done\n";
[edit] Bartleby texts
#This script is for importing public domain works at www.bartleby.com
# I create a text file containing the names of chapters, and URLs for their sources.
use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;
my $title="[[The American Language]]"; # or whatever
my $author="H. L. Mencken"; # or whoever
my $before_first = "";
my $chapterbegin = '<\!\-\- BEGIN CHAPTER \-\->';
my $chapterend = '<\!\-\- BOTTOM CHAPTER\/SECTION NAV CODE \-\->';
my $poem = 0;
my $test = 0;
# ------------------------------------------------------------------------------------------------------------------------
#login
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Firefox/2.0.0.6');
$pw->set_wiki('en.wikisource.org', 'w');
print "Logging in as Polbot\n";
my $login_status=$pw->login('Polbot','[Password]');
die "I can't log in." unless ($login_status == 0);
open(infile, "<chapters.txt");
my $previous_line;
my $current_line = $before_first;
my $next_line = <infile>;
while ($next_line) {
#cycle
$previous_line = $current_line;
$current_line = $next_line;
$next_line = <infile>;
chomp $next_line;
print "$current_line\n";
my $footnoted = 0;
next if $current_line =~ /^\#/;
#see how to show them
$previous_line =~ m/^(\[\[.*\]\])/;
my $previous_show = $1;
$next_line =~ m/^(\[\[.*\]\])/;
my $next_show = $1;
if (!$next_line) { $next_show = "" };
$current_line =~ m/^\[\[(?:[^\|]*\|)?(.*)\]\](.*)$/;
my $current_show = $1;
my $url = $2;
$current_line =~ m/^\[\[([^\|]*)(\|.*)?\]\]/;
my $article_name = $1;
print "Article=[[$article_name]]. Show=$current_show. Prev=$previous_show. Next=$next_show. url=$url\n\n";
# write header.
my $wiki_out .= "{{header2\n| title=$title\n|author=$author\n";
$wiki_out .= "|section=$current_show\n|previous=$previous_show\n|next=$next_show\n";
$wiki_out .= "|notes=\n}}\n";
$wiki_out .= "<poem>\n" if $poem;
# get chapter.
print "Reading chapter. . .\n";
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/6.0");
my $res = $ua->get($url);
die "Could not connect\n" unless ($res->is_success);
my $work = $res->content;
# Excerpt just the chapters
$work =~ s/^.*?($chapterbegin.*?)$chapterend.*$/$1/si or die "chapter not excerpted.";
$work =~ s/&\#160;| / /gi; # take out fixed spaces
$work =~ s/<\!--.*?-->//gi; # remove html comments
$work =~ s/<TD(\s+VALIGN\s*=\s*"?TOP"?\s*|\s+ALIGN\s*=\s*"?RIGHT"?)+>.*?<\/TD>//ig; # Remove page numbers
# Fix footnotes
# <A HREF="/185/44.html#note95">95</A>
# <A NAME="note95"><B>Note 95.</B></A> <I>Cf.</I> Vogue Affixes in Present-Day Word-Coinage [<A HREF="/185/44.html#txt95">back</A>]</TD></TR></TABLE>
while ($work =~ m/<A HREF=".*?#(note(\d|\.)+)">\d+<\/A>/gi) {
my $footnote_name = $1;
$footnoted = 1;
$work =~ s/^(.*)<A HREF=".*?\#$footnote_name">\d+<\/A>(.*)<A NAME="$footnote_name"><B>Note \d+\.\s*<\/B><\/A>\s*(.*?) \[<A .*?>back<\/A>\](.*)$/$1<ref>$3<\/ref>$2$4/si;
}
$work =~ s/<font size="?\+1"?><B>(.*?)<\/B><\/font>/==$1==/gi; # wikiformat headers
$work =~ s/\n?<CENTER><B><A.*?>(.*?)<\/A><\/B><\/CENTER>\n?/\n==$1==\n/gi; # more headers
$work =~ s/<i>(.*?)<\/i>/''$1''/gi; # wikiformat italics
$work =~ s/<b>(.*?)<\/b>/'''$1'''/gi; # wikiformat bold
# Wikiformat interior tables
while ($work =~ m/<TABLE WIDTH=\"45\%\" CELLPADDING=3 CELLSPACING=1 BORDER=\"0\">\n?(.*?)\n?<\/TABLE>/is) {
my $intable = $1;
$intable =~ s/\n?<tr([^>]*)>\n?/\n|- $1\n/gi;
$intable =~ s/\n?<td([^>]*)>\n?/\n| $1|/gi;
$work =~ s/\n?<TABLE WIDTH=\"45\%\" CELLPADDING=3 CELLSPACING=1 BORDER=\"0\">\n?(.*?)\n?<\/TABLE>\n?/\n{|\n$intable\n|}\n/is;
}
$work =~ s/<\/?T[DR].*?>//gi; # remove table tags
$work =~ s/<\/?TABLE.*?>//gi; # ditto
$work =~ s/<\/?FONT.*?>//gi; # remove font tags
$work =~ s/<a .*?>(.*?)<\/a>/$1/gi; # remove links
$work =~ s/<[hb]r>//gi; # remove lines and hard returns
$work =~ s/<\/?dl>//gi; # remove other stuff
$work =~ s/<img .*?>/\n[image not included]\n/gi; # remove images
$work =~ s/<\!--.*?-->//gi; # remove html comments
$work =~ s/<\/?FORM>//gi; # remove forms
$work =~ s/<center><center>/<center>/gi; #fix bug
$work =~ s/^\s+/\n/gm if !$poem; # remove initial spaces for prose
$work =~ s/^:\s*$//gm; # remove lone colons
$work =~ s/<PAGE NUM=".*?">//gi; # remove page numbers
$work =~ s/\s*\n\s*\n\s+/\n\n/g; # remove excess whitespace
$wiki_out .= "$work\n";
$wiki_out .= "</poem>\n" if $poem;
if ($footnoted) {
$wiki_out .= "\n==Notes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>\n";
}
if ($test) {
open(outfile, ">test.txt");
print outfile $wiki_out;
die "done";
}
#write to Wikipedia.
print "Writing [[$article_name]].\n";
$pw->edit($article_name, $wiki_out, "Importing a work from a website, using a perl script.");
sleep 1;
}
close(infile);
[edit] CCEL texts
# This script is for works at the Christian Classics Etherial Library at www.ccel.org
# Again, it must be modified and tested for each work.
use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;
my $file = shift;
my $start = shift;
my $pw;
my $title = "";
my $author = "";
my $usechaptersfor = "Book [IXVLC]+|On the Making of Man";
my $dontcount = "Prologue|Introduction";
my $Whatwecallsections = "Chapter";
my @articlenames = ();
my @articletitles = ();
my @ranks = ();
my @wikis = ();
open(infile, "<$file") or die "File not found\n";
# Get header (for title and author)
print "Reading header. . .\n";
while (my $line = <infile>) {
chop $line;
if ($line =~ m/<DC\.Title>[A-Z0-9]+\.\s+(.*)<\/DC\.Title>/) {
$title = $1;
}
if ($line =~ m/<DC\.Creator sub=\"Author\" scheme=\"short\-form\">(.*)<\/DC\.Creator>/) {
$author = $1;
}
last if $line =~ m/<ThML\.body>/;
}
$title = "Nicene and Post-Nicene Fathers: Series II/Volume V";
$author = "[[Author:Philip Schaff|Philip Schaff]] et al.";
my $line;
my $wiki;
my $articletitle = "";
my $rank = "";
until ($articletitle) {
$line = <infile>;
if ($line =~ m/<div(\d)(?: type=\"[^\"]*\")?(?: n=\"[^\"]*\")? title=\"(.*?)\"/) {
$rank = $1;
$articletitle = $2;
$articletitle =~ s/^(.*?)\&\#8212;.*$/$1/;
$articletitle =~ s/\&\#230;/ae/g;
$articletitle =~ s/\&\#198;/AE/g;
$articletitle =~ s/\.$//;
}
}
while ($line = <infile>) {
chop $line;
# <div3 type="Book" n="I" title="Book I."
if ($line =~ m/<div(\d)(?: type=\"[^\"]*\")?(?: n=\"[^\"]*\")? title=\"(.*?)\"/) {
my $temprank = "$1";
my $temptitle = "$2";
$temptitle =~ s/^(.*?)\&\#8212;.*$/$1/;
$temptitle =~ s/\&\#230;/ae/g;
$temptitle =~ s/\&\#198;/AE/g;
$temptitle =~ s/\&\#339;/oe/g;
$temptitle =~ s/\&\#8217;/'/g;
$temptitle =~ s/\&\#235;/e/g;
$temptitle =~ s/\.$//;
push @articletitles, $articletitle;
push @articlenames, "";
push @ranks, $rank;
push @wikis, $wiki;
$articletitle = $temptitle;
$rank = $temprank;
$wiki = "";
} else {
$wiki .= "$line\n";
}
}
close(infile);
push @articletitles, $articletitle;
push @articlenames, "";
push @ranks, $rank;
push @wikis, $wiki;
open(outfile, ">test.txt");
print outfile "Contents:\n\n";
print scalar(@articletitles) . " pages.\n";
my $div1 = "";
my $div2 = "";
my $div3 = "";
my $div4 = "";
my $container1 = 0;
my $container2 = 0;
my $container3 = 0;
my $container4 = 0;
my $chaptercount2 = 0;
my $chaptercount3 = 0;
my $chaptercount4 = 0;
my $chaptercount5 = 0;
for(my $i=0;$i<=scalar(@articletitles)-1;$i++) {
my $articletitle = $articletitles[$i];
my $articlename = "";
if ($ranks[$i] eq "1") {
$articlename = "$articletitle";
print outfile "* [[$title/$articlename|$articletitle]]\n";
$div1 = $articletitle;
$container1 = $i;
if ($articletitle =~ /^($usechaptersfor)$/) {
$chaptercount2 = 1;
} else {
$chaptercount2 = 0;
}
} elsif ($ranks[$i] eq "2") {
if ($chaptercount2 and ($articletitle !~ /^($dontcount)$/i)) {
$articletitle = "$Whatwecallsections $chaptercount2";
$articletitles[$i] = "$Whatwecallsections $chaptercount2";
$chaptercount2++;
}
$articlename = "$div1/$articletitle";
print outfile "** [[$title/$articlename|$articletitle]]\n";
$div2 = $articletitle;
$container2 = $i;
if ($wikis[$container1] !~ m/== Contents ==/) {
$wikis[$container1] .= "\n== Contents ==\n";
}
$wikis[$container1] .= "* [[$title/$articlename|$articletitle]]\n";
if ($articletitle =~ /^($usechaptersfor)$/) {
$chaptercount3 = 1;
} else {
$chaptercount3 = 0;
}
} elsif ($ranks[$i] eq "3") {
if ($chaptercount3 and ($articletitle !~ /^($dontcount)$/i)) {
$articletitle = "$Whatwecallsections $chaptercount3";
$articletitles[$i] = "$Whatwecallsections $chaptercount3";
$chaptercount3++;
}
if ($articletitle =~ /^($usechaptersfor)$/) {
$chaptercount4 = 1;
} else {
$chaptercount4 = 0;
}
$articlename = "$div1/$div2/$articletitle";
print outfile "*** [[$title/$articlename|$articletitle]]\n" if ($start =~ /test/);
$div3 = $articletitle;
$container3 = $i;
if ($wikis[$container2] !~ m/== Contents ==/) {
$wikis[$container2] .= "\n== Contents ==\n";
}
$wikis[$container2] .= "* [[$title/$articlename|$articletitle]]\n";
} elsif ($ranks[$i] eq "4") {
if ($chaptercount4 and ($articletitle !~ /^($dontcount)$/i)) {
$articletitle = "$Whatwecallsections $chaptercount4";
$articletitles[$i] = "$Whatwecallsections $chaptercount4";
$chaptercount4++;
}
if ($articletitle =~ /^($usechaptersfor)$/) {
$chaptercount5 = 1;
} else {
$chaptercount5 = 0;
}
$articlename = "$div1/$div2/$div3/$articletitle";
print outfile "**** [[$title/$articlename|$articletitle]]\n" if ($start =~ /test/);
$div4 = $articletitle;
$container4 = $i;
if ($wikis[$container3] !~ m/== Contents ==/) {
$wikis[$container3] .= "\n== Contents ==\n";
}
$wikis[$container3] .= "* [[$title/$articlename|$articletitle]]\n";
} else { # 5
if ($chaptercount5 and ($articletitle !~ /^($dontcount)$/i)) {
$articletitle = "$Whatwecallsections $chaptercount5";
$articletitles[$i] = "$Whatwecallsections $chaptercount5";
$chaptercount5++;
}
$articlename = "$div1/$div2/$div3/$div4/$articletitle";
print outfile "***** [[$title/$articlename|$articletitle]]\n" if ($start =~ /test/);
if ($wikis[$container4] !~ m/== Contents ==/) {
$wikis[$container4] .= "\n== Contents ==\n";
}
$wikis[$container4] .= "* [[$title/$articlename|$articletitle]]\n";
}
$articlenames[$i] = $articlename;
}
close (outfile);
die "Wrote TOC" if ($start =~ /test/);
print "Wrote TOC\n\n";
sleep 5;
# login
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Firefox/2.0.0.6');
$pw->set_wiki('en.wikisource.org', 'w');
print "Logging in as Polbot\n";
my $login_status=$pw->login('Polbot','[password]');
die "I can't log in." unless ($login_status == 0);
for(my $i=$start;$i<=scalar(@articletitles)-1;$i++) {
my $articletitle = @articletitles[$i];
my $articlename = @articlenames[$i];
my $wiki = @wikis[$i];
print "$i: $articletitle\n";
#change <h2>, <i>, bbsc, and refs
#$wiki =~ s/<h\d[^>]*>(.*?)<\/h\d>/'''{{bbsc|$1}}'''\n/gis; # Make headers into smallcaps, just for this one
$wiki =~ s/<span class=\"c\d\d[^>]*>(.*?)<\/span>/{{small-caps|$1}}/gs; #make c24 span into smallcaps, just for this one
$wiki =~ s/<h\d[^>]*>(.*?)<\/h\d>/'''$1'''\n/gis; # Make headers into bold
$wiki =~ s/<\/?(i|em)>/\'\'/gi; # change italics
$wiki =~ s/<span class=\"(?:upper|sc)\"[^>]*>(.*?)<\/span>/{{small-caps|$1}}/gs; # bbsc
$wiki =~ s/<note[^>]*>(.*?)<\/note>/<ref>$1<\/ref>/gs; # refs
$wiki =~ s/<\/l>/<br>/g; # poem lines
$wiki =~ s/<p( [^>]*)?>/\n/gi; # p to hard return
#take out p, index, </div>, hr, verse, l, a, etc.
#keep br, ref
$wiki =~ s/<\!--.*?-->//gi; # html comments
$wiki =~ s/<hr[^>]*>//gi; # hr
$wiki =~ s/<\/?p( [^>]*)?>//gi; # p
$wiki =~ s/<\/?pb( [^>]*)?>//gi; # pb
$wiki =~ s/<\/?index[^>]*>//gi; # index
$wiki =~ s/<\/?div[^>]*>//gi; # div
$wiki =~ s/<\/?scrip[^>]*>//gs; # scripRef
$wiki =~ s/<\/?span( [^>]*)?>//gi; # span
$wiki =~ s/<verse [^>]*>//g; # poem start
$wiki =~ s/<\/verse>//g; # poem end
$wiki =~ s/<l [^>]*>//gi; # l
$wiki =~ s/<\/?a[^>]*>//gs; # a
$wiki =~ s/<\/?name( [^>]*)?>//gi; # name
$wiki =~ s/^ +//gm; # Take out initial spaces
$wiki =~ s/\n\s*\n\s*\n/\n\n/g; # Take out excess whitespace
#$wiki .= "\n==Footnotes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>" if $wiki =~ m/<ref>/;
$wiki .= "\n==Footnotes==\n<references /></div>" if $wiki =~ m/<ref>/;
my $showtitle = linkslashes("$title/$articlename");
$showtitle =~ m/(.*)\/(.*)/;
my $showsection = $2;
$showtitle=$1;
my $header = "{{header2\n|title=$showtitle\n|author=$author\n";
$header .= "|section=$showsection\n";
if ($i > 0) {
$header .= "|previous=[[$title/" . $articlenames[$i-1] . "|" . $articletitles[$i-1] . "]]\n";
} else {
$header .= "|previous=\n";
}
if ($articlenames[$i+1]) {
$header .= "|next=[[$title/" . $articlenames[$i+1] . "|" . $articletitles[$i+1] . "]]\n|notes=\n}}\n";
} else {
$header .= "|next=\n|notes=\n}}\n";
}
$wiki = $header . $wiki;
print "Writing to [[$title/$articlename]]\n";
$pw->edit("$title/$articlename", $wiki, "Importing from Christian Classics Etherial Library, using an automated script");
#sleep 5;
}
sub linkslashes {
my $line = shift;
my $analyzeline = $line;
while ($analyzeline =~ s/^([^\[\]\|]*)\/([^\[\]\|]*)\/(.*)$/$1\/\[\[$1\/$2\|$2\]\]\/$3/) {};
$analyzeline =~ s/^([^\/]*)\//[[$1]]\//;
return $analyzeline;
}