User:Polbot/code/Function 2

From Wikisource
Jump to navigation Jump to search

For importing works and Wikifying them, the code has to be modified for each work. For each section below, the code given is a rather generic example, but you'll have to modify the code through trial and error to produce code that works for your source.

Gutenberg texts[edit]

# Note: This script is for works from Project Gutenberg, www.gutenberg.org, although it works pretty well for other texts.
#  I download the html or txt file, and place {{curly braces}} around chapter names to tell the script where to divide pages.
#  I also start each text file with a line containing the name of the work, and a line containing the name of the author.

use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;

my $file = shift;
my $test = shift;
my $pw;

print "Reading file. . .\n";
open(infile, "<$file") or die "File not found\n";

my $line = <infile>;
chop $line;
my $title = $line;

$line = <infile>;
chop $line;
my $author = $line;
my $translator = "";

if ($author =~ /^(.*)\|(.*)$/) {
	$author = $1;
	$translator = $2;
}

my @pagenames = ();
my @wikis = ();

$line = <infile>;
while ($line =~ /^\s*$/) {
	$line = <infile>;
}
chop $line;
	
$line =~ /^\{\{(.*)\}\}$/ or die "Improper format\n";

my $pagename = $1;
my $wiki = "";

while ($line = <infile>) {
	chop $line;
	
	if ($line =~ /^\{\{(.*)\}\}/) {
		my $temp = $1;
		
		push @pagenames, $pagename;
		push @wikis, $wiki;
		$pagename = $temp;
		$wiki = "";
	} else {
		$wiki .= "$line\n";		
	}
}

push @pagenames, $pagename;
push @wikis, $wiki;

close(infile);

print scalar(@pagenames) . " pages.\n";

if (!$test) {
	# login
	$pw=Perlwikipedia->new();
	#$pw->{debug} = 1;
	$pw->{mech}->agent('Firefox/2.0.0.6');
	$pw->set_wiki('en.wikisource.org', 'w');
	print "Logging in as Polbot\n";
	my $login_status=$pw->login('Polbot','[password]');
	die "I can't log in." unless $login_status == 0;
	
	# Output automatic TOC
	open(outfile, ">test.txt");
	print outfile "Contents:\n\n";
	foreach my $pagename (@pagenames) {
		if ($pagename =~ /^\#+/) {
			# if a chapter starts with #, it's its own page, not a subpage.
			# if it starts with ##, it's its own page, and it gets <poem> tags.
			if ($pagename =~ /^\#+(.*)\: (.*)$/) {
				print outfile "* [[$1]]: $2\n";
			} else {
				$pagename =~ /^\#+(.*)$/;
				print outfile "* [[$1]]\n";
			}
		} else {
			if ($pagename =~ /^(.*)\/(.*)$/) {
				my $book = $1;
				my $chapter = $2;

				if ($chapter =~ /^(.*)\: (.*)$/) {
					print outfile "* [[$title/$book/$1|$1]]: $2\n";
				} else {
					print outfile "* [[$title/$book/$chapter|$chapter]]\n";
				}
			} else {
				if ($pagename =~ /^(.*)\: (.*)$/) {
					print outfile "* [[$title/$1|$1]]: $2\n";
				} else {
					print outfile "* [[$title/$pagename|$pagename]]\n";
				}
			}
		}
	}
	close (outfile);		
	print "Wrote TOC\n";
	sleep 5;
}

foreach my $start (0 .. $#pagenames) {
	my $pagename = $pagenames[$start];
	my $wiki = $wikis[$start];
	my $poem = 0;
	$poem = 1 if $pagename =~ s/^\#\#/\#/;
	print " $start. " . $pagenames[$start] . "\n";
	
	$wiki =~ s/<\!--.*?-->//gi; # remove html comments
	$wiki =~ s/<span class=[\"\']?pagenum[\"\']?>.*?<\/span>//gi; # remove page numbers
	$wiki =~ s/<span class=[\"\']?smallcaps[\"\']?>(.*?)<\/span>/{{small-caps|$1}}/gi; # format smallcaps
	$wiki =~ s/<p class=[\"\']?authorsc[\"\']?>(.*?)<\/p>/<div align=right>{{bbsc|$1}}<\/div>/gi; # author smallcaps
	#$wiki =~ s/\s*<b>(.*?)<\/b>\s*/\n==$1==\n/gis; # Make bolds into headers
	$wiki =~ s/\s*<h\d[^>]*>\s*([^\s].*?[^\s])\s*<\/h\d>\s*/\n'''$1'''\n\n/gis; # Make headers into bold
	$wiki =~ s/<\/?u>//gi; # remove underlines
	$wiki =~ s/<ins.*?>(.*?)<\/ins>/$1/gis; #remove ins
	$wiki =~ s/<br( \/)?>//gi; # take out BR
	$wiki =~ s/<\/?(i|em)>/\'\'/gi; # italics
	$wiki =~ s/<hr[^>]*>//gi; # remove HRs. 
	$wiki =~ s/\&nbsp\;/ /g;
	$wiki =~ s/<div class=[\"\']?blockquot[\"\']?>(.*?)<\/div>/<div style=\"margin-left: 5%; margin-right: 10%;\">\n$1\n<\/div>/gis; # blockquotes
  #$wiki =~ s/<(\/?)pre>/<$1poem>/gi; # pre to poem
  
	# footnotes:
#	while ($wiki =~ /<a href=\"\#(.*?)\".*?<\/a>/gi) {
#		my $footnote_name = $1;
#
#		$wiki =~ s/<a href=\"\#$footnote_name\".*?<\/a>(.*)<div class=\"footnote\"><a name=\"$footnote_name\".*?<\/a>.*?<\/a>\s*(.*?)<\/div>/<ref>$2<\/ref>$1/si;
#	}
#	$wiki =~ s/<hr( style=\"[^\"]*\" )?\/>\n\'\'\'FOOTNOTES:\'\'\'//gis;

	# footnotes: 
	#    <a name="Ibr" id="Ibr"></a><a href="#Ib"><sup>b</sup></a>
	#    <p class="footnote"><a name="Ia" id="Ia"></a>\n<a href="#Iar">a</a>&nbsp;&nbsp;&nbsp;See <a href="#pagex">page x</a>.</p>
	while ($wiki =~ /<a href=\"\#(.*?)\".*?<\/a>/gi) {
		my $footnote_name = $1;

		$wiki =~ s/<a href=\"\#$footnote_name\".*?<\/a>(.*)<p class=\"footnote\"><a name=\"$footnote_name\".*?<\/a>.*?<\/a>\s*(.*?)<\/p>/<ref>$2<\/ref>$1/si;
	}
		
	# <span class="i0">And<a name="FNanchor_46:2_77" id="FNanchor_46:2_77"></a><a href="#Footnote_46:2_77" class="fnanchor">[46:2]</a></span><br />

	# poems:
	#$wiki =~ s/<p class=\"i0">(.*?)<\/p>/    $1/gis;
	$wiki =~ s/^<span class="i1">(.*)<\/span>/   $1/gim;
	#$wiki =~ s/<p class=\"i8">(.*?)<\/p>/            $1/gis;
	$wiki =~ s/<div class=\"stanza\">(.*?)<\/div>/$1/gis;
	$wiki =~ s/<div class=\"poem\">(.*?)<\/div>/<poem>$1<\/poem>/gis;
	
	# other divs and spans
	$wiki =~ s/<\/?p*?>/\n/gi; # remove paragraphs
	$wiki =~ s/^(\s*\*)+\s*$/{{star-divider}}/gm; #  * * *
	$wiki =~ s/\n?<a [^>]*?\/>/ /gi; # remove lone anchors
	$wiki =~ s/<a .*?>(.*?)<\/a>/$1/gi; # links
	$wiki =~ s/<img .*?>/\n[image not included]\n/gi; # images
	$wiki =~ s/\_/''/g; # convert _ into italics
	#$wiki =~ s/^ +//gm if !$poem; #leading spaces 
	
	$wiki =~ s/\n\s*\n\s*\n/\n\n/g; # Take out excess whitespace	
	$wiki = "<poem>\n$wiki</poem>\n[[Category:Poems]]\n" if $poem;
	
	#	$wiki .= "\n==Footnotes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>" if $wiki =~ /<ref>/;
	$wiki .= "\n==Footnotes==\n<references />" if $wiki =~ /<ref>/;
	
	my $header = "{{header2\n|title=[[$title]]\n|author=$author\n";
	$header .= "|translator=$translator\n" if $translator;
	
	my $prev = "";
	if ($start > 0) {
		$prev = $pagenames[$start-1] ;
		$prev =~ s/\: .*$//;
	}
	my $next = $pagenames[$start+1];
	$next  =~ s/\: .*$//;
	
	# if a chapter starts with #, it's its own page, not a subpage.
	if ($prev) {
		if ($prev =~ /^\#+(.*)$/) {
			$prev = "[[$1]]";
		} else {
			$prev = "[[$title/$prev|$prev]]";
		}
	}

	if ($next) {
		if ($next =~ /^\#+(.*)$/) {
			$next = "[[$1]]";
		} else {
			$next = "[[$title/$next|$next]]";
		} 
	}
	$pagename =~ /\#*(.*)/;
	$header .= "|section=$1\n";
	$header .= "|previous=$prev\n|next=$next\n|notes=\n}}\n";

	$wiki = $header . $wiki;
	#$wiki = "$header<poem>\n$wiki</poem>";  # THIS TIME ONLY
	
	if ($test) {
		open(outfile, ">test.txt");
		print outfile $wiki;
		die "done";
	} else {
		my $writeto = $pagename;
		$writeto =~ s/^(.*)\: .*$/$1/;
		if ($writeto =~ s/^\#//) {
			#nothing
		} else {
			$writeto = "$title/$writeto";
		}
		print "Writing to [[$writeto]]\n";
		$pw->edit("$writeto", $wiki, "Importing from Project Gutenberg using an automated script");
		#sleep 5;
	}

}
	
print "done\n";

Bartleby texts[edit]

#This script is for importing public domain works at www.bartleby.com
# I create a text file containing the names of chapters, and URLs for their sources.

use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;

my $title="[[The American Language]]"; # or whatever
my $author="H. L. Mencken"; # or whoever
my $before_first = "";
my $chapterbegin = '<\!\-\- BEGIN CHAPTER \-\->';
my $chapterend = '<\!\-\- BOTTOM CHAPTER\/SECTION NAV CODE \-\->';
my $poem = 0; 
my $test = 0;

# ------------------------------------------------------------------------------------------------------------------------

#login
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Firefox/2.0.0.6');
$pw->set_wiki('en.wikisource.org', 'w');
print "Logging in as Polbot\n";
my $login_status=$pw->login('Polbot','[Password]');
die "I can't log in." unless $login_status == 0;

open(infile, "<chapters.txt");

my $previous_line;
my $current_line = $before_first;
my $next_line = <infile>;

while ($next_line) {	
	#cycle
	$previous_line = $current_line;
	$current_line = $next_line;
	$next_line = <infile>;
	chomp $next_line;
	print "$current_line\n";
	my $footnoted = 0;
	
	next if $current_line =~ /^\#/;
	
	#see how to show them
	$previous_line =~ /^(\[\[.*\]\])/;
	my $previous_show = $1;
	$next_line =~ /^(\[\[.*\]\])/;
	my $next_show = $1;
	if (!$next_line) { $next_show = "" };
	$current_line =~ /^\[\[(?:[^\|]*\|)?(.*)\]\](.*)$/;
	my $current_show = $1;
	my $url = $2;
	$current_line =~ /^\[\[([^\|]*)(\|.*)?\]\]/;
	my $article_name = $1;
	
	print "Article=[[$article_name]]. Show=$current_show. Prev=$previous_show. Next=$next_show. url=$url\n\n";
	
	# write header.
	my $wiki_out .= "{{header2\n| title=$title\n|author=$author\n";
	$wiki_out .= "|section=$current_show\n|previous=$previous_show\n|next=$next_show\n";
	$wiki_out .= "|notes=\n}}\n";
	$wiki_out .= "<poem>\n" if $poem;
	
	# get chapter.
	print "Reading chapter. . .\n";
	my $ua = LWP::UserAgent->new;
	$ua->agent("Mozilla/6.0");
	my $res = $ua->get($url);
	die "Could not connect\n" unless $res->is_success;
	my $work = $res->content;
	
	# Excerpt just the chapters
	$work =~ s/^.*?($chapterbegin.*?)$chapterend.*$/$1/si or die "chapter not excerpted.";
	$work =~ s/&\#160;|&nbsp;/ /gi;	# take out fixed spaces
	$work =~ s/<\!--.*?-->//gi; # remove html comments
	$work =~ s/<TD(\s+VALIGN\s*=\s*"?TOP"?\s*|\s+ALIGN\s*=\s*"?RIGHT"?)+>.*?<\/TD>//ig; # Remove page numbers

	# Fix	footnotes   
	# <A HREF="/185/44.html#note95">95</A>
	# <A NAME="note95"><B>Note 95.</B></A>  <I>Cf.</I> Vogue Affixes in Present-Day Word-Coinage [<A HREF="/185/44.html#txt95">back</A>]</TD></TR></TABLE>

	while ($work =~ /<A HREF=".*?#(note(\d|\.)+)">\d+<\/A>/gi) {
		my $footnote_name = $1;
		$footnoted = 1;

		$work =~ s/^(.*)<A HREF=".*?\#$footnote_name">\d+<\/A>(.*)<A NAME="$footnote_name"><B>Note \d+\.\s*<\/B><\/A>\s*(.*?) \[<A .*?>back<\/A>\](.*)$/$1<ref>$3<\/ref>$2$4/si;
	}
	
	$work =~ s/<font size="?\+1"?><B>(.*?)<\/B><\/font>/==$1==/gi; # wikiformat headers
	$work =~ s/\n?<CENTER><B><A.*?>(.*?)<\/A><\/B><\/CENTER>\n?/\n==$1==\n/gi; # more headers
	$work =~ s/<i>(.*?)<\/i>/''$1''/gi; # wikiformat italics
	$work =~ s/<b>(.*?)<\/b>/'''$1'''/gi; # wikiformat bold
	
	# Wikiformat interior tables
	while ($work =~ /<TABLE WIDTH=\"45\%\" CELLPADDING=3 CELLSPACING=1 BORDER=\"0\">\n?(.*?)\n?<\/TABLE>/is) {
		my $intable = $1;
		$intable =~ s/\n?<tr([^>]*)>\n?/\n|- $1\n/gi;
		$intable =~ s/\n?<td([^>]*)>\n?/\n| $1|/gi;

		$work =~ s/\n?<TABLE WIDTH=\"45\%\" CELLPADDING=3 CELLSPACING=1 BORDER=\"0\">\n?(.*?)\n?<\/TABLE>\n?/\n{|\n$intable\n|}\n/is;
	}
	
	$work =~ s/<\/?T[DR].*?>//gi; # remove table tags
	$work =~ s/<\/?TABLE.*?>//gi; # ditto
	$work =~ s/<\/?FONT.*?>//gi; # remove font tags
	$work =~ s/<a .*?>(.*?)<\/a>/$1/gi; # remove links
	$work =~ s/<[hb]r>//gi; # remove lines and hard returns
	$work =~ s/<\/?dl>//gi; # remove other stuff
	$work =~ s/<img .*?>/\n[image not included]\n/gi; # remove images
	$work =~ s/<\!--.*?-->//gi; # remove html comments
	$work =~ s/<\/?FORM>//gi; # remove forms
	$work =~ s/<center><center>/<center>/gi; #fix bug
	$work =~ s/^\s+/\n/gm if !$poem; # remove initial spaces for prose
	$work =~ s/^:\s*$//gm; # remove lone colons
	$work =~ s/<PAGE NUM=".*?">//gi; # remove page numbers
	$work =~ s/\s*\n\s*\n\s+/\n\n/g; # remove excess whitespace

	$wiki_out .= "$work\n";
	$wiki_out .= "</poem>\n" if $poem;
	
	if ($footnoted) {
		$wiki_out .= "\n==Notes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>\n";
	}
	
	if ($test) {
		open(outfile, ">test.txt");
		print outfile $wiki_out;
		die "done";
	}
	
	#write to Wikipedia.
	print "Writing [[$article_name]].\n";
	$pw->edit($article_name, $wiki_out, "Importing a work from a website, using a perl script.");
	sleep 1;
}

close(infile);

CCEL texts[edit]

# This script is for works at the Christian Classics Ethereal Library at www.ccel.org
# Again, it must be modified and tested for each work.

use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;

my $file = shift;
my $start = shift;
my $pw;
my $title = "";
my $author = "";

my $usechaptersfor = "Book [IXVLC]+|On the Making of Man";
my $dontcount = "Prologue|Introduction";
my $Whatwecallsections = "Chapter";

my @articlenames = ();
my @articletitles = ();
my @ranks = ();
my @wikis = ();

open(infile, "<$file") or die "File not found\n";

# Get header (for title and author)
print "Reading header. . .\n";
while (my $line = <infile>) {
	chop $line;
	
	if ($line =~ /<DC\.Title>[A-Z0-9]+\.\s+(.*)<\/DC\.Title>/) {
		$title = $1;
	}
	if ($line =~ /<DC\.Creator sub=\"Author\" scheme=\"short\-form\">(.*)<\/DC\.Creator>/) {
		$author = $1;
	}
	last if $line =~ /<ThML\.body>/;
}
$title = "Nicene and Post-Nicene Fathers: Series II/Volume V";
$author = "[[Author:Philip Schaff|Philip Schaff]] et al.";

my $line;
my $wiki;
my $articletitle = "";
my $rank = "";

until ($articletitle) {
	$line = <infile>;
	if ($line =~ /<div(\d)(?: type=\"[^\"]*\")?(?: n=\"[^\"]*\")? title=\"(.*?)\"/) {
		$rank = $1;
		$articletitle = $2;
		$articletitle =~ s/^(.*?)\&\#8212;.*$/$1/;
		$articletitle =~ s/\&\#230;/ae/g;
		$articletitle =~ s/\&\#198;/AE/g;
		$articletitle =~ s/\.$//;
	}
}

while ($line = <infile>) {
	chop $line;
	# <div3 type="Book" n="I" title="Book I." 
	if ($line =~ /<div(\d)(?: type=\"[^\"]*\")?(?: n=\"[^\"]*\")? title=\"(.*?)\"/) {
		my $temprank = "$1";
		my $temptitle = "$2";
		$temptitle =~ s/^(.*?)\&\#8212;.*$/$1/;
		$temptitle =~ s/\&\#230;/ae/g;
		$temptitle =~ s/\&\#198;/AE/g;
		$temptitle =~ s/\&\#339;/oe/g;
		$temptitle =~ s/\&\#8217;/'/g;
		$temptitle =~ s/\&\#235;/e/g;
		$temptitle =~ s/\.$//;
		
		push @articletitles, $articletitle;
		push @articlenames, "";
		push @ranks, $rank;
		push @wikis, $wiki;
		
		$articletitle = $temptitle;
		$rank = $temprank;
		$wiki = "";
	} else {
		$wiki .= "$line\n";		
	}
}
close(infile);

push @articletitles, $articletitle;
push @articlenames, "";
push @ranks, $rank;
push @wikis, $wiki;

open(outfile, ">test.txt");
print outfile "Contents:\n\n";

print scalar(@articletitles) . " pages.\n";

my $div1 = "";
my $div2 = "";
my $div3 = "";
my $div4 = "";
my $container1 = 0;
my $container2 = 0;
my $container3 = 0;
my $container4 = 0;
my $chaptercount2 = 0;
my $chaptercount3 = 0;
my $chaptercount4 = 0;
my $chaptercount5 = 0;

for my $i(0..$#articletitles) {
	my $articletitle = $articletitles[$i];
	my $articlename = "";
	
	if ($ranks[$i] eq "1") {
		$articlename = "$articletitle";
		print outfile "* [[$title/$articlename|$articletitle]]\n";
		$div1 = $articletitle;
		$container1 = $i;
		
		$chaptercount2 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;
	} elsif ($ranks[$i] eq "2") {
		if ($chaptercount2 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount2";
			$articletitles[$i] = "$Whatwecallsections $chaptercount2";
			$chaptercount2++;
		} 
		
		$articlename = "$div1/$articletitle";
		print outfile "** [[$title/$articlename|$articletitle]]\n";
		$div2 = $articletitle;
		$container2 = $i;
		
		if ($wikis[$container1] !~ /== Contents ==/) {
			$wikis[$container1] .= "\n== Contents ==\n";
		}
		$wikis[$container1] .= "* [[$title/$articlename|$articletitle]]\n";
		
		$chaptercount3 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;
	} elsif ($ranks[$i] eq "3") {
		if ($chaptercount3 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount3";
			$articletitles[$i] = "$Whatwecallsections $chaptercount3";
			$chaptercount3++;
		} 

		$chaptercount4 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;
		
		$articlename = "$div1/$div2/$articletitle";
		print outfile "*** [[$title/$articlename|$articletitle]]\n" if $start =~ /test/;
		$div3 = $articletitle;
		$container3 = $i;
		
		if ($wikis[$container2] !~ /== Contents ==/) {
			$wikis[$container2] .= "\n== Contents ==\n";
		}
		$wikis[$container2] .= "* [[$title/$articlename|$articletitle]]\n";
		
	} elsif ($ranks[$i] eq "4") {
		if ($chaptercount4 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount4";
			$articletitles[$i] = "$Whatwecallsections $chaptercount4";
			$chaptercount4++;
		}

		$chaptercount5 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;

		$articlename = "$div1/$div2/$div3/$articletitle";
		print outfile "**** [[$title/$articlename|$articletitle]]\n" if $start =~ /test/;
		$div4 = $articletitle;
		$container4 = $i;
		
		if ($wikis[$container3] !~ /== Contents ==/) {
			$wikis[$container3] .= "\n== Contents ==\n";
		}
		$wikis[$container3] .= "* [[$title/$articlename|$articletitle]]\n";
		
	} else { # 5
		if ($chaptercount5 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount5";
			$articletitles[$i] = "$Whatwecallsections $chaptercount5";
			$chaptercount5++;
		}

		$articlename = "$div1/$div2/$div3/$div4/$articletitle";
		print outfile "***** [[$title/$articlename|$articletitle]]\n" if $start =~ /test/;
		if ($wikis[$container4] !~ /== Contents ==/) {
			$wikis[$container4] .= "\n== Contents ==\n";
		}
		$wikis[$container4] .= "* [[$title/$articlename|$articletitle]]\n";
	}	
	
	$articlenames[$i] = $articlename;
}

close (outfile);		
die "Wrote TOC" if $start =~ /test/;
print "Wrote TOC\n\n";
sleep 5;

# login
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Firefox/2.0.0.6');
$pw->set_wiki('en.wikisource.org', 'w');
print "Logging in as Polbot\n";
my $login_status=$pw->login('Polbot','[password]');
die "I can't log in." unless $login_status == 0;

for my $i($start..$#articletitles) {
	my $articletitle = $articletitles[$i];
	my $articlename = $articlenames[$i];
	my $wiki = $wikis[$i];
	print "$i: $articletitle\n";

	#change <h2>, <i>, bbsc, and refs
	#$wiki =~ s/<h\d[^>]*>(.*?)<\/h\d>/'''{{bbsc|$1}}'''\n/gis; # Make headers into smallcaps, just for this one
	$wiki =~ s/<span class=\"c\d\d[^>]*>(.*?)<\/span>/{{small-caps|$1}}/gs; #make c24 span into smallcaps, just for this one
	$wiki =~ s/<h\d[^>]*>(.*?)<\/h\d>/'''$1'''\n/gis; # Make headers into bold
	$wiki =~ s/<\/?(i|em)>/\'\'/gi; # change italics
	$wiki =~ s/<span class=\"(?:upper|sc)\"[^>]*>(.*?)<\/span>/{{small-caps|$1}}/gs; # bbsc
	$wiki =~ s/<note[^>]*>(.*?)<\/note>/<ref>$1<\/ref>/gs; # refs
	$wiki =~ s/<\/l>/<br>/g;               # poem lines
	$wiki =~ s/<p( [^>]*)?>/\n/gi;  # p to hard return
	
	#take out p, index, </div>, hr, verse, l, a, etc.
	#keep br, ref
	$wiki =~ s/<\!--.*?-->//gi;      # html comments
	$wiki =~ s/<hr[^>]*>//gi;        # hr 	
	$wiki =~ s/<\/?p( [^>]*)?>//gi;  # p
	$wiki =~ s/<\/?pb( [^>]*)?>//gi; # pb
	$wiki =~ s/<\/?index[^>]*>//gi;  # index
	$wiki =~ s/<\/?div[^>]*>//gi;    # div	
	$wiki =~ s/<\/?scrip[^>]*>//gs;  # scripRef
	$wiki =~ s/<\/?span( [^>]*)?>//gi;    # span
	$wiki =~ s/<verse [^>]*>//g;     # poem start
	$wiki =~ s/<\/verse>//g;         # poem end
	$wiki =~ s/<l [^>]*>//gi;        # l
	$wiki =~ s/<\/?a[^>]*>//gs;      # a
	$wiki =~ s/<\/?name( [^>]*)?>//gi;  # name

	$wiki =~ s/^ +//gm;              # Take out initial spaces
	$wiki =~ s/\n\s*\n\s*\n/\n\n/g;  # Take out excess whitespace	
	#$wiki .= "\n==Footnotes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>" if $wiki =~ /<ref>/;
	$wiki .= "\n==Footnotes==\n<references /></div>" if $wiki =~ /<ref>/;

	my $showtitle = linkslashes("$title/$articlename");
	$showtitle =~ /(.*)\/(.*)/;
	my $showsection = $2;
	$showtitle=$1;
	
	my $header = "{{header2\n|title=$showtitle\n|author=$author\n";
	$header .= "|section=$showsection\n";
	if ($i > 0) {
		$header .= "|previous=[[$title/" . $articlenames[$i-1] . "|" . $articletitles[$i-1] . "]]\n";
	} else {
		$header .= "|previous=\n";
	}
	if ($articlenames[$i+1]) {
		$header .= "|next=[[$title/" . $articlenames[$i+1] . "|" . $articletitles[$i+1] . "]]\n|notes=\n}}\n";
	} else {
		$header .= "|next=\n|notes=\n}}\n";
	}
	
	$wiki = $header . $wiki;
	
	print "Writing to [[$title/$articlename]]\n";

	$pw->edit("$title/$articlename", $wiki, "Importing from Christian Classics Etherial Library, using an automated script");
	#sleep 5;

}

sub linkslashes {
	my $line = shift;
	my $analyzeline = $line;
	
	while ($analyzeline =~ s/^([^\[\]\|]*)\/([^\[\]\|]*)\/(.*)$/$1\/\[\[$1\/$2\|$2\]\]\/$3/) {};
	$analyzeline =~ s/^([^\/]*)\//[[$1]]\//;
	
	return $analyzeline;
}