User:Polbot/code/Function 2

For importing works and Wikifying them, the code has to be modified for each work. For each section below, the code given is a rather generic example, but you'll have to modify the code through trial and error to produce code that works for your source.

Gutenberg texts[edit]

# Note: This script is for works from Project Gutenberg, www.gutenberg.org, although it works pretty well for other texts.
#  I download the html or txt file, and place {{curly braces}} around chapter names to tell the script where to divide pages.
#  I also start each text file with a line containing the name of the work, and a line containing the name of the author.

use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;

my $file = shift;
my $test = shift;
my $pw;

print "Reading file. . .\n";
open(infile, "<$file") or die "File not found\n";

my $line = <infile>;
chop $line;
my $title = $line;

$line = <infile>;
chop $line;
my $author = $line;
my $translator = "";

if ($author =~ /^(.*)\|(.*)$/) {
	$author = $1;
	$translator = $2;
}

my @pagenames = ();
my @wikis = ();

$line = <infile>;
while ($line =~ /^\s*$/) {
	$line = <infile>;
}
chop $line;
	
$line =~ /^\{\{(.*)\}\}$/ or die "Improper format\n";

my $pagename = $1;
my $wiki = "";

while ($line = <infile>) {
	chop $line;
	
	if ($line =~ /^\{\{(.*)\}\}/) {
		my $temp = $1;
		
		push @pagenames, $pagename;
		push @wikis, $wiki;
		$pagename = $temp;
		$wiki = "";
	} else {
		$wiki .= "$line\n";		
	}
}

push @pagenames, $pagename;
push @wikis, $wiki;

close(infile);

print scalar(@pagenames) . " pages.\n";

if (!$test) {
	# login
	$pw=Perlwikipedia->new();
	#$pw->{debug} = 1;
	$pw->{mech}->agent('Firefox/2.0.0.6');
	$pw->set_wiki('en.wikisource.org', 'w');
	print "Logging in as Polbot\n";
	my $login_status=$pw->login('Polbot','[password]');
	die "I can't log in." unless $login_status == 0;
	
	# Output automatic TOC
	open(outfile, ">test.txt");
	print outfile "Contents:\n\n";
	foreach my $pagename (@pagenames) {
		if ($pagename =~ /^\#+/) {
			# if a chapter starts with #, it's its own page, not a subpage.
			# if it starts with ##, it's its own page, and it gets <poem> tags.
			if ($pagename =~ /^\#+(.*)\: (.*)$/) {
				print outfile "* [[$1]]: $2\n";
			} else {
				$pagename =~ /^\#+(.*)$/;
				print outfile "* [[$1]]\n";
			}
		} else {
			if ($pagename =~ /^(.*)\/(.*)$/) {
				my $book = $1;
				my $chapter = $2;

				if ($chapter =~ /^(.*)\: (.*)$/) {
					print outfile "* [[$title/$book/$1|$1]]: $2\n";
				} else {
					print outfile "* [[$title/$book/$chapter|$chapter]]\n";
				}
			} else {
				if ($pagename =~ /^(.*)\: (.*)$/) {
					print outfile "* [[$title/$1|$1]]: $2\n";
				} else {
					print outfile "* [[$title/$pagename|$pagename]]\n";
				}
			}
		}
	}
	close (outfile);		
	print "Wrote TOC\n";
	sleep 5;
}

foreach my $start (0 .. $#pagenames) {
	my $pagename = $pagenames[$start];
	my $wiki = $wikis[$start];
	my $poem = 0;
	$poem = 1 if $pagename =~ s/^\#\#/\#/;
	print " $start. " . $pagenames[$start] . "\n";
	
	$wiki =~ s/<\!--.*?-->//gi; # remove html comments
	$wiki =~ s/<span class=[\"\']?pagenum[\"\']?>.*?<\/span>//gi; # remove page numbers
	$wiki =~ s/<span class=[\"\']?smallcaps[\"\']?>(.*?)<\/span>/{{small-caps|$1}}/gi; # format smallcaps
	$wiki =~ s/<p class=[\"\']?authorsc[\"\']?>(.*?)<\/p>/<div align=right>{{bbsc|$1}}<\/div>/gi; # author smallcaps
	#$wiki =~ s/\s*<b>(.*?)<\/b>\s*/\n==$1==\n/gis; # Make bolds into headers
	$wiki =~ s/\s*<h\d[^>]*>\s*([^\s].*?[^\s])\s*<\/h\d>\s*/\n'''$1'''\n\n/gis; # Make headers into bold
	$wiki =~ s/<\/?u>//gi; # remove underlines
	$wiki =~ s/<ins.*?>(.*?)<\/ins>/$1/gis; #remove ins
	$wiki =~ s/<br( \/)?>//gi; # take out BR
	$wiki =~ s/<\/?(i|em)>/\'\'/gi; # italics
	$wiki =~ s/<hr[^>]*>//gi; # remove HRs. 
	$wiki =~ s/\&nbsp\;/ /g;
	$wiki =~ s/<div class=[\"\']?blockquot[\"\']?>(.*?)<\/div>/<div style=\"margin-left: 5%; margin-right: 10%;\">\n$1\n<\/div>/gis; # blockquotes
  #$wiki =~ s/<(\/?)pre>/<$1poem>/gi; # pre to poem
  
	# footnotes:
#	while ($wiki =~ /<a href=\"\#(.*?)\".*?<\/a>/gi) {
#		my $footnote_name = $1;
#
#		$wiki =~ s/<a href=\"\#$footnote_name\".*?<\/a>(.*)<div class=\"footnote\"><a name=\"$footnote_name\".*?<\/a>.*?<\/a>\s*(.*?)<\/div>/<ref>$2<\/ref>$1/si;
#	}
#	$wiki =~ s/<hr( style=\"[^\"]*\" )?\/>\n\'\'\'FOOTNOTES:\'\'\'//gis;

	# footnotes: 
	#    <a name="Ibr" id="Ibr"></a><a href="#Ib"><sup>b</sup></a>
	#    <p class="footnote"><a name="Ia" id="Ia"></a>\n<a href="#Iar">a</a>&nbsp;&nbsp;&nbsp;See <a href="#pagex">page x</a>.</p>
	while ($wiki =~ /<a href=\"\#(.*?)\".*?<\/a>/gi) {
		my $footnote_name = $1;

		$wiki =~ s/<a href=\"\#$footnote_name\".*?<\/a>(.*)<p class=\"footnote\"><a name=\"$footnote_name\".*?<\/a>.*?<\/a>\s*(.*?)<\/p>/<ref>$2<\/ref>$1/si;
	}
		
	# <span class="i0">And<a name="FNanchor_46:2_77" id="FNanchor_46:2_77"></a><a href="#Footnote_46:2_77" class="fnanchor">[46:2]</a></span><br />

	# poems:
	#$wiki =~ s/<p class=\"i0">(.*?)<\/p>/    $1/gis;
	$wiki =~ s/^<span class="i1">(.*)<\/span>/   $1/gim;
	#$wiki =~ s/<p class=\"i8">(.*?)<\/p>/            $1/gis;
	$wiki =~ s/<div class=\"stanza\">(.*?)<\/div>/$1/gis;
	$wiki =~ s/<div class=\"poem\">(.*?)<\/div>/<poem>$1<\/poem>/gis;
	
	# other divs and spans
	$wiki =~ s/<\/?p*?>/\n/gi; # remove paragraphs
	$wiki =~ s/^(\s*\*)+\s*$/{{star-divider}}/gm; #  * * *
	$wiki =~ s/\n?<a [^>]*?\/>/ /gi; # remove lone anchors
	$wiki =~ s/<a .*?>(.*?)<\/a>/$1/gi; # links
	$wiki =~ s/<img .*?>/\n[image not included]\n/gi; # images
	$wiki =~ s/\_/''/g; # convert _ into italics
	#$wiki =~ s/^ +//gm if !$poem; #leading spaces 
	
	$wiki =~ s/\n\s*\n\s*\n/\n\n/g; # Take out excess whitespace	
	$wiki = "<poem>\n$wiki</poem>\n[[Category:Poems]]\n" if $poem;
	
	#	$wiki .= "\n==Footnotes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>" if $wiki =~ /<ref>/;
	$wiki .= "\n==Footnotes==\n<references />" if $wiki =~ /<ref>/;
	
	my $header = "{{header2\n|title=[[$title]]\n|author=$author\n";
	$header .= "|translator=$translator\n" if $translator;
	
	my $prev = "";
	if ($start > 0) {
		$prev = $pagenames[$start-1] ;
		$prev =~ s/\: .*$//;
	}
	my $next = $pagenames[$start+1];
	$next  =~ s/\: .*$//;
	
	# if a chapter starts with #, it's its own page, not a subpage.
	if ($prev) {
		if ($prev =~ /^\#+(.*)$/) {
			$prev = "[[$1]]";
		} else {
			$prev = "[[$title/$prev|$prev]]";
		}
	}

	if ($next) {
		if ($next =~ /^\#+(.*)$/) {
			$next = "[[$1]]";
		} else {
			$next = "[[$title/$next|$next]]";
		} 
	}
	$pagename =~ /\#*(.*)/;
	$header .= "|section=$1\n";
	$header .= "|previous=$prev\n|next=$next\n|notes=\n}}\n";

	$wiki = $header . $wiki;
	#$wiki = "$header<poem>\n$wiki</poem>";  # THIS TIME ONLY
	
	if ($test) {
		open(outfile, ">test.txt");
		print outfile $wiki;
		die "done";
	} else {
		my $writeto = $pagename;
		$writeto =~ s/^(.*)\: .*$/$1/;
		if ($writeto =~ s/^\#//) {
			#nothing
		} else {
			$writeto = "$title/$writeto";
		}
		print "Writing to [[$writeto]]\n";
		$pw->edit("$writeto", $wiki, "Importing from Project Gutenberg using an automated script");
		#sleep 5;
	}

}
	
print "done\n";

Bartleby texts[edit]

#This script is for importing public domain works at www.bartleby.com
# I create a text file containing the names of chapters, and URLs for their sources.

use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;

my $title="[[The American Language]]"; # or whatever
my $author="H. L. Mencken"; # or whoever
my $before_first = "";
my $chapterbegin = '<\!\-\- BEGIN CHAPTER \-\->';
my $chapterend = '<\!\-\- BOTTOM CHAPTER\/SECTION NAV CODE \-\->';
my $poem = 0; 
my $test = 0;

# ------------------------------------------------------------------------------------------------------------------------

#login
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Firefox/2.0.0.6');
$pw->set_wiki('en.wikisource.org', 'w');
print "Logging in as Polbot\n";
my $login_status=$pw->login('Polbot','[Password]');
die "I can't log in." unless $login_status == 0;

open(infile, "<chapters.txt");

my $previous_line;
my $current_line = $before_first;
my $next_line = <infile>;

while ($next_line) {	
	#cycle
	$previous_line = $current_line;
	$current_line = $next_line;
	$next_line = <infile>;
	chomp $next_line;
	print "$current_line\n";
	my $footnoted = 0;
	
	next if $current_line =~ /^\#/;
	
	#see how to show them
	$previous_line =~ /^(\[\[.*\]\])/;
	my $previous_show = $1;
	$next_line =~ /^(\[\[.*\]\])/;
	my $next_show = $1;
	if (!$next_line) { $next_show = "" };
	$current_line =~ /^\[\[(?:[^\|]*\|)?(.*)\]\](.*)$/;
	my $current_show = $1;
	my $url = $2;
	$current_line =~ /^\[\[([^\|]*)(\|.*)?\]\]/;
	my $article_name = $1;
	
	print "Article=[[$article_name]]. Show=$current_show. Prev=$previous_show. Next=$next_show. url=$url\n\n";
	
	# write header.
	my $wiki_out .= "{{header2\n| title=$title\n|author=$author\n";
	$wiki_out .= "|section=$current_show\n|previous=$previous_show\n|next=$next_show\n";
	$wiki_out .= "|notes=\n}}\n";
	$wiki_out .= "<poem>\n" if $poem;
	
	# get chapter.
	print "Reading chapter. . .\n";
	my $ua = LWP::UserAgent->new;
	$ua->agent("Mozilla/6.0");
	my $res = $ua->get($url);
	die "Could not connect\n" unless $res->is_success;
	my $work = $res->content;
	
	# Excerpt just the chapters
	$work =~ s/^.*?($chapterbegin.*?)$chapterend.*$/$1/si or die "chapter not excerpted.";
	$work =~ s/&\#160;|&nbsp;/ /gi;	# take out fixed spaces
	$work =~ s/<\!--.*?-->//gi; # remove html comments
	$work =~ s/<TD(\s+VALIGN\s*=\s*"?TOP"?\s*|\s+ALIGN\s*=\s*"?RIGHT"?)+>.*?<\/TD>//ig; # Remove page numbers

	# Fix	footnotes   
	# <A HREF="/185/44.html#note95">95</A>
	# <A NAME="note95"><B>Note 95.</B></A>  <I>Cf.</I> Vogue Affixes in Present-Day Word-Coinage [<A HREF="/185/44.html#txt95">back</A>]</TD></TR></TABLE>

	while ($work =~ /<A HREF=".*?#(note(\d|\.)+)">\d+<\/A>/gi) {
		my $footnote_name = $1;
		$footnoted = 1;

		$work =~ s/^(.*)<A HREF=".*?\#$footnote_name">\d+<\/A>(.*)<A NAME="$footnote_name"><B>Note \d+\.\s*<\/B><\/A>\s*(.*?) \[<A .*?>back<\/A>\](.*)$/$1<ref>$3<\/ref>$2$4/si;
	}
	
	$work =~ s/<font size="?\+1"?><B>(.*?)<\/B><\/font>/==$1==/gi; # wikiformat headers
	$work =~ s/\n?<CENTER><B><A.*?>(.*?)<\/A><\/B><\/CENTER>\n?/\n==$1==\n/gi; # more headers
	$work =~ s/<i>(.*?)<\/i>/''$1''/gi; # wikiformat italics
	$work =~ s/<b>(.*?)<\/b>/'''$1'''/gi; # wikiformat bold
	
	# Wikiformat interior tables
	while ($work =~ /<TABLE WIDTH=\"45\%\" CELLPADDING=3 CELLSPACING=1 BORDER=\"0\">\n?(.*?)\n?<\/TABLE>/is) {
		my $intable = $1;
		$intable =~ s/\n?<tr([^>]*)>\n?/\n|- $1\n/gi;
		$intable =~ s/\n?<td([^>]*)>\n?/\n| $1|/gi;

		$work =~ s/\n?<TABLE WIDTH=\"45\%\" CELLPADDING=3 CELLSPACING=1 BORDER=\"0\">\n?(.*?)\n?<\/TABLE>\n?/\n{|\n$intable\n|}\n/is;
	}
	
	$work =~ s/<\/?T[DR].*?>//gi; # remove table tags
	$work =~ s/<\/?TABLE.*?>//gi; # ditto
	$work =~ s/<\/?FONT.*?>//gi; # remove font tags
	$work =~ s/<a .*?>(.*?)<\/a>/$1/gi; # remove links
	$work =~ s/<[hb]r>//gi; # remove lines and hard returns
	$work =~ s/<\/?dl>//gi; # remove other stuff
	$work =~ s/<img .*?>/\n[image not included]\n/gi; # remove images
	$work =~ s/<\!--.*?-->//gi; # remove html comments
	$work =~ s/<\/?FORM>//gi; # remove forms
	$work =~ s/<center><center>/<center>/gi; #fix bug
	$work =~ s/^\s+/\n/gm if !$poem; # remove initial spaces for prose
	$work =~ s/^:\s*$//gm; # remove lone colons
	$work =~ s/<PAGE NUM=".*?">//gi; # remove page numbers
	$work =~ s/\s*\n\s*\n\s+/\n\n/g; # remove excess whitespace

	$wiki_out .= "$work\n";
	$wiki_out .= "</poem>\n" if $poem;
	
	if ($footnoted) {
		$wiki_out .= "\n==Notes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>\n";
	}
	
	if ($test) {
		open(outfile, ">test.txt");
		print outfile $wiki_out;
		die "done";
	}
	
	#write to Wikipedia.
	print "Writing [[$article_name]].\n";
	$pw->edit($article_name, $wiki_out, "Importing a work from a website, using a perl script.");
	sleep 1;
}

close(infile);

CCEL texts[edit]

# This script is for works at the Christian Classics Ethereal Library at www.ccel.org
# Again, it must be modified and tested for each work.

use strict;
use LWP::UserAgent;
use Encode;
use Perlwikipedia;

my $file = shift;
my $start = shift;
my $pw;
my $title = "";
my $author = "";

my $usechaptersfor = "Book [IXVLC]+|On the Making of Man";
my $dontcount = "Prologue|Introduction";
my $Whatwecallsections = "Chapter";

my @articlenames = ();
my @articletitles = ();
my @ranks = ();
my @wikis = ();

open(infile, "<$file") or die "File not found\n";

# Get header (for title and author)
print "Reading header. . .\n";
while (my $line = <infile>) {
	chop $line;
	
	if ($line =~ /<DC\.Title>[A-Z0-9]+\.\s+(.*)<\/DC\.Title>/) {
		$title = $1;
	}
	if ($line =~ /<DC\.Creator sub=\"Author\" scheme=\"short\-form\">(.*)<\/DC\.Creator>/) {
		$author = $1;
	}
	last if $line =~ /<ThML\.body>/;
}
$title = "Nicene and Post-Nicene Fathers: Series II/Volume V";
$author = "[[Author:Philip Schaff|Philip Schaff]] et al.";

my $line;
my $wiki;
my $articletitle = "";
my $rank = "";

until ($articletitle) {
	$line = <infile>;
	if ($line =~ /<div(\d)(?: type=\"[^\"]*\")?(?: n=\"[^\"]*\")? title=\"(.*?)\"/) {
		$rank = $1;
		$articletitle = $2;
		$articletitle =~ s/^(.*?)\&\#8212;.*$/$1/;
		$articletitle =~ s/\&\#230;/ae/g;
		$articletitle =~ s/\&\#198;/AE/g;
		$articletitle =~ s/\.$//;
	}
}

while ($line = <infile>) {
	chop $line;
	# <div3 type="Book" n="I" title="Book I." 
	if ($line =~ /<div(\d)(?: type=\"[^\"]*\")?(?: n=\"[^\"]*\")? title=\"(.*?)\"/) {
		my $temprank = "$1";
		my $temptitle = "$2";
		$temptitle =~ s/^(.*?)\&\#8212;.*$/$1/;
		$temptitle =~ s/\&\#230;/ae/g;
		$temptitle =~ s/\&\#198;/AE/g;
		$temptitle =~ s/\&\#339;/oe/g;
		$temptitle =~ s/\&\#8217;/'/g;
		$temptitle =~ s/\&\#235;/e/g;
		$temptitle =~ s/\.$//;
		
		push @articletitles, $articletitle;
		push @articlenames, "";
		push @ranks, $rank;
		push @wikis, $wiki;
		
		$articletitle = $temptitle;
		$rank = $temprank;
		$wiki = "";
	} else {
		$wiki .= "$line\n";		
	}
}
close(infile);

push @articletitles, $articletitle;
push @articlenames, "";
push @ranks, $rank;
push @wikis, $wiki;

open(outfile, ">test.txt");
print outfile "Contents:\n\n";

print scalar(@articletitles) . " pages.\n";

my $div1 = "";
my $div2 = "";
my $div3 = "";
my $div4 = "";
my $container1 = 0;
my $container2 = 0;
my $container3 = 0;
my $container4 = 0;
my $chaptercount2 = 0;
my $chaptercount3 = 0;
my $chaptercount4 = 0;
my $chaptercount5 = 0;

for my $i(0..$#articletitles) {
	my $articletitle = $articletitles[$i];
	my $articlename = "";
	
	if ($ranks[$i] eq "1") {
		$articlename = "$articletitle";
		print outfile "* [[$title/$articlename|$articletitle]]\n";
		$div1 = $articletitle;
		$container1 = $i;
		
		$chaptercount2 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;
	} elsif ($ranks[$i] eq "2") {
		if ($chaptercount2 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount2";
			$articletitles[$i] = "$Whatwecallsections $chaptercount2";
			$chaptercount2++;
		} 
		
		$articlename = "$div1/$articletitle";
		print outfile "** [[$title/$articlename|$articletitle]]\n";
		$div2 = $articletitle;
		$container2 = $i;
		
		if ($wikis[$container1] !~ /== Contents ==/) {
			$wikis[$container1] .= "\n== Contents ==\n";
		}
		$wikis[$container1] .= "* [[$title/$articlename|$articletitle]]\n";
		
		$chaptercount3 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;
	} elsif ($ranks[$i] eq "3") {
		if ($chaptercount3 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount3";
			$articletitles[$i] = "$Whatwecallsections $chaptercount3";
			$chaptercount3++;
		} 

		$chaptercount4 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;
		
		$articlename = "$div1/$div2/$articletitle";
		print outfile "*** [[$title/$articlename|$articletitle]]\n" if $start =~ /test/;
		$div3 = $articletitle;
		$container3 = $i;
		
		if ($wikis[$container2] !~ /== Contents ==/) {
			$wikis[$container2] .= "\n== Contents ==\n";
		}
		$wikis[$container2] .= "* [[$title/$articlename|$articletitle]]\n";
		
	} elsif ($ranks[$i] eq "4") {
		if ($chaptercount4 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount4";
			$articletitles[$i] = "$Whatwecallsections $chaptercount4";
			$chaptercount4++;
		}

		$chaptercount5 = ($articletitle =~ /^($usechaptersfor)$/) ? 1 : 0;

		$articlename = "$div1/$div2/$div3/$articletitle";
		print outfile "**** [[$title/$articlename|$articletitle]]\n" if $start =~ /test/;
		$div4 = $articletitle;
		$container4 = $i;
		
		if ($wikis[$container3] !~ /== Contents ==/) {
			$wikis[$container3] .= "\n== Contents ==\n";
		}
		$wikis[$container3] .= "* [[$title/$articlename|$articletitle]]\n";
		
	} else { # 5
		if ($chaptercount5 and ($articletitle !~ /^($dontcount)$/i)) {
			$articletitle = "$Whatwecallsections $chaptercount5";
			$articletitles[$i] = "$Whatwecallsections $chaptercount5";
			$chaptercount5++;
		}

		$articlename = "$div1/$div2/$div3/$div4/$articletitle";
		print outfile "***** [[$title/$articlename|$articletitle]]\n" if $start =~ /test/;
		if ($wikis[$container4] !~ /== Contents ==/) {
			$wikis[$container4] .= "\n== Contents ==\n";
		}
		$wikis[$container4] .= "* [[$title/$articlename|$articletitle]]\n";
	}	
	
	$articlenames[$i] = $articlename;
}

close (outfile);		
die "Wrote TOC" if $start =~ /test/;
print "Wrote TOC\n\n";
sleep 5;

# login
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Firefox/2.0.0.6');
$pw->set_wiki('en.wikisource.org', 'w');
print "Logging in as Polbot\n";
my $login_status=$pw->login('Polbot','[password]');
die "I can't log in." unless $login_status == 0;

for my $i($start..$#articletitles) {
	my $articletitle = $articletitles[$i];
	my $articlename = $articlenames[$i];
	my $wiki = $wikis[$i];
	print "$i: $articletitle\n";

	#change <h2>, <i>, bbsc, and refs
	#$wiki =~ s/<h\d[^>]*>(.*?)<\/h\d>/'''{{bbsc|$1}}'''\n/gis; # Make headers into smallcaps, just for this one
	$wiki =~ s/<span class=\"c\d\d[^>]*>(.*?)<\/span>/{{small-caps|$1}}/gs; #make c24 span into smallcaps, just for this one
	$wiki =~ s/<h\d[^>]*>(.*?)<\/h\d>/'''$1'''\n/gis; # Make headers into bold
	$wiki =~ s/<\/?(i|em)>/\'\'/gi; # change italics
	$wiki =~ s/<span class=\"(?:upper|sc)\"[^>]*>(.*?)<\/span>/{{small-caps|$1}}/gs; # bbsc
	$wiki =~ s/<note[^>]*>(.*?)<\/note>/<ref>$1<\/ref>/gs; # refs
	$wiki =~ s/<\/l>/<br>/g;               # poem lines
	$wiki =~ s/<p( [^>]*)?>/\n/gi;  # p to hard return
	
	#take out p, index, </div>, hr, verse, l, a, etc.
	#keep br, ref
	$wiki =~ s/<\!--.*?-->//gi;      # html comments
	$wiki =~ s/<hr[^>]*>//gi;        # hr 	
	$wiki =~ s/<\/?p( [^>]*)?>//gi;  # p
	$wiki =~ s/<\/?pb( [^>]*)?>//gi; # pb
	$wiki =~ s/<\/?index[^>]*>//gi;  # index
	$wiki =~ s/<\/?div[^>]*>//gi;    # div	
	$wiki =~ s/<\/?scrip[^>]*>//gs;  # scripRef
	$wiki =~ s/<\/?span( [^>]*)?>//gi;    # span
	$wiki =~ s/<verse [^>]*>//g;     # poem start
	$wiki =~ s/<\/verse>//g;         # poem end
	$wiki =~ s/<l [^>]*>//gi;        # l
	$wiki =~ s/<\/?a[^>]*>//gs;      # a
	$wiki =~ s/<\/?name( [^>]*)?>//gi;  # name

	$wiki =~ s/^ +//gm;              # Take out initial spaces
	$wiki =~ s/\n\s*\n\s*\n/\n\n/g;  # Take out excess whitespace	
	#$wiki .= "\n==Footnotes==\n" . '<div style="-moz-column-count: 2; column-count: 2;">' . "<references /></div>" if $wiki =~ /<ref>/;
	$wiki .= "\n==Footnotes==\n<references /></div>" if $wiki =~ /<ref>/;

	my $showtitle = linkslashes("$title/$articlename");
	$showtitle =~ /(.*)\/(.*)/;
	my $showsection = $2;
	$showtitle=$1;
	
	my $header = "{{header2\n|title=$showtitle\n|author=$author\n";
	$header .= "|section=$showsection\n";
	if ($i > 0) {
		$header .= "|previous=[[$title/" . $articlenames[$i-1] . "|" . $articletitles[$i-1] . "]]\n";
	} else {
		$header .= "|previous=\n";
	}
	if ($articlenames[$i+1]) {
		$header .= "|next=[[$title/" . $articlenames[$i+1] . "|" . $articletitles[$i+1] . "]]\n|notes=\n}}\n";
	} else {
		$header .= "|next=\n|notes=\n}}\n";
	}
	
	$wiki = $header . $wiki;
	
	print "Writing to [[$title/$articlename]]\n";

	$pw->edit("$title/$articlename", $wiki, "Importing from Christian Classics Etherial Library, using an automated script");
	#sleep 5;

}

sub linkslashes {
	my $line = shift;
	my $analyzeline = $line;
	
	while ($analyzeline =~ s/^([^\[\]\|]*)\/([^\[\]\|]*)\/(.*)$/$1\/\[\[$1\/$2\|$2\]\]\/$3/) {};
	$analyzeline =~ s/^([^\/]*)\//[[$1]]\//;
	
	return $analyzeline;
}

User:Polbot/code/Function 2

Gutenberg texts[edit]

Bartleby texts[edit]

CCEL texts[edit]

Navigation menu

Search