#!/usr/bin/perl -w
#Superscripter program designed by Shas'o'Kais using Perl
#Designed for use in cleaning up pages on the Warhammer 40k Wikia
#	1. Paste wikia text to edit into a plaintext file 
#	2. Run program with name of file as argument
#	3. Voila, paste cleaned up text back into the wiki editor
#Sublime Text Editor recommended as it can automatically detect changes in any opened file
#This Program:
#	Adds <sup></sup> to any 1st, 2nds, 3rds, 4ths, etc where it is missing, except for the Sources section
#	Removes <sup>st/nd/rd/th</sup> after letters i.e. XIX<sup>th</sup> Legion (As per guidelines set by Montonius)
#	Removes any <sup></sup> present in the Sources section
foreach $file (@ARGV) {
	$filename = $file;
	open ALPHA, "$file" or die;
	@source = <ALPHA>;
	@output = ();
	$count = 0;
	$flag = 0;
	foreach $line1 (@source) {
		$output[$count] = $line1;
		if ($line1 =~ m/\s*\=\=\s*Sources\s*\=\=\s*/) {
			#set flag on if sources section reached
			$flag = 1;
		} elsif ($line1 =~ m/\s*\=\=\s*Gallery\s*\=\=\s*/) {
			#set flag on if gallery section reached
			$flag = 2;
		} elsif ($line1 =~ m/.*\=\=.*\=\=.*/) {
			#turn off flag once next section reached
			$flag = 0;
		#changes sups with extra attributes to be normal sups i.e. <sup title="Fall of Prospero"> -> <sup>
		$output[$count] =~ s/\<sup\s.+?\>/\<sup\>/g;
		#removes sups if not on the end of a number i.e. XIX<sup>th</sup> Legion -> XIX Legion
		$output[$count] =~ s/([A-Za-z])\<sup\>[a-z]{2}\<\/sup\>/$1/g;
		if ($flag eq 0) {
			#if sources flag has not been set off, add the sups
			$output[$count] =~ s/1st/1\<sup\>st<\/sup\>/g;
			$output[$count] =~ s/2nd/2\<sup\>nd<\/sup\>/g;
			$output[$count] =~ s/3rd/3\<sup\>rd<\/sup\>/g;
			$output[$count] =~ s/([4-9])th/$1\<sup\>th<\/sup\>/g;
		} elsif ($flag eq 1) {
			#if in sources section, remove any sups
			$output[$count] =~ s/\<sup\>//g;
			$output[$count] =~ s/\<\/sup\>//g;
		} elsif ($flag eq 2) {
			#adds sups in second section of gallery image tags
			$output[$count] =~ s/(\|.*)1st/${1}1\<sup\>st<\/sup\>/g;
			$output[$count] =~ s/(\|.*)2nd/${1}2\<sup\>nd<\/sup\>/g;
			$output[$count] =~ s/(\|.*)3rd/${1}3\<sup\>rd<\/sup\>/g;
			$output[$count] =~ s/(\|.*)([4-9])th/$1$2\<sup\>th<\/sup\>/g;
		#removes sups if inserted in the first section of an image or link tag i.e. [[1<sup>st</sup>_Company.png|Pic of a Rhino]] -> [[1st_Company.png|Pic of a Rhino]]
		$output[$count] =~ s/(\[\[[^\|\]\[]*)\<sup\>([a-z]{2})\<\/sup\>(.*?[\|\]])/$1$2$3/g;
	#prints file output to terminal
	for ($temp = 0; $temp < $count; $temp++) {print "$output[$temp]\n";}
	#print "$filename\n";
	#writes to file (overwrites contents)
	open my $file, ">", "$filename" or die;
	foreach $clean (@output) {
    	print $file "$clean\n";
	close $file;