#!/usr/bin/perl -w
#Superscripter program designed by Shas'o'Kais using Perl
#Designed for use in cleaning up pages on the Warhammer 40k Wikia
#Usage:
# 1. Paste wikia text to edit into a plaintext file
# 2. Run program with name of file as argument
# 3. Voila, paste cleaned up text back into the wiki editor
#Sublime Text Editor recommended as it can automatically detect changes in any opened file
#This Program:
# Adds <sup></sup> to any 1st, 2nds, 3rds, 4ths, etc where it is missing, except for the Sources section
# Removes <sup>st/nd/rd/th</sup> after letters i.e. XIX<sup>th</sup> Legion (As per guidelines set by Montonius)
# Removes any <sup></sup> present in the Sources section
foreach $file (@ARGV) {
$filename = $file;
open ALPHA, "$file" or die;
@source = <ALPHA>;
@output = ();
$count = 0;
$flag = 0;
foreach $line1 (@source) {
$output[$count] = $line1;
if ($line1 =~ m/\s*\=\=\s*Sources\s*\=\=\s*/) {
#set flag on if sources section reached
$flag = 1;
} elsif ($line1 =~ m/\s*\=\=\s*Gallery\s*\=\=\s*/) {
#set flag on if gallery section reached
$flag = 2;
} elsif ($line1 =~ m/.*\=\=.*\=\=.*/) {
#turn off flag once next section reached
$flag = 0;
}
#changes sups with extra attributes to be normal sups i.e. <sup title="Fall of Prospero"> -> <sup>
$output[$count] =~ s/\<sup\s.+?\>/\<sup\>/g;
#removes sups if not on the end of a number i.e. XIX<sup>th</sup> Legion -> XIX Legion
$output[$count] =~ s/([A-Za-z])\<sup\>[a-z]{2}\<\/sup\>/$1/g;
if ($flag eq 0) {
#if sources flag has not been set off, add the sups
$output[$count] =~ s/1st/1\<sup\>st<\/sup\>/g;
$output[$count] =~ s/2nd/2\<sup\>nd<\/sup\>/g;
$output[$count] =~ s/3rd/3\<sup\>rd<\/sup\>/g;
$output[$count] =~ s/([4-9])th/$1\<sup\>th<\/sup\>/g;
} elsif ($flag eq 1) {
#if in sources section, remove any sups
$output[$count] =~ s/\<sup\>//g;
$output[$count] =~ s/\<\/sup\>//g;
} elsif ($flag eq 2) {
#adds sups in second section of gallery image tags
$output[$count] =~ s/(\|.*)1st/${1}1\<sup\>st<\/sup\>/g;
$output[$count] =~ s/(\|.*)2nd/${1}2\<sup\>nd<\/sup\>/g;
$output[$count] =~ s/(\|.*)3rd/${1}3\<sup\>rd<\/sup\>/g;
$output[$count] =~ s/(\|.*)([4-9])th/$1$2\<sup\>th<\/sup\>/g;
}
#removes sups if inserted in the first section of an image or link tag i.e. [[1<sup>st</sup>_Company.png|Pic of a Rhino]] -> [[1st_Company.png|Pic of a Rhino]]
$output[$count] =~ s/(\[\[[^\|\]\[]*)\<sup\>([a-z]{2})\<\/sup\>(.*?[\|\]])/$1$2$3/g;
$count++;
}
chomp(@output);
#prints file output to terminal
for ($temp = 0; $temp < $count; $temp++) {print "$output[$temp]\n";}
#print "$filename\n";
#writes to file (overwrites contents)
open my $file, ">", "$filename" or die;
foreach $clean (@output) {
print $file "$clean\n";
}
close $file;
}
Advertisement
Advertisement