User:AllyUnion/did you know.pl

From Wikipedia, the free encyclopedia

The following code is licensed under the GPL and the Creative Commons Attribution License. -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)

  • This code is not working -- AllyUnion (talk) 03:21, 9 Apr 2005 (UTC)
  • FYI: The system calls to python2.3 are calls to the pywikipediabot framework... as perl takes like so many packages just to download files and I don't have access to an English module version that allows me to post to the English wikipedia. -- AllyUnion (talk) 03:27, 9 Apr 2005 (UTC)

Basic idea:

  1. Leave three blocks of "...that" on Template talk:Did you know
  2. Move all other blocks of "...that" to Wikipedia:Recent additions
  3. Move oldest blocks on Wikipedia:Recent additions to an archive page if Wikipedia:Recent additions exceeds 50 "...that" lines.

didyouknow.pl[edit]

#!/usr/bin/perl -w

# Author: Jason Y. Lee
# Purpose: Wikipedia's Did you know archival process

# Special thanks to dysprosia, and the person who helped me in #wikipedia

# Assumptions:

# On Template talk:Did you know:
# A did you know line is in the following format:
# *...that <TEXT><br>
# Where <TEXT> is any text of any length, no matter if there is a newline or not.

# On Wikipedia:Recent additions and any archive pages after Wikipedia:Recent additions 25:
# A did you know line starts either with:
# [[Image: OR ...that
# A did you know line ends with either:
# A question mark or a HTML line break (<br>)

use Tie::File;

my $HOME = '<insert home directory>';
my $BOTDIR = 'wikipedia/bots/kurando-san/';
my $LOGDIR = 'dyklogs/';

my $configfile = $HOME . $BOTDIR . 'didyouknow.cfg';

my $logfile1 = $HOME . $BOTDIR . $LOGDIR . 'dyk1.log';
my $logfile2 = $HOME . $BOTDIR . $LOGDIR . 'dyk2.log';
my $logfile3 = $HOME . $BOTDIR . $LOGDIR . 'dyk3.log';
my $logfile4 = $HOME . $BOTDIR . $LOGDIR . 'newra.log';
my $logfile5 = $HOME . $BOTDIR . $LOGDIR . 'newra-';

my $talkpage = "Template talk:Did you know";
my $pagename = "Wikipedia:Recent additions";
my $archive = "Wikipedia:Recent additions";
my $archivenum = "";
my $archiveheader = "{{DYK archive header}}\n{{DYK archive nav}}\n\n==Did you know...==\n";

# Python Page Existance program
#$pageexist1 = "\"import config, wikipedia\nimport sys\nmysite = wikipedia.getSite()\nif (wikipedia.PageLink(mysite, \'";
#$pageexist2 = "\').exists()):\n\tsys.exit(0)\nelse:\n\tsys.exit(1)\n\"";

# Posting a page
$postprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";
$postprog2 = "logfile = \'";
$postprog3 = "\'\npagename = \'";
$postprog4 = "\'\ncomment = \'";
$postprog5 = "\'\nlog = file(logfile, 'r')\npage = log.read()\nlog.close()\nwikipedia.PageLink(mysite, pagename).put(page, comment)\n";

tie @config, 'Tie::File', $configfile or die;
$lastarchive = "";
foreach $line (@config)
{
	if ($line =~ /lastarchive =/)
	{
		$archive = $line;
		$archivenum = $line;
		$archivenum =~ s/lastarchive\s*=\s*//g;
		$archive = "Wikipedia:Recent additions " . $archivenum;
		$lastarchive = \$line;
	}
}

# Get the pages
$pythonprog1 = "\"import config, wikipedia\nmysite = wikipedia.getSite()\n";
$pythonprog2 = "logfile = ";
$pythonprog3 = "\npagename = ";
$pythonprog4 = "\nlog = file(logfile, 'w')\nlog.write(wikipedia.getPage(mysite, pagename, True, True, False).encode('iso-8859-1'))\nlog.close()\n\"";

system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile1 . '\'' . $pythonprog3 . '\'' . $talkpage . '\'' . $pythonprog4);
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile2 . '\'' . $pythonprog3 . '\'' . $pagename . '\'' . $pythonprog4);
system("python2.3 -c " . $pythonprog1 . $pythonprog2 . '\'' . $logfile3 . '\'' . $pythonprog3 . '\'' . $archive . '\'' . $pythonprog4);

# Analysis of 'Template talk:Did you know'
tie @dyklog, 'Tie::File', $logfile1 or die;

$dykmat = [];
$dykblockcount = 0;
$y = 0;
$line = "";
$x = 0;

while ($dyklog[$x] !~ m/=+ARCHIVE USED SUGGESTIONS HERE=+/ig)
{
#	print $x, ". ", $dyklog[$x], "\n";
	$x++;
}

while ($dyklog[$x] !~ m/\'\'include a link to the used picture behind the fact in which it has been \'\'\'used\'\'\' on the front page\.\'\'<br>/ig)
{
#	print $x, ". ", $dyklog[$x], "\n";
	$x++;
}

#print $x, ". ", $dyklog[$x], "\n";
$x++;
#print $x, ". ", $dyklog[$x], "\n";

while (($dyklog[$x] !~ m/All older items have been archived at \[\[Wikipedia:Recent additions\]\]/ig) && ($x < scalar(@dyklog)))
{
	$dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;
	$dyklog[$x] =~ s/(?:--|&[mn]dash;|&#15[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;
	$dyklog[$x] =~ s/\(\)//g;
	$dyklog[$x] =~ s/\?\s<br/\?<br/ig;
	$dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;
	$loopflag = 1;
	$line = "";

#	print "$x. 1\n";

	if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)
	{
#		print "$x. 2\n";
		$line = $dyklog[$x];

		if ($dykblockcount >= 3)
		{
			splice(@dyklog, $x, 1);
		}
		else
		{
			$x++;
		}

		while (($loopflag == 1) && ($x < scalar(@dyklog)))
		{
#			print "$x. 3\n";
			$dyklog[$x] =~ s/^\*\s*\.\.\.\s*that/\*\.\.\.that/ig;
			$dyklog[$x] =~ s/(?:--|&[mn]dash;|&#15[10];|)\s*\[\[User(.*?):(.*?)\(UTC\)//ig;
			$dyklog[$x] =~ s/\(\)//g;
			$dyklog[$x] =~ s/\?\s<br/\?<br/ig;
			$dyklog[$x] =~ s/\[\[Image/\[\[:Image/ig;
			if ($dyklog[$x] =~ m/^\*\.\.\.that/ig)
			{
#				print "$x. 4\n";
				$loopflag = 0;
			}
			elsif ($dyklog[$x] eq "")
			{
#				print $x, ". Block detected\n";
#				print "$x. 5\n";
				$loopflag = 0;
			}
			else
			{
#				print "$x. 6\n";
				$test = $dyklog[$x];
				$test =~ s/\s+//g;
				if ($test eq "")
				{
#					print "$x. 7\n";
#					print $x, ". Block detected\n";
					$loopflag = 0;
				}
				else
				{
#					print "$x. 8\n";
					$line .= " " . $dyklog[$x];
					if ($dykblockcount >= 3)
					{
						splice(@dyklog, $x, 1);
					}
					else
					{
						$x++;
					}
				}
			}
		}
#		print "$x. 9\n";
#		print $x, ". -> (", $dykblockcount, ", ", $y, "): ", $line, "\n\n";
		$y++;
		$dykmat[$dykblockcount][$y] = $line;
	}
	elsif ($dyklog[$x] eq "")
	{
#		print "$x. 10\n";
		if ($y == 0)
		{
			$dykblockcount--;
		}
		else
		{
			$dykmat[$dykblockcount][0] = $y + 1;
		}
		$y = 0;
		$dykblockcount++;
		if ($dykblockcount >= 3)
		{
			splice(@dyklog, $x, 1);
		}
		else
		{
			$x++;
		}
	}
	else
	{
#		print "$x. 11\n";
		$test = $dyklog[$x];
		$test =~ s/\s+//g;
		if ($test eq "")
		{
#			print "$x. 12\n";
			if ($y == 0)
			{
				$dykblockcount--;
			}
			else
			{
				$dykmat[$dykblockcount][0] = $y + 1;
			}
			$y = 0;
			$dykblockcount++;
		}
		if ($dykblockcount >= 3)
		{
			splice(@dyklog, $x, 1);
		}
		else
		{
			$x++;
		}
	}

#	print "Exit";
}

#print $dykblockcount, "\n";

#die;

#for ($x = 0; $x < $dykblockcount; $x++)
#{
#	for ($y = 1; $y < $dykmat[$x][0]; $y++)
#	{
#		print $y, ". ", $dykmat[$x][$y], "\n";
#	}
#	print "\n";
#}

if ($dykblockcount <= 3)
{
	die;
}

# Find the image left and right.

tie @wralog, 'Tie::File', $logfile2 or die;

$side = "left";

$x = 0;
while ($wralog[$x] !~ m/\[\[Image:/i)
{
	$x++;
}
if ($wralog[$x] =~ m/left/i)
{
	$side = "left";
}
else
{
	$side = "right";
}

# Process the talk page.
foreach $x (0 .. $dykblockcount-1)
{
	foreach $y (1 .. $dykmat[$x][0]-1)
	{
		$line = $dykmat[$x][$y];

		$line =~ s/^\*\.\.\.that/\.\.\.that/ig;
		@parts = split /\(?\[\[:?Image:/i, $line;

		if (scalar(@parts) == 2)
		{
			$image = $parts[1];
			$image =~ s/\]\]\)?<br\s*\/?>//ig;
			$image =~ s/\]\]\)?//g;
			$image =~ s/\n//g;
			$imagelink = "[[Image:" . $image . "|100px|" . $side . "]]";

			if ($side eq "left")
			{
				$side = "right";
			}
			else
			{
				$side = "left";
			}
			$line =~ s/\(*?\s*?\[\[:*?Image:.*?\]\]\s*?\)*?//ig;
			$line =~ s/\?\s*?\)*?<br/\?<br/ig;
			$dykmat[$x][$y] = "$imagelink\n$line";
		}
		else
		{
			$line =~ s/\?\s*?\)*?<br/\?<br/ig;
			$dykmat[$x][$y] = "$line";
		}		
	}
}

# Analysis of 'Wikipedia:Recent additions'

$wramat = [];
$wrablockcount = -1;
$y = 1;
$line = "";
$wracount = 0;
#$limitflag = False;
$x = 0;
#$wralast = 0;

while (($wralog[$x] !~ m/<!-- newly archived items should go in at the top -->/g) && ($x < scalar(@wralog)))
{
	$x++;
}
$x++;

while (($wralog[$x] !~ m/<!-- newly archived items should go in at the top -->/g) && ($x < scalar(@wralog)))
{
	$wralog[$x] =~ s/^\s*?\.\.\.\s*?that/\.\.\.that/ig;
#	print "Processing: ", $wralog[$x], "\n";
	if ($wralog[$x] =~ m/\[\[Image:/ig)
	{
		if ($wrablockcount == -1)
		{
			$wrablockcount++;
		}
#		print $x, ". (nimage). ", $wralog[$x], "\n";
		$line = $wralog[$x] . "\n";
		while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i)))
		{
#			if ($limitflag)
#			{
#				splice(@wralog, $x, 1);
#			}
#			else
#			{
			$x++;
#			}
#			print $x, ". (image). ", $wralog[$x], "\n";
			$line .= $wralog[$x] . "\n";
		}
		$wramat[$wrablockcount][$y] = $line;
		$y++;
		$wracount++;
	}
	elsif ($wralog[$x] =~ m/^\.\.\.that/ig)
	{
		if ($wrablockcount == -1)
		{
			$wrablockcount++;
		}
		$line = "";
		while (not (($wralog[$x] =~ m/\?/i) || ($wralog[$x] =~ m/<br\s*\/{0,1}>/i)))
		{
#			print $x, ". (that). ", $wralog[$x], "\n";
			$line .= $wralog[$x] . "\n";
#			if ($limitflag)
#			{
#				splice(@wralog, $x, 1);
#			}
#			else
#			{
			$x++;
#			}
		}
#		print $x, ". (that). ", $wralog[$x], "\n";
		$line .= $wralog[$x] . "\n";
		$wramat[$wrablockcount][$y] = $line;
		$y++;
		$wracount++;
	}
	elsif ($wralog[$x] eq "")
	{
#		print $x, ". Block detected!\n";
#		if ($wracount > 50)
#		{
#			$limitflag = True;
#		}
		if ($wrablockcount != -1)
		{
			$wramat[$wrablockcount][0] = $y;	# Save the size
		}
		$y = 1;				# Reset the line count for the block
		$wrablockcount++;
	}
	else
	{
		$test = $wralog[$x];
		$test =~ s/\s+//g;
		if ($test eq "")
		{
#			print $x, ". Block detected!\n";
#			if ($wracount > 50)
#			{
#				$limitflag = True;
#			}					
			if ($wrablockcount != -1)
			{
				$wramat[$wrablockcount][0] = $y;
			}
		
			$y = 1;
			$wrablockcount++;
		}
	}
#	if ($limitflag)
#	{
#		splice(@wralog, $x, 1);
#	}
#	else
#	{
	$x++;
#	}
}

splice(@wralog, $x, 1, "\n<!-- newly archived items should go in at the top -->");

# Copy everything from "Wikipedia:Recent archives" matrix into the "Did you know" matrix
foreach $x (0 .. $wrablockcount-1)
{
	foreach $y (0 .. $wramat[$x][0]-1)
	{
		$dykmat[$dykblockcount][$y] = $wramat[$x][$y];
	}
	$dykblockcount++;
}

$x = 0;
open(RALOG, ">", $logfile4) or die;
while ((not ($wralog[$x] =~ m/<!-- newly archived items should go in at the top -->/g)) && ($x < scalar(@wralog)))
{
	print RALOG $wralog[$x], "\n";
	$x++;
}
print RALOG $wralog[$x], "\n";
$x++;

$total = 0;
for ($i = 3; (($i < $dykblockcount) && ($total < 60)); $i++)
{
	foreach $j (1 .. $dykmat[$i][0]-1)
	{
		print RALOG $dykmat[$i][$j], "\n";
#		print "(", $i, ", ", $j, "). ", $dykmat[$i][$j], "\n";
	}
	print RALOG "\n";
	$total += $dykmat[$i][0];
}

while (($wralog[$x] !~ m/<!-- newly archived items should go in at the top -->/g) && ($x < scalar(@wralog)))
{
	$x++;
}
while ($x < scalar(@wralog))
{
	print RALOG $wralog[$x], "\n";
	$x++;
}
close(RALOG);

#print "----\n";

#print $wracount, "\n";

# Post the new pages
system("python2.3 -c " . $postprog1 . $postprog2 . $logfile1 . $postprog3 . $talkpage . $postprog4 . "Testing archival bot" . $postprog5 . $postprog2 . $logfile4 . $postprog3 . $pagename . $postprog4 . "Testing archival bot" . $postprog5 . "\"");

# Analysis of the archive pages

$arccount = 0;

if ($i < $dykblockcount)
{
	tie @arclog, 'Tie::File', $logfile3 or die;
	$x = 0;
	while (($arclog[$x] !~ m/==Did you know\.\.\.==/ig) && ($x < scalar(@arclog)))
	{
		$x++;
	}
	
	for ($arccount = 0; $x < scalar(@arclog); $x++)
	{
		if ($arclog[$x] =~ m/\[\[Image:/ig)
		{
#			print $x, ". (nimage). ", $arclog[$x], "\n";
			while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i)))
			{
				$x++;
#				print $x, ". (image). ", $arclog[$x], "\n";
			}
			$arccount++;
		}
		elsif ($arclog[$x] =~ m/^\.\.\.that/ig)
		{
			while (not (($arclog[$x] =~ m/\?/i) || ($arclog[$x] =~ m/<br\s*\/{0,1}>/i)))
			{
#				print $x, ". (that). ", $arclog[$x], "\n";
				$x++;
			}
#			print $x, ". (that). ", $arclog[$x], "\n";
			$arccount++;
		}
		$x++;
	}
}

$startingarchive = $archivenum;
$testcount = $arccount;
if ($arccount < 50)
{
	$currentlogfile = $logfile5 . $archivenum . ".log";
	$x = 0;
	open(ARCLOG, ">", $currentlogfile) or die;
	print ARCLOG $archiveheader;
	close(ARCLOG);

	tie @newlog, 'Tie::File', $currentlogfile or die;
	
	for ($a = $dykblockcount - 1; $a >= $i; $a--)
	{
		$start = 4;
		foreach $b (1 .. $dykmat[$a][0]-1)
		{
			splice(@newlog, $start, 0, $dykmat[$a][$b]);
			$start++;
		}
		splice(@newlog, $start, 0, "");
		$testcount += $dykmat[$a][0];

		if ($testcount >= 50)
		{
			$testcount = 0;
			$archivenum++;
			$currentlog = $logfile5 . $archivenum . ".log";
			open(ARCLOG, ">", $currentlog) or die;
			print ARCLOG $archiveheader;
			close(ARCLOG);
			untie @newlog or die;
			tie @newlog, 'Tie::File', $currentlog or die;
		}
	}

	open(ARCLOG, ">>", $currentlogfile) or die;
#	print ARCLOG "\n";
	foreach $x (4 .. $#arclog)
	{
		print ARCLOG $arclog[$x], "\n";
	}
	close(ARCLOG);
}
else
{
	$a = $dykblockcount - 1;
	$archivenum++;
	$currentlog = $logfile5 . $archivenum . ".log";
	$testcount = 0;
	open(ARCLOG, ">", $currentlog) or die;
	print ARCLOG $archiveheader;
	close(ARCLOG);
	tie @newlog, 'Tie::File', $currentlog or die;
	while ($a >= $i)
	{
		$start = 4;
		for ($b = 1; $b < $dykmat[$a][0]; $b++)
		{
			splice(@newlog, $start, 0, $dykmat[$a][$b]);
			$start++;
		}
		splice(@newlog, $start, 0, "");
		$testcount += $dykmat[$a][0];
		if ($testcount >= 50)
		{
			$testcount = 0;
			$archivenum++;
			$currentlog = $logfile5 . $archivenum . ".log";
			open(ARCLOG, ">", $currentlog) or die;
			print ARCLOG $archiveheader;
			close(ARCLOG);
			untie @newlog or die;
			tie @newlog, 'Tie::File', $currentlog or die;
		}
		$a--;
	}
}
#print "\n$testcount\n";
#print $archivenum, "\n";

foreach $x ($startingarchive .. $archivenum)
{
	$currentlog = $logfile5 . $x . ".log";
	system("python2.3 -c " . $postprog1 . $postprog2 . $currentlog . $postprog3 . "Wikipedia:Recent additions " . $x . $postprog4 . "Testing archival bot" . $postprog5 . "\"");
}

$$lastarchive = "lastarchive = " . $archivenum;

system("rm", "-f", $HOME . $BOTDIR . $LOGDIR . "*.log");

didyouknow.cfg[edit]

# Last archive that the program is on, do not attempt to update manually.
lastarchive = 25