User:Polbot/source/stats.pl

use strict;
use Perlwikipedia;
use LWP::UserAgent;

my $firstletter = shift;

print "\nStarting polbot\n" ;
my $pw=Perlwikipedia->new();
#$pw->{debug} = 1;
$pw->{mech}->agent('Bot/WP/EN/Quadell/polbot');

print "Logging in\n";
my $login_status=$pw->login('Polbot','(bot password)');
die "I can't log in." unless ($login_status eq 0);

# Pull from FJC
print "Getting list of all judges starting with $firstletter\n";
my @judge_ids = ();
my $url = 'http://www.fjc.gov/servlet/tAsearch?lname=' . $firstletter;
print " $url\n";
my $ua = LWP::UserAgent->new;
$ua->agent("Mozilla/6.0");
my $res = $ua->get($url);
die "could not connect" unless ($res->is_success);
my $html = $res->content;
while ($html =~ m/<A HREF=\"\/servlet\/tGetInfo\?jid=(\d+)\">([^<]*)</g) {
	my $thisjid = $1;
	my $thisname = $2;
	push @judge_ids, $thisjid;
}

print scalar(@judge_ids) . " judges found.\n\n";
my $wiki_out;

foreach my $jid (@judge_ids) {
	my $url = "http:\/\/www.fjc.gov\/servlet\/tGetInfo\?jid=$jid";
	print "\n$jid: ";
	$res = $ua->get($url);
	die "could not connect" unless ($res->is_success);
	$html = $res->content;
	$html =~ s/\`/'/g;
	
	# Output variables
	my $name = "-";
	my $race = "-";
	my $gender = "-";
	
	# extract name
	$html =~ m/\<FONT SIZE\=\+1 COLOR\=BLACK\>\<B\>([^\n]*?) *\<\/B\>\<\/FONT\>/m;
	my $rev_name = $1;
	$rev_name =~ s/ +/ /g;
	$rev_name =~ s/\[//g;
	$rev_name =~ s/\]//g;
	$rev_name =~ m/^(.*?)\, (.*?)( Jr\.| Sr\.| II| III| IV)?$/;
	my $last_name = $1;
	$name = "[[$2 $last_name$3]]";		
	print "$name\n";
	
	# extract gender
	if ($html =~ m/<BR><B>Gender:<\/B> *([^<]+)</) {	
		$gender = $1;
	}

	# extract race
	if ($html =~ m/<BR><BR><B>Race or Ethnicity:<\/B> *([^<]+)</) {
		$race = $1;
	}
	
	# Extract judgeships
	if ($html =~ m/<B>Federal Judicial Service:<\/B><BR>(.*?)<BR>\s*<BR>\s*<B>/si) {
		my $ju_string = $1;
		my @jus = split(/ *<[Bb][Rr]><[Bb][Rr]> */, $ju_string);
		foreach my $ju (@jus) {

			# Output variables
			my $court = "-";
			my $president = "-";
			my $nominated_date = "-";
			my $recess_date = "-";
			my $confirmed_date = "-";
			my $commission_date = "-";
			my $senior_date = "-";
			my $end_date = "-";
			my $end_reason = "-";
			
			if ($ju =~ m/Judge, U\. S\. District Court, ([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States District Court for the $1]]";
			} elsif ($ju =~ m/U\. S\. District Court for the District of Columbia/) {
				$court = "[[United States District Court for the District of Columbia]]";
			} elsif ($ju =~ m/Judge, U\. S\. Circuit Courts ([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States circuit court]] $1";
			} elsif ($ju =~ m/Judge, Circuit Court for the District of Columbia([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States circuit court]] for the D.C. Circuit";
		  } elsif ($ju =~ m/U\. S\. Court of Appeals for District of Columbia Circuit<[Bb][Rr]>/) {
				$court = "[[United States Court of Appeals for the D.C. Circuit]]";
			} elsif ($ju =~ m/Judge, U\. S\. Court of Appeals ([^<]*)<[Bb][Rr]>/) {
				$court = "[[United States Court of Appeals $1]]";
			} elsif ($ju =~ m/Supreme Court of the United States/) {
				$court = "[[Supreme Court of the United States]]";
			} 
			
			if ($ju =~ m/Nominated by (.*?) on (\w+ \d+, \d+), to/) {
				$president = "[[$1]]";
				$nominated_date = $2;
			} elsif ($ju =~ m/Received a recess appointment from (.*?) on (\w+ \d+, \d+), to/) {
				$president = "[[$1]]";
				$recess_date = $2;
				if ($ju =~ m/; nominated on (\w+ \d+, \d+);/) {
					$nominated_date = $1;
				}
			}
			
			if ($ju =~ m/Confirmed by the Senate on (\w+ \d+, \d+), and received commission on (\w+ \d+, \d+)\./) {
				$confirmed_date = $1;
				$commission_date = $2;
			}
			
			if ($ju =~ m/Assumed senior status on (\w+ \d+, \d+)\./) {
				$senior_date = $1;
			}
			
			if ($ju =~ m/Service terminated on (\w+ \d+, \d+), due to (.*?)\./) {
				$end_date = $1;
				$end_reason = $2;
				$end_reason =~ s/appointment to another judicial position/reappointment/;
			}
			
			if ($court ne "-") {
				$wiki_out .= "|-\n| $name || $court || $president || $recess_date || $nominated_date || $confirmed_date || $commission_date || $senior_date || $end_date || $end_reason || $race || $gender \n";
			}
		}
	}		
}

print "Writing... ";
my $listsofar = $pw->get_text("Wikipedia:WikiProject United States courts and judges/judgestats");
$listsofar .= $wiki_out;
$pw->edit("Wikipedia:WikiProject United States courts and judges/judgestats", $listsofar, "Adding judges that start with $firstletter");
print "done.\n";

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.