#! /usr/bin/perl -w use strict; use warnings; use diagnostics; use utf8; binmode STDOUT, ":utf8"; use LWP::Simple; my @urls = split /\n/, get 'http://www.erikjosefsson.eu/sites/default/files/urltabell3.txt'; my %commities_abbr = ( 'Conciliation Committee' => 'CODE', 'Foreign Affairs' => 'AFET', 'Human Rights' => 'DROI', 'Security and Defence' => 'SEDE', 'Development' => 'DEVE', 'International Trade' => 'INTA', 'Budgets' => 'BUDG', 'Budgetary Control' => 'CONT', 'Economic and Monetary Affairs' => 'ECON', 'Employment and Social Affairs' => 'EMPL', 'Environment, Public Health and Food Safety' => 'ENVI', 'Public Health and Food Safety' => 'ENVI', 'Industry, Research and Energy' => 'ITRE', 'Research and Energy' => 'ITRE', 'Internal Market and Consumer Protection' => 'IMCO', 'Transport and Tourism' => 'TRAN', 'Regional Development' => 'REGI', 'Agriculture and Rural Development' => 'AGRI', 'Fisheries' => 'PECH', 'Culture and Education' => 'CULT', 'Legal Affairs' => 'JURI', 'Civil Liberties, Justice and Home Affairs' => 'LIBE', 'Justice and Home Affairs' => 'LIBE', 'Constitutional Affairs' => 'AFCO', 'Women\'s Rights and Gender Equality' => 'FEMM', 'Rights and Gender Equality' => 'FEMM', 'S Rights and Gender Equality' => 'FEMM', 's Rights and Gender Equality' => 'FEMM', 'Petitions' => 'PETI', 'Financial, Economic and Social Crisis' => 'CRIS' ); my $table_header = "{| border='1' cellspacing='0' class='wikitable sortable' ! tratten link !! summary !! CODE !! AFET !! DROI !! SEDE !! DEVE !! INTA !! BUDG !! CONT !! ECON !! EMPL !! ENVI !! ITRE !! IMCO !! TRAN !! REGI !! AGRI !! PECH !! CULT !! JURI !! LIBE !! AFCO !! FEMM !! PETI !! CRIS\n"; my $table_footer = "|}\n"; my @committee_array = ("CODE", "AFET", "DROI", "SEDE", "DEVE", "INTA", "BUDG", "CONT", "ECON", "EMPL", "ENVI", "ITRE", "IMCO", "TRAN", "REGI", "AGRI", "PECH", "CULT", "JURI", "LIBE", "AFCO", "FEMM", "PETI", "CRIS"); sub display_results { my ($commitee, $MEP, $status) = @_; $commities_abbr{$commitee} eq $_ and print " | ($status) $MEP | " or print " || " foreach (@committee_array); } print "$table_header"; for (@urls) { my $page_content = get $_; $page_content =~ m/\s(\S*?)<\/title>/; print "|-\n | [[$1]] |"; display_results $1, $3, $2 while ($page_content =~ /\s*(?:)?.*?(\w[\w\s]*\w)\s+\((responsible|opinion)\)(?:<\/i>)?\s*<\/span>\s*<\/td>\s*