package GCPlugins::GCPluginsBase;
###################################################
#
# Copyright 2005-2010 Christian Jodar
# Copyright 2015-2016 Kérénoc (kerenoc01 on Google mail)
#
# This file is part of GCstar.
#
# GCstar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# GCstar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with GCstar; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
#
###################################################
use strict;
use utf8;
{
package GCPluginParser;
use base qw(HTML::Parser);
use LWP::Simple qw($ua);
use HTTP::Cookies::Netscape;
use URI::Escape;
use HTML::Entities;
use Encode;
use File::Spec;
sub new
{
my $proto = shift;
my $class = ref($proto) || $proto;
my $self = $class->SUPER::new();
$ua->agent('Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.5) Gecko/20041111 Firefox/1.0');
$ua->default_header('Accept-Encoding' => 'x-gzip');
$ua->default_header('Accept' => 'text/html');
$self->{ua} = $ua;
$self->{itemIdx} = -1;
$self->{itemsList} = ();
bless ($self, $class);
return $self;
}
sub getItemsNumber
{
my ($self) = @_;
return $self->{itemIdx} + 1;
}
sub getItems
{
my ($self) = @_;
return @{$self->{itemsList}};
}
sub load
{
my $self = shift;
$self->checkProxy;
$self->checkCookieJar;
$self->{itemIdx} = -1;
$self->{isInfo} = 0;
$self->{itemsList} = ();
#my $word = uri_escape_utf8($self->{title});
my $title2 = encode($self->getSearchCharset, $self->{title});
my $word = uri_escape($title2);
$word =~ s/%20/+/g;
my $post;
my $html;
# For multi-pass plugins, the plugin will have set the url to load for
# the next pass as nextUrl. If this doesn't exist, we're either on the
# first pass, or only using a one-pass plugin, so call getSearchUrl
# to find the url to retrieve
if ($self->{nextUrl})
{
$html = $self->loadPage($self->{nextUrl});
}
else
{
$html = $self->loadPage($self->getSearchUrl($word));
}
return if (length $html eq 0);
$self->{parsingList} = 1;
$html = $self->preProcess($html);
decode_entities($html)
if $self->decodeEntitiesWanted;
$self->{inside} = undef;
$self->parse($html);
my @noConversion = @{$self->getNotConverted};
foreach my $item (@{$self->{itemsList}})
{
foreach (keys %{$item})
{
next if $_ eq 'url';
$item->{$_} = $self->convertCharset($item->{$_})
if ! GCUtils::inArrayTest($_, @noConversion);
}
}
}
sub loadPage
{
my ($self, $url, $post, $noSave) = @_;
my $debugPhase = $ENV{GCS_DEBUG_PLUGIN_PHASE};
my $debugFile;
$debugFile = File::Spec->tmpdir.'/'.GCUtils::getSafeFileName($url)
if ($debugPhase > 0);
$self->{loadedUrl} = $url if ! $noSave;
my $response;
my $result;
if ($debugPhase < 2 || (!(-f $debugFile)))
{
if ($post)
{
$response = $ua->post($url, $post);
}
else
{
$response = $ua->get($url);
}
#UnclePetros 03/07/2011:
#code to handle correctly 302 response messages
my $label1 = $response->code;
if($response->code == '302'){
my $location = $response->header("location");
$response = $ua->get($location);
$self->{loadedUrl} = $location;
}
eval {
$result = $response->decoded_content;
};
if ($debugPhase == 1 || $debugPhase == 3)
{
open DEBUG_FILE, ">$debugFile";
binmode(DEBUG_FILE, ":utf8");
close DEBUG_FILE;
}
}
else
{
local $/;
open DEBUG_FILE, "$debugFile";
$result = <DEBUG_FILE>;
utf8::decode($result);
}
return $result || ($response && $response->content);
}
sub capWord
{
my ($self, $msg) = @_;
use locale;
(my $newmsg = lc $msg) =~ s/(\s|,|^)(\w)(\w)(\w*?)/$1\U$2\E$3$4/gi;
return $newmsg;
}
sub getSearchFieldsArray
{
return [''];
}
sub getSearchFields
{
my ($self, $model) = @_;
my $result = '';
$result .= $model->getDisplayedLabel($_).', ' foreach (@{$self->getSearchFieldsArray});
$result =~ s/, $//;
return $result;
}
sub hasField
{
my ($self, $field) = @_;
return $self->{hasField}->{$field};
}
sub getExtra
{
return '';
}
# Character set for web page text
sub getCharset
{
my $self = shift;
return "ISO-8859-1";
}
# Character set for encoding search term, can sometimes be different
# to the page encoding, but we default to the same as the page set
sub getSearchCharset
{
my $self = shift;
return getCharset;
}
# For some plugins, we need extra checks to determine if urls match
# the language the plugin is written for. This allows us to correctly determine
# if a drag and dropped url is handled by a particular plugin. If these
# checks are necessary, return 1, and make sure plugin handles the
# the testURL function correctly
sub needsLanguageTest
{
return 0;
}
# Used to test if a given url is handled by the plugin. Only required if
# needsLanguageTest is true.
sub testURL
{
my ($self, $url) = @_;
return 1
}
# Determines whether plugin should be the default plugins gcstar uses.
# Plugins with this attribute set will appear first in plugin list,
# and will be highlighted with a star icon. A returned value of 1
# means the plugin is preferred if it's language matches the user's language,
# a returned value of 2 mean's it's preferred regardless of the language.
sub isPreferred
{
return 0;
}
sub getPreferred
{
return isPreferred;
}
sub getNotConverted
{
my $self = shift;
return [];
}
sub decodeEntitiesWanted
{
return 1;
}
sub getDefaultPictureSuffix
{
return '';
}
sub convertCharset
{
my ($self, $value) = @_;
my $result = $value;
if (ref($value) eq 'ARRAY')
{
foreach my $line(@{$value})
{
my $i = 0;
eval {
map {$_ = decode($self->getCharset, $_)} @{$line};
}
}
}
else
{
eval {
$result = decode($self->getCharset, $result);
};
}
return $result;
}
sub getItemInfo
{
my $self = shift;
eval {
$self->init;
};
my $idx = $self->{wantedIdx};
my $url = $self->getItemUrl($self->{itemsList}[$idx]->{url});
$self->{curInfo} = {};
$self->loadUrl($url);
# multi-pass plugins that requires multiple web page to get all info on a single collection item
# for example : Allmovie (tabs to get casting), Allocine (idem)
# the plugin can set {nextUrl} to fetch next web page, the information is cumulative in {curInfo}
while ($self->{curInfo}->{nextUrl})
{
my $nextUrl = $self->{curInfo}->{nextUrl};
$self->{curInfo}->{nextUrl} = 0;
$self->loadUrl($nextUrl);
}
return $self->{curInfo};
}
sub changeUrl
{
my ($self, $url) = @_;
return $url;
}
sub loadUrl
{
my ($self, $url) = @_;
$self->checkProxy;
$self->checkCookieJar;
my $realUrl = $self->changeUrl($url);
my $html = $self->loadPage($realUrl);
$self->{parsingList} = 0;
#$html = $self->convertCharset($html);
# $self->{curInfo} = {} if (!$self->{curInfo}->{title});
# once the urlField is set don't change it (plugins fetching multiple pages for one item)
$self->{curInfo}->{$self->{urlField}} = $url if (!$self->{curInfo}->{$self->{urlField}});
$html = $self->preProcess($html);
decode_entities($html)
if $self->decodeEntitiesWanted;
$self->{inside} = undef;
$self->parse($html);
my @noConversion = @{$self->getNotConverted};
foreach (keys %{$self->{curInfo}})
{
next if $_ eq $self->{urlField};
$self->{curInfo}->{$_} = $self->convertCharset($self->{curInfo}->{$_})
if ! GCUtils::inArrayTest($_, @noConversion);
if (ref($self->{curInfo}->{$_}) ne 'ARRAY')
{
$self->{curInfo}->{$_} =~ s/\|/,/gm;
$self->{curInfo}->{$_} =~ s/\r//gm;
$self->{curInfo}->{$_} =~ s/[ \t]*$//gm;
}
}
$self->{curInfo}->{$self->{urlField}} .= $GCModel::linkNameSeparator.$self->getName;
return $self->{curInfo};
}
sub setProxy
{
my ($self, $proxy) = @_;
$self->{proxy} = $proxy;
}
sub checkProxy
{
my $self = shift;
$ua->proxy(['http'], $self->{proxy});
#$self->{ua}->proxy(['http'], $self->{proxy});
}
sub setCookieJar
{
my ($self, $cookieJar) = @_;
$self->{cookieJar} = $cookieJar;
}
sub checkCookieJar
{
my $self = shift;
$ua->cookie_jar(HTTP::Cookies::Netscape->new(
'file' => "$self->{cookieJar}",
'autosave' => 1,));
}
# Used to set the number of passes the plugin requires
sub getNumberPasses
{
# Most plugins only need to search once, so default to one pass
return 1;
}
# Returns undef if it doesn't support search using barcode scanner
sub getEanField
{
return undef;
}
}
1;