use strict;
package NF;
use WWW::Netflix::API;
use XML::Simple;
use Statistics::Descriptive::Discrete;
use Data::Dumper;
use Cache::FileCache;
my $CACHE_ON = 1;
my $cache_time = '4 hour';
unless ($CACHE_ON) {
my $cache = new Cache::FileCache;
$cache->clear();
}
my @stats_methods = qw(
count
min
max
mode
median
mean
standard_deviation
variance
text
sample_range
sum
uniq
);
sub stars {
my $mode = shift;
my $value = shift;
if ($mode eq 'normalized') {
$value = $value * 100 / 20;
}
my @stars;
for (1..5) {
print "$_ $value\n";
if ($_ <= $value) {
push @stars, 'full';
} elsif ($_ > $value) {
if ($value - $_ < 1) {
push @stars, 'half';
} else {
push @stars, 'blank';
}
}
}
return \@stars;
}
sub fetch {
my $name = shift;
my $users = users();
my $netflix = WWW::Netflix::API->new({
%{$users->{$name}},
content_filter => sub { XMLin(@_, ForceArray => ['links','category','ratings_item','queue_item','rental_history_item']) },
});
return $netflix;
}
sub queue {
my $name = shift;
my $netflix = fetch($name);
$netflix->REST->Users->Queues->Disc;
$netflix->Get(start_index => 0, max_results => 50);
my $content = $netflix->content;
my @data;
foreach my $item (
sort { $a->{position} <=> $b->{position} }
grep { $_->{position} }
values %{$content->{queue_item}}
){
push @data, { position => $item->{position},
title => $item->{title}->{regular},
average_rating => $item->{average_rating},
box_art => $item->{box_art}->{medium},
};
}
return \@data;
}
sub mixed_queues {
my $name = shift;
my $key = "mixed_queue:$name";
my $cache = new Cache::FileCache( );
my $holder = $cache->get( $key );
if ( not defined $holder) {
warn "Mixed_Queus $name Not Cached!";
my $netflix = fetch($name);
$netflix->REST->Users->Queues->Disc;
$netflix->Get(start_index => 0, max_results => 50);
my $disc_content = $netflix->content;
$netflix->REST->Users->Queues->Instant;
$netflix->Get(start_index => 0, max_results => 50);
my $instant_content = $netflix->content;
my $holder;
foreach my $content (($disc_content, $instant_content)) {
foreach my $key (keys %{$content->{queue_item}}) {
$holder->{$key} = $content->{queue_item}->{$key};
}
}
$cache->set( $key, $holder, $cache_time );
return $holder;
} else {
return $holder;
}
}
sub normalized_rating {
my $name = shift;
my $given = shift;
my $key = "normal:$name:$given";
my $cache = new Cache::FileCache( );
my $normalized = $cache->get( $key );
if ( not defined $normalized ) {
warn "Normalized Not Cached!";
my $dataset = NF::ratings_map($name);
my @ratings = values(%$dataset);
$normalized = NF::normalize(\@ratings, $given);
$cache->set( $key, $normalized, $cache_time );
return $normalized;
} else {
return $normalized;
}
}
sub merged_queues {
my $users = shift;
my $depth = shift || 10;
my @users = @$users;
my $data;
my $merged;
my $reference;
my $stats = new Statistics::Descriptive::Discrete;
my $stats2 = new Statistics::Descriptive::Discrete;
foreach my $name (@users) {
$data->{$name}->{q} = mixed_queues($name);
warn "--building queues for $name\n";
foreach my $ref (keys %{$data->{$name}->{q}}) {
# print $ref . "\n";
$ref =~ /.*\/(\d*)$/;
my $id = $1;
next if $merged->{$id}->{names}->{$name};
$merged->{$id}->{names}->{$name}++;
$merged->{$id}->{count}++;
$merged->{$id}->{id} = $id;
$merged->{$id}->{$name}->{actual} = $data->{$name}->{q}->{$ref}->{actual_rating};
$merged->{$id}->{$name}->{predicted} = $data->{$name}->{q}->{$ref}->{predicted_rating};
$reference->{$id}->{data} = $data->{$name}->{q}->{$ref};
eval {
$merged->{$id}->{title} = $reference->{$id}->{data}->{title}->{regular}
};
if ($@) {
warn Dumper($reference->{$id});
next;
}
}
}
# Now find predicted ratings for all of those.
foreach my $name (@users) {
my @keys = keys %$merged;
my @ids;
foreach my $id (@keys) {
next if $merged->{$id}->{$name}->{predicted} ;
push @ids, $id;
}
my $predictions = predicted_from_ids($name, \@ids);
foreach my $id (keys %$predictions) {
$merged->{$id}->{$name}->{predicted} = $predictions->{$id};
}
}
warn "--Computing Joint predicted rating \n";
foreach my $id (keys %$merged) {
my $i;
my $sum_predicted;
my $sum_predicted_normalized;
my @predicted;
my @predicted_normalized;
foreach my $name (@users) {
$i++;
$sum_predicted += $merged->{$id}->{$name}->{predicted};
push @predicted, $merged->{$id}->{$name}->{predicted};
$merged->{$id}->{$name}->{predicted_normalized} = normalized_rating($name,$merged->{$id}->{$name}->{predicted});
$sum_predicted_normalized += $merged->{$id}->{$name}->{predicted_normalized};
push @predicted_normalized, $merged->{$id}->{$name}->{predicted_normalized};
}
$merged->{$id}->{joint_predicted} = sprintf("%.5f", $sum_predicted/$i);
$merged->{$id}->{joint_predicted_normalized} = sprintf("%.5f", $sum_predicted_normalized/$i);
$stats->add_data(@predicted);
$merged->{$id}->{joint_standard_deviation} = $stats->standard_deviation();
$stats2->add_data(@predicted_normalized);
$merged->{$id}->{joint_standard_deviation_normalized} = $stats2->standard_deviation();
}
return ($merged, $reference);
}
sub merged_recs {
my $users = shift;
my $depth = shift || 10;
my @users = @$users;
my $data;
my $merged;
my $reference;
my $stats = new Statistics::Descriptive::Discrete;
my $stats2 = new Statistics::Descriptive::Discrete;
foreach my $name (@users) {
my $content = grab_recs($name, $depth);
$data->{$name}->{recs} = $content->{recommendation};
warn "--building recs for $name\n";
foreach my $ref (keys %{$data->{$name}->{recs}}) {
# print $ref . "\n";
$ref =~ /.*\/(\d*)$/;
my $id = $1;
next if $merged->{$id}->{names}->{$name};
$merged->{$id}->{names}->{$name}++;
$merged->{$id}->{count}++;
$merged->{$id}->{id} = $id;
$merged->{$id}->{$name}->{actual} = $data->{$name}->{recs}->{$ref}->{actual_rating};
$merged->{$id}->{$name}->{predicted} = $data->{$name}->{recs}->{$ref}->{predicted_rating};
$reference->{$id}->{data} = $data->{$name}->{recs}->{$ref};
$merged->{$id}->{title} = $reference->{$id}->{data}->{title}->{regular}
}
}
# Now find predicted ratings for all of those.
foreach my $name (@users) {
my @keys = keys %$merged;
my @ids;
foreach my $id (@keys) {
next if $merged->{$id}->{$name}->{predicted} ;
push @ids, $id;
}
my $predictions = predicted_from_ids($name, \@ids);
foreach my $id (keys %$predictions) {
$merged->{$id}->{$name}->{predicted} = $predictions->{$id};
}
}
warn "--Computing Joint predicted rating \n";
foreach my $id (keys %$merged) {
my $i;
my $sum_predicted;
my $sum_predicted_normalized;
my @predicted;
my @predicted_normalized;
foreach my $name (@users) {
$i++;
$sum_predicted += $merged->{$id}->{$name}->{predicted};
push @predicted, $merged->{$id}->{$name}->{predicted};
$merged->{$id}->{$name}->{predicted_normalized} = normalized_rating($name,$merged->{$id}->{$name}->{predicted});
$sum_predicted_normalized += $merged->{$id}->{$name}->{predicted_normalized};
push @predicted_normalized, $merged->{$id}->{$name}->{predicted_normalized};
}
$merged->{$id}->{joint_predicted} = sprintf("%.5f", $sum_predicted/$i);
$merged->{$id}->{joint_predicted_normalized} = sprintf("%.5f", $sum_predicted_normalized/$i);
$stats->add_data(@predicted);
$merged->{$id}->{joint_standard_deviation} = $stats->standard_deviation();
$stats2->add_data(@predicted_normalized);
$merged->{$id}->{joint_standard_deviation_normalized} = $stats2->standard_deviation();
}
return ($merged, $reference);
}
sub sort_predicted_queues {
my $merged = shift;
# warn Dumper($merged); exit;
my $min_intersect = shift || 0;
my $upper_limit_intersect = shift;
my $sort = shift;
my @rv;
my $i = 0;
if ($sort eq 'normalized') {
foreach my $item (
sort { $a->{count} <=> $b->{count} || $a->{joint_predicted_normalized} <=> $b->{joint_predicted_normalized} || $b->{joint_standard_deviation_normalized} <=> $a->{joint_standard_deviation_normalized} }
grep { $_->{joint_predicted} }
values %{$merged}
){
$i++;
push @rv, $item;
}
} else {
foreach my $item (
sort { $a->{count} <=> $b->{count} || $a->{joint_predicted} <=> $b->{joint_predicted} || $b->{joint_standard_deviation} <=> $a->{joint_standard_deviation} }
grep { $_->{joint_predicted} }
values %{$merged}
){
$i++;
push @rv, $item;
}
}
return reverse @rv;
}
sub sort_predicted {
my $merged = shift;
my $min_intersect = shift || 0;
my $upper_limit_intersect = shift;
my $sort = shift;
my @rv;
my $i = 0;
if ($sort eq 'normalized') {
foreach my $item (
sort { $a->{joint_predicted_normalized} <=> $b->{joint_predicted_normalized} || $b->{joint_standard_deviation_normalized} <=> $a->{joint_standard_deviation_normalized} }
grep { $_->{joint_predicted_normalized} }
values %{$merged}
){
$i++;
push @rv, $item;
}
} else {
foreach my $item (
sort { $a->{joint_predicted} <=> $b->{joint_predicted} || $b->{joint_standard_deviation} <=> $a->{joint_standard_deviation} }
grep { $_->{joint_predicted} }
values %{$merged}
){
$i++;
push @rv, $item;
}
}
return reverse @rv;
}
sub predicted_from_ids {
my $name = shift;
my $ids = shift;
my @ids = @$ids;
my @refs;
my $struc;
my $key = "predicted:$name:" . join('^',@ids);
my $cache = new Cache::FileCache( );
my $struc = $cache->get( $key );
if ( not defined $struc ) {
foreach my $id (@ids) {
push @refs, "http://api.netflix.com/catalog/titles/movies/$id";
}
my $count = scalar(@refs);
my $counter = 0;
my $increment = 24;
my $index = 0;
my @holder;
while ($counter < $count) {
my $term = ($counter+$increment > $count - 1) ? $count -1 : ($counter+$increment);
my @slice = @refs[($counter)..($term)];
$holder[$index] = \@slice;
$index++;
$counter += ($increment + 1);
}
my $netflix = fetch($name);
foreach my $arr (@holder) {
my $ref = join(',',@$arr);
$netflix->REST->Users->Ratings->Title->Predicted();
$netflix->Get( title_refs => $ref);
my $content = $netflix->content;
foreach my $key (keys %{$content->{ratings_item}}) {
$key =~ /.*\/(\d*)$/;
my $id = $1;
$struc->{$id} = $content->{ratings_item}->{$key}->{predicted_rating};
}
}
$cache->set( $key, $struc, $cache_time );
return $struc;
} else {
return $struc;
}
}
sub actual_from_ids {
my $name = shift;
my $ids = shift;
my @ids = @$ids;
my @refs;
foreach my $id (@ids) {
push @refs, "http://api.netflix.com/catalog/titles/movies/$id";
}
my $count = scalar(@refs);
my $counter = 0;
my $increment = 24;
my $index = 0;
my @holder;
while ($counter < $count) {
my $term = ($counter+$increment > $count - 1) ? $count -1 : ($counter+$increment);
my @slice = @refs[($counter)..($term)];
$holder[$index] = \@slice;
$index++;
$counter += ($increment + 1);
}
my $netflix = fetch($name);
my $struc;
foreach my $arr (@holder) {
my $ref = join(',',@$arr);
$netflix->REST->Users->Ratings->Title->Actual();
$netflix->Get( title_refs => $ref);
my $content = $netflix->content;
foreach my $key (keys %{$content->{ratings_item}}) {
$key =~ /.*\/(\d*)$/;
my $id = $1;
$struc->{$id} = $content->{ratings_item}->{$key}->{predicted_rating};
}
}
return $struc;
}
sub grab_recs {
my $name =shift;
my $count = shift || 50;
my $cache = new Cache::FileCache( );
my $key = "grab_recs:$name:$count";
my $data = $cache->get( $key );
if ( not defined $data ) {
warn "GrabRecs $name Not Cached!";
my $netflix = fetch($name);
$netflix->REST->Users->Recommendations();
$netflix->Get(max_results => $count);
$data = $netflix->content;
$cache->set( $key, $data, $cache_time );
return $data;
} else {
return $data;
}
}
sub recs {
my $name = shift;
my $content = grab_recs($name);
my @data;
foreach my $item (
sort { $a->{predicted_rating} <=> $b->{predicted_rating} }
grep { $_->{predicted_rating} }
values %{$content->{recommendation}}
){
# push @data, $item;
push @data, { position => $item->{position},
title => $item->{title}->{regular},
average_rating => $item->{average_rating},
box_art => $item->{box_art}->{medium},
predicted_rating => $item->{predicted_rating},
};
}
@data = reverse(@data);
return \@data;
}
sub rated {
my $name = shift;
my $netflix = fetch($name);
$netflix->REST->Users->Ratings->Title();#->Actual();
# $netflix->REST->Users->Rental_History();#;(;#->Actual();
$netflix->Get(max_results => 50);
#print $netflix->url;
my $content = $netflix->content;
return $content;
my @data;
foreach my $item (
sort { $a->{user_rating} <=> $b->{user_rating} }
grep { $_->{user_rating} }
values %{$content->{ratings_item}}
){
# push @data, $item;
push @data, {
user_rating => $item->{user_rating},
title => $item->{title}->{regular},
average_rating => $item->{average_rating},
box_art => $item->{box_art}->{medium},
};
}
@data = reverse(@data);
return \@data;
}
sub rental_history {
my $name = shift;
my $netflix = fetch($name);
$netflix->REST->Users->Rental_History();
$netflix->Get(max_results => 500);
my $content = $netflix->content;
return $content;
}
sub ratings_map_group {
my $users = shift;
my %ids;
# create set of movies
foreach my $name (@$users) {
my $rh = rental_history($name);
foreach my $ref (keys %{$rh->{rental_history_item}}) {
$ref =~ /.*\/(\d*)$/;
my $id = $1;
$ids{$1}++ if $1;
}
}
my @ids = keys %ids;
my $data;
foreach my $name (@$users) {
my @ratings;
my $predictions = predicted_from_ids($name, \@ids);
$data->{$name} = $predictions;
foreach my $id (keys %$predictions) {
push @ratings, $predictions->{$id};
}
my $stats = new Statistics::Descriptive::Discrete;
$stats->add_data(@ratings);
next unless @ratings;
warn $name . ":\n ";
foreach my $method (@stats_methods) {
warn "\t$method:\t" . $stats->$method . "\n";
}
}
return $data;
}
sub ratings_map {
my $name = shift;
my $cache = new Cache::FileCache( );
my $key = "rm:$name";
my $data = $cache->get( $key );
if ( not defined $data ) {
warn "Rating_Map Not Cached!";
my %ids;
# create set of movies
my $rh = rental_history($name);
foreach my $ref (keys %{$rh->{rental_history_item}}) {
$ref =~ /.*\/(\d*)$/;
my $id = $1;
$ids{$1}++ if $1;
}
my @ids = keys %ids;
my @ratings;
my $predictions = predicted_from_ids($name, \@ids);
$data = $predictions;
$cache->set( $key, $data, $cache_time );
return $data;
# Debug stuff, bypassed
foreach my $id (keys %$predictions) {
push @ratings, $predictions->{$id};
}
my $stats = new Statistics::Descriptive::Discrete;
$stats->add_data(@ratings);
next unless @ratings;
warn $name . ":\n ";
foreach my $method (@stats_methods) {
warn "\t$method:\t" . $stats->$method . "\n";
}
return $data;
} else {
warn "Cached!";
return $data;
}
}
sub rating_for {
my $name = shift;
my @refs = @_;
my $ref = join(',',@refs);
my $netflix = fetch($name);
# print $netflix->rest2sugar('http://api.netflix.com/users/T1xbRlp0t.E46kCD0y25NrFfHL0S95GzyxMdz0bgQ56go-/ratings/title/predicted?title_refs=1&2&3');
$netflix->REST->Users->Ratings->Title->Predicted();
# warn $ref;
# return 1;
$netflix->Get( title_refs => $ref);
# warn Dumper($netflix);
return $netflix->content;
}
sub netflix_normal_set {
my $total = shift;
#represents percentages
my $nf_normal = {
5 => 26,
4 => 33,
3 => 28,
2 => 9,
1 => 4,
};
my @holder;
foreach my $key (keys %$nf_normal) {
my $count = int($nf_normal->{$key}/100 * $total);
for (1..$count) {
push @holder, $key;
}
}
return \@holder;
}
sub normalize {
my $dataset = shift;
my $given = shift;
my @normal = @{netflix_normal_set(50)};
my @dataset = @$dataset;
warn Dumper(\@normal);
# weight dataset
@dataset = (@dataset, @normal);
@dataset = sort { $a <=> $b } @dataset;
warn Dumper(\@dataset);
my $size = scalar(@dataset);
my $accum;
my ($pos1, $pos2);
warn "Size: $size\n";
foreach (@dataset) {
$accum++;
if (!$pos1) {
$pos1 = $accum if $_ >= $given;
}
if ($pos1 && ($_ > $given)) {
warn 'here';
$pos2 = $accum;
last;
}
warn "$_\t$accum\t$pos1\t$pos2\t$given";
}
$pos2 ||=$size;
$pos1 ||=$size;
# get the average of the start and end point in the list
my $pos = ($pos1 + $pos2) / 2;
# print "my $pos = $pos1 + $pos2 / 2\n";
my $value = $pos / $size;
# print "my $value = $pos / $size\n";
return $value;
}
sub old_normalize {
my $dataset = shift;
my @dataset = @$dataset;
@dataset = (@dataset, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5 );
my $given = shift;
my $map = {};
foreach (@dataset) {
$map->{$_}++;
}
my @x = qw(1 2 3 4 5);
my @y = ($map->{1}, $map->{2}, $map->{3}, $map->{4}, $map->{5});
my $spline=new Math::Spline(\@x,\@y);
my $i = 1;
my $total;
my $accum;
while ($i <= 5) {
my $area = $spline->evaluate($i);
if ($i < $given) { $accum += $area }
$total += $area;
$i += .05;
}
print "\nAccum: $accum, total: $total\n\n";
print "\nNomalized: " . $accum/$total . "\n";
print Dumper($map);
return $accum/$total;
}
1;