Netflix Group Recommender Prototype

use strict;
package NF;
use WWW::Netflix::API;
use XML::Simple;
 use Statistics::Descriptive::Discrete;
use Data::Dumper;
use Cache::FileCache;
my $CACHE_ON = 1;
my $cache_time = '4 hour';

unless ($CACHE_ON) {
    my $cache = new Cache::FileCache;
    $cache->clear();
}


my @stats_methods = qw(
    count

    min
    max
    mode
    median
    mean

    standard_deviation
    variance
    text
    sample_range
    sum
    uniq
);



sub stars {
    my $mode = shift;
    my $value = shift;
    if ($mode eq 'normalized') {
	$value = $value * 100 / 20;
    }
    my @stars;
    for (1..5) {
	print "$_ $value\n";

	
	if ($_ <= $value) {
	    push @stars, 'full';	    
	    
	} elsif ($_ > $value) {
	    if ($value - $_ < 1) {
		push @stars, 'half';	    
	    } else {
		push @stars, 'blank';	    
	    }
	    
	}
    }
    
    return \@stars; 
}
    


sub fetch {
    my $name = shift;
    my $users = users();
    my $netflix = WWW::Netflix::API->new({
        %{$users->{$name}},
        content_filter => sub { XMLin(@_, ForceArray => ['links','category','ratings_item','queue_item','rental_history_item']) },
					 });
    return $netflix;
} 

sub queue {
    my $name = shift;
    my $netflix = fetch($name);
    $netflix->REST->Users->Queues->Disc;
    $netflix->Get(start_index => 0, max_results => 50);
    my $content = $netflix->content;
    my @data;
    foreach my $item (
	sort { $a->{position} <=> $b->{position} }
	grep { $_->{position} }
	values %{$content->{queue_item}}
	){
	
	push @data, { position => $item->{position}, 
		      title => $item->{title}->{regular}, 
		      average_rating => $item->{average_rating}, 
		      box_art => $item->{box_art}->{medium},
	};
    }
    return \@data;
}


sub mixed_queues {
    my $name = shift;

    my $key = "mixed_queue:$name";
  
    my $cache = new Cache::FileCache( );
    my $holder = $cache->get( $key );

    if ( not defined $holder) {
   	warn "Mixed_Queus $name Not Cached!";
	my $netflix = fetch($name);
	$netflix->REST->Users->Queues->Disc;
	$netflix->Get(start_index => 0, max_results => 50);
	my $disc_content = $netflix->content;
	$netflix->REST->Users->Queues->Instant;
	$netflix->Get(start_index => 0, max_results => 50);
	my $instant_content = $netflix->content;
	
	my $holder;
	foreach my $content (($disc_content, $instant_content)) {
	    foreach my $key (keys %{$content->{queue_item}}) {
		$holder->{$key} = $content->{queue_item}->{$key};
	    }
	}
	$cache->set( $key, $holder, $cache_time );
	return $holder;
    } else {
	return $holder;
    }
}


sub normalized_rating {
    my $name = shift;
    my $given = shift;
    my $key = "normal:$name:$given";
    
    my $cache = new Cache::FileCache( );
    my $normalized = $cache->get( $key );

    if ( not defined $normalized ) {
   	warn "Normalized Not Cached!";       
	my $dataset = NF::ratings_map($name);
	my @ratings = values(%$dataset);
	$normalized = NF::normalize(\@ratings, $given); 
	$cache->set( $key, $normalized, $cache_time );
	return $normalized;
    } else {
	return $normalized;
    }
}



sub merged_queues {
    my $users = shift;
    my $depth = shift || 10;
    my @users = @$users;
    
    my $data;
    my $merged; 
    my $reference;
    my $stats  = new Statistics::Descriptive::Discrete;
    my $stats2 = new Statistics::Descriptive::Discrete;
    foreach my $name (@users) {
	$data->{$name}->{q} = mixed_queues($name);
	warn "--building queues for $name\n";
	foreach my $ref (keys %{$data->{$name}->{q}}) {
#	    print $ref . "\n";
	    $ref =~ /.*\/(\d*)$/;
	    my $id = $1;
	    next if $merged->{$id}->{names}->{$name};
            $merged->{$id}->{names}->{$name}++;
	    $merged->{$id}->{count}++;
	    $merged->{$id}->{id} = $id;
	    $merged->{$id}->{$name}->{actual} = $data->{$name}->{q}->{$ref}->{actual_rating};
	    $merged->{$id}->{$name}->{predicted} = $data->{$name}->{q}->{$ref}->{predicted_rating};
	    $reference->{$id}->{data} = $data->{$name}->{q}->{$ref};
	    eval {
	    $merged->{$id}->{title} = $reference->{$id}->{data}->{title}->{regular}
	    };
	    if ($@) {
                warn Dumper($reference->{$id});
                next;
            }
	}

    }

    # Now find predicted ratings for all of those.
    foreach my $name (@users) {
	my @keys = keys %$merged;
	my @ids;
	foreach my $id (@keys) {
	    next if $merged->{$id}->{$name}->{predicted} ;
	    push @ids, $id;
	}
	my $predictions = predicted_from_ids($name, \@ids);
	foreach my $id (keys %$predictions) {
	    $merged->{$id}->{$name}->{predicted} = $predictions->{$id};
	}
    }

    warn "--Computing Joint predicted rating \n";
    foreach my $id (keys %$merged) {
	my $i;
	my $sum_predicted;
	my $sum_predicted_normalized;
	my @predicted;
	my @predicted_normalized;
	foreach my $name (@users) {
	    $i++;
	    $sum_predicted += $merged->{$id}->{$name}->{predicted};	    
	    push @predicted, $merged->{$id}->{$name}->{predicted};

            $merged->{$id}->{$name}->{predicted_normalized} = normalized_rating($name,$merged->{$id}->{$name}->{predicted});
	    $sum_predicted_normalized += $merged->{$id}->{$name}->{predicted_normalized};
	    push @predicted_normalized, $merged->{$id}->{$name}->{predicted_normalized};
	}
	$merged->{$id}->{joint_predicted} = sprintf("%.5f", $sum_predicted/$i);
	$merged->{$id}->{joint_predicted_normalized} = sprintf("%.5f", $sum_predicted_normalized/$i);
	
	$stats->add_data(@predicted);
	$merged->{$id}->{joint_standard_deviation} =  $stats->standard_deviation();

	$stats2->add_data(@predicted_normalized);
	$merged->{$id}->{joint_standard_deviation_normalized} =  $stats2->standard_deviation();

    }

    return ($merged, $reference);
}





sub merged_recs {
    my $users = shift;
    my $depth = shift || 10;
    my @users = @$users;
    
    my $data;
    my $merged; 
    my $reference;
    my $stats  = new Statistics::Descriptive::Discrete;
    my $stats2 = new Statistics::Descriptive::Discrete;
    foreach my $name (@users) {
	my $content = grab_recs($name, $depth);
	$data->{$name}->{recs} = $content->{recommendation};
	warn "--building recs for $name\n";
	foreach my $ref (keys %{$data->{$name}->{recs}}) {
#	    print $ref . "\n";
	    $ref =~ /.*\/(\d*)$/;
	    my $id = $1;
	    next if $merged->{$id}->{names}->{$name};
            $merged->{$id}->{names}->{$name}++;
	    $merged->{$id}->{count}++;
	    $merged->{$id}->{id} = $id;
	    $merged->{$id}->{$name}->{actual} = $data->{$name}->{recs}->{$ref}->{actual_rating};
	    $merged->{$id}->{$name}->{predicted} = $data->{$name}->{recs}->{$ref}->{predicted_rating};
	    $reference->{$id}->{data} = $data->{$name}->{recs}->{$ref};
	    $merged->{$id}->{title} = $reference->{$id}->{data}->{title}->{regular}
	}

    }

    # Now find predicted ratings for all of those.
    foreach my $name (@users) {
	my @keys = keys %$merged;
	my @ids;
	foreach my $id (@keys) {
	    next if $merged->{$id}->{$name}->{predicted} ;
	    push @ids, $id;
	}
	my $predictions = predicted_from_ids($name, \@ids);
	foreach my $id (keys %$predictions) {
	    $merged->{$id}->{$name}->{predicted} = $predictions->{$id};
	}
    }

    warn "--Computing Joint predicted rating \n";
    foreach my $id (keys %$merged) {
	my $i;
	my $sum_predicted;
	my $sum_predicted_normalized;
	my @predicted;
	my @predicted_normalized;
	foreach my $name (@users) {
	    $i++;
	    $sum_predicted += $merged->{$id}->{$name}->{predicted};	    
	    push @predicted, $merged->{$id}->{$name}->{predicted};

            $merged->{$id}->{$name}->{predicted_normalized} = normalized_rating($name,$merged->{$id}->{$name}->{predicted});
	    $sum_predicted_normalized += $merged->{$id}->{$name}->{predicted_normalized};
	    push @predicted_normalized, $merged->{$id}->{$name}->{predicted_normalized};
	}
	$merged->{$id}->{joint_predicted} = sprintf("%.5f", $sum_predicted/$i);
	$merged->{$id}->{joint_predicted_normalized} = sprintf("%.5f", $sum_predicted_normalized/$i);
	
	$stats->add_data(@predicted);
	$merged->{$id}->{joint_standard_deviation} =  $stats->standard_deviation();

	$stats2->add_data(@predicted_normalized);
	$merged->{$id}->{joint_standard_deviation_normalized} =  $stats2->standard_deviation();

    }

    return ($merged, $reference);
}


sub sort_predicted_queues {
    my $merged = shift;
#    warn Dumper($merged); exit;
    my $min_intersect = shift || 0;
    my $upper_limit_intersect = shift;
    my $sort = shift;
    my @rv;
    my $i = 0;

    if ($sort eq 'normalized') {
	foreach my $item (
	    sort { $a->{count} <=> $b->{count} || $a->{joint_predicted_normalized} <=> $b->{joint_predicted_normalized} || $b->{joint_standard_deviation_normalized} <=> $a->{joint_standard_deviation_normalized} }
	    grep { $_->{joint_predicted} }
	    values %{$merged}
	    ){
	    $i++;
	    push @rv, $item;
	}
    } else {
	foreach my $item (
	    sort { $a->{count} <=> $b->{count} || $a->{joint_predicted} <=> $b->{joint_predicted} || $b->{joint_standard_deviation} <=> $a->{joint_standard_deviation} }
	    grep { $_->{joint_predicted} }
	    values %{$merged}
	    ){
	    $i++;
	    push @rv, $item;
	}
    }
    return reverse @rv;
}



sub sort_predicted {
    my $merged = shift;
    my $min_intersect = shift || 0;
    my $upper_limit_intersect = shift;
    my $sort = shift;
    my @rv;
    my $i = 0;

    if ($sort eq 'normalized') {
	foreach my $item (
	    sort { $a->{joint_predicted_normalized} <=> $b->{joint_predicted_normalized} || $b->{joint_standard_deviation_normalized} <=> $a->{joint_standard_deviation_normalized} }
	    grep { $_->{joint_predicted_normalized} }
	    values %{$merged}
	    ){
	    $i++;
	    push @rv, $item;
	}
    } else {
	foreach my $item (
	    sort { $a->{joint_predicted} <=> $b->{joint_predicted} || $b->{joint_standard_deviation} <=> $a->{joint_standard_deviation} }
	    grep { $_->{joint_predicted} }
	    values %{$merged}
	    ){
	    $i++;
	    push @rv, $item;
	}
    }
    return reverse @rv;
}




sub predicted_from_ids {
    my $name = shift;
    my $ids = shift;
    my @ids = @$ids;
    my @refs;

    my $struc;
    my $key = "predicted:$name:" . join('^',@ids);
    my $cache = new Cache::FileCache( );
    my $struc = $cache->get( $key );

    if ( not defined $struc ) {    
	foreach my $id (@ids) {
	    push @refs, "http://api.netflix.com/catalog/titles/movies/$id";
	}

	my $count = scalar(@refs);
	my $counter   = 0;
	my $increment = 24;
	my $index = 0;
	my @holder;
	while ($counter < $count) {
	    my $term = ($counter+$increment > $count - 1) ? $count -1 : ($counter+$increment);
	    my @slice = @refs[($counter)..($term)];
	    $holder[$index] = \@slice;
	    $index++;
	    $counter += ($increment + 1);
	}

	my $netflix = fetch($name);
	foreach my $arr (@holder) { 
	    my $ref = join(',',@$arr);
	    $netflix->REST->Users->Ratings->Title->Predicted();
	    $netflix->Get( title_refs => $ref);
	    
	    my $content = $netflix->content;
	    foreach my $key (keys %{$content->{ratings_item}}) {
		$key =~ /.*\/(\d*)$/;
		my $id = $1;
		$struc->{$id} = $content->{ratings_item}->{$key}->{predicted_rating};
	    }
	}
	$cache->set( $key, $struc, $cache_time );
	return $struc;
    } else { 
	return $struc;
    }

}


sub actual_from_ids {
    my $name = shift;
    my $ids = shift;
    my @ids = @$ids;
    my @refs;
    foreach my $id (@ids) {
	push @refs, "http://api.netflix.com/catalog/titles/movies/$id";
    }

    my $count = scalar(@refs);
    my $counter   = 0;
    my $increment = 24;
    my $index = 0;
    my @holder;
    while ($counter < $count) {
	my $term = ($counter+$increment > $count - 1) ? $count -1 : ($counter+$increment);
	my @slice = @refs[($counter)..($term)];
	$holder[$index] = \@slice;
	$index++;
	$counter += ($increment + 1);
    }

    my $netflix = fetch($name);
    my $struc;
    foreach my $arr (@holder) { 
	my $ref = join(',',@$arr);
	$netflix->REST->Users->Ratings->Title->Actual();
	$netflix->Get( title_refs => $ref);

	my $content = $netflix->content;
	foreach my $key (keys %{$content->{ratings_item}}) {
	    $key =~ /.*\/(\d*)$/;
	    my $id = $1;
	    $struc->{$id} = $content->{ratings_item}->{$key}->{predicted_rating};
	}
    }
    return $struc;
}



sub grab_recs {
    my $name =shift;
    my $count = shift || 50;
    
    my $cache = new Cache::FileCache( );
    my $key = "grab_recs:$name:$count";
    my $data = $cache->get( $key );
    if ( not defined $data ) {
   	warn "GrabRecs $name Not Cached!";
	my $netflix = fetch($name);
	$netflix->REST->Users->Recommendations();
	$netflix->Get(max_results => $count);
	$data = $netflix->content;
	$cache->set( $key, $data, $cache_time );
	return $data;
    } else {
	return $data;
    }
}

sub recs {
    my $name = shift;

    my $content = grab_recs($name);

    my @data;
    foreach my $item (
	sort { $a->{predicted_rating} <=> $b->{predicted_rating} }
	grep { $_->{predicted_rating} }
	values %{$content->{recommendation}}
	){
#	push @data, $item; 
	
	push @data, { position => $item->{position}, 
		      title => $item->{title}->{regular}, 
		      average_rating => $item->{average_rating}, 
		      box_art => $item->{box_art}->{medium},
		      predicted_rating => $item->{predicted_rating},
	};
    }
    @data = reverse(@data);
    return \@data;
    
}


sub rated {
    my $name = shift;

    my $netflix = fetch($name);
    $netflix->REST->Users->Ratings->Title();#->Actual();
#    $netflix->REST->Users->Rental_History();#;(;#->Actual();
    $netflix->Get(max_results => 50);
    #print $netflix->url;
    my $content = $netflix->content;
    return $content;
    my @data;
    foreach my $item (
	sort { $a->{user_rating} <=> $b->{user_rating} }
	grep { $_->{user_rating} }
	values %{$content->{ratings_item}}
	){
#	push @data, $item; 
	
	push @data, { 
		      user_rating => $item->{user_rating}, 
		      title => $item->{title}->{regular}, 
		      average_rating => $item->{average_rating}, 
		      box_art => $item->{box_art}->{medium},
	};
    }
    @data = reverse(@data);
    return \@data;
    
}



sub rental_history {
    my $name = shift;

    my $netflix = fetch($name);
    $netflix->REST->Users->Rental_History();
    $netflix->Get(max_results => 500);
    my $content = $netflix->content;
    return $content;
}

sub ratings_map_group {
    my $users = shift;
    my %ids;
    # create set of movies
    foreach my $name (@$users) {
	my $rh = rental_history($name);
	foreach my $ref (keys %{$rh->{rental_history_item}}) {
	    $ref =~ /.*\/(\d*)$/;
	    my $id = $1;
	    $ids{$1}++ if $1;
	}	
    } 
    
    my @ids = keys %ids;
    my $data;
    foreach my $name (@$users) {
	my @ratings;

	my $predictions = predicted_from_ids($name, \@ids);
	$data->{$name} = $predictions;
	foreach my $id (keys %$predictions) {
	    push @ratings, $predictions->{$id};
	}    
	my $stats = new Statistics::Descriptive::Discrete;
	$stats->add_data(@ratings);    
	next unless @ratings;
	
	warn $name . ":\n ";
	
	foreach my $method (@stats_methods) {
	    warn "\t$method:\t" . $stats->$method . "\n";
	}

    }
    return $data;
}


sub ratings_map {
    my $name = shift;

    my $cache = new Cache::FileCache( );
    my $key = "rm:$name";
    my $data = $cache->get( $key );

    if ( not defined $data ) {
   	warn "Rating_Map Not Cached!";
	my %ids;
        # create set of movies
	
	my $rh = rental_history($name);
	foreach my $ref (keys %{$rh->{rental_history_item}}) {
	    $ref =~ /.*\/(\d*)$/;
	    my $id = $1;
	    $ids{$1}++ if $1;
	}	
	my @ids = keys %ids;
	my @ratings;
	
	my $predictions = predicted_from_ids($name, \@ids);
	$data = $predictions;
	$cache->set( $key, $data, $cache_time );

	return $data;
	# Debug stuff, bypassed
	foreach my $id (keys %$predictions) {
	    push @ratings, $predictions->{$id};
	}    
	
	my $stats = new Statistics::Descriptive::Discrete;
	$stats->add_data(@ratings);    
	next unless @ratings;
	
	warn $name . ":\n ";
	
	foreach my $method (@stats_methods) {
	    warn "\t$method:\t" . $stats->$method . "\n";
	}

	return $data;
	
    } else {
	warn "Cached!";
	return $data;
    }

}



sub rating_for {
    
    my $name = shift;
    my @refs = @_;
    my $ref = join(',',@refs);
    my $netflix = fetch($name);
#    print $netflix->rest2sugar('http://api.netflix.com/users/T1xbRlp0t.E46kCD0y25NrFfHL0S95GzyxMdz0bgQ56go-/ratings/title/predicted?title_refs=1&2&3');
    $netflix->REST->Users->Ratings->Title->Predicted();
#    warn $ref;
#    return 1;
    $netflix->Get( title_refs => $ref);
#    warn Dumper($netflix);
    return $netflix->content;
}



sub netflix_normal_set {
    my $total = shift;
    #represents percentages
    my $nf_normal = { 
	5 =>  26,
	4 =>  33,
	3 =>  28,
	2 =>  9,
	1 =>  4,
    };
    my @holder;
    foreach my $key (keys %$nf_normal) {
	my $count = int($nf_normal->{$key}/100 * $total);
	for (1..$count) {
	    push @holder, $key;
	}
    }
    return \@holder;
}

sub normalize {

    my $dataset = shift;
    my $given = shift;

    my @normal = @{netflix_normal_set(50)};
    my @dataset = @$dataset;

    warn Dumper(\@normal);
    # weight dataset
    @dataset = (@dataset, @normal);
    @dataset =  sort { $a <=> $b } @dataset;
    warn Dumper(\@dataset);
    my $size = scalar(@dataset);
    my $accum;
    my ($pos1, $pos2);
    warn "Size: $size\n";
    foreach (@dataset) {
	$accum++;

	if (!$pos1) { 
	    $pos1 = $accum if $_ >= $given;
	}
	
	if ($pos1 && ($_ > $given)) {
	    warn 'here';
	    $pos2 = $accum;
	    last;
	}
	warn "$_\t$accum\t$pos1\t$pos2\t$given";
    }
    $pos2 ||=$size;
    $pos1 ||=$size;
    # get the average of the start and end point in the list

    my $pos = ($pos1 + $pos2) / 2;
    # print "my $pos = $pos1 + $pos2 / 2\n";


    my $value = $pos / $size;
    # print "my $value = $pos / $size\n";
    return $value; 
}



sub old_normalize {
   my $dataset = shift;

    my @dataset = @$dataset;
    @dataset = (@dataset, 1,2,3,4,5, 1,2,3,4,5, 1,2,3,4,5 );
    my $given = shift;

    my $map = {};
    foreach (@dataset) {
        $map->{$_}++;
    }

    my @x = qw(1 2 3 4 5);
    my @y = ($map->{1}, $map->{2}, $map->{3}, $map->{4}, $map->{5});

    my $spline=new Math::Spline(\@x,\@y);


    my $i = 1;
    my $total;
    my $accum;
    while ($i <= 5) {
        my $area = $spline->evaluate($i);
        if ($i < $given) { $accum += $area }
        $total += $area;
        $i += .05;
    }


    print "\nAccum: $accum,  total: $total\n\n";
    print "\nNomalized: " . $accum/$total . "\n";

    print Dumper($map);
    return $accum/$total;
}



1;