forked from azk0019/CourseProject
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrecommender.py
More file actions
67 lines (53 loc) · 1.8 KB
/
recommender.py
File metadata and controls
67 lines (53 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Creates measures of similarity between books. Work in process (compute time).
"""
from google.colab import drive
import pickle
import pandas as pd
from collections import OrderedDict
from scipy import stats
drive.mount('/content/drive')
with open('/content/drive/MyDrive/books.pickle', 'rb') as f:
books = pickle.load(f)
with open('/content/drive/MyDrive/reviews.pickle', 'rb') as f:
reviews = pickle.load(f)
num_reviews = []
for book in books:
num_reviews.append((len(books[book]['reviews']), book))
num_reviews.sort(reverse=True)
top1k = {}
for i in range(1000):
if num_reviews[i][1] == 2657:
print("yes!")
top1k[num_reviews[i][1]] = 0
top_books = {}
for book in books:
if book in top1k:
top_books[book] = books[book].copy()
top_reviews = {}
for rev in reviews:
if reviews[rev]['book_id'] in top_books:
top_reviews[rev] = reviews[rev].copy()
user_ratings = OrderedDict()
for rev in top_reviews:
user = top_reviews[rev]['user_id']
user_ratings[user] = 0
for book in top_books:
top_books[book]['user_ratings'] = []
for book in top_books:
top_books[book]['book_corrs'] = {}
for book in top_books:
top_books[book]['user_ratings'] = user_ratings.copy()
for rev in top_reviews:
book_id = top_reviews[rev]['book_id']
user = top_reviews[rev]['user_id']
top_books[book_id]['user_ratings'][user] = top_reviews[rev]['rating']
for book_i in top_books:
i_ratings = []
for user in top_books[book_i]['user_ratings']:
i_ratings.append(top_books[book_i]['user_ratings'][user])
for book_j in top_books:
j_ratings = []
for user in top_books[book_j]['user_ratings']:
j_ratings.append(top_books[book_i]['user_ratings'][user])
top_books[book_i]['book_corrs'][book_j] = stats.pearsonr(i_ratings, j_ratings)