Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F102769334
ex1a.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, Feb 24, 00:30
Size
1 KB
Mime Type
text/x-c
Expires
Wed, Feb 26, 00:30 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
24419601
Attached To
R6092 InfoSec
ex1a.py
View Options
def
main
():
import
pandas
#loading the data
#preprocess .csv file before loading and eliminate all commas in movie titles!
dedis
=
pandas
.
read_csv
(
'dedis-1.csv'
,
names
=
[
'email'
,
'movie'
,
'date'
,
'rating'
],
quotechar
=
'"'
)
imdb
=
pandas
.
read_csv
(
'imdb-1.csv'
,
names
=
[
'email'
,
'movie'
,
'date'
,
'rating'
],
quotechar
=
'"'
)
#inner join on same date and rating
merged
=
pandas
.
merge
(
imdb
,
dedis
,
how
=
'inner'
,
on
=
[
'date'
,
'rating'
],
suffixes
=
(
'_imdb'
,
'_dedis'
),
copy
=
True
,
indicator
=
False
)
my_hash
=
list
(
merged
[
merged
[
'email_imdb'
]
==
'dennis.gankin@epfl.ch'
][
"email_dedis"
])[
0
]
#get all my movie hashes
movies
=
dedis
[
dedis
[
'email'
]
==
my_hash
]
#find corresponding movie names for all hashes in my movies
print
(
"Watched movies, mapping. Pick the most frequent for each hash:"
)
movies
[
'movie'
]
.
apply
(
lambda
x
:
find_same_movies
(
x
,
merged
))
def
find_same_movies
(
movie_hash
,
merged
):
print
(
movie_hash
)
#for given movie_hash that I have watched, print corresponding movie in imdb that it is also linked to,
#majority will be the right movie
print
(
merged
[
merged
[
'movie_dedis'
]
==
movie_hash
][
'movie_imdb'
])
if
__name__
==
"__main__"
:
main
()
Event Timeline
Log In to Comment