Page MenuHomec4science

ex1a.py
No OneTemporary

File Metadata

Created
Mon, Feb 24, 00:30
def main():
import pandas
#loading the data
#preprocess .csv file before loading and eliminate all commas in movie titles!
dedis = pandas.read_csv('dedis-1.csv', names=['email', 'movie', 'date', 'rating'], quotechar='"')
imdb = pandas.read_csv('imdb-1.csv', names=['email', 'movie', 'date', 'rating'], quotechar='"')
#inner join on same date and rating
merged = pandas.merge(imdb, dedis, how='inner', on=['date', 'rating'],suffixes=('_imdb', '_dedis'), copy=True, indicator=False)
my_hash=list(merged[merged['email_imdb'] == 'dennis.gankin@epfl.ch']["email_dedis"])[0]
#get all my movie hashes
movies = dedis[dedis['email'] == my_hash]
#find corresponding movie names for all hashes in my movies
print ("Watched movies, mapping. Pick the most frequent for each hash:")
movies['movie'].apply(lambda x: find_same_movies(x,merged))
def find_same_movies(movie_hash,merged):
print(movie_hash)
#for given movie_hash that I have watched, print corresponding movie in imdb that it is also linked to,
#majority will be the right movie
print(merged[merged['movie_dedis'] == movie_hash]['movie_imdb'])
if __name__ == "__main__":
main()

Event Timeline