ex1a.py
No OneTemporary
Actions

Subscribers

None

File Metadata


	def main():
	import pandas
	#loading the data
	#preprocess .csv file before loading and eliminate all commas in movie titles!
	dedis = pandas.read_csv('dedis-1.csv', names=['email', 'movie', 'date', 'rating'], quotechar='"')
	imdb = pandas.read_csv('imdb-1.csv', names=['email', 'movie', 'date', 'rating'], quotechar='"')
	#inner join on same date and rating
	merged = pandas.merge(imdb, dedis, how='inner', on=['date', 'rating'],suffixes=('_imdb', '_dedis'), copy=True, indicator=False)

	my_hash=list(merged[merged['email_imdb'] == 'dennis.gankin@epfl.ch']["email_dedis"])[0]
	#get all my movie hashes
	movies = dedis[dedis['email'] == my_hash]

	#find corresponding movie names for all hashes in my movies
	print ("Watched movies, mapping. Pick the most frequent for each hash:")
	movies['movie'].apply(lambda x: find_same_movies(x,merged))


	def find_same_movies(movie_hash,merged):
	print(movie_hash)
	#for given movie_hash that I have watched, print corresponding movie in imdb that it is also linked to,
	#majority will be the right movie
	print(merged[merged['movie_dedis'] == movie_hash]['movie_imdb'])



	if __name__ == "__main__":
	main()