find_experience.py
No OneTemporary
Actions

Subscribers

None

File Metadata

	import sys
	import pandas as pd
	import editdistance

	# constants
	max_distance = 1
	names = {} # contains all the unique names and their experience

	def compare_names(name1, name2):
	# case: one starts with the other
	if name2.startswith(name1) or name1.startswith(name2):
	return True
	# case: first name and last name inversed -> same set of names
	if len(name1) == len(name2) and set(name2.split()) == set(name1.split()):
	return True

	dist = editdistance.eval(name1, name2)
	return dist <= max_distance


	def get_experience(name):
	for key, exp in names.items():
	if compare_names(name, key):
	names[key] += 1
	return exp

	names[name] = 1
	return 0


	complete_data = pd.read_csv("../results/complete_dataset.csv",
	encoding="utf-8-sig")
	complete_data_with_experience = pd.DataFrame(columns={
	"meeting",
	"name",
	"gender",
	"has_title",
	"affiliation",
	"affiliation_category",
	"role",
	"description",
	"experience"})

	metadata = pd.read_csv("../data/meetings_metadata.csv")

	for label in metadata["label"]:
	print(label)
	data = complete_data.loc[complete_data.meeting == label]

	data["experience"] = data["name"].apply(get_experience)

	complete_data_with_experience = complete_data_with_experience.append(data, ignore_index=True)


	# generate the output file
	complete_data_with_experience.to_csv("../results/complete_dataset_experience-2.csv",
	encoding="utf-8-sig", index=False)
	print(len(names))

	# print the dictionary to a text file
	f = open("experience_dict.txt", "w")
	print(names)
	f.write(str(names), encoding="utf-8-sig")
	f.close()