Page MenuHomec4science

find_experience.py
No OneTemporary

File Metadata

Created
Mon, Aug 26, 23:40

find_experience.py

import sys
import pandas as pd
import editdistance
# constants
max_distance = 1
names = {} # contains all the unique names and their experience
def compare_names(name1, name2):
# case: one starts with the other
if name2.startswith(name1) or name1.startswith(name2):
return True
# case: first name and last name inversed -> same set of names
if len(name1) == len(name2) and set(name2.split()) == set(name1.split()):
return True
dist = editdistance.eval(name1, name2)
return dist <= max_distance
def get_experience(name):
for key, exp in names.items():
if compare_names(name, key):
names[key] += 1
return exp
names[name] = 1
return 0
complete_data = pd.read_csv("../results/complete_dataset.csv",
encoding="utf-8-sig")
complete_data_with_experience = pd.DataFrame(columns={
"meeting",
"name",
"gender",
"has_title",
"affiliation",
"affiliation_category",
"role",
"description",
"experience"})
metadata = pd.read_csv("../data/meetings_metadata.csv")
for label in metadata["label"]:
print(label)
data = complete_data.loc[complete_data.meeting == label]
data["experience"] = data["name"].apply(get_experience)
complete_data_with_experience = complete_data_with_experience.append(data, ignore_index=True)
# generate the output file
complete_data_with_experience.to_csv("../results/complete_dataset_experience-2.csv",
encoding="utf-8-sig", index=False)
print(len(names))
# print the dictionary to a text file
f = open("experience_dict.txt", "w")
print(names)
f.write(str(names), encoding="utf-8-sig")
f.close()

Event Timeline