File Metadata

Created: Wed, Jul 10, 13:06

find_experience.py
View Options

	import sys
	import pandas as pd
	import editdistance
	import json

	# constants
	max_distance = 1
	min_length_for_linebreak = 15
	names = dict() # contains all the unique names in the format (name, list[(meeting, name, affiliation, affiliation_category)])


	def compare_names(name1, name2):
	# case: one starts with the other (because some words are on next line)
	l1 = len(name1)
	l2 = len(name2)
	if (l1 >= min_length_for_linebreak and
	l2 >= min_length_for_linebreak and
	(name2.startswith(name1) or name1.startswith(name2)) and
	(set(name1.split()) <= set(name2.split()) or
	set(name2.split()) <= set(name1.split()))):
	return True
	# case: first name and last name inversed -> same set of names
	if l1 == l2 and set(name2.split()) == set(name1.split()):
	return True

	# leventshtein difference if the two words have a similar length (value <= 1 possible)
	if abs(l1 - l2) > max_distance:
	return False
	else:
	dist = editdistance.eval(name1, name2)
	return dist <= max_distance


	def get_experience(name, meeting, affiliation, affiliation_category):
	"""[summary]

	Args:
	name ([type]): [description]
	meeting ([type]): [description]
	affiliation ([type]): [description]
	affiliation_category ([type]): [description]

	Returns:
	int, int, int, int, bool: cop_exp, sb_exp, party_exp, not_party_exp, exp_err_poss
	"""
	for key_name, participation_list in names.items():
	if compare_names(name, key_name):
	prev_meetings = names[key_name]
	cops = [m for m in prev_meetings if m[0].startswith("cop")]
	sbs = [m for m in prev_meetings if m[0].startswith("sb")]
	in_party = [m for m in prev_meetings if m[3] == "parties"]
	not_party = [m for m in prev_meetings if m[3] != "parties"]
	names[key_name].append((meeting, name, affiliation, affiliation_category))
	# an error occurs when there is a meeting more than once
	err_poss = len(set([m[0] for m in prev_meetings])) != len(names[key_name])
	return len(cops), len(sbs), len(in_party), len(not_party), int(err_poss)

	names[name] = [(meeting, name, affiliation, affiliation_category)]
	return 0, 0, 0, 0, 0

	if __name__ == "__main__":
	complete_data = pd.read_csv("../results/complete_dataset.csv",
	encoding="utf-8-sig")
	complete_data_with_experience = pd.DataFrame(columns={
	"meeting",
	"name",
	"gender",
	"has_title",
	"affiliation",
	"affiliation_category",
	"role",
	"description",
	"experience cop",
	"experience sb",
	"experience party",
	"experience not_party",
	"experience possible error"})

	metadata = pd.read_csv("../data/meetings_metadata.csv")

	for label in metadata["label"]:
	#for label in ["cop24", "cop25"]:
	print(label)
	data = complete_data.loc[complete_data.meeting == label]

	# print(data.apply(lambda row: pd.Series(get_experience(row["name"], row["meeting"], row["affiliation"], row["affiliation_category"]), axis=1)))
	data[["experience cop", "experience sb", "experience party", "experience not_party", "experience possible error"]] = (
	data.apply(lambda row: pd.Series(get_experience(row["name"], row["meeting"], row["affiliation"], row["affiliation_category"])), axis=1))

	complete_data_with_experience = complete_data_with_experience.append(data, ignore_index=True)

	# generate the output file
	complete_data_with_experience.to_csv("../results/complete_dataset_experience-def.csv",
	encoding="utf-8-sig", index=False)
	print(len(names))

	# print the dictionary to a text file
	f = open("experience_dict_def.txt", "w", encoding="utf-8")
	f.write(json.dumps(names))
	f.close()

	def get_experience_score(delegates_experience):
	"""Computes the experience score of an affiliation. This is the average experience of the top 10 most experienced delegates

	Args:
	delegates_experience (list[int]): The experiences of all the delegates of a party
	"""
	if len(delegates_experience) <= 10:
	return average(delegates_experience)
	else:
	copy = delegates_experience.copy()
	copy.sort(reverse=True)
	return average(copy[:10])


	def average(numbers):
	sum = 0
	for n in numbers:
	sum += n
	return sum / len(numbers)

find_experience.py
No OneTemporary
Actions

File Metadata

find_experience.py
View Options

Event Timeline

find_experience.pyNo OneTemporaryActions

File Metadata

find_experience.pyView Options

Event Timeline

find_experience.py
No OneTemporary
Actions

find_experience.py
View Options