diff --git a/code/scripts/find_most_common_word_nokeyword.py b/code/scripts/find_most_common_word_nokeyword.py
new file mode 100644
index 0000000..3a0f3aa
--- /dev/null
+++ b/code/scripts/find_most_common_word_nokeyword.py
@@ -0,0 +1,31 @@
+import pandas as pd 
+
+complete_data = pd.read_csv("../results/complete_dataset.csv",
+                                encoding="utf-8-sig")
+
+no_keyword_participants = complete_data.loc[complete_data["role"] == "no keyword found"]
+no_keyword_participants = no_keyword_participants.loc[no_keyword_participants["affiliation_category"] == "parties"]
+
+words_dict = dict()
+line_dict = dict()
+
+for description in no_keyword_participants["description"]:
+    description = description.replace(";", " ")
+    # description
+    if description in line_dict:
+        line_dict[description] += 1
+    else:
+        line_dict[description] = 1
+    # words
+    for word in description.split(" "):
+        if word in words_dict:
+            words_dict[word] += 1
+        else:
+            words_dict[word] = 1
+
+sorted_word_dict = sorted(words_dict.items(), key=lambda x: x[1], reverse=True)
+sorted_line_dict = sorted(line_dict.items(), key=lambda x: x[1], reverse=True)
+print("Most common words:")
+print(sorted_word_dict[:100])
+print("Most common lines:")
+print(sorted_line_dict[:100])
\ No newline at end of file
diff --git a/code/scripts/generate_complete_dataset.py b/code/scripts/generate_complete_dataset.py
index 3b6fa3a..b9c677e 100644
--- a/code/scripts/generate_complete_dataset.py
+++ b/code/scripts/generate_complete_dataset.py
@@ -1,203 +1,203 @@
 import pandas as pd
 import country_converter as coco
 import editdistance
 import os
 import re
 
 import partlistproc.MeetingAnalyzer as Ana
 import partlistproc.MeetingAnalyzerFactory as AnaFac
 
 """ This script generates one csv file containing all the participants of all
     the available meetings (information taken from metadata file)
 """
 
 
 def is_male(name):
     return any(title in name for title in Ana.MeetingAnalyzer.masculine_salutory_addresses)
 
 
 def is_female(name):
     return any(title in name for title in Ana.MeetingAnalyzer.feminine_salutory_addresses)
 
 
 def has_title(name):
     return any(title in name for title in Ana.MeetingAnalyzer.titles)
 
 
 def has_no_title(name):
     return not has_title(name)
 
 
 def get_role(description):
     # EDIT: I had to redesign this to make keywords of several words possible (28.11.20)
     description = str(description)
     if description == "nan":
         return "no description"
     splitted = re.split('[; ]{1}', description)
     for key_line in roles_dict.keys():
         keywords = re.split(" ", key_line)
         if(str(keywords[0]) in splitted or (str(keywords[0])).lower() in
            splitted):
             if(len(keywords) == 1):
                 return roles_dict[key_line]
             else:
                 # keyword is more than one word: check the others
                 found_word = str(keywords[0])
                 if found_word not in splitted:
                     found_word = found_word.lower()
                 index = splitted.index(found_word)
                 size = len(splitted)
                 for i in range(1, len(keywords)):
                     if index + i >= size or str(keywords[i]).lower() != str(splitted[index + i]).lower():
-                        return "no keyword found"
+                        continue
                 return roles_dict[key_line]
     return "no keyword found"
 
 
 def clear_name(name):
     """removes all salutory addresses and titles from a given name
 
     Args:
         name (str): the name to be cleared
     """
     cleared_name = name
     while cleared_name.startswith(Ana.MeetingAnalyzer.salutory_addresses):
         startindex = cleared_name.find(" ")
         if startindex == -1:
             return cleared_name
         startindex += 1
         cleared_name = cleared_name[startindex:]
 
     return cleared_name.lower()
 
 
 short_country_names = {}
 def simplify_country_name(affiliation):
     if affiliation in short_country_names:
         return short_country_names[affiliation]
     else:
         # None for not found makes that it returns the input value
         converted = (coco.convert(names=[affiliation], to="name_short",
                                   not_found=None))
         if isinstance(converted, list):
             converted = converted[0]
         short_country_names[affiliation] = converted
         return converted
 
 
 def is_fossil_fuel_associated(words):
     """checks if the given string contains a fossil fuel industry keyword
 
     Args:
         words (str): the string to be tested for keywords
     """
     splitted = re.split('[; ]{1}', (str(words)).lower())
     for keyword in fossil_fuel_keywords:
         if keyword in splitted:
             return True
     return False
 
 
 # pre-processing
 
 # extract the list of roles
 roles_dict = {}
 roles_file = open("../data/dictionaries/role_keywords.txt", "r", encoding="utf-8")
 role_lines = roles_file.readlines()
 current_role = ""
 for line in role_lines:
     if "\n" in line:
         line = line[:line.index("\n")]
     if line.startswith("["):
         if not line.endswith("]"):
             raise KeyError("Format on line {} was incorrect".format(line))
         current_role = line[1:len(line) - 1]
     else:
         if line != "":
             roles_dict[line] = current_role
 
 # extract the list of fossil fuel industry keywords
 fossil_fuel_keywords = []
 ff_file = open("../data/dictionaries/fossil_fuel_industry_keywords.txt", "r",
                encoding="utf-8")
 ff_lines = ff_file.readlines()
 for line in ff_lines:
     if "\n" in line:
         line = line[:line.index("\n")]
     if line != "":
         fossil_fuel_keywords.append(line.lower())
 
 # begin with the real processing
 complete_data = pd.DataFrame(columns={
             "meeting",
             "name",
             "gender",
             "has_title",
             "affiliation",
             "affiliation_category",
             "role",
             "fossil_fuel_industry",
             "description"})
 
 metadata = pd.read_csv("../data/meetings_metadata.csv")
 
 for label in metadata["label"]:
     datafile_name = "../results/participants-csv/participants_" + label + ".csv"
     if label in AnaFac.MeetingAnalyzerFactory.french_meetings:
         datafile_name = "../results/participants-csv/participants_" + label + "-en.csv"
         if not os.path.isfile(datafile_name):
             os.system("python extract_participants.py " + label)
             os.system("python translate_list_fr_en.py " + label)
     # generate the data if not yet available
     if not os.path.isfile(datafile_name):
         os.system("python extract_participants.py " + label)
 
     # open the data from this cop
     cop_data = pd.read_csv(datafile_name, encoding="utf-8-sig")
 
     # add its data to the complete dataframe
     cop_data["meeting"] = label
     # determine gender
     cop_data.loc[cop_data.name.apply(is_male), "gender"] = "m"
     cop_data.loc[cop_data.name.apply(is_female), "gender"] = "f"
     # determine title (if any)
     cop_data.loc[cop_data.name.apply(has_title), "has_title"] = 1
     cop_data.loc[cop_data.name.apply(has_no_title), "has_title"] = 0
 
     # define the role
     cop_data["role"] = cop_data["description"].apply(get_role)
 
     # define the association to fossil fuel industry
     cop_data["fossil_fuel_industry"] = 0
     cop_data.loc[cop_data.description.apply(is_fossil_fuel_associated), "fossil_fuel_industry"] = 1
     cop_data.loc[cop_data.affiliation.apply(is_fossil_fuel_associated), "fossil_fuel_industry"] = 1
 
     # clear up the name
     cop_data["name"] = cop_data["name"].apply(clear_name)
 
     # unify the country names
     cop_data.loc[cop_data.affiliation_category.apply(lambda p: p == "parties"), "affiliation"] = cop_data.loc[cop_data.affiliation_category.apply(lambda p: p == "parties"), "affiliation"].apply(simplify_country_name)
 
     print(label)
     print(cop_data[:5])
     complete_data = complete_data.append(cop_data, ignore_index=True)
 
 # only for a short time
 grouped_by_role = complete_data.groupby("role")
 for role, rest in grouped_by_role:
     print(f"{role}: {len(rest)} participants found")
 
 print(f"Country names map of length {len(short_country_names)}")
 print(short_country_names)
 short_country_names_cleaned = {k: v for (k, v) in short_country_names.items() if k != v}
 country_set = set(short_country_names_cleaned.values())
 print(f"Set of length {len(country_set)}")
 print(country_set)
 f = open("../data/dictionaries/valid_countries.txt", "w")
 for country in country_set:
     f.write(str(country) + "\n")
 f.close()
 
 # generate the output file
 complete_data.to_csv("../results/complete_dataset.csv", encoding="utf-8-sig",
                      index=False)
diff --git a/code/scripts/generate_plots.py b/code/scripts/generate_plots.py
index a02e777..4d923cd 100644
--- a/code/scripts/generate_plots.py
+++ b/code/scripts/generate_plots.py
@@ -1,34 +1,34 @@
 import os
 
 import matplotlib.pyplot as plt
 import pandas as pd
 
 import plots.plot_fossil_fuel_industry as plot_fossil_fuel_industry
 import plots.plot_government as plot_government
 import plots.plot_experience as plot_experience
 import plots.plot_categories as plot_categories
 import plots.plot_missing_participants as plot_missing_participants
 import plots.plot_gender_rate as plot_gender_rate
 import plots.plot_delegation_sizes as plot_delegation_sizes
 import plots.plot_overall_experience_distr as plot_overall_experience_distr
 import plots.plot_delegation_exp as plot_delegation_exp
 import plots.plot_intervention_distr as plot_intervention_distr
 
-plot_intervention_distr.plot("../data/data_regression/dataset_interventions.csv")
 plot_government.plot("../results/complete_dataset.csv")
+plot_intervention_distr.plot("../data/data_regression/dataset_interventions.csv")
 plot_experience.plot("../results/complete_dataset_experience-2.csv")
 plot_fossil_fuel_industry.plot("../results/complete_dataset.csv")
 plot_missing_participants.plot("../results/participants-csv/participants_cop")
 plot_delegation_exp.plot("../results/complete_dataset_experience-2.csv")
 
 plot_overall_experience_distr.plot("../results/complete_dataset_experience-2.csv")
 
 
 
 
 plot_categories.plot("../results/participants-csv/participants_cop")
 
 
 plot_gender_rate.plot("../results/complete_dataset.csv")
 
 plot_delegation_sizes.plot("../results/complete_dataset.csv")
diff --git a/code/scripts/plots/plot_government.py b/code/scripts/plots/plot_government.py
index 3358cbc..723c334 100644
--- a/code/scripts/plots/plot_government.py
+++ b/code/scripts/plots/plot_government.py
@@ -1,90 +1,94 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 
 def get_roles(people):
     counter = len(people)
     gov = len(people.loc[people["role"] == "government"]) / counter
     dipl = len(people.loc[people["role"] == "diplomacy"]) / counter
     sec = len(people.loc[people["role"] == "security"]) / counter
     press = len(people.loc[people["role"] == "press"]) / counter
+    uni = len(people.loc[people["role"] == "universities"]) / counter
     nodescr = len(people.loc[people["role"] == "no description"]) / counter
     nokeyword = len(people.loc[people["role"] == "no keyword found"]) / counter
-    return gov, dipl, sec, press, nodescr, nokeyword
+    return gov, dipl, sec, press, uni, nodescr, nokeyword
 
 
 def plot(path):
     complete_data = pd.read_csv(path,
                                 encoding="utf-8-sig")
     parties = complete_data.loc[complete_data["affiliation_category"] == "parties"]
 
     metadata = pd.read_csv("../data/meetings_metadata.csv")
 
     result_df = pd.DataFrame(columns={"label", "government", "diplomacy",
-                                      "security", "press", "no description",
-                                      "no keyword found"})
+                                      "security", "press", "universities",
+                                      "no description", "no keyword found"})
 
     for label in metadata["label"]:
         this_meeting = parties.loc[parties["meeting"] == label]
 
         by_party = this_meeting.groupby("affiliation")
         gov_acc = 0
         dipl_acc = 0
         sec_acc = 0
         press_acc = 0
+        uni_acc = 0
         nodescr_acc = 0
         nokeyword_acc = 0
         for aff, people in by_party:
-            gov, dipl, sec, press, nodescr, nokeyword = get_roles(people)
+            gov, dipl, sec, press, uni, nodescr, nokeyword = get_roles(people)
             gov_acc += gov
             dipl_acc += dipl
             sec_acc += sec
             press_acc += press
+            uni_acc += uni
             nodescr_acc += nodescr
             nokeyword_acc += nokeyword
 
         n_parties = len(by_party)
         result_df = result_df.append({
             "label": label,
             "government": gov_acc / n_parties,
             "diplomacy": dipl_acc / n_parties,
             "security": sec_acc / n_parties,
             "press": press_acc / n_parties,
+            "universities": uni_acc / n_parties,
             "no description": nodescr_acc / n_parties,
             "no keyword found": nokeyword_acc / n_parties
         }, ignore_index=True)
 
     # First, plot for COPs
 
-    ordered_categories = ["government", "diplomacy", "security", "press", "no keyword found", "no description"]
+    ordered_categories = ["government", "diplomacy", "security", "press", "universities", "no keyword found", "no description"]
     colors = {"no description": "C5", "government": "C1", "diplomacy": "C2",
-            "security": "C6", "press": "C4", "no keyword found": "C0"}
+            "security": "C6", "press": "C4", "universities": "C8", "no keyword found": "C0"}
 
     plot_data = result_df.loc[result_df.label.apply(lambda l: l.startswith("cop"))]
     plot_data = plot_data.set_index("label")
 
     plot_data = pd.DataFrame(plot_data)
     plot_data.columns = pd.CategoricalIndex(plot_data.columns.values,
                                             ordered=True,
                                             categories=ordered_categories)
 
     # Sort the columns (axis=1) by the new categorical ordering
     plot_data = plot_data.sort_index(axis=1)
 
     plot_data.plot(kind="area", xlabel="Meeting", ylabel="Avg rate of roles per party",
                    title="Roles in parties (COP)", stacked=True, color=colors, ylim=[0,1])
 
     # Second, plot for SBs
     plot_data2 = result_df.loc[result_df.label.apply(lambda l: l.startswith("sb"))]
     plot_data2 = plot_data2.set_index("label")
 
     #plot_data2 = plot_data2.unstack(level=0)
     plot_data2 = pd.DataFrame(plot_data2)
     plot_data2.columns = pd.CategoricalIndex(plot_data2.columns.values,
                                              ordered=True,
                                              categories=ordered_categories)
     plot_data2 = plot_data2.sort_index(axis=1)
 
     plot_data2.plot(kind="area", xlabel="Meeting", ylabel="Avg rate of roles per party",
                 title="Roles in parties (SB)", stacked=True, color=colors, ylim=[0,1])
 
     plt.show()