diff --git a/code/lib/partlistproc/DigitalPdfExtractor.py b/code/lib/partlistproc/DigitalPdfExtractor.py
index b422025..050e2e1 100644
--- a/code/lib/partlistproc/DigitalPdfExtractor.py
+++ b/code/lib/partlistproc/DigitalPdfExtractor.py
@@ -1,190 +1,210 @@
 import functools
+import pandas as pd
 
 from pdfminer.layout import LAParams
 from pdfminer.pdfdocument import PDFDocument
 from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
 from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfparser import PDFParser
 
 import partlistproc.MeetingAnalyzer as Ana
 from partlistproc import PDFPageDetailedAggregator
 from partlistproc.PdfExtractor import PdfExtractor
 
 # what is inserted in the txt for a line break
 new_page_marker = ''
 
 IDX_PAGENR = 0
 IDX_X0 = 1
 IDX_Y0 = 2
 IDX_X1 = 3
 IDX_Y1 = 4
 IDX_ELEM = 5
 
 
 class DigitalPdfExtractor(PdfExtractor):
     """ Class to represent an extractor that converts a normal pdf participant list 
         into an .txt file
     """
 
     def __init__(self, data_file, output_file, start_page, column_tolerance=5,
-                 linebreak_tolerance=5, sameline_tolerance=0.5, list_parts=1):
+                 linebreak_tolerance=5, sameline_tolerance=0.5, list_parts=1,
+                 valid_affiliation_names_path=None):
         """ Constructor of the class
 
         Args:
             data_file (string): the PDF file to extract the text from
             output_file (string): the file to put the output text in,
                                   usually .txt
             start_page (int): the first page to extract of the pdf
             column_tolerance (int, optional): if two elements have more
                  x0 distance, they're not on the same columns. Defaults to 5.
             linebreak_tolerance (int, optional): if 2 lines have more distance
                  than this, insert a double line break between. Defaults to 5.
             sameline_tolerance (float, optional): the y0 difference that still
                 accepts two element to be on same line. Defaults to 0.5.
             list_parts (int, optional): in how many pdfs the list is splitted.
                 Defaults to 1.
+            valid_affiliation_names_path (str, optional): path to a csv file that contains
+                a dataframe with valid country names. Defaults to None.
         """
         self.data_file = data_file
         self.output_file = output_file
         self.start_page = start_page
         self.column_tolerance = column_tolerance
         self.linebreak_tolerance = linebreak_tolerance
         self.sameline_tolerance = sameline_tolerance
         self.list_parts = list_parts
+        self.valid_affiliation_names_path = valid_affiliation_names_path
         # this dictionnary contains (page -> y0) for every affiliation category title
         self.category_dict = {}
 
     def extract_text(self):
         if self.list_parts == 1:
             # everything normal
             self.extract_text_of_pdf()
         else:
             base_name = self.data_file[:self.data_file.index(".pdf")]
             for i in range(1, self.list_parts + 1):
                 print("Part " + str(i))
                 self.data_file = base_name + "-" + str(i) + ".pdf"
                 self.extract_text_of_pdf()
 
     def extract_text_of_pdf(self):
         """ Overriding abstract method """
 
         print("Extract the text from the pdf list using pdfminer.six")
 
         fp = open(self.data_file, 'rb')
         parser = PDFParser(fp)
 
         rsrcmgr = PDFResourceManager()
         laparams = LAParams()
         device = PDFPageDetailedAggregator(rsrcmgr, laparams=laparams)
         interpreter = PDFPageInterpreter(rsrcmgr, device)
 
         for page in PDFPage.get_pages(fp):
             interpreter.process_page(page)
             # receive the LTPage object for this page
             device.get_result()
 
         fp.close()
 
         # find the pages of the affiliation category titles with y-position
         rows = device.rows
         for row in rows:
             if (row[IDX_ELEM].lower()).startswith(Ana.MeetingAnalyzer.affiliation_categories):
                 # this is a new title -> check if the first one on its page
                 page = row[IDX_PAGENR]
                 if page in self.category_dict:
                     (self.category_dict[page]).append(row[IDX_Y0])
                 else:
                     self.category_dict[page] = [row[IDX_Y0]]
 
         # device.rows now contain the text containers with location
         sorted_rows = sorted(rows, key=functools.cmp_to_key(self.make_comparator(self.lineComesFirst)), reverse=False)
-        # print(sorted_rows[:1000])
+
+        # To prevent bug that doesn't recognize affiliations at some points,
+        # import a list of valid country names
+        valid_affiliation_names = set()
+        if self.valid_affiliation_names_path is not None:
+            valid_affiliation_names = set((pd.read_csv(self.valid_affiliation_names_path, encoding="utf-8-sig"))["valid_affiliations"].values.tolist())
 
         # insert line breaks as empty elements
         sorted_rows_with_linebreaks = []
         prev = ''
         size = len(sorted_rows)
         for i, row in enumerate(sorted_rows):
             # first of all, ignore all the pages before the start page and the page numbers
             if not PdfExtractor.is_pagenumber(row[IDX_ELEM]) and row[IDX_PAGENR] >= self.start_page - 1:
                 # normal case: in the same column
-                if (prev != '' and prev[IDX_PAGENR] == row[IDX_PAGENR] and abs(prev[IDX_X0] - row[IDX_X0]) < self.column_tolerance):
+                if (prev != '' and prev[IDX_PAGENR] == row[IDX_PAGENR] and
+                    abs(prev[IDX_X0] - row[IDX_X0]) < self.column_tolerance):
                     # compare the y values
                     if prev[IDX_Y0] - row[IDX_Y1] > self.linebreak_tolerance:
                         sorted_rows_with_linebreaks.append((0, 0, 0, 0, 0, ''))
                 # affiliation category titles
                 elif (prev != '' and prev[IDX_ELEM].lower().startswith(Ana.MeetingAnalyzer.affiliation_categories)):
                     sorted_rows_with_linebreaks.append((0, 0, 0, 0, 0, ''))
                 # also mark new pages
                 elif (prev != '' and prev[IDX_PAGENR] < row[IDX_PAGENR]):
                     sorted_rows_with_linebreaks.append((0, 0, 0, 0, 0, new_page_marker))
                 # new column: only mark when a new affiliation follows (no continuation of the person)
                 elif (prev != ''
                       and abs(prev[IDX_X0] - row[IDX_X0])
-                        >= self.column_tolerance
-                      and i + 1 < size
-                      and row[IDX_Y0] - sorted_rows[i + 1][IDX_Y1]
-                        > self.linebreak_tolerance):
-                    sorted_rows_with_linebreaks.append((0, 0, 0, 0, 0, ''))
+                        >= self.column_tolerance):
+                    # case 1: only one line on top of new column -> new aff
+                    if (i + 1 < size
+                        and row[IDX_Y0] - sorted_rows[i + 1][IDX_Y1]
+                            > self.linebreak_tolerance):
+                        sorted_rows_with_linebreaks.append((0, 0, 0, 0, 0, ''))
+                    # case 2: matches a valid affiliation name
+                    block_of_text = row[IDX_ELEM]
+                    j = i
+                    while (j + 1 < size and
+                           sorted_rows[j][IDX_Y0] - sorted_rows[j + 1][IDX_Y1]
+                                < self.linebreak_tolerance):
+                        block_of_text += " " + sorted_rows[j + 1][IDX_ELEM]
+                        j += 1
+                    if block_of_text.lower() in valid_affiliation_names:
+                        sorted_rows_with_linebreaks.append((0, 0, 0, 0, 0, ''))
 
                 sorted_rows_with_linebreaks.append(row)
                 prev = row
 
         sorted_elems = [el[IDX_ELEM] for el in sorted_rows_with_linebreaks]
 
         # print the result to the txt file
         with open(self.output_file, "a", encoding="utf-8") as f:
             for row in sorted_elems:
                 f.write("%s\n" % row)
 
         # clean the category dictionary
         self.category_dict = {}
 
-
-    # my method
     def lineComesFirst(self, el1, el2):
         """ returns True if el1 comes before el2 in the document
         """
         # compare pages
         if el1[IDX_PAGENR] == el2[IDX_PAGENR]:
             # if there is a category title on that page
             if el1[IDX_PAGENR] in self.category_dict:
                 # check if one element is a new category -> compare only y0
                 if((el1[IDX_ELEM].lower()).startswith(Ana.MeetingAnalyzer.affiliation_categories) or
                    (el2[IDX_ELEM].lower()).startswith(Ana.MeetingAnalyzer.affiliation_categories)):
                     # if there on the same line, let it go under
                     if abs(el1[IDX_Y0] - el2[IDX_Y0]) < self.sameline_tolerance:
                         return (el1[IDX_ELEM].lower()).startswith(Ana.MeetingAnalyzer.affiliation_categories)
                     else:
                         return el1[IDX_Y0] > el2[IDX_Y0]
 
                 # check if there in different categories -> compare only y0
                 borders = list(self.category_dict[el1[IDX_PAGENR]])
                 borders.append(el1[IDX_Y0])
                 borders.append(el2[IDX_Y0])
                 sorted_borders_and_els = sorted(borders)
                 idx_diff = abs(sorted_borders_and_els.index(el1[IDX_Y0]) -
                                sorted_borders_and_els.index(el2[IDX_Y0]))
                 if idx_diff > 1:
                     return el1[IDX_Y0] > el2[IDX_Y0]
                 # otherwise, do the normal column check
 
             # check if their in the same column (x0 similar)
             if abs(el1[IDX_X0] - el2[IDX_X0]) <= self.column_tolerance:
                 # same column -> y0 is decisive (grows from bottom of page)
                 return el1[IDX_Y0] > el2[IDX_Y0]
             else:
                 # different columns
                 return el1[IDX_X0] < el2[IDX_X0]
         else:
             return el1[IDX_PAGENR] < el2[IDX_PAGENR]
 
     def make_comparator(self, less_than):
         def compare(x, y):
             if less_than(x, y):
                 return -1
             elif less_than(y, x):
                 return 1
             else:
                 return 0
         return compare
diff --git a/code/lib/partlistproc/PdfExtractorFactory.py b/code/lib/partlistproc/PdfExtractorFactory.py
index 6f93104..1373024 100644
--- a/code/lib/partlistproc/PdfExtractorFactory.py
+++ b/code/lib/partlistproc/PdfExtractorFactory.py
@@ -1,86 +1,90 @@
 import os
 from pathlib import Path
 
 from partlistproc.DigitalPdfExtractor import DigitalPdfExtractor
 from partlistproc.OcrExtractor import OcrExtractor
 
 
 class PdfExtractorFactory():
     """ finds the accurate PdfExtractor to use """
     meetings_that_need_ocr = ["cop1", "cop2", "cop3", "cop4", "cop7", "cop8",
                               "sb1", "sb2", "sb4", "sb5", "sb6", "sb7", "sb10",
                               "sb12", "sb13"]
     meetings_with_several_pdfs = ["cop11", "cop13", "cop14", "cop15", "cop16",
                                   "cop17", "cop21", "cop22"]
     meetings_with_corrigendum = []  # TODO
 
     # normally, the first page of the list is this
     default_startpage = 3
     # for the meetings that are different, the start page is in this dict
     custom_default_startpage = {"cop2": 2, "cop5": 2, "cop23": 2, "cop24": 2,
                                 "cop25": 2, "sb1": 2, "sb2": 2, "sb4": 2,
                                 "sb5": 2, "sb7": 2, "sb46": 2, "sb48": 2,
                                 "sb48b": 2, "sb50": 2}
 
     # same for the end page
     default_endpage = 0
     custom_default_endpage = {"cop1": 126}
 
-    def __init__(self, label, output_file):
+    def __init__(self, label, output_file, valid_affiliation_names_path=None):
         """ Constructor of this class
 
         Args:
             label (str): label of the meeting to process
-            output_file (string): name of the file to put the text in
+            output_file (str): name of the file to put the text in
+            valid_affiliation_names_path (str, optional): path to a csv file that contains
+                a dataframe with valid country names. Defaults to None.
         """
         self.label = label
         # check if we have list for this label
         if not (os.path.isfile(DigitalPdfExtractor.getPDFpath(label)) or
                 os.path.isfile(DigitalPdfExtractor.getPDFpath(label, 1))):
             raise ValueError("For this meeting, no PDF list is located in the data folder.")
         self.output_file = output_file
+        self.valid_affiliation_names_path = valid_affiliation_names_path
 
     def createPdfExtractor(self):
         """ returns the accurate PdfExtractor that is correctly initialized
         """
         startpage = self.custom_default_startpage.get(self.label,
                                                       self.default_startpage)
         if self.label in self.meetings_that_need_ocr:
             # Use OCR
             endpage = self.custom_default_endpage.get(self.label,
                                                       self.default_endpage)
             return OcrExtractor(self.label, self.output_file, startpage,
                                 endpage)
         else:
             # Use PDF to txt
             if self.label in self.meetings_with_several_pdfs:
                 # Pdf is splitted
                 i = 1
                 path = DigitalPdfExtractor.getPDFpath(self.label, i)
                 parts = 0
                 while os.path.isfile(path):
                     parts += 1
                     i += 1
                     path = DigitalPdfExtractor.getPDFpath(self.label, i)
 
                 return DigitalPdfExtractor(
                         DigitalPdfExtractor.getPDFpath(self.label),
                         self.output_file,
                         startpage,
-                        list_parts=parts)
+                        list_parts=parts,
+                        valid_affiliation_names_path=self.valid_affiliation_names_path)
             else:
                 # cop5 has a special structure
                 if self.label == "cop5":
                     return DigitalPdfExtractor(
                         DigitalPdfExtractor.getPDFpath(self.label),
                         self.output_file,
                         startpage,
-                        column_tolerance=50)
-                    extr.extract_text()
+                        column_tolerance=50,
+                        valid_affiliation_names_path=self.valid_affiliation_names_path)
                 else:
                     # normal case: just one pdf
                     return DigitalPdfExtractor(
                         DigitalPdfExtractor.getPDFpath(self.label),
                         self.output_file,
-                        startpage)
-                    extr.extract_text()
+                        startpage,
+                        valid_affiliation_names_path=self.valid_affiliation_names_path)
diff --git a/code/scripts/extract_participants.py b/code/scripts/extract_participants.py
index 38624b9..ed43420 100644
--- a/code/scripts/extract_participants.py
+++ b/code/scripts/extract_participants.py
@@ -1,41 +1,42 @@
 """ The main script of the cop participants extraction.
 Takes as an argument the number of the cop to process.
 """
 
 import os
 import sys
 
 import partlistproc
 from partlistproc.MeetingAnalyzerFactory import MeetingAnalyzerFactory
 from partlistproc.PdfExtractorFactory import PdfExtractorFactory
 
 txt_prefix = "../results/participants-txt/"
 csv_prefix = "../results/participants-csv/"
 default_intermediate_name = txt_prefix + "raw_X.txt"
 default_output_name = csv_prefix + "participants_X.csv"
+valid_affiliation_names_path = "../data/dictionaries/valid_affiliation_names.csv"
 
 
 # format:
 # extract_participants_xopX.py <numberOfCop> <intermediateFilename>
 #   <outputFilename>
 # the last option is given if the OCR has already been done (for cop 1 - 4)
 
 # parse arguments
 arguments = sys.argv
 label = arguments[1]
 intermediate_name = default_intermediate_name.replace("X", label)
 output_name = default_output_name.replace("X", label)
 if(len(arguments) > 2):
     intermediate_name = txt_prefix + arguments[2]
     output_name = csv_prefix + arguments[3]
 
 # First, extract the text from the pdf if not already done
 if not os.path.isfile(intermediate_name):
-    extr_factory = PdfExtractorFactory(label, intermediate_name)
+    extr_factory = PdfExtractorFactory(label, intermediate_name, valid_affiliation_names_path)
     extr = extr_factory.createPdfExtractor()
     extr.extract_text()
 
 # Second, extract the data from the text
 ana_factory = MeetingAnalyzerFactory(label, intermediate_name)
 ana = ana_factory.get_analyzer()
 ana.get_data(output_name)
diff --git a/code/scripts/plots/plot_participant_graph.py b/code/scripts/plots/plot_participant_graph.py
index af7f395..50172c8 100644
--- a/code/scripts/plots/plot_participant_graph.py
+++ b/code/scripts/plots/plot_participant_graph.py
@@ -1,106 +1,108 @@
 import pandas as pd
 import matplotlib.pyplot as plt
 import json
 import networkx as nx
 
 def find_largest_parties():
     country_file = open("../data/dictionaries/valid_countries.txt", "r")
     countries = country_file.readlines()
     countries = [c.replace("\n", "") for c in countries]
 
     complete_data = pd.read_csv("../results/complete_dataset.csv",
                                 encoding="utf-8-sig")
     parties = complete_data.loc[complete_data["affiliation_category"] == "parties"]
     parties = parties.loc[parties["affiliation"].apply(lambda x: x in countries)]
 
     total_nb_participants_per_country = dict()
     grouped_parties = parties.groupby("affiliation")
     for aff, people in grouped_parties:
         total_nb_participants_per_country[aff] = len(people)
 
     sorted_c = sorted(total_nb_participants_per_country.items(), key=lambda x: x[1], reverse=True)
     print(sorted_c)
     return [x[0] for x in sorted_c]
 
 
 def plot(path):
     LABEL_IDX = 0
     NAME_IDX = 1
     AFFILIATION_IDX = 2
     CATEGORY_IDX = 3
+    considered_meetings = ["cop10", "sb22", "cop11", "sb24", "cop12", "sb26", "cop13", "sb28", "cop14", "sb30", "cop15", "sb32",
+                         "cop16", "sb34", "cop17", "sb36", "cop18", "sb38", "cop19", "sb40", "cop20", "sb42", "cop21", "sb44",
+                          "cop22", "sb46", "cop23", "sb48", "sb48b", "cop24", "sb50", "cop25"]
 
     f = open(path, "r", encoding="utf-8")
     text = f.read()
     names = json.loads(text)
     # exclude the names that have an error (two names in the same meeting)
     names = {n: l for n, l in names.items() if len(set([m[0] for m in l])) == len(l)}
 
     country_file = open("../data/dictionaries/valid_countries.txt", "r")
     countries = country_file.readlines()
     countries = [c.replace("\n", "") for c in countries]
 
     max_set_n = len(countries)
     biggest_countries = find_largest_parties()[:max_set_n]
     # biggest_countries.append("european union")
 
     G = nx.Graph()
     G.clear()
     affiliations = set(biggest_countries) # TODO could just do it for the 40 countries that have the most participants
     G.add_nodes_from(biggest_countries)
     # TODO maybe add NGO's
 
     for name, list in names.items():
         previous_affiliation = ""
         current_affiliation = ""
         for participation in list:
-            if participation[AFFILIATION_IDX] in countries:
+            if participation[LABEL_IDX] in considered_meetings:
                 previous_affiliation = current_affiliation
                 current_affiliation = participation[AFFILIATION_IDX]
-                if current_affiliation not in affiliations and len(affiliations) < max_set_n:
-                    print(current_affiliation)
+                if current_affiliation not in affiliations:
                     G.add_node(current_affiliation)
                     affiliations.add(current_affiliation)
                 if previous_affiliation in affiliations and current_affiliation in affiliations and previous_affiliation != "" and previous_affiliation != current_affiliation:
                     if (previous_affiliation, current_affiliation) in G.edges:
                         # increase weight
                         if G[previous_affiliation][current_affiliation]["weight"] > 20:
                             print(name)
                             print(list)
                         G[previous_affiliation][current_affiliation]["weight"] += 1
                     else:
                         G.add_edge(previous_affiliation, current_affiliation, weight=1)
 
 
     # nodes
     print("Sorted nodes")
     highest_nodes = [x[0] for x in sorted(G.degree(weight='weight'), key=lambda x: x[1], reverse=True)]
     print(highest_nodes[:40])
     G = G.subgraph(highest_nodes[:40])
 
     # find the largest weight for the resizing of the egdes
     sorted_edges = sorted(G.edges(data=True), key=lambda x: x[2]['weight'], reverse=True)
     print("biggest edges")
     print(sorted_edges[:20])
     max_weight = sorted_edges[0][2]["weight"]
     print(max_weight)
 
     pos = nx.circular_layout(G)
     nx.draw_networkx_nodes(
         G,
         pos,
         node_size=2,
     )
-    nx.draw_networkx_labels(G, pos, font_color="black", font_weight="bold")
+    nx.draw_networkx_labels(G, pos, font_color="black", font_size=8, font_weight="bold")
 
     for edge in G.edges(data='weight'):
         nx.draw_networkx_edges(G, pos, edgelist=[edge], edge_color= (0/256.0, 162/256.0, 240/256.0, 1), width=edge[2]/max_weight*10)
 
 
     """plt.subplot(122)
     nx.draw_shell(G, nlist=[range(5, 10), range(5)], with_labels=True, font_weight='bold')"""
     """print(G.nodes())
     nx.draw(G)"""
-    """plt.xlim(-0.05, 1.05)
-    plt.ylim(-0.05, 1.05)"""
+    plt.xlim(-1.25, 1.25)
+    plt.ylim(-1.1, 1.1)
     plt.axis("off")
     plt.show()
\ No newline at end of file
diff --git a/report/conclusion.tex b/report/conclusion.tex
index 2ab31aa..ed86ca8 100644
--- a/report/conclusion.tex
+++ b/report/conclusion.tex
@@ -1,5 +1,5 @@
 \section{Conclusion}
 
 
 \subsection{Critics on methodology}
-% mention errors of OCR, for example cop 7 (marocco in france)
\ No newline at end of file
+% mention errors of OCR, for example cop 7 (marocco in france) 
\ No newline at end of file