diff --git a/code/COP_Analyzer.py b/code/COP_Analyzer.py
new file mode 100644
index 0000000..3d0ca7e
--- /dev/null
+++ b/code/COP_Analyzer.py
@@ -0,0 +1,10 @@
+
+
+class COP_Analyzer():
+    def __init__(self, copN, intermediate_name, output_name):
+        self.copN = copN
+        self.intermediate_name = intermediate_name
+        self.output_name = output_name
+
+    ## maybe make this an abstract class
+    
\ No newline at end of file
diff --git a/code/COP_Extractor.py b/code/COP_Extractor.py
new file mode 100644
index 0000000..0a4fb28
--- /dev/null
+++ b/code/COP_Extractor.py
@@ -0,0 +1,100 @@
+
+
+class COP_TextExtractor():¨
+    """Extracts the text from a PDF participant list"""    
+
+    def __init__(self, copN, output_file):
+        """ Constructor of this class
+
+        Args:
+            copN (int): number of the edition of cop to process
+            output_file (string): name of the file to put the text in
+        """        
+        self.copN = copN
+        self.intermediate_name = intermediate_name
+
+    def doOCR(self, startpage, endpage):
+        """Performs OCR with tesseract on a PDF file.
+
+        Args:
+            startpage (int): first page to process
+            endpage (int): last page to process (exclusive)
+
+        Returns:
+            boolean: True if the extraction was sucessful
+        """
+        # insert boxes that help for the OCR on the critical pages
+        print("Insert boxes where necessary")
+        box = "files\\column_block.pdf"
+        f_box = open(box, "rb")
+        pdf_box = PyPDF2.PdfFileReader(f_box)
+
+        PDF_file = self.__getPDFpath()
+        file = open(PDF_file, "rb")
+        file_read = PyPDF2.PdfFileReader(file)
+
+        output = PyPDF2.PdfFileWriter()
+
+        for i in range(file_read.getNumPages()):
+            page = file_read.getPage(i)
+            # insert the boxes where necessary
+            if(i in box_pages[self.copN - 1]):
+                page.mergePage(pdf_box.getPage(0))
+            output.addPage(page)
+
+        temporary_filename = "temp.pdf"
+        boxed_file = open(temporary_filename, "wb")
+        output.write(boxed_file)
+        f_box.close()
+        file.close()
+        boxed_file.close()
+
+        # start of the OCR procedure
+        print("Create images from the pdf file")
+        pages = convert_from_path(temporary_filename, dpi=200) 
+
+        # check indexes
+        if startpage == 0 and endpage == 0:
+            # set to default page space
+            startpage = default_startpage[self.copN]
+            endpage = len(pages)
+        elif len(pages) < endpage or endpage < startpage or startpage <= 0:
+            return False
+        # Correct the page numbers s.t. they begin counting from 0 and
+        # end is exclusive
+        startpage -= 1
+
+        # Open the file in write mode so that
+        # all contents of all pages are added to the same file
+        f = open(self.intermediate_name, "w")
+
+        # Iterate through all the pages stored above
+        for i in range(startpage, endpage):  
+            print("Reading page " + str(i + 1)) 
+
+            # Recognize the text as string in image using pytesseract
+            text = str(((pytesseract.image_to_string(pages[i], 
+                                                     config="--psm 3 --oem 1"))))
+
+            # Write the processed text to the file. 
+            f.write(text)
+
+        f.close()
+        os.remove("temp.pdf")
+
+    def __doPDFtoTxt(self):
+        """PDF_file = self.__getPDFpath()
+        f = open(PDF_file, "rb")
+        pdfReader = PyPDF2.PdfFileReader(f)
+        size = pdfReader.numPages
+        f = open(self.intermediate_name, "a") 
+        for i in range(size):
+            f.write(pdfReader.getPage(i).extractText())
+        f.close()"""
+        PDF_file = self.__getPDFpath()
+        text = textract.process(PDF_file, method='tesseract')
+        print(text)
+        f = open(self.intermediate_name, "a") 
+        f.write(str(text))
+        f.close()
+        return True
\ No newline at end of file
diff --git a/code/extract_participants_copX.py b/code/extract_participants_copX.py
new file mode 100644
index 0000000..a43e0f2
--- /dev/null
+++ b/code/extract_participants_copX.py
@@ -0,0 +1,41 @@
+""" The main script of the cop participants extraction.
+Takes as an argument the number of the cop to process.
+"""
+
+# Constants
+# This is used to differ names from abbreviations
+uppercase_abbrev = ["US", "USA", "AO", "UK", "WWF-US", "WWF-UK", "EPFL"]
+# Must be a tuple for the funtion "startsWith" of String
+salutory_addresses = ("Mr", "Ms", "Sr", "Sra", "H.E.", "S.E.", "M.",
+                      "Mme", "Dr.", "Drs.")
+default_startpage = [126, 2, 3, 3]  # TODO add for copN > 4
+# Where to add boxes for ocr, index == copX
+box_pages = [[], range(47, 60), [], []]
+
+seperator = "#"
+
+
+# format:
+# extract_participants_xopX.py <numberOfCop> <intermediateFilename> 
+#   <outputFilename> (<startpage> <endpage>)
+# the last option is given if the OCR has already been done (for cop 1 - 4)
+
+# parse arguments
+arguments = sys.argv
+copNumber = int(arguments[1])
+intermediateFilename = arguments[2]
+outputFilename = arguments[3]
+startpage = 0
+endpage = 0
+if(len(arguments) == 6):
+    startpage = int(arguments[4])
+    endpage = int(arguments[5])
+
+# TODO replace
+proc = COP_Processor(copNumber, intermediateFilename, outputFilename)
+success = proc.pdfToData(startpage, endpage)
+
+if success:
+    print("The data has successfully been extracted")
+else:
+    print("The data couldn't be extracted correctly. Maybe this cop is not implemented yet.")
\ No newline at end of file