File Metadata

Created: Sun, Sep 1, 20:22

DigitalMeetingAnalyzer.py
View Options

	import pandas as pd
	import re

	from partlistproc.MeetingAnalyzer import MeetingAnalyzer
	from partlistproc.PdfExtractor import PdfExtractor


	class DigitalMeetingAnalyzer(MeetingAnalyzer):
	""" Analyzer for meetings with a digitally generated participant list pdf.
	"""

	def __init__(self, intermediate_name):
	"""
	Args:
	intermediate_name (str): name of the txt file that contains text
	of participant list
	"""
	self.intermediate_name = intermediate_name

	def get_data(self, output_name):
	""" Overriding abstract method """

	print("Analyze the .txt file to generate the participant data")
	file = open(self.intermediate_name, "r", encoding="utf-8")
	entire_text = file.read()

	# replace the the (continued) from affiliations
	entire_text = entire_text.replace(' (continued)', '').replace('(continued)', '')

	# split it to a list
	entire_list = re.split('\n', entire_text)

	# ADDED FOR COP12: delete the empty spaces at beginning
	# and at end of each element
	size = len(entire_list)
	for i in range(size):
	while entire_list[i].startswith(' '):
	entire_list[i] = entire_list[i][1:]
	for i in range(size):
	while entire_list[i].endswith(' '):
	s = entire_list[i]
	entire_list[i] = s[:len(s)-1]

	# remove page numbers and document header
	entire_list = [el for el in entire_list if
	not (el.startswith("Page ")
	or el.startswith("FCCC"))]
	# update size
	size = len(entire_list)

	# the resulting dataframe
	data = pd.DataFrame(columns={
	"name", "affiliation", "affiliation_category", "description"})

	# other variables
	affiliation = ""
	affiliation_cat = ""
	name = ""
	description = ""
	i = 0

	# go to first country
	while entire_list[i] == "":
	i += 1

	while i < size:
	# check if we're in new affiliation category
	if (entire_list[i].lower()).startswith(self.affiliation_categories):
	affiliation_cat = entire_list[i].lower()
	i += 1

	while i < size and entire_list[i] != "":
	affiliation_cat += " " + entire_list[i].lower()
	i += 1

	while i < size and entire_list[i] == "":
	i += 1

	# new affiliation
	affiliation = entire_list[i].lower()
	i += 1
	# handle affiliations over several lines
	while i < size and entire_list[i] != "":
	affiliation += " " + entire_list[i].lower()
	i += 1

	while i < size and entire_list[i] == "":
	i += 1

	# extract all the names of this affiliation
	while i < size and entire_list[i].startswith(self.salutory_addresses):
	name = entire_list[i]
	i += 1

	# the rest is description
	while i < size - 1 and entire_list[i] != "" and not entire_list[i].startswith(self.salutory_addresses):
	description += entire_list[i] + self.description_splitter
	i += 1

	# add the person to the dataframe
	data = data.append({'name': name,
	'affiliation': affiliation,
	'affiliation_category': affiliation_cat,
	'description': description},
	ignore_index=True)
	description = ""

	while i < size and entire_list[i] == "":
	i += 1

	# we had a space and no name is next -> new affiliation
	# but first, skip all the empty slots
	while i < size and entire_list[i] == "":
	i += 1

	# generate the output file
	data.to_csv(output_name, encoding="utf-8-sig", mode="w", index=False)

	MeetingAnalyzer.print_small_analysis(self, data)

DigitalMeetingAnalyzer.py
No OneTemporary
Actions

File Metadata

DigitalMeetingAnalyzer.py
View Options

Event Timeline

DigitalMeetingAnalyzer.pyNo OneTemporaryActions

File Metadata

DigitalMeetingAnalyzer.pyView Options

Event Timeline

DigitalMeetingAnalyzer.py
No OneTemporary
Actions

DigitalMeetingAnalyzer.py
View Options