File Metadata

Created: Wed, Jul 17, 02:25

UppercaseAffiliationMeetingAnalyzer.py
View Options

	import pandas as pd
	import re

	from partlistproc.MeetingAnalyzer import MeetingAnalyzer


	class UppercaseAffiliationMeetingAnalyzer(MeetingAnalyzer):
	""" Analyzor for meetings that have lists that mark new affiliation with
	capital letters
	"""

	def __init__(self, intermediate_name, encoding=None):
	"""
	Args:
	intermediate_name (str): name of the txt file that contains text
	of participant list
	encoding (str, optional): how the intermediate file is encoded.
	Defaults to None (which can be used for results of tesseract).
	"""
	self.intermediate_name = intermediate_name
	self.encoding = encoding

	def get_data(self, output_name):
	""" Overriding abstract method """

	print("Analyze the .txt file to generate the participant data with ")
	file = open(self.intermediate_name, "r", encoding=self.encoding)

	entire_text = file.read()
	file.close()
	entire_text = entire_text.replace('', '\n').replace(',', '.').replace('Continued', '')

	# split it to a list
	content_list = re.split('\n', entire_text)

	# delete all the page numbers
	content_list = [el for el in content_list if not (el.startswith('-') and el.endswith('-'))]

	# init constants
	name = ""
	description = ""
	affiliation = ""
	affiliation_cat = "parties"
	i = 0

	# the resulting dataframe
	data = pd.DataFrame(columns={
	"name", "affiliation", "affiliation_category", "description"})

	# fill in the data row by row
	list_size = len(content_list)
	while i < list_size:
	elem = content_list[i]
	# check if it's a new affiliation
	if (elem.isupper() and
	elem not in self.uppercase_abbrev and
	elem[:3].isalpha()):
	# store the last person (if there is one)
	if name != "":
	data = data.append(
	{"name": name,
	"affiliation": affiliation,
	"affiliation_category": affiliation_cat,
	"description": description},
	ignore_index=True)
	name = ""
	description = ""

	# check if it's new affiliation or category
	if elem.lower().startswith(self.affiliation_categories):
	affiliation_cat = elem.lower()
	# check if affiliation is over several lines
	while i + 1 < list_size and content_list[i + 1].isupper():
	if content_list[i + 1]:
	affiliation_cat += " " + content_list[i + 1].lower()
	i += 1
	else:
	# set the new affiliation
	affiliation = elem.lower()
	# check if affiliation is over several lines
	while i + 1 < list_size and (content_list[i + 1].isupper()
	or not content_list[i + 1]):
	if content_list[i + 1]:
	affiliation += " " + content_list[i + 1].lower()
	i += 1
	elif(elem.startswith(self.salutory_addresses)):
	# a new person
	# store the last person
	if name != "":
	data = data.append(
	{'name': name,
	'affiliation': affiliation,
	"affiliation_category": affiliation_cat,
	'description': description},
	ignore_index=True)

	name = ""
	description = ""
	# set the new one
	name = elem
	elif elem != "":
	# add it to the actual persons description
	description += elem + self.description_splitter

	i += 1


	# generate the output file
	data.to_csv(output_name, encoding="utf-8-sig", index=False)

	MeetingAnalyzer.print_small_analysis(self, data)

UppercaseAffiliationMeetingAnalyzer.py
No OneTemporary
Actions

File Metadata

UppercaseAffiliationMeetingAnalyzer.py
View Options

Event Timeline

UppercaseAffiliationMeetingAnalyzer.pyNo OneTemporaryActions

File Metadata

UppercaseAffiliationMeetingAnalyzer.pyView Options

Event Timeline

UppercaseAffiliationMeetingAnalyzer.py
No OneTemporary
Actions

UppercaseAffiliationMeetingAnalyzer.py
View Options