import matplotlib.pyplot as plt import pandas as pd parties = ("parties") observer_states = ("observer states", "entities having received a standing invitation") UN_units = ("united nations secretariat units and", "representatives of united nations secretariat units and bodies", "representatives of united nations") specialized_agencies = ("specialized agencies and related organizations", "representatives of specialized agencies and") intergovernmental = ("intergovernmental organizations") nongovernmental = ("non-government") # for cop2 # get overview over all the participants # format: [copN, affiliation_cat, num_part] participants_per_cop = pd.DataFrame(columns={"copN","parties","observer states", "UN units", "spec. agencies and rel. org.", "IGOs", "NGOs"}) for i in range(1, 26): data = pd.read_csv("../results/participants-csv/participants_cop" + str(i) + ".csv", encoding="utf-8-sig") by_category = data.groupby('affiliation_category') for cat, people in by_category: if (cat.lower()).startswith(parties): participants_per_cop = participants_per_cop.append({ "copN": i, "parties": len(people), "observer states": 0, "UN units": 0, "spec. agencies and rel. org.": 0, "IGOs": 0, "NGOs": 0 }, ignore_index=True) elif (cat.lower()).startswith(observer_states): participants_per_cop = participants_per_cop.append({ "copN": i, "parties": 0, "observer states": len(people), "UN units": 0, "spec. agencies and rel. org.": 0, "IGOs": 0, "NGOs": 0 }, ignore_index=True) elif (cat.lower()).startswith(UN_units): participants_per_cop = participants_per_cop.append({ "copN": i, "parties": 0, "observer states": 0, "UN units": len(people), "spec. agencies and rel. org.": 0, "IGOs": 0, "NGOs": 0 }, ignore_index=True) elif (cat.lower()).startswith(specialized_agencies): participants_per_cop = participants_per_cop.append({ "copN": i, "parties": 0, "observer states": 0, "UN units": 0, "spec. agencies and rel. org.": len(people), "IGOs": 0, "NGOs": 0 }, ignore_index=True) elif (cat.lower()).startswith(intergovernmental): participants_per_cop = participants_per_cop.append({ "copN": i, "parties": 0, "observer states": 0, "UN units": 0, "spec. agencies and rel. org.": 0, "IGOs": len(people), "NGOs": 0 }, ignore_index=True) elif (cat.lower()).startswith(nongovernmental): participants_per_cop = participants_per_cop.append({ "copN": i, "parties": 0, "observer states": 0, "UN units": 0, "spec. agencies and rel. org.": 0, "IGOs": 0, "NGOs": len(people) }, ignore_index=True) else: print("error !!!!!!!!!") print(cat) plot_data = participants_per_cop.set_index("copN") plot_data = plot_data.groupby("copN").sum() """plt.hist(plot_data, bins=25, stacked=True, histtype="bar") plt.title("Participants per COP") plt.ylabel("Nbr. participants") plt.xlabel("Nbr. of COP")""" plot_data.plot.bar(xlabel="Nb. of COP", ylabel="Nb. participants", title="Participants per COP", stacked=True) # NEXT PLOT: the inconsistency with the data stated_number_of_participants = [-1,-1,6138,4058,-1,6050,4001,3557,4645,5366,8657,5285, 9330,8430,24073,10578,11224,8321,7717,10281,27574,21360,14745,17294,20189] missing_participants = pd.DataFrame(columns={"copN", "missing"}) for i in range(1, 26): data = pd.read_csv("../results/participants-csv/participants_cop" + str(i) + ".csv", encoding="utf-8-sig") expected = stated_number_of_participants[i - 1] if expected >= 0: missing_participants = missing_participants.append({ "copN": i, "missing": expected - len(data) }, ignore_index=True) plot_data2 = missing_participants.set_index("copN") plot_data2 = plot_data2.groupby("copN").sum() plot_data2.plot.bar(xlabel="Nb. of COP", ylabel="Missing Participants", title="Undetected participants per COP") # NEXT PLOT: percentage of women for cops gender_data = pd.DataFrame(columns={"meeting", "proportion of female participants"}) complete_data = pd.read_csv("../results/complete_dataset.csv", encoding="utf-8-sig") for i in range(1, 26): data = complete_data[complete_data["meeting"] == "cop" + str(i)] grouped_by_gender = data.groupby("gender") total_women = len(grouped_by_gender.get_group("f")) total_men = len(grouped_by_gender.get_group("m")) proportion_of_women = total_women / (total_men + total_women) gender_data = gender_data.append({ "meeting": "cop" + str(i), "proportion of female participants": proportion_of_women }, ignore_index=True) plot_data3 = gender_data.set_index("meeting") #plot_data2 = plot_data2.groupby("meeting").sum() plot_data3.plot(kind="line", xlabel="Meeting", ylabel="Proportion of female participants", title="Proportion of female participants on COPs", ylim=(0, 1), yticks=[0, 0.25, 0.5, 0.75, 1]) # NEXT PLOT: evolution of delegation size of CHINA, USA, DE, SAUDI ARABIA, BRAZIL over time country_data = pd.DataFrame(columns={"meeting", "USA", "China", "Germany", "Saudi Arabia", "Brazil", "Switzerland"}) complete_data = pd.read_csv("../results/complete_dataset.csv", encoding="utf-8-sig") for i in range(1, 26): data = complete_data[complete_data["meeting"] == "cop" + str(i)] grouped_by_country = data.groupby("affiliation") try: total_DE = len(grouped_by_country.get_group("Germany")) total_US = len(grouped_by_country.get_group("United States")) total_CN = len(grouped_by_country.get_group("China")) total_SA = len(grouped_by_country.get_group("Saudi Arabia")) total_BR = len(grouped_by_country.get_group("Brazil")) total_CH = len(grouped_by_country.get_group("Switzerland")) except KeyError: print(i) print(f"{total_DE} DE and {total_US} US and {total_CN} CN and {total_SA} SA and {total_BR} BR and {total_CH} CH") total_DE = 0 total_US = 0 total_CN = 0 total_SA = 0 total_BR = 0 total_CH = 0 country_data = country_data.append({ "meeting": "cop" + str(i), "USA": total_US, "China": total_CN, "Germany": total_DE, "Saudi Arabia": total_SA, "Brazil": total_BR, "Switzerland": total_CH }, ignore_index=True) total_DE = 0 total_US = 0 total_CN = 0 total_SA = 0 total_BR = 0 total_CH = 0 plot_data4 = country_data.set_index("meeting") plot_data4.plot(kind="line", xlabel="Meeting", ylabel="Delegation size", title="Delegation size", ylim=(0, 600)) ## NEXT PLOT: number years a participant was there (plaintext comparison) """ VERY SLOW LIKE THAT, MAYBE AVOID THE APPEND IN THE FOR LOOP""" experience_data = pd.DataFrame(columns={"Nb of meetings", "nb of participants"}) complete_data = pd.read_csv("../results/complete_dataset.csv", encoding="utf-8-sig") grouped_by_participant = complete_data.groupby("name") print("Total nb of participants = " + str(len(grouped_by_participant))) year_dict = {} for name, instances in grouped_by_participant: meetings = len(instances) if meetings in year_dict: year_dict[meetings] += 1 else: year_dict[meetings] = 1 if meetings > 26: print(name) print(instances) for y, v in year_dict.items(): experience_data = experience_data.append({ "Nb of meetings": y, "nb of participants": v }, ignore_index=True) plot_data5 = experience_data.set_index("Nb of meetings") #plot_data5 = plot_data.groupby("years").sum() plot_data5.plot(kind="bar", xlabel="Nb. of years", ylabel="Nb. of participants", title="Years of participation", ylim=(0, 20000)) print(year_dict) plt.show()