import matplotlib.pyplot as plt
import pandas as pd


parties = ("parties")
observer_states = ("observer states", 
            "entities having received a standing invitation")
UN_units = ("united nations secretariat units and", 
        "representatives of united nations secretariat units and bodies", 
        "representatives of united nations")
specialized_agencies = ("specialized agencies and related organizations",
        "representatives of specialized agencies and")
intergovernmental = ("intergovernmental organizations")
nongovernmental = ("non-government") # for cop2

# get overview over all the participants
# format: [copN, affiliation_cat, num_part]
participants_per_cop = pd.DataFrame(columns={"copN","parties","observer states", "UN units", "spec. agencies and rel. org.", "IGOs", "NGOs"})

for i in range(1, 26):
    data = pd.read_csv("../results/participants-csv/participants_cop" + str(i) + ".csv",
                       encoding="utf-8-sig")
    by_category = data.groupby('affiliation_category')
    for cat, people in by_category:
        if (cat.lower()).startswith(parties):
            participants_per_cop = participants_per_cop.append({
                "copN": i,
                "parties": len(people),
                "observer states": 0,
                "UN units": 0,
                "spec. agencies and rel. org.": 0,
                "IGOs": 0,
                "NGOs": 0
            }, ignore_index=True)
        elif (cat.lower()).startswith(observer_states):
            participants_per_cop = participants_per_cop.append({
                "copN": i,
                "parties": 0,
                "observer states": len(people),
                "UN units": 0,
                "spec. agencies and rel. org.": 0,
                "IGOs": 0,
                "NGOs": 0
            }, ignore_index=True)
        elif (cat.lower()).startswith(UN_units):
            participants_per_cop = participants_per_cop.append({
                "copN": i,
                "parties": 0,
                "observer states": 0,
                "UN units": len(people),
                "spec. agencies and rel. org.": 0,
                "IGOs": 0,
                "NGOs": 0
            }, ignore_index=True)
        elif (cat.lower()).startswith(specialized_agencies):
            participants_per_cop = participants_per_cop.append({
                "copN": i,
                "parties": 0,
                "observer states": 0,
                "UN units": 0,
                "spec. agencies and rel. org.": len(people),
                "IGOs": 0,
                "NGOs": 0
            }, ignore_index=True)
        elif (cat.lower()).startswith(intergovernmental):
            participants_per_cop = participants_per_cop.append({
                "copN": i,
                "parties": 0,
                "observer states": 0,
                "UN units": 0,
                "spec. agencies and rel. org.": 0,
                "IGOs": len(people),
                "NGOs": 0
            }, ignore_index=True)
        elif (cat.lower()).startswith(nongovernmental):
            participants_per_cop = participants_per_cop.append({
                "copN": i,
                "parties": 0,
                "observer states": 0,
                "UN units": 0,
                "spec. agencies and rel. org.": 0,
                "IGOs": 0,
                "NGOs": len(people)
            }, ignore_index=True)
        else:
            print("error !!!!!!!!!")
            print(cat)

plot_data = participants_per_cop.set_index("copN")
plot_data = plot_data.groupby("copN").sum()

"""plt.hist(plot_data, bins=25, stacked=True, histtype="bar")
plt.title("Participants per COP")
plt.ylabel("Nbr. participants")
plt.xlabel("Nbr. of COP")"""
plot_data.plot.bar(xlabel="Nb. of COP", ylabel="Nb. participants", title="Participants per COP", stacked=True)


# NEXT PLOT: the inconsistency with the data
stated_number_of_participants = [-1,-1,6138,4058,-1,6050,4001,3557,4645,5366,8657,5285,
            9330,8430,24073,10578,11224,8321,7717,10281,27574,21360,14745,17294,20189]


missing_participants = pd.DataFrame(columns={"copN", "missing"})

for i in range(1, 26):
    data = pd.read_csv("../results/participants-csv/participants_cop" + str(i) + ".csv",
                       encoding="utf-8-sig")
    expected = stated_number_of_participants[i - 1]
    if expected >= 0:
        missing_participants = missing_participants.append({
            "copN": i,
            "missing": expected - len(data)
        }, ignore_index=True)

plot_data2 = missing_participants.set_index("copN")
plot_data2 = plot_data2.groupby("copN").sum()

plot_data2.plot.bar(xlabel="Nb. of COP", ylabel="Missing Participants", title="Undetected participants per COP")


# NEXT PLOT: percentage of women for cops
gender_data = pd.DataFrame(columns={"meeting", "proportion of female participants"})
complete_data = pd.read_csv("../results/complete_dataset.csv",
                       encoding="utf-8-sig")

for i in range(1, 26):
    data = complete_data[complete_data["meeting"] == "cop" + str(i)]

    grouped_by_gender = data.groupby("gender")
    total_women = len(grouped_by_gender.get_group("f"))
    total_men = len(grouped_by_gender.get_group("m"))
    proportion_of_women = total_women / (total_men + total_women)

    gender_data = gender_data.append({
        "meeting": "cop" + str(i),
        "proportion of female participants": proportion_of_women
    }, ignore_index=True)

plot_data3 = gender_data.set_index("meeting")
#plot_data2 = plot_data2.groupby("meeting").sum()

plot_data3.plot(kind="line", xlabel="Meeting", ylabel="Proportion of female participants",
                title="Proportion of female participants on COPs", ylim=(0, 1), yticks=[0, 0.25, 0.5, 0.75, 1])


# NEXT PLOT: evolution of delegation size of CHINA, USA, DE, SAUDI ARABIA, BRAZIL over time
country_data = pd.DataFrame(columns={"meeting", "USA", "China", "Germany", "Saudi Arabia", "Brazil", "Switzerland"})
complete_data = pd.read_csv("../results/complete_dataset.csv",
                       encoding="utf-8-sig")

for i in range(1, 26):
    data = complete_data[complete_data["meeting"] == "cop" + str(i)]
    grouped_by_country = data.groupby("affiliation")
    try:
        total_DE = len(grouped_by_country.get_group("Germany"))
        total_US = len(grouped_by_country.get_group("United States"))
        total_CN = len(grouped_by_country.get_group("China"))
        total_SA = len(grouped_by_country.get_group("Saudi Arabia"))
        total_BR = len(grouped_by_country.get_group("Brazil"))
        total_CH = len(grouped_by_country.get_group("Switzerland"))
    except KeyError:
        print(i)
        print(f"{total_DE} DE and {total_US} US and {total_CN} CN and {total_SA} SA and {total_BR} BR and {total_CH} CH")
        total_DE = 0
        total_US = 0
        total_CN = 0
        total_SA = 0
        total_BR = 0
        total_CH = 0

    country_data = country_data.append({
        "meeting": "cop" + str(i),
        "USA": total_US,
        "China": total_CN,
        "Germany": total_DE,
        "Saudi Arabia": total_SA,
        "Brazil": total_BR,
        "Switzerland": total_CH
    }, ignore_index=True)

    total_DE = 0
    total_US = 0
    total_CN = 0
    total_SA = 0
    total_BR = 0
    total_CH = 0

plot_data4 = country_data.set_index("meeting")

plot_data4.plot(kind="line", xlabel="Meeting", ylabel="Delegation size",
                title="Delegation size", ylim=(0, 600))


## NEXT PLOT: number years a participant was there (plaintext comparison)
""" VERY SLOW LIKE THAT, MAYBE AVOID THE APPEND IN THE FOR LOOP"""
experience_data = pd.DataFrame(columns={"Nb of meetings", "nb of participants"})
complete_data = pd.read_csv("../results/complete_dataset.csv",
                            encoding="utf-8-sig")

grouped_by_participant = complete_data.groupby("name")
print("Total nb of participants = " + str(len(grouped_by_participant)))
year_dict = {}
for name, instances in grouped_by_participant:
    meetings = len(instances)
    if meetings in year_dict:
        year_dict[meetings] += 1
    else:
        year_dict[meetings] = 1

    if meetings > 26:
        print(name)
        print(instances)


for y, v in year_dict.items():
    experience_data = experience_data.append({
        "Nb of meetings": y,
        "nb of participants": v
    }, ignore_index=True)

plot_data5 = experience_data.set_index("Nb of meetings")
#plot_data5 = plot_data.groupby("years").sum()

plot_data5.plot(kind="bar", xlabel="Nb. of years", ylabel="Nb. of participants",
                title="Years of participation", ylim=(0, 20000))
print(year_dict)

plt.show()