diff --git a/code/scripts/translate_list_fr_en.py b/code/scripts/translate_list_fr_en.py new file mode 100644 index 0000000..8e43b4c --- /dev/null +++ b/code/scripts/translate_list_fr_en.py @@ -0,0 +1,39 @@ +import sys +import pandas as pd + + +def translate_affiliation_and_category(row): + new_aff = country_translations.get(row["affiliation"], row["affiliation"]) + + return pd.Series([row["name"], new_aff, row["affiliation_category"], row["description"]], + index=["name", "affiliation", "affiliation_category", "description"]) + +""" translates affiliations and affiliation categories of + a cop csv from french to english +""" +args = sys.argv +if len(args) != 2: + sys.exit("Please provide one argument that contains the path \ +of the csv participant list to translate") + +filename = "../results/participants-csv/participants_" + args[1] + ".csv" + +# get the translations for the countries +country_translations = dict() +country_translations_df = pd.read_csv("../data/dictionaries/countries_french.csv") +for index, row in country_translations_df.iterrows(): + country_clean = row["fr"].lower() + country_clean = country_clean.replace("le ", "").replace("la ", "").replace("les ", "").replace("l'", "") + country_clean = country_clean.replace("é", "e").replace("è", "e").replace("ê", "e").replace("ï", "i").replace("ô", "o") + if "(" in country_clean: + country_clean = country_clean[:country_clean.index("(")-1] + country_translations[country_clean] = row["en"].lower() + +print(country_translations) + +cop_data = pd.read_csv(filename, encoding="utf-8-sig") +cop_data = cop_data.apply(translate_affiliation_and_category, axis=1) + +filename = filename[:filename.index(".csv")] +filename += "-en.csv" +cop_data.to_csv(filename, encoding="utf-8-sig")