diff --git a/expand_dataset.py b/expand_dataset.py
deleted file mode 100644
index 24e7a02..0000000
--- a/expand_dataset.py
+++ /dev/null
@@ -1,261 +0,0 @@
-# Script by Jasmin, Jan 2022, Eawag
-# Expand compound entries with all half-lives and associated data
-# Goal: create full data set
-import pandas as pd
-import numpy as np
-import sys
-import re
-sys.path.insert(0, 'C:\\Users\\leetseng\\enviPath-python\\')    #/Users/jasmin/enviPath-python/enviPath_python/
-from enviPath_python.enviPath import *
-from  enviPath_python.objects import *
-file_location = '/Halflife_modeling/'
-
-# Define the instance to use
-INSTANCE_HOST = 'https://envipath.org'
-username = 'leetseng'
-password = 'Joan1388'   #password = getpass.getpass()
-eP = enviPath(INSTANCE_HOST)
-eP.login(username, password)
-
-# files
-input_file_path = file_location+'input/soil_compounds_final.txt'
-output_file_path_full = file_location+'output/full_dataset_half-lives.txt'
-
-
-#todo :
-# 1. create file similar to tomu's data set - can we improve model just by re-obtaining all the data ?
-# 2. try to improve model
-
-# try to switch to the sludge dataset
-# try to collect the information and fill into the dictionary
-# transfer the function to extract all compounds
-
-
-
-def __main__():
-    # data frames
-    data = pd.read_csv(input_file_path, sep='\t')
-    # available headers: 	Index	Name	ID	Inchikey_full	Inchikey_first_14	Full_SMILES	Canonical_SMIES	Is_composite
-    # Cropped_canonical_SMILES	Cropped_canonical_SMILES_no_stereo	SMILES_pH_7	Number_halflives	Comment	Additional_HL_source
-    # Mol_weigth	pKa	pKa_5to8	charge_pH_7	log(Koc)	Koc_source
-
-    D = {'index': [], 'compound_id': [], 'smiles': [], 'reduced_smiles': [], 'halflife': [], 'scenario_id': [], 'study_name':[],
-         'halflife_model': [], 'halflife_comment': [], 'spike_compound': [], 'acidity': [], 'CEC': [], 'OC': [],
-         'biomass_start': [], 'biomass_end': [], 'biomass': [], 'temperature': [], 'wst_value': [],
-         'wst_type': [], 'humidity': [], 'humidity_conditions': [], 'soil_texture': [], 'sand': [], 'silt': [], 'clay': [], 'log(Koc)':[], 'Koc_source':[]}
-
-    for index, row in data.iterrows():
-        compound_id = row['ID']
-        print("COMPOUND: {}\n".format(compound_id))
-        D = add_halflives(D, compound_id, row)
-        if type(row['Additional_HL_source']) == str:
-            D = add_halflives(D, row['Additional_HL_source'], row)
-
-    hl_data = pd.DataFrame.from_dict(D)  #convert dict into DF
-    hl_data.to_csv(output_file_path_full, sep='\t')
-
-
-def fetch_acidity(info):
-    try:
-        raw_pH = info.get_acidity().get_value()
-    except:
-        return np.NaN
-    else:
-        if ';' in raw_pH:
-            if '-' in raw_pH.split(';')[0]:
-                pH = range_to_average(raw_pH.split(';')[0])
-            else:
-                pH = float(raw_pH.split(';')[0])
-        elif '-' in raw_pH: # if range, get mean value
-            pH = range_to_average(raw_pH)
-        else:
-            pH = float(raw_pH)
-        return np.round(pH, 1)
-
-def range_to_average(input_string):
-    min = float(input_string.split(' - ')[0])
-    max = float(input_string.split(' - ')[1])
-    avg = np.average([min, max])
-    return avg
-
-def fetch_cec(info):
-    try:
-        cec = info.get_cec().get_value()
-    except:
-        return np.NaN
-    else:
-        return cec
-
-def fetch_organic_content(info):
-    try:
-        raw = info.get_omcontent().get_value()
-    except:
-        return np.NaN
-    else:
-        raw_list = raw.split(';')
-        oc = np.NaN
-        for i in raw_list:
-            if i == 'OC':
-                oc = val
-            elif i == 'OM':
-                oc = val / 1.7 # OC = OM / 1.7, source: Schwarzenbach
-            else:
-                if '<' in i:
-                    val = float(i[1:])
-                    print("Warning: {} was converted to {}".format(i, val))
-                elif i == '' or i == '-':
-                    val = np.NaN
-                else:
-                    val = float(i)
-        return oc
-
-def fetch_biomass(info):
-    try:
-        raw = info.get_biomass().get_value()
-    except:
-        return np.NaN, np.NaN
-    else:
-        l = raw.split(' - ')
-        return float(l[0]), float(l[1])
-
-def fetch_temperature(info):
-    try:
-        raw = info.get_temperature().get_value()
-    except:
-        return np.NaN
-    else:
-        min = float(raw.split(';')[0])
-        max = float(raw.split(';')[1])
-        return np.round(np.average([min, max]), 0)
-
-def fetch_wst(info):
-    try:
-        raw = info.get_waterstoragecapacity().get_value()
-    except:
-        return np.NaN, ''
-    else:
-        raw_list = raw.replace(" ", "").split('-')
-        if len(raw_list) < 4:
-            value = float(raw_list[0])
-            type = raw_list[1]
-        else:
-            value = np.NaN
-            type = raw_list[2]
-        return value, type
-
-def fetch_humidity(info):
-    try:
-        raw = info.get_humidity().get_value()
-    except:
-        return np.NaN, ''
-    else:
-        if type(raw) == float:
-            return raw, ''
-        else:
-            l = raw.split(' - ')
-            return float(l[0]), l[1]
-
-def fetch_soiltexture1(info):
-    try:
-        raw = info.get_soiltexture1().get_value()
-    except:
-        return ''
-    else:
-        return raw
-
-def fetch_spikecompound(info):
-    try:
-        raw = info.get_spikecompound().get_compoundLink()
-        spike_cpd = CompoundStructure(eP.requester, id=raw)
-        spike_smiles = spike_cpd.get_smiles()
-    except:
-        return ''
-    else:
-        return spike_smiles
-
-def fetch_soiltexture2(info):
-    try:
-        raw = info.get_soiltexture2().get_value()
-    except:
-        return np.NaN, np.NaN, np.NaN
-    else:
-        values = re.findall(r'\s([\d.]+)%', raw)    ########################
-        if values == []:
-            return np.NaN, np.NaN, np.NaN
-        return get_float_or_nan(values[0]), get_float_or_nan(values[1]), get_float_or_nan(values[2]) # sand, silt, clay
-
-def fetch_halflife_model(info):
-    try:
-        raw = info.get_halflife().get_value()
-    except:
-        return ''
-    else:
-        return raw.split(';')[0]
-
-def fetch_halflife_comment(info):
-    try:
-        raw = info.get_halflife().get_value()
-    except:
-        return ''
-    else:
-        return raw.split(';')[2]
-
-def get_float_or_nan(x):
-    try:
-        return float(x)
-    except:
-        return np.NaN
-
-def add_halflives(D, compound_id, row):
-    compound_index = row['Index']
-    reduced_smiles = row['Cropped_canonical_SMILES_no_stereo']
-    compound_structure = CompoundStructure(eP.requester, id=compound_id)
-    compound = Compound(eP.requester, id=compound_id)
-    print(compound_id)
-    halflives = compound_structure.get_halflifes()
-    smiles = compound.get_smiles()
-    for hl in halflives:
-        compound info
-        D['index'].append(compound_index)
-        D['compound_id'].append(compound_id)
-        D['smiles'].append(smiles)
-        D['reduced_smiles'].append(reduced_smiles) # cropped_canonical_smiles_no_stereo
-        D['halflife'].append(float(hl.hl))
-        D['scenario_id'].append(hl.scenarioId)
-        D['log(Koc)'].append(row['log(Koc)'])
-        D['Koc_source'].append(row['Koc_source'])
-        print(hl.scenarioId)
-        fetch data structures
-        scenario = Scenario(eP.requester, id=hl.scenarioId)
-        add_info = scenario.get_additional_information()
-        add halflife details
-        D['halflife_model'].append(fetch_halflife_model(add_info))
-        D['halflife_comment'].append(fetch_halflife_comment(add_info))
-        D['study_name'].append(scenario.get_name().split(' - ')[0])
-        D['spike_compound'].append(fetch_spikecompound(add_info))
-        fetch data points
-        D['acidity'].append(fetch_acidity(add_info))
-        D['CEC'].append(fetch_cec(add_info))  # cation exchange capacity
-        D['OC'].append(fetch_organic_content(add_info))  # organic content as organic carbon (oc)
-        start, end = fetch_biomass(add_info)
-        D['biomass_start'].append(start)
-        D['biomass_end'].append(end)
-        D['biomass'].append(np.round(np.average([start, end]), 2))
-        D['temperature'].append(fetch_temperature(add_info))
-        wst_value, wst_type = fetch_wst(add_info)  # water storage capacity,
-        D['wst_value'].append(wst_value)
-        D['wst_type'].append(wst_type)
-        hum, hum_cond = fetch_humidity(add_info)
-        D['humidity'].append(hum)
-        D['humidity_conditions'].append(hum_cond)
-        D['soil_texture'].append(fetch_soiltexture1(add_info))
-        _sand, _silt, _clay = fetch_soiltexture2(add_info)
-        D['sand'].append(_sand)
-        D['silt'].append(_silt)
-        D['clay'].append(_clay)
-    return D
-
-
-__main__()
-
-