diff --git a/1-SQL_basics/SQL_introduction.py b/1-SQL_basics/SQL_introduction.py index 40ac2d6..5d16ba8 100644 --- a/1-SQL_basics/SQL_introduction.py +++ b/1-SQL_basics/SQL_introduction.py @@ -1,199 +1,241 @@ import sqlite3 import pandas import hashlib import pickle from pandas._testing import assert_frame_equal """ Utility functions """ def run_query(q): con = sqlite3.connect("stork-mooc.sqlite") return pandas.read_sql_query(q, con) -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 + def check_hash(dataframe, dataframehash): return generate_hash(dataframe) == dataframehash - """ Validation function for Question 1 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question1_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '42d655bac29b2d16b27e41a6c10ad713' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'a426a8dc5d720da2b262e13b5d8aef79' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 2 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question2_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '76f2c51487cb966bd3c554c1a96957cb' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '973280172e84229a6c263b7aa1de7620' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 3 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question3_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'd54eca32b2124f5a6583bf8048aa48e1' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'ae3a604315626162fb364bbc896b8c9f' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 4 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question4_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '63f4af4d374da63e8f5293c7ae9027c0' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'da9e0b566a6923628201f23c09f9a7a4' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 5 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question5_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '126e62188b4a0e56dc933a66637d6310' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'd0b6b7d298e70eb5392a888a21c7bedf' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - + return \ No newline at end of file diff --git a/2-WHERE_clause/SQL_conditions.py b/2-WHERE_clause/SQL_conditions.py index ddc6f9f..8210a46 100644 --- a/2-WHERE_clause/SQL_conditions.py +++ b/2-WHERE_clause/SQL_conditions.py @@ -1,306 +1,363 @@ import sqlite3 import pandas import hashlib import pickle from pandas._testing import assert_frame_equal """ Utility functions """ def run_query(q): con = sqlite3.connect("stork-mooc.sqlite") return pandas.read_sql_query(q, con) -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 def check_hash(dataframe, dataframehash): return generate_hash(dataframe) == dataframehash """ Validation function for Question 1 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question1_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '8869f41a629d6097c2160161c908052d' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'caa05cd19629f8dda404fcb7d0522553' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return """ Validation function for Question 2 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question2_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'd618a65ae888ae46407b46961ed5cb98' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '84c7e8e933439f68a92be5ac2649acf0' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ Validation function for Question 3 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question3_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '627c11065f3552ddcdb15d9d8c038106' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '5d6dbd822e808c053055767a00656b86' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ Validation function for Question 4 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question4_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '72f6637a62c1d94a14f55cc988c85e31' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '19a8471fb69680b018027034b86ddcea' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return """ Validation function for Question 5 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question5_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'cfd14c89d1c4c443b1525d6a0b5325b7' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'e03218e8316cdbe283cd93d2e3874019' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - - + return + """ Validation function for Question 6 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question6_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '6627ab377e443da6509aaf65bd50c645' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '13a126fdcf382abe91b9609569eafc20' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - - + return + """ Validation function for Question 7 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question7_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '6627ab377e443da6509aaf65bd50c645' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '13a126fdcf382abe91b9609569eafc20' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - - + return + """ Validation function for Question 8 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question8_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'e7cc677dbf21b7f5728e423a1bf36ce3' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'fc2162c1eb859e9c3e4e5491c441f43c' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - \ No newline at end of file + return diff --git a/3-Aggregation_sorting/SQL_aggregation.py b/3-Aggregation_sorting/SQL_aggregation.py index ce532e2..fb6afca 100644 --- a/3-Aggregation_sorting/SQL_aggregation.py +++ b/3-Aggregation_sorting/SQL_aggregation.py @@ -1,234 +1,283 @@ import sqlite3 import pandas import hashlib import pickle from pandas._testing import assert_frame_equal """ Utility functions """ def run_query(q): con = sqlite3.connect("stork-mooc.sqlite") return pandas.read_sql_query(q, con) -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 def check_hash(dataframe, dataframehash): return generate_hash(dataframe) == dataframehash """ Validation function for Question 1 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question1_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '7ad8b2587b715b5a77ee027b1750a48b' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '1e4eb8d349e31c66dc9c2b0572c5065b' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 2 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question2_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '8dc1ad6015d07cef58ab22328a96e3f8' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'b72293088ed90148b5db49b4816ae846' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 3 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question3_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '82f3e4184eadcb8464904c72c6f6ce4f' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'e5d47c35768763cda30dc6f92a8439b9' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 4 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question4_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '1240afe0bff475eb73537854acaf0093' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '25b61a0bfacb7317dad287b70d32ba28' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 5 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question5_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '1bd88d8d51206e4969048c9a8ce7dbae' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '9e521a5294de5ef59d9714da2193f78c' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - - - """ -Validation function for Question 5 + return + +""" +Validation function for Question 6 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question6_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '7a63ba00c472fdc6142848ce885dac13' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'dbda7e8c843c7d87dc68bc398243e8f4' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - \ No newline at end of file + return diff --git a/4-Compound_nested_queries/SQL_fusion.py b/4-Compound_nested_queries/SQL_fusion.py index 710970a..1575286 100644 --- a/4-Compound_nested_queries/SQL_fusion.py +++ b/4-Compound_nested_queries/SQL_fusion.py @@ -1,199 +1,240 @@ import sqlite3 import pandas import hashlib import pickle from pandas._testing import assert_frame_equal """ Utility functions """ def run_query(q): con = sqlite3.connect("stork-mooc.sqlite") return pandas.read_sql_query(q, con) -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 def check_hash(dataframe, dataframehash): return generate_hash(dataframe) == dataframehash """ Validation function for Question 1 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question1_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'b27b60409c967bcb8a93dbbc75ba33cd' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '7ce81fa0b3c121f853ca818d4c02136e' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 2 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question2_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '41af0d89e1ead68b5ed15fa9013a3493' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '2805924da422785a82bf9c6d3bbff6bd' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 3 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question3_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'b27b60409c967bcb8a93dbbc75ba33cd' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '7ce81fa0b3c121f853ca818d4c02136e' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 4 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question4_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '54caca6dd78ac6240be397455424bbda' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '4cc68008bfa3927f27cffee947d6c616' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 5 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question5_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '54caca6dd78ac6240be397455424bbda' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '4cc68008bfa3927f27cffee947d6c616' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - - \ No newline at end of file + return \ No newline at end of file diff --git a/5-DDL_DML_views/SQL_DML.ipynb b/5-DDL_DML_views/SQL_DML.ipynb index a085655..61fd4e4 100644 --- a/5-DDL_DML_views/SQL_DML.ipynb +++ b/5-DDL_DML_views/SQL_DML.ipynb @@ -1,625 +1,628 @@ { "cells": [ { "cell_type": "markdown", "id": "3f467ad7-4bdd-4ce9-86f6-738f90481072", "metadata": {}, "source": [ "# Exercice SQL – Data definition and manipulation\n", "\n", "The questions in this exercise are about the syntax of SQL queries.\n", "\n", "We will use:\n", "* the QSpatiaLite stork database that we used earlier;\n", "* SQL to write queries on this database;\n", "* the programming language Python to query this database (execute the queries).\n", "\n", "
\n", "
\n", " How to use this notebook?
\n", " This notebook consists of text cells and code cells (Python). The code cells must be executed in order to see the output of the program.
To execute a cell, select it and then simply click on the \"play\" button () in the toolbar at the top of the notebook, or type shift + enter.
It is important to run the code cells in the same order as they appear in the notebook.\n", "
\n" ] }, { "cell_type": "markdown", "id": "b4e21f2d-b7bb-4e6e-bd05-a158f2524056", "metadata": {}, "source": [ "---\n", "---" ] }, { "cell_type": "markdown", "id": "fa2e6b4a-e0dc-48b7-a032-4c8da2184fa4", "metadata": { "tags": [] }, "source": [ "# Accessing the database with Python\n", "\n", "In this first part, we show you with an example how to write and execute your queries on the stork database with Python." ] }, { "cell_type": "markdown", "id": "6ff3944a-2984-4854-91f7-1b566d3553bb", "metadata": { "tags": [] }, "source": [ "## Importing Python libraries\n", "To manipulate a database with Python we need the `sqlite3` and `pandas` libraries:" ] }, { "cell_type": "code", "execution_count": null, "id": "083f4802-f93c-472d-b68e-887cc7798a78", "metadata": {}, "outputs": [], "source": [ "# Importing libraries\n", "import sqlite3\n", - "import pandas" + "import pandas\n", + "\n", + "# Librairie permettant d'afficher la table \"observation\"\n", + "from SQL_DML import *" ] }, { "cell_type": "markdown", "id": "9a95b8ef-a5e5-42bc-a1ee-991c444ea2aa", "metadata": {}, "source": [ "## Connection to the database \n", "First, a connection to the database is established:" ] }, { "cell_type": "code", "execution_count": null, "id": "affdc6db-df17-4a0a-989b-bc470aa2e8f7", "metadata": {}, "outputs": [], "source": [ "# Establish the connection to the database:\n", - "con = sqlite3.connect(\"stork-mooc.sqlite\")" + "conn = sqlite3.connect(\"stork-mooc.sqlite\")" ] }, { "cell_type": "markdown", "id": "0c36f96f-9c67-441d-a8c7-d24cf10c9945", "metadata": {}, "source": [ "---\n", "---" ] }, { "cell_type": "markdown", "id": "8116c37c-3db2-4d92-bf3a-faf066969321", "metadata": { "tags": [] }, "source": [ "# Exercises with SQL\n", "\n", "For each of the following questions, **give the SQL syntax for the requested queries** and **test your queries.** \n", "\n", "As a reminder, here is the UML diagram of our stork database. \n", "\n", "\"UML" ] }, { "cell_type": "markdown", "id": "9a49ab22-6ef6-4c0a-98e1-a089f7d1bb63", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "# Special feature of this exercise\n", "\n", "This exercise involves queries allowing to **modify the stork database**. Thus, the following questions will allow you to create a new table named \"observations\", to modify it and to delete it. \n", "\n", "Since your queries will allow you to modify the original database, a copy of the database named **\"stork-mooc-BACKUP.sqlite \"** is available in the folder of this exercise if a problem with the database occurs during the exercise. In this case, delete the current version of your database \"stork-mooc.sqlite\", create a copy of the file \"stork-mooc-BACKUP.sqlite\" and rename this copy \"stork-mooc.sqlite\". **Always keep an unmodified copy of \"stork-mooc-BACKUP.sqlite\"!" ] }, { "cell_type": "markdown", "id": "03a31944-09b6-429c-86ea-0cb1307680bf", "metadata": { "tags": [] }, "source": [ "## Let's look at the current state of the \"observations\" table\n", "In principle, at the beginning of this exercise the \"observations\" table does not yet exist and the following command should return a message indicating it indeed does not exist.\n", "\n", "**If it does not the case, you need to start from scratch by deleting the existing table.** To do this, run the very last cell containing code at the bottom of this file (this executes a DROP TABLE query which allows you to delete a table) and then re-execute the cell below ('display_observations_table()') to ensure that the 'observations' table no longer exists. " ] }, { "cell_type": "code", "execution_count": null, "id": "9912fe28-bec7-447c-a37c-2ce2af9a122d", "metadata": { "tags": [] }, "outputs": [], "source": [ "display_observations_table()" ] }, { "cell_type": "markdown", "id": "f46cdd5c-c09e-4adf-a88b-c4cf02ace948", "metadata": { "tags": [] }, "source": [ "## Let's get started!\n", "You can now start answering the following questions. The first question will allow you to create the 'observations' table. The other questions will allow you to modify it. Finally, you will be able to delete it. **If necessary, feel free to run the last cell of code to delete the 'observations' table and then start again from question 1.**" ] }, { "cell_type": "markdown", "id": "cdef6666-e9c7-4614-8d89-95ea6629d98d", "metadata": {}, "source": [ "---\n", "\n", "## Question 1\n", "**Question** : Enter the query that will create a new \"observations\" table to store the observations on storks. In the table include the date (\"day\" should be \"date\" format) and the name (\"storkname\" should be \"text\" format)." ] }, { "cell_type": "code", "execution_count": null, "id": "08fc5f61-202a-4e10-a0f6-c06333274b9b", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query\n", "conn.cursor().execute(query)\n", "conn.commit()\n", "\n", "# Display the result table\n", "display_observations_table()" ] }, { "cell_type": "markdown", "id": "b0d93855-76b2-403f-b945-b9a9a67f8451", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "839e19f7-bdc8-494d-9fae-b41e3035e4a1", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_DML import *\n", "question1_validation()" ] }, { "cell_type": "markdown", "id": "1c40c206-4e86-4abb-9bd4-38b8bbb1584c", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "3248b00b-42fc-4043-849d-ce206ef3d94e", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`CREATE TABLE observations ('date' date, 'storkname' text)`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "ce69bcad-8b2c-43ed-bac2-f2d1feaa636a", "metadata": {}, "source": [ "---\n", "\n", "## Question 2\n", "**Question** : The new \"observations\" table is missing a field for the observer. Write the query to add a new \"member\" attribute to the table. " ] }, { "cell_type": "code", "execution_count": null, "id": "96467df3-bb81-44d1-9d8c-c4bab83f0854", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query\n", "conn.cursor().execute(query)\n", "conn.commit()\n", "\n", "# Display the result table\n", "display_observations_table()" ] }, { "cell_type": "markdown", "id": "6af457ad-0a71-4147-a094-7521e4e7e98a", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "fcf954a0-e697-4b20-bb98-764b66cbf127", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_DML import *\n", "question2_validation()" ] }, { "cell_type": "markdown", "id": "40c1cb56-0899-41da-939b-ccc40c2ef149", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "c0d3979d-5605-41bf-8d2c-8f04bce172c5", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`ALTER TABLE observations ADD member text`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "08372c86-215b-4aa6-81f1-74b86bf391fd", "metadata": {}, "source": [ "---\n", "\n", "## Question 3\n", "**Question** : Enter the query to enter the following tuples into the new \"observations\" table: 2016-08-12 / Annelise / Arthur, 2016-08-13 / Felix / Arthur, 2016-08-15 / Max / Jeanne" ] }, { "cell_type": "code", "execution_count": null, "id": "851477b4-bb70-4eaf-a8ad-719fa316f5e1", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query\n", "conn.cursor().execute(query)\n", "conn.commit()\n", "\n", "# Display the result table\n", "display_observations_table()" ] }, { "cell_type": "markdown", "id": "42c6eec1-eca5-4c84-a72e-46ff301b6243", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "98845eb1-4056-4ab9-ba52-470f37044990", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_DML import *\n", "question3_validation()" ] }, { "cell_type": "markdown", "id": "105a1701-50ab-4963-9a5b-a6386dbe1b92", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "e659c9aa-60d4-4bdf-b217-2ee096880499", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`insert into 'observations' values('2016-08-12' ,'Annelise','Arthur'),( '2016-08-13','Felix','Arthur'),('2016-08-15' ,'Max','Jeanne')`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "b7cd601b-409d-45a6-93ca-3cb44b69ce76", "metadata": {}, "source": [ "---\n", "\n", "## Question 4\n", "**Question** : Enter the query to correct the second observation by changing Felix to Johann. \n" ] }, { "cell_type": "code", "execution_count": null, "id": "1b49f899-2985-42ec-a6ed-4a89878c8fc1", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query\n", "conn.cursor().execute(query)\n", "conn.commit()\n", "\n", "# Display the result table\n", "display_observations_table()" ] }, { "cell_type": "markdown", "id": "2a18c2b5-cce3-4542-a285-e3081a77e21b", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "6cc04231-b3f8-454e-a57d-5c765ca7a547", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_DML import *\n", "question4_validation()" ] }, { "cell_type": "markdown", "id": "f022181f-a1f6-4ef8-8990-89bc7edcd52f", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "a3450ff9-11d3-4a93-81a2-f7ce9b538897", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`update 'observations' set storkname ='Johann' where storkname ='Felix'`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "1623f395-b41c-4a20-8560-5fcb78c188aa", "metadata": {}, "source": [ "---\n", "\n", "## Question 5\n", "**Question** : Enter the query to delete the observation made by Jeanne from the \"observations\" table. " ] }, { "cell_type": "code", "execution_count": null, "id": "c81307a5-2eea-4e68-914f-d33ab6f3ae53", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query\n", "conn.cursor().execute(query)\n", "conn.commit()\n", "\n", "# Display the result table\n", "display_observations_table()" ] }, { "cell_type": "markdown", "id": "bfe3aa7d-f619-45e4-9de8-bde91446ae34", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "6698085a-0690-46ed-a30b-0b106b3f1f5a", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_DML import *\n", "question5_validation()" ] }, { "cell_type": "markdown", "id": "0f8c0e39-3791-487a-9723-fa932cc483ad", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "3eea65e0-2bac-42de-856a-14180deec02f", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`delete from 'observations' where member='Jeanne'`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "9ac7dba0-2ed1-4a43-a7fd-47130b0e8a0c", "metadata": { "tags": [] }, "source": [ "---\n", "## Deleting the table\n", "Execute the following code cell to delete the 'observations' table. " ] }, { "cell_type": "code", "execution_count": null, "id": "2f977221-06fa-4098-a627-931b16d0b5cd", "metadata": { "tags": [] }, "outputs": [], "source": [ "query = \"DROP TABLE 'observations'\"\n", "\n", "# Execute the query\n", "conn.cursor().execute(query)\n", "conn.commit()" ] } ], "metadata": { "kernelspec": { "display_name": "Python", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 } diff --git a/5-DDL_DML_views/SQL_DML.py b/5-DDL_DML_views/SQL_DML.py index de378f5..8f64bca 100644 --- a/5-DDL_DML_views/SQL_DML.py +++ b/5-DDL_DML_views/SQL_DML.py @@ -1,141 +1,217 @@ import sqlite3 import pandas import hashlib import pickle from sqlalchemy import create_engine from pandas.io.sql import DatabaseError """ Utility functions """ -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 def run_query(q): conn = sqlite3.connect("stork-mooc.sqlite") return pandas.read_sql_query(q, conn) def display_observations_table(): query = "select * from observations" table = pandas.DataFrame() try: table = run_query(query) except DatabaseError as e: print(f"Looks like the 'observations' table does not exist...\n") return table """ HOW TO GENERATE THE CORRECT HASH (to be done in a separate notebook) correctresult = pandas.DataFrame(data=[["2021-10-14", "stork1"],["2021-11-19", "stork2"]], columns=['date', 'storkname']) display(correctresult) correctresponse_hash = generate_hash(correctresult) correctresponse_hash """ + """ Validation function for Question 1 """ def question1_validation(): - correctresponse_hash = 'ceb5fcf0c091dcbf3d0d29a8b1dac802' + correctresponse_hash = '354457a15f87002c212b4f230550efba' + selectresult = display_observations_table() "display(selectresult)" studentresult_hash = generate_hash(selectresult) "studentresult_hash" try: - assert studentresult_hash == correctresponse_hash, "Incorrect query." + message = "Incorrect query." + assert studentresult_hash == correctresponse_hash except AssertionError as ae: - print("Your query didn't pass the validation tests.") - raise ae + print("Your query didn't pass the validation tests:") + print(message) + return print("Your query has passed the validation tests and is evaluated as correct, well done!") + return + + #try: + # assert studentresult_hash == correctresponse_hash, "Incorrect query." + + #except AssertionError as ae: + # print("Your query didn't pass the validation tests.") + # raise ae + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") """ Validation function for Question 2 """ def question2_validation(): - correctresponse_hash = '035dfc2eebd673a8a5a2e492e6d7e7eb' + correctresponse_hash = 'ff6fcf9d9395ea04e209dd3b012aa26e' selectresult = display_observations_table() "display(selectresult)" studentresult_hash = generate_hash(selectresult) "studentresult_hash" try: - assert studentresult_hash == correctresponse_hash, "Incorrect query." + message = "Incorrect query." + assert studentresult_hash == correctresponse_hash except AssertionError as ae: - print("Your query didn't pass the validation tests.") - raise ae + print("Your query didn't pass the validation tests:") + print(message) + return print("Your query has passed the validation tests and is evaluated as correct, well done!") + return + #try: + # assert studentresult_hash == correctresponse_hash, "Incorrect query." + #except AssertionError as ae: + # print("Your query didn't pass the validation tests.") + # raise ae + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + """ Validation function for Question 3 """ def question3_validation(): - correctresponse_hash = '62dcf7b4d9643d755fdd9c68a8ddea9d' + correctresponse_hash = '664b73c3d54992595a313e19cca4cdfd' selectresult = display_observations_table() "display(selectresult)" studentresult_hash = generate_hash(selectresult) "studentresult_hash" try: - assert studentresult_hash == correctresponse_hash, "Incorrect query." + message = "Incorrect query." + assert studentresult_hash == correctresponse_hash except AssertionError as ae: - print("Your query didn't pass the validation tests.") - raise ae + print("Your query didn't pass the validation tests:") + print(message) + return print("Your query has passed the validation tests and is evaluated as correct, well done!") + return + + #try: + # assert studentresult_hash == correctresponse_hash, "Incorrect query." + + #except AssertionError as ae: + # print("Your query didn't pass the validation tests.") + # raise ae + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") -""" + """ Validation function for Question 4 """ def question4_validation(): - correctresponse_hash = 'e5c660b69dfb8c640fbd9ce541f0df8c' + correctresponse_hash = 'f5a014d652984f3efb396fa29c3473bc' selectresult = display_observations_table() "display(selectresult)" studentresult_hash = generate_hash(selectresult) "studentresult_hash" try: - assert studentresult_hash == correctresponse_hash, "Incorrect query." + message = "Incorrect query." + assert studentresult_hash == correctresponse_hash except AssertionError as ae: - print("Your query didn't pass the validation tests.") - raise ae + print("Your query didn't pass the validation tests:") + print(message) + return print("Your query has passed the validation tests and is evaluated as correct, well done!") + return -""" + #try: + # assert studentresult_hash == correctresponse_hash, "Incorrect query." + + #except AssertionError as ae: + # print("Your query didn't pass the validation tests.") + # raise ae + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + + """ Validation function for Question 5 """ def question5_validation(): - correctresponse_hash = 'b75771bbba787b972d33b95146c23a60' + correctresponse_hash = '1ccb35f50108eecdbc8f672682f68d30' selectresult = display_observations_table() "display(selectresult)" studentresult_hash = generate_hash(selectresult) "studentresult_hash" try: - assert studentresult_hash == correctresponse_hash, "Incorrect query." + message = "Incorrect query." + assert studentresult_hash == correctresponse_hash except AssertionError as ae: - print("Your query didn't pass the validation tests.") - raise ae + print("Your query didn't pass the validation tests:") + print(message) + return print("Your query has passed the validation tests and is evaluated as correct, well done!") - \ No newline at end of file + return + + #try: + # assert studentresult_hash == correctresponse_hash, "Incorrect query." + + #except AssertionError as ae: + # print("Your query didn't pass the validation tests.") + # raise ae + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") \ No newline at end of file diff --git a/5-DDL_DML_views/stork-mooc.sqlite b/5-DDL_DML_views/stork-mooc.sqlite index 29872f2..bf025d9 100644 Binary files a/5-DDL_DML_views/stork-mooc.sqlite and b/5-DDL_DML_views/stork-mooc.sqlite differ diff --git a/6-Geometrical_spatial_queries/SQL_geometrical_queries.ipynb b/6-Geometrical_spatial_queries/SQL_geometrical_queries.ipynb index 4d2df27..a0d16a3 100644 --- a/6-Geometrical_spatial_queries/SQL_geometrical_queries.ipynb +++ b/6-Geometrical_spatial_queries/SQL_geometrical_queries.ipynb @@ -1,787 +1,787 @@ { "cells": [ { "cell_type": "markdown", "id": "3f467ad7-4bdd-4ce9-86f6-738f90481072", "metadata": { "tags": [] }, "source": [ "# Exercice SQL – Geometrical spatial queries\n", "\n", "The questions in this exercise are about the syntax of SQL queries.\n", "\n", "We will use:\n", "* a QSpatiaLite database with agricultural data for a region in India, including the layers \"Cultivated_Fields\", \"Roads\", \"Streams\", \"Village\" and \"Wells\";\n", "* SQL to write queries on this database;\n", "* the programming language Python to query this database (execute the queries).\n", "\n", "
\n", "
\n", " How to use this notebook?
\n", " This notebook consists of text cells and code cells (Python). The code cells must be executed in order to see the output of the program.
To execute a cell, select it and then simply click on the \"play\" button () in the toolbar at the top of the notebook, or type shift + enter.
It is important to run the code cells in the same order as they appear in the notebook.\n", "
" ] }, { "cell_type": "markdown", "id": "a08280d1-df23-4e8a-ae6f-64b55897c079", "metadata": {}, "source": [ "---\n", "---" ] }, { "cell_type": "markdown", "id": "70ad2bc7-4005-4396-a79b-e0b68a22c255", "metadata": { "tags": [] }, "source": [ "# Accessing the database with Python\n", "\n", "In this first part, we show you with an example how to write and execute your queries on the stork database with Python." ] }, { "cell_type": "markdown", "id": "796db845-18a1-4d53-960b-62cf7075c172", "metadata": { "tags": [] }, "source": [ "## Importing Python libraries\n", "To manipulate a database with Python we need the `sqlite3` and `pandas` libraries:" ] }, { "cell_type": "code", "execution_count": null, "id": "b4afb6f6-cfdd-41d8-b0a8-7c38e17b9a1a", "metadata": {}, "outputs": [], "source": [ "# Importing libraries\n", "import sqlite3\n", "import pandas" ] }, { "cell_type": "markdown", "id": "c3936e46-a816-435a-b46b-4064e77d1a81", "metadata": {}, "source": [ "## Connection to the database \n", "First, a connection to the database is established:" ] }, { "cell_type": "code", "execution_count": null, "id": "2ae2db69-8429-4dea-949f-8457ccf0b6e1", "metadata": {}, "outputs": [], "source": [ "# Establish the connection to the database\n", "con = sqlite3.connect(\"india.sqlite\")" ] }, { "cell_type": "code", "execution_count": null, "id": "3b7fba08-8930-4bd0-8ebd-767560d268ae", "metadata": {}, "outputs": [], "source": [ "# Load spatialite extension\n", "con.enable_load_extension(True)\n", "con.load_extension(\"mod_spatialite\")" ] }, { "cell_type": "markdown", "id": "df01de98-e114-4b75-8ae6-5ecf76a7c688", "metadata": {}, "source": [ "## Executing SQL queries\n", "\n", "To execute a SQL query on our database with Python, we use the `read_sql_query` function from the `pandas` library which :\n", "* executes the query\n", "* returns the result table\n", "To display this table, you just have to do a `display`.\n", "\n", "Here is an example, which executes the query `select * from wells` and displays the result:" ] }, { "cell_type": "code", "execution_count": null, "id": "4b307566-457f-4237-8985-9ebae37a1f9c", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\"\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "5a44ce0f-6e14-43a3-876a-495e538b2978", "metadata": {}, "source": [ "
\n", "\n", "**Question** : modify the above query to retrieve the contents of the `streams` table." ] }, { "cell_type": "markdown", "id": "bb688437-6065-4cf6-91ea-18b26c8e684b", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "704b31ba-5571-4520-a087-300d905ae1b6", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select * from streams`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "cdef6666-e9c7-4614-8d89-95ea6629d98d", "metadata": {}, "source": [ "---\n", "\n", "## Question 1\n", "**Question** : Enter the query that returns the village's geometry in a column named \"geometry_wkt\"\n", "\n", "Here is how to test your query: \n", "1. Edit the query in the cell below and then execute the cell to see the result. \n", "You can edit and re-execute the cell as many times as necessary. \n", "2. Once you are satisfied with your query, you can execute the validation cell below to check if your query is correct or not. \n", "3. Finally, you can compare your query with the solution." ] }, { "cell_type": "code", "execution_count": null, "id": "b6a88264-70ad-4c4d-aea5-0d8661f52a21", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "3f34149a-c207-4813-bfa5-a5127ab5c9f9", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "839e19f7-bdc8-494d-9fae-b41e3035e4a1", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question1_validation(query)" ] }, { "cell_type": "markdown", "id": "49f371da-2975-4c5a-b8c3-d62256303315", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "3248b00b-42fc-4043-849d-ce206ef3d94e", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select AsText(geometry) as geometry_wkt from Village`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "5ded8691-5cc7-4ffd-8e7a-8b9e68ec9b28", "metadata": {}, "source": [ "---\n", "\n", "## Question 2\n", "\n", "**Question** : Enter the query that returns the village's geometry type (Point, Polyline, Polygon, etc.) in a column named geometry_type.\n" ] }, { "cell_type": "code", "execution_count": null, "id": "93d90836-43e4-4a5d-ae9e-b0a32df31d14", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "939cb098-8bd4-4779-89ff-fa89c9b956c8", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "b60cc09c-cbe5-4055-bdbf-dff6ef205faa", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question2_validation(query)" ] }, { "cell_type": "markdown", "id": "2e5faadb-458b-41d3-a926-e63b39f81748", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "74758991-7c8a-49be-8c23-d99f3505b03b", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select GeometryType(geometry) as geometry_type from Village`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "baf8ea9d-52c7-4f09-b2da-f7a2350edb84", "metadata": {}, "source": [ "---\n", "\n", "## Question 3\n", "\n", "**Question** : Enter the query that returns the list of agricultural fields larger than 4 hectares" ] }, { "cell_type": "code", "execution_count": null, "id": "bf93c34f-30c6-492d-b464-3773c6a188fe", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "4db09dc0-9958-4c78-bddc-d8b30762bc3e", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "edd89993-4524-4260-ae45-c03dcef23817", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question3_validation(query)" ] }, { "cell_type": "markdown", "id": "e10b54c4-4a7b-44d8-b307-ef4aaa14750f", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "e4b270be-9464-4b53-8dc9-ae0b6d45f30f", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select * from Cultivated_Fields where Area(geometry) > 40000`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "2181501d-9609-45db-add3-11fef5627712", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "## Question 4\n", "\n", "**Question** : Enter the query that returns the viilage's centroid (in a column named geom_centr) and display this point in QGIS" ] }, { "cell_type": "code", "execution_count": null, "id": "2b7497b2-d1ba-4040-b939-2d118330025f", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "49c3f7a9-e080-4944-bf63-a589fd8cddbc", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "6f6fa35d-2308-4d9f-91ad-3b849f122479", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question4_validation(query)" ] }, { "cell_type": "markdown", "id": "fcb72363-5b18-4cd1-a2d1-4d659e85511c", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "4b0e5608-1b34-47ca-9e4e-eac6f9fb944b", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select Centroid(geometry) as geom_centr from Village`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "43863949-1e0d-4dee-9f08-f560700677a5", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "## Question 5\n", "\n", "**Question** : Enter the query that returns the total length of the secondary roads (of type \"simple\") in a column named \"length\"" ] }, { "cell_type": "code", "execution_count": null, "id": "87d476b6-d562-425b-9e16-6e9cea306092", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "0dff4adf-dff5-4458-9e9f-9a303fe6dc62", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "4aaf1326-f94a-4527-b334-266e5989238f", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question5_validation(query)" ] }, { "cell_type": "markdown", "id": "1fc6941d-ec9f-4f95-88cb-feb3cbe33957", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "e3030f35-7b38-4be0-852b-b1dff7635117", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`Select sum(GLength(Geometry)) as longueur From Roads Where Type like 'simple'`\n", "\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "8db43a73-7383-4f1c-a875-434742cb7001", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "## Question 6\n", "\n", - "**Question** : Enter the query that returns the number of wells located south-west of the village's centroid (use the metacharacter in the aggregation function)" + "**Question** : Enter the query that returns the number of wells located south-west of the village's centroid." ] }, { "cell_type": "code", "execution_count": null, "id": "a8e90b3e-0970-4486-9c80-b042d194ca45", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "65ef6110-3ea2-4467-ba16-5ddcc00c788c", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "cf911d59-4dfd-45e4-a780-27ef0d62c0ba", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question6_validation(query)" ] }, { "cell_type": "markdown", "id": "07f02dc6-36ec-48f2-ba39-eaf5d1667148", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "ea940598-7455-4e12-9164-25393a14c08f", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`SELECT COUNT(*) AS num_SW FROM Wells, Village WHERE X(Wells.Geometry)\n" ] }, { "cell_type": "markdown", "id": "a09fd9b4-7dc2-440a-8114-fe4113ecc51b", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "## Question 7\n", "\n", "**Question** : Enter the query that allows to check, in a column named \"EPSG\", that all features in the \"Wells\" layer do share the same projection system\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c3302661-2578-4978-8c42-d55d84713b64", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "8162aa85-1b2b-43e6-8a23-564fb8144ce9", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "5562a720-c7f2-43af-b711-666afbe530c9", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_geometrical_queries import *\n", "question7_validation(query)" ] }, { "cell_type": "markdown", "id": "93513f04-fac1-4ab6-92e1-72505d7f4e3f", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "941f4861-f978-4538-a9e4-828730f58bce", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`Select SRID(Geometry) as EPSG From Wells`\n", "\n", " \n", "
\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 } diff --git a/6-Geometrical_spatial_queries/SQL_geometrical_queries.py b/6-Geometrical_spatial_queries/SQL_geometrical_queries.py index b4d8822..622be6c 100644 --- a/6-Geometrical_spatial_queries/SQL_geometrical_queries.py +++ b/6-Geometrical_spatial_queries/SQL_geometrical_queries.py @@ -1,271 +1,324 @@ import sqlite3 import pandas import hashlib import pickle from pandas._testing import assert_frame_equal """ Utility functions """ def run_query(q): con = sqlite3.connect("india.sqlite") con.enable_load_extension(True) con.load_extension("mod_spatialite") return pandas.read_sql_query(q, con) -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 def check_hash(dataframe, dataframehash): return generate_hash(dataframe) == dataframehash """ Validation function for Question 1 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question1_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'e9615a5915bb7d1b32f4028c5e4cd977' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '2c35771572b931662737eca58d64671c' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 2 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question2_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'db93725980882289f5860ab67d8e834f' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '7a9d0f6358dc6b20574573b5eba3daff' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 3 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question3_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'ec83fa5ea746982b4527ef311471e862' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '7ab0edb51bfb33aac2100caf334f0014' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 4 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question4_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '78b62222924bd032a5336a349e1db8a6' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'bbf25e2e93f636e94727f410ccbf6569' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 5 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question5_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '265f265a18c2cb6bfd9c1c81c1dac805' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '00bb75d340e2a5752650bd933f2a7aff' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - + return + """ Validation function for Question 6 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question6_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '63a6dc8c30c7e8213f63b240ef9b49b2' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '864921c271f65adb88d3fb965bec3c83' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - + return + """ Validation function for Question 7 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question7_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '1fabe794369f17e66458156b5f71e12b' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'cad419c804eeba32a4f89b3ed8a8cac9' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") - - \ No newline at end of file + return \ No newline at end of file diff --git a/7-Topological_spatial_queries/SQL_topological_queries.ipynb b/7-Topological_spatial_queries/SQL_topological_queries.ipynb index ceb0838..381b248 100644 --- a/7-Topological_spatial_queries/SQL_topological_queries.ipynb +++ b/7-Topological_spatial_queries/SQL_topological_queries.ipynb @@ -1,532 +1,532 @@ { "cells": [ { "cell_type": "markdown", "id": "3f467ad7-4bdd-4ce9-86f6-738f90481072", "metadata": {}, "source": [ "# Exercice SQL – Topological spatial queries\n", "\n", "The questions in this exercise are about the syntax of SQL queries.\n", "\n", "We will use:\n", "* a QSpatiaLite database with agricultural data for a region in India, including the layers \"Cultivated_Fields\", \"Roads\", \"Streams\", \"Village\" and \"Wells\";\n", "* SQL to write queries on this database;\n", "* the programming language Python to query this database (execute the queries).\n", "\n", "
\n", "
\n", " How to use this notebook?
\n", " This notebook consists of text cells and code cells (Python). The code cells must be executed in order to see the output of the program.
To execute a cell, select it and then simply click on the \"play\" button () in the toolbar at the top of the notebook, or type shift + enter.
It is important to run the code cells in the same order as they appear in the notebook.\n", "
" ] }, { "cell_type": "markdown", "id": "a08280d1-df23-4e8a-ae6f-64b55897c079", "metadata": {}, "source": [ "---\n", "---" ] }, { "cell_type": "markdown", "id": "87e805db-56a7-4961-88ac-188f654c49bb", "metadata": { "tags": [] }, "source": [ "# Accessing the database with Python\n", "\n", "In this first part, we show you with an example how to write and execute your queries on the stork database with Python." ] }, { "cell_type": "markdown", "id": "ebd61a3d-fadf-4b0a-aaea-749f36a4f65b", "metadata": { "tags": [] }, "source": [ "## Importing Python libraries\n", "To manipulate a database with Python we need the `sqlite3` and `pandas` libraries:" ] }, { "cell_type": "code", "execution_count": null, "id": "7a9341b1-f624-4d3e-9c28-eaf5e1dc1cdd", "metadata": {}, "outputs": [], "source": [ "# Importing libraries\n", "import sqlite3\n", "import pandas" ] }, { "cell_type": "markdown", "id": "207f63fb-4a3c-448c-b8f4-d576ee14f934", "metadata": {}, "source": [ "## Connection to the database \n", "First, a connection to the database is established:" ] }, { "cell_type": "code", "execution_count": null, "id": "2ae2db69-8429-4dea-949f-8457ccf0b6e1", "metadata": {}, "outputs": [], "source": [ "# Establish the connection to the database:\n", "con = sqlite3.connect(\"india.sqlite\")" ] }, { "cell_type": "code", "execution_count": null, "id": "3b7fba08-8930-4bd0-8ebd-767560d268ae", "metadata": {}, "outputs": [], "source": [ "# Load spatialite extension\n", "con.enable_load_extension(True)\n", "con.load_extension(\"mod_spatialite\")" ] }, { "cell_type": "markdown", "id": "61d93076-5f19-4368-b9bb-857509c4a1eb", "metadata": {}, "source": [ "## Executing SQL queries\n", "\n", "To execute a SQL query on our database with Python, we use the `read_sql_query` function from the `pandas` library which :\n", "* executes the query\n", "* returns the result table\n", "To display this table, you just have to do a `display`.\n", "\n", "Here is an example, which executes the query `select * from wells` and displays the result:" ] }, { "cell_type": "code", "execution_count": null, "id": "0349a05d-05a0-4475-95e5-29b6c40187b1", "metadata": {}, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\"\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "1e67177d-5d7c-4c7c-87dc-411c813d283a", "metadata": {}, "source": [ "
\n", "\n", "**Question** : modify the above query to retrieve the contents of the `streams` table." ] }, { "cell_type": "markdown", "id": "ccc0e712-9c7b-436a-8c39-30a203e7bda9", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "de659433-88aa-4037-8e22-5733cfcd889d", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select * from streams`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "cdef6666-e9c7-4614-8d89-95ea6629d98d", "metadata": {}, "source": [ "---\n", "\n", "## Question 1\n", "**Question** : Enter the query that returns the list of the id's of the wells that completely inside Mr. Vajubhai Madha's fields\n", "\n", "Here is how to test your query: \n", "1. Edit the query in the cell below and then execute the cell to see the result. \n", "You can edit and re-execute the cell as many times as necessary. \n", "2. Once you are satisfied with your query, you can execute the validation cell below to check if your query is correct or not. \n", "3. Finally, you can compare your query with the solution." ] }, { "cell_type": "code", "execution_count": null, "id": "a3e9aa00-348b-4318-b189-6198e1d76828", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "cd727d93-26be-4065-8240-b602a58feb2f", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "839e19f7-bdc8-494d-9fae-b41e3035e4a1", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_topological_queries import *\n", "question1_validation(query)" ] }, { "cell_type": "markdown", "id": "b5484b5a-a09d-40d3-b6e9-9b291f6de612", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "3248b00b-42fc-4043-849d-ce206ef3d94e", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", - "`SELECT DISTINCT Cultivated_Fields.ID FROM Wells, Cultivated_Fields WHERE intersects(Wells.Geometry, Cultivated_Fields.Geometry) AND Cultivated_Fields.owner like 'Bhikhabhai Lakhman'`\n", + "`select Wells.idwells from Cultivated_Fields, Wells where within(Wells.Geometry, Cultivated_Fields.Geometry) and Cultivated_Fields.owner like 'Vajubhai Madha'`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "5ded8691-5cc7-4ffd-8e7a-8b9e68ec9b28", "metadata": {}, "source": [ "---\n", "\n", "## Question 2\n", "\n", "**Question** : Enter the query that allows to create the QGIS layer hosting the agricultural fields that are in contact with a river while making certain that each selected features only appears once (use a grouping clause).\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "2abb3246-aa76-4bea-bca9-d7210aa84f01", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "fd52bda4-1f3a-44c3-bc39-d2b4abfcd7c6", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "b60cc09c-cbe5-4055-bdbf-dff6ef205faa", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_topological_queries import *\n", "question2_validation(query)" ] }, { "cell_type": "markdown", "id": "7308e5d5-dd56-4544-acf7-adb12ac238d5", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "74758991-7c8a-49be-8c23-d99f3505b03b", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select Cultivated_Fields.Geometry from Cultivated_Fields , Streams where intersects(Streams.Geometry, Cultivated_Fields.Geometry) group by Cultivated_Fields.Geometry`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "baf8ea9d-52c7-4f09-b2da-f7a2350edb84", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "## Question 3\n", "\n", "**Question** : Enter the query that returns the list of features located at less than 10 meters from a primary road (of type \"double\") without using a GROUP BY clause." ] }, { "cell_type": "code", "execution_count": null, "id": "72269faf-b502-4ded-b36d-c2f1a8b238a2", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "8a4b8988-8304-45cd-9d33-5a121da3db8e", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "edd89993-4524-4260-ae45-c03dcef23817", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_topological_queries import *\n", "question3_validation(query)" ] }, { "cell_type": "markdown", "id": "b3a41747-a321-40dd-b6c2-5501eeb0d7f4", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "e4b270be-9464-4b53-8dc9-ae0b6d45f30f", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`SELECT DISTINCT Cultivated_Fields.ID FROM Cultivated_Fields, Roads WHERE Distance(Cultivated_Fields.Geometry, Roads.Geometry) <10 and Roads.Type='double'`\n", " \n", "
\n" ] }, { "cell_type": "markdown", "id": "2181501d-9609-45db-add3-11fef5627712", "metadata": { "tags": [] }, "source": [ "---\n", "\n", "## Question 4\n", "\n", "**Question** : Enter the query that creates a 20m wide buffer around roads (name this column geom_buffer) and display it in QGIS." ] }, { "cell_type": "code", "execution_count": null, "id": "a7586c84-a4a6-45b4-bc46-dc47edf04e08", "metadata": { "tags": [] }, "outputs": [], "source": [ "# Give the query to execute\n", "query = \"select * from wells\" # TODO: MODIFY THE QUERY HERE\n", "\n", "# Execute the query and get the result table\n", "resulttable = pandas.read_sql_query(query, con)\n", "\n", "# Display the result table\n", "display(resulttable)" ] }, { "cell_type": "markdown", "id": "bbcda836-4f22-4401-b2e4-eae1b2f94289", "metadata": {}, "source": [ "**Validation** : Execute the following cell to see if your query is correct or not. \n", "Read carefully the error messages to correct your query." ] }, { "cell_type": "code", "execution_count": null, "id": "6f6fa35d-2308-4d9f-91ad-3b849f122479", "metadata": { "tags": [] }, "outputs": [], "source": [ "from SQL_topological_queries import *\n", "question4_validation(query)" ] }, { "cell_type": "markdown", "id": "1141a355-935e-4b08-9728-f999e0a587e2", "metadata": { "tags": [] }, "source": [ "**Solution** : Click on the three small dots \"...\" below." ] }, { "cell_type": "markdown", "id": "4b0e5608-1b34-47ca-9e4e-eac6f9fb944b", "metadata": { "jupyter": { "source_hidden": true }, "tags": [] }, "source": [ "
\n", "\n", "Here is the correct query:\n", " \n", "`select Buffer(geometry,20) as geom_buffer from Streams`\n", " \n", "
\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" } }, "nbformat": 4, "nbformat_minor": 5 } diff --git a/7-Topological_spatial_queries/SQL_topological_queries.py b/7-Topological_spatial_queries/SQL_topological_queries.py index bf1fb2e..6885d70 100644 --- a/7-Topological_spatial_queries/SQL_topological_queries.py +++ b/7-Topological_spatial_queries/SQL_topological_queries.py @@ -1,165 +1,202 @@ import sqlite3 import pandas import hashlib import pickle from pandas._testing import assert_frame_equal """ Utility functions """ def run_query(q): con = sqlite3.connect("india.sqlite") con.enable_load_extension(True) con.load_extension("mod_spatialite") return pandas.read_sql_query(q, con) -def generate_hash(dataframe): - return hashlib.md5(pickle.dumps(dataframe)).hexdigest() +def generate_hash(something): + # Old method: + #return hashlib.md5(pickle.dumps(dataframe)).hexdigest() + # New method: + if isinstance(something, pandas.core.frame.DataFrame): + to_hash = "" + # Get hash of column names, in order + for col in something.columns: + to_hash += generate_hash(col) + # Add hash of the dataframe content + to_hash += str(pandas.util.hash_pandas_object(something).sum()) + # Calculate hash of the hashes + return(generate_hash(to_hash)) + #return pandas.util.hash_pandas_object(something).sum() + elif isinstance(something, str): + return hashlib.md5(something.encode('utf-8')).hexdigest() + else: + return -1 + def check_hash(dataframe, dataframehash): return generate_hash(dataframe) == dataframehash """ Validation function for Question 1 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question1_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = 'd166c8873454a2de98a40adf81b53d76' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '6378aefaf2ab1d24d85443200d7e9daf' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - """ +""" Validation function for Question 2 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question2_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '8530058d9604c8cedc237700a53b6bd9' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'c3889fb606a0c9d5e06c0a5bd0d02a50' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 3 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question3_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '093be6b2f31ffe3bae30e5abea2959a7' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = 'fa3c7ad4a555eb18fd340465e63ba693' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + + #print("Your query has passed the validation tests and is evaluated as correct, well done!") print("Your query has passed the validation tests and is evaluated as correct, well done!") + return - - """ +""" Validation function for Question 4 => We compare with the possible mistakes made by students => Then we finally compare with the correct answer # Possible mistakes: we check that the result obtained by the student is different from: # * the union of 2001 and 2003 """ def question4_validation(studentquery): # Misconception: use of UNION #query = "select name from migration where departureyear=2001 union select name from migration where departureyear=2003" #if run_query(studentquery).equals(run_query(query)): # print('You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.' # +'\nYou should get only the storks who departed BOTH in 2001 and in 2003.' # +'\nHave you drawn a diagram on paper to determine which logical operator to use?') # Default case that catches all misconceptions (including the mistakes flagged above) # => we compare with the correct answer try: # Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table # (this also means that the error message cannot help students correct their answer) # NB: the hash of the correct result table has to be generated beforehand (using the utility function) # generate_hash(run_query(correctquery)) - response_hash = '6ef0fa33660f3f1c0b9446931c9a2d80' - assert check_hash(run_query(studentquery), response_hash), "Incorrect query." + response_hash = '045104975f8d478422f35d50ca52d3b8' + message = "Incorrect query." + assert check_hash(run_query(studentquery), response_hash) except AssertionError as ae: - print("Your query didn't pass the validation tests.") + #print("Your query didn't pass the validation tests.") # We raise the exception here to stop execution and prevent the success message to be printed - raise ae - - print("Your query has passed the validation tests and is evaluated as correct, well done!") + #raise ae + print("Your query didn't pass the validation tests:") + print(message) + return + #print("Your query has passed the validation tests and is evaluated as correct, well done!") + print("Your query has passed the validation tests and is evaluated as correct, well done!") + return \ No newline at end of file