Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F59789263
SQLFusionEmboitement_v1.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Thu, Apr 25, 05:42
Size
5 KB
Mime Type
text/x-python
Expires
Sat, Apr 27, 05:42 (2 d)
Engine
blob
Format
Raw Data
Handle
17229883
Attached To
rSQLVAL SQLValidation
SQLFusionEmboitement_v1.py
View Options
import
sqlite3
import
pandas
import
hashlib
import
pickle
from
pandas._testing
import
assert_frame_equal
"""
Utility functions
"""
def
run_query
(
q
):
con
=
sqlite3
.
connect
(
"stork-mooc.sqlite"
)
return
pandas
.
read_sql_query
(
q
,
con
)
def
generate_hash
(
dataframe
):
return
hashlib
.
md5
(
pickle
.
dumps
(
dataframe
))
.
hexdigest
()
def
check_hash
(
dataframe
,
dataframehash
):
return
generate_hash
(
dataframe
)
==
dataframehash
"""
Validation function 1
=> We compare with the correct answer:
select name from migration where departureyear=2001
intersect
select name from migration where departureyear=2003
"""
def
question1_validation_correct
(
studentquery
):
try
:
# *** Option 1 ***
# Compare with result of the correct query - but gives the correct query to the student
correctquery
=
"select name from migration where departureyear=2001 intersect select name from migration where departureyear=2003"
assert_frame_equal
(
run_query
(
studentquery
),
run_query
(
correctquery
))
# *** Option 2 ***
# Compare with result table - but gives the correct result table to the student (who still has to figure out the query)
# (can also be quite cumbersome to write when the correct result table becomes large)
correctresult
=
pandas
.
DataFrame
(
data
=
[
"Annamarie"
,
"Felix"
,
"Jonas"
,
"Prinzesschen"
],
columns
=
[
"name"
])
assert_frame_equal
(
run_query
(
studentquery
),
correctresult
)
# *** Option 3 ***
# Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table
# (this also means that the error message cannot help students correct their answer)
# NB: the hash of the correct result table has to be generated beforehand (using the utility function)
# generate_hash(run_query(correctquery)
response_hash
=
'b27b60409c967bcb8a93dbbc75ba33cd'
assert
check_hash
(
run_query
(
studentquery
),
response_hash
),
"Incorrect query."
except
AssertionError
as
ae
:
print
(
"Your query didn't pass the validation tests."
)
# We raise the exception here to stop execution and prevent the success message to be printed
raise
ae
print
(
"Your query has passed the validation tests and is evaluated as correct, well done!"
)
return
True
"""
Validation function 2
=> We compare with the possible mistakes made by students
Therefore we check that the result obtained by the student is different from:
* the union of 2001 and 2003
* 2001 except 2003
* 2003 except 2001
* 2001 only
* 2003 only
"""
def
question1_validation
(
studentquery
):
# Misconception: use of UNION
query
=
"select name from migration where departureyear=2001 union select name from migration where departureyear=2003"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.'
+
'
\n
You should get only the storks who departed BOTH in 2001 and in 2003.'
+
'
\n
Have you drawn a diagram on paper to determine which logical operator to use?'
)
# Misconception: use of EXCEPT
query
=
"select name from migration where departureyear=2001 except select name from migration where departureyear=2003"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected the storks who departed in 2001 and REMOVED the storks who also departed in 2003.'
+
'
\n
You should get those who departed BOTH in 2001 AND 2003.'
+
'
\n
Have you drawn a diagram on paper to determine which logical operator to use?'
)
# Misconception: use of EXCEPT (opposite direction)
query
=
"select name from migration where departureyear=2003 except select name from migration where departureyear=2001"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected the storks who departed in 2003 and REMOVED the storks who also departed in 2001.'
+
'
\n
You should get those who departed BOTH in 2001 AND 2003.'
+
'
\n
Have you drawn a diagram on paper to determine which logical operator to use?'
)
# Misconception: partial query
query
=
"select name from migration where departureyear=2001"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected only the storks who departed in 2001.'
+
'
\n
This is a good way to start writing this type of query but you are only halfway to the solution.'
+
'
\n
How could you get the storks who departed BOTH in 2001 AND 2003?'
+
'
\n
Maybe draw a diagram to determine which logical operator to use?'
)
# Misconception: partial query (other side)
query
=
"select name from migration where departureyear=2003"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected only the storks who departed in 2003.'
+
'
\n
This is a good way to start writing this type of query but you are only halfway to the solution.'
+
'
\n
How could you get the storks who departed BOTH in 2001 AND 2003?'
+
'
\n
Maybe draw a diagram to determine which logical operator to use?'
)
# Default case for all other misconceptions
# => we fall back on the general validation function
assert
question1_validation_correct
(
studentquery
)
Event Timeline
Log In to Comment