Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F61399058
SQLFusionEmboitement.py
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Mon, May 6, 10:26
Size
5 KB
Mime Type
text/x-python
Expires
Wed, May 8, 10:26 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
17506665
Attached To
rSQLVAL SQLValidation
SQLFusionEmboitement.py
View Options
import
sqlite3
import
pandas
import
hashlib
import
pickle
from
pandas._testing
import
assert_frame_equal
"""
Utility functions
"""
def
run_query
(
q
):
con
=
sqlite3
.
connect
(
"stork-mooc.sqlite"
)
return
pandas
.
read_sql_query
(
q
,
con
)
def
generate_hash
(
dataframe
):
return
hashlib
.
md5
(
pickle
.
dumps
(
dataframe
))
.
hexdigest
()
def
check_hash
(
dataframe
,
dataframehash
):
return
generate_hash
(
dataframe
)
==
dataframehash
"""
Validation function 1
=> We compare with the correct answer:
select name from migration where departureyear=2001
intersect
select name from migration where departureyear=2003
"""
def
question1_validation_correct
(
studentquery
):
try
:
# *** Option 1 ***
# Compare with result of the correct query - but gives the correct query to the student
correctquery
=
"select name from migration where departureyear=2001 intersect select name from migration where departureyear=2003"
assert_frame_equal
(
run_query
(
studentquery
),
run_query
(
correctquery
))
# *** Option 2 ***
# Compare with result table - but gives the correct result table to the student (who still has to figure out the query)
# (can also be quite cumbersome to write when the correct result table becomes large)
correctresult
=
pandas
.
DataFrame
(
data
=
[
"Annamarie"
,
"Felix"
,
"Jonas"
,
"Prinzesschen"
],
columns
=
[
"name"
])
assert_frame_equal
(
run_query
(
studentquery
),
correctresult
)
# *** Option 3 ***
# Compare with the MD5 hash of the result table, which means the student can neither know the correct query nor the correct result table
# (this also means that the error message cannot help students correct their answer)
# NB: the hash of the correct result table has to be generated beforehand (using the utility function)
# generate_hash(run_query(correctquery)
response_hash
=
'b27b60409c967bcb8a93dbbc75ba33cd'
assert
check_hash
(
run_query
(
studentquery
),
response_hash
),
"Incorrect query."
except
AssertionError
as
ae
:
print
(
"Your query didn't pass the validation tests."
)
# We raise the exception here to stop execution and prevent the success message to be printed
raise
ae
print
(
"Your query has passed the validation tests and is evaluated as correct, well done!"
)
return
True
"""
Validation function 2
=> We compare with the possible mistakes made by students
Therefore we check that the result obtained by the student is different from:
* the union of 2001 and 2003
* 2001 except 2003
* 2003 except 2001
* 2001 only
* 2003 only
"""
def
question1_validation
(
studentquery
):
# Misconception: use of UNION
query
=
"select name from migration where departureyear=2001 union select name from migration where departureyear=2003"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected the storks who departed in 2001 AS WELL AS the storks who departed in 2003.'
+
'
\n
You should get only the storks who departed BOTH in 2001 and in 2003.'
+
'
\n
Have you drawn a diagram on paper to determine which logical operator to use?'
)
# Misconception: use of EXCEPT
query
=
"select name from migration where departureyear=2001 except select name from migration where departureyear=2003"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected the storks who departed in 2001 and REMOVED the storks who also departed in 2003.'
+
'
\n
You should get those who departed BOTH in 2001 AND 2003.'
+
'
\n
Have you drawn a diagram on paper to determine which logical operator to use?'
)
# Misconception: use of EXCEPT (opposite direction)
query
=
"select name from migration where departureyear=2003 except select name from migration where departureyear=2001"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected the storks who departed in 2003 and REMOVED the storks who also departed in 2001.'
+
'
\n
You should get those who departed BOTH in 2001 AND 2003.'
+
'
\n
Have you drawn a diagram on paper to determine which logical operator to use?'
)
# Misconception: partial query
query
=
"select name from migration where departureyear=2001"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected only the storks who departed in 2001.'
+
'
\n
This is a good way to start writing this type of query but you are only halfway to the solution.'
+
'
\n
How could you get the storks who departed BOTH in 2001 AND 2003?'
+
'
\n
Maybe draw a diagram to determine which logical operator to use?'
)
# Misconception: partial query (other side)
query
=
"select name from migration where departureyear=2003"
if
run_query
(
studentquery
)
.
equals
(
run_query
(
query
)):
print
(
'You selected only the storks who departed in 2003.'
+
'
\n
This is a good way to start writing this type of query but you are only halfway to the solution.'
+
'
\n
How could you get the storks who departed BOTH in 2001 AND 2003?'
+
'
\n
Maybe draw a diagram to determine which logical operator to use?'
)
# Default case for all other misconceptions
# => we fall back on the general validation function
assert
question1_validation_correct
(
studentquery
)
Event Timeline
Log In to Comment