diff --git a/local/stats/amcstats b/local/stats/amcstats index 16d046d..7bf8ebe 100755 --- a/local/stats/amcstats +++ b/local/stats/amcstats @@ -1,59 +1,165 @@ #!/usr/bin/env php addFile($name, $filename, $total); + if (in_array($filename, $loaded)) { + echo "Trying to load $filename a second time...\n"; + exit; + } + $Global->addFile($name, $filename, $total); + $loaded[] = $filename; } -$sorted_data = $EC->sortByTotalPoints(false); -print_r($EC->getTeachers()); -print_r($EC->getSections()); -$EC->doStatsOnCommonItems(false); -$EC->doStats(); +foreach ($types as $type) { + + // Build samples + foreach ($samples as $sample) { + // Print header + $sample_header = strtoupper($sample); + switch ($type) { + case 'average': + printAverage($sample_header); + break; + case 'dist': + case 'dist_percentage': + printDistribution($sample_header); + break; + case 'marks': + break; + } + + // Build samples + $Samples = array(); + switch ($sample) { + case 'global': + $Samples['global'] = new ExamCalcs($Global->getDataSet()); + break; + case 'sections': + foreach ($Global->getSections() as $section) { + $tmpSample = new ExamCalcs($Global->getDataSet()); + $tmpSample->filterBySections($section); + $Samples[$section] = $tmpSample; + } + break; + case 'profs': + foreach ($Global->getTeachers() as $prof) { + $tmpSample = new ExamCalcs($Global->getDataSet()); + $tmpSample->filterByTeachers($prof); + $Samples[$prof] = $tmpSample; + } + break; + } + + // Print stats + foreach ($Samples as $name => $S) { + $stats = $S->getStats(); + switch ($type) { + case 'average': + printAverage($name, $stats); + break; + case 'dist_percentage': + printDistribution($name, $stats, true); + break; + case 'dist': + printDistribution($name, $stats); + break; + case 'questions': + echo "\"STATS ON QUESTIONS: ".strtoupper($name)."\"\n"; + try { + $S->printStatsOnCommonItems(); + } catch (Exception $e) { + echo "\"Not enough data\"\n"; + } + break; + case 'marks': + $marks = $S->getMarks(); + foreach ($marks as $student) { + echo implode(';', $student)."\n"; + } + break; + } + } + echo "\n"; + } +} + +exit; + +function printDistribution($line_id = null, $stats = null, $percentage = false) { + if (is_null($line_id)) $line_id = "N/A"; + if (is_null($stats)) { + echo "\"$line_id\",1,1.25,1.5,1.75,2,2.25,2.5,2.75,3,3.25,3.5,3.75,4,4.25,4.5,4.75,5,5.25,5.5,5.75,6\n"; + } else { + echo "\"$line_id\""; + if (!$percentage) { + foreach ($stats['distribution'] as $n) echo ",$n"; + } else { + foreach ($stats['distribution_percentage'] as $n) echo ",$n"; + } + echo "\n"; + } +} + + +function printAverage($line_id = null, $stats = null) { + if (is_null($line_id)) $line_id = "N/A"; + if (is_null($stats)) { + echo "\"$line_id\",\"n\",\"tot\",\"average\",\"stddev\",\"median\"\n"; + } else { + echo "\"$line_id\",\"{$stats['quarter_mark6']['n']}\",\"{$stats['quarter_mark6']['tot']}\",\"{$stats['quarter_mark6']['average']}\",\"{$stats['quarter_mark6']['stddev']}\",\"{$stats['quarter_mark6']['median']}\"\n"; + } +} exit; ?> diff --git a/local/stats/lib/lib_amcstats.php b/local/stats/lib/lib_amcstats.php index 48a4879..1cff011 100755 --- a/local/stats/lib/lib_amcstats.php +++ b/local/stats/lib/lib_amcstats.php @@ -1,418 +1,532 @@ filename = $filename; $this->teacher = $teacher; + $this->exam_points = $exam_points; $raw_data = file($this->filename, FILE_IGNORE_NEW_LINES); $this->raw_data = array(); foreach($raw_data as $line) { $line = array_map("clean_array", explode(';', $line)); $line = array_map("decimal_conversion", $line); $this->raw_data[] = $line; } $this->parseHeader(); $this->parseStudents(); } public function getStudents() { return $this->students; } protected function parseHeader() { foreach($this->raw_data[0] as $col_id => $value) { // Analyse header from CSV file, based on content $item = array(); switch ($value) { case "ID": case "NAME": case "EMAIL": case "SECTION": case "Mark": $item['name'] = $value; $item['type'] = "info"; break; case "SCIPER": $item['name'] = $value; $item['type'] = "unique_id"; break; default: $item['name'] = $value; if (preg_match('/^TICKED:/', $value)) { $item['name'] = preg_replace('/^TICKED:/', '', $value); $item['type'] = "ticked"; } else { $item['type'] = "question"; $item['subtype'] = $this->guessSubtype($col_id); } } // Stats will be computed at a later stage $item['stats'] = null; $this->columns[] = $item; } } protected function guessSubtype($col_id) { $subtype = null; //$ticked_col = $this->getColIdsByType('ticked', $this->getQuestionNameByColId($col_id)); $min_points = 0; $max_points = 0; foreach($this->raw_data as $line) { if (preg_match('/\./', $line[$col_id])) return 'open'; if ($line[$col_id] > $max_points) $max_points = $line[$col_id]; } if ($max_points == 3) return 'mc'; if ($max_points == 1) return 'tf'; return 'unknown'; } protected function getColIdByName($name) { foreach ($this->columns as $id => $col) if ($col['name'] == $name) return $id; throw new Exception('Column not found: '.$name); } protected function getColIdsByType($type, $name = null) { $ids = array(); if (is_null($name)) { foreach ($this->columns as $id => $col) if ($col['type'] == $type) $ids[] = $id; } else { foreach ($this->columns as $id => $col) if ($col['type'] == $type and $col['name'] == $name) $ids[] = $id; } if (count($ids) == 0) { if (is_null($name)) throw new Exception('Column type not found: '.$type); throw new Exception('Column type not found: '.$type.'/'.$name); } if (count($ids) == 1) return $ids[0]; return $ids; } protected function getQuestionNameByColId($id) { if (array_key_exists($id, $this->columns) and $this->columns[$id]['type'] == 'question') return $this->columns[$id]['name']; throw new Exception('Column not found, or is not a question: '.$id); } protected function parseStudents() { foreach($this->raw_data as $line => $student) { if ($line == 0) continue; // skip header $data = array('teacher' => $this->teacher); foreach(array('ID', 'SCIPER', 'NAME', 'EMAIL', 'SECTION') as $key) { $data[$key] = $student[$this->getColIdByName($key)]; } // Get points $points = array(); $data['items'] = array(); foreach($this->getColIdsByType('question') as $col) { $item = array(); $item['name'] = $this->getQuestionNameByColId($col); $item['points'] = (float)$student[$col]; $item['right'] = (int)($item['points']>0); $points[] = $item['points']; $item['ticked'] = $student[$this->getColIdsByType('ticked', $item['name'])]; $item['type'] = $this->columns[$col]['type']; $item['subtype'] = $this->columns[$col]['subtype']; $data['items'][] = $item; } $data['total'] = array_sum($points); $data['present'] = (int)(array_sum(array_map("square", $points))>0); + $data['exam_points'] = $this->exam_points; + + // Compute marks + if ($data['present']) { + $data['positive_total'] = (float)max($data['total'], 0.0); + $data['mark6'] = (float)min($data['positive_total']/($this->exam_points)*5.0+1, 6.0); + $data['quarter_mark6'] = (float)round($data['mark6']*4.0, 0)/4.0; + } else { + $data['positive_total'] = 'n/a'; + $data['mark6'] = 'abs'; + $data['quarter_mark6'] = 'abs'; + } + if (preg_match('/^FAKE/', $data['SCIPER'])) { if ($data['present']) $data['type'] = 'unregistered'; else $data['type'] = 'unused'; } else $data['type'] = 'student'; $this->students[] = $data; } } } // Compare students on total (higher to lower) function cmp_total($a, $b) { if ($a['total'] == $b['total']) { return 0; } return ($a['total'] < $b['total']) ? 1 : -1; } class ExamCalcs { protected $dataset = null; protected $tmp_dataset = null; public function __construct($dataset = null) { $this->dataset = array(); if (!is_null($dataset)) $this->dataset = $dataset; } public function addFile($teacher, $teacher_file, $max_points) { - echo "Adding $teacher ($teacher_file) to the dataset ($max_points points).\n"; + #echo "Adding $teacher ($teacher_file) to the dataset ($max_points points).\n"; $AR = new AmcReader($teacher_file, $teacher, $max_points); $this->addDataSet($AR->getStudents()); } public function addDataSet($data) { foreach ($data as $student) $this->dataset[] = $student; } + public function filterBySections($sections, $update = true) { + $dataset = array(); + foreach ($this->dataset as $student) { + if (is_array($sections)) { + if (in_array($student['SECTION'], $sections)) + $dataset[] = $student; + } else { + if ($student['SECTION'] == $sections) + $dataset[] = $student; + } + } + if ($update) $this->dataset = $dataset; + return $dataset; + } + public function filterByTeachers($teachers, $update = true) { $dataset = array(); foreach ($this->dataset as $student) { if (is_array($teachers)) { if (in_array($student['teacher'], $teachers)) $dataset[] = $student; } else { if ($student['teacher'] == $teachers) $dataset[] = $student; } } if ($update) $this->dataset = $dataset; return $dataset; } public function getTeachers() { $teachers = array(); foreach ($this->dataset as $student) { if (!in_array($student['teacher'], $teachers)) $teachers[] = $student['teacher']; } return $teachers; } public function getSections() { $sections = array(); foreach ($this->dataset as $student) { $section = $student['SECTION']; if (!in_array($section, $sections) and $section != 'XXX') $sections[] = $section; } return $sections; } - public function doStatsOnCommonItems() { + public function printStatsOnCommonItems() { // Sort dataset by points $dataset = $this->sortByTotalPoints(false); if (count($dataset) < 3) throw new Exception('Dataset is too small.'); // Get items from the first student $items = array(); foreach ($this->dataset[0]['items'] as $item) $items[] = $item['name']; foreach ($dataset as $student) { // Get items for current student $tmp_items = array(); foreach ($student['items'] as $item) $tmp_items[] = $item['name']; // Keep only items in both '$items' AND '$tmp_items' $items = array_intersect($items, $tmp_items); } // Now, filter items in the dataset $filtered_dataset = array(); foreach ($dataset as $student) { if (!$student['present']) continue; $filtered_items = array(); foreach ($student['items'] as $item) { if (in_array($item['name'], $items)) $filtered_items[] = $item; } if (count($filtered_items)) { $student['items'] = $filtered_items; $filtered_dataset[] = $student; } } $dataset = $filtered_dataset; if (count($dataset) < 3) throw new Exception('Dataset is too small.'); - // Computed limits + // Compute limits $nb_students = count($dataset); $twenty_seven = (int)($nb_students*27.0/100); - $upper_start = 0; - $lower_stop = $nb_students-1; $upper_stop = $twenty_seven-1; - $lower_start = $lower_stop-$twenty_seven+1; - - echo "$nb_students / $twenty_seven / $upper_start -> $upper_stop / $lower_start -> $lower_stop \n"; - - $stats = array(); - foreach ($items as $item_name) { - echo "$item_name\n"; - $stats[$item_name] = array(); - $stats[$item_name]['27%'] = $twenty_seven; - $stats[$item_name]['upper'] = 0; - $stats[$item_name]['lower'] = 0; - - // Stats for upper 27% - for ($i = $upper_start ; $i <= $upper_stop; $i++) { - foreach($dataset[$i]['items'] as $cur_item) { - if ($cur_item['name'] == $item_name) { - if ($cur_item['right']) $stats[$item_name]['upper']++; - } - } - } - // Stats for lower 27% - for ($i = $lower_start ; $i <= $lower_stop; $i++) { - foreach($dataset[$i]['items'] as $cur_item) { - if ($cur_item['name'] == $item_name) { - if ($cur_item['right']) $stats[$item_name]['lower']++; - } - } - } - $stats[$item_name]['DI'] = (float)(($stats[$item_name]['upper']-$stats[$item_name]['lower'])/$twenty_seven); - } + $lower_start = $nb_students-$twenty_seven+1; + #echo "$nb_students / $twenty_seven / 0 -> $upper_stop / $lower_start -> $nb_students \n"; $stats = array(); foreach ($dataset as $i => $student) { foreach ($student['items'] as $item) { $name = $item['name']; if (!array_key_exists($name, $stats)) - $stats[$name] = array( '27%' => $twenty_seven, - 'upper' => 0, - 'lower' => 0, - 'valid' => '?', - 'ticked'=> array(), + $stats[$name] = array( '27%' => $twenty_seven, + 'upper' => 0, + 'lower' => 0, + 'valid' => null, + 'ticked'=> null, 'ticked_count'=> 0, 'empty_count'=> 0, - 'type' => null, - 'subtype' => null, + 'type' => null, + 'subtype' => null, ); $stats[$name]['type'] = $item['type']; $stats[$name]['subtype'] = $item['subtype']; + + // Initialise 'ticked' table + if (is_null($stats[$name]['ticked'])) { + switch ($stats[$name]['subtype']) { + case 'mc': + $stats[$name]['ticked'] = array( 'A' => 0, 'B' => 0, 'C' => 0, 'D' => 0, 'multiple' => 0); + break; + case 'tf': + $stats[$name]['ticked'] = array( 'TRUE' => 0, 'FALSE' => 0, 'multiple' => 0); + break; + default: + $stats[$name]['ticked'] = array(); + break; + } + } + + // Count right answers if ($item['right']) { - if ($stats[$name]['valid'] == '?') - if ($item['subtype'] = 'tf') { - if ($item['ticked'] == 'A') $stats[$name]['valid'] = 'TRUE'; else $stats[$name]['valid'] = 'FALSE'; - } else { - $stats[$name]['valid'] = $item['ticked']; + // Save valid answer + if (is_null($stats[$name]['valid'])) { + switch ($stats[$name]['subtype']) { + case 'tf': + if ($item['ticked'] == 'A') + $stats[$name]['valid'] = 'TRUE'; + else + $stats[$name]['valid'] = 'FALSE'; + break; + case 'mc': + $stats[$name]['valid'] = $item['ticked']; + break; + default: + $stats[$name]['valid'] = 'n/a'; + break; } - if ($i <= $upper_stop) { - $stats[$name]['upper']++; - } - if ($i >= $lower_start) { - $stats[$name]['lower']++; } + + // 'upper 27%' and 'lower 27%' counters + if ($i <= $upper_stop) $stats[$name]['upper']++; + if ($i >= $lower_start) $stats[$name]['lower']++; } + + // Count empty answers if (empty($item['ticked'])) { $stats[$name]['empty_count']++; } else { + // Stats on non-empty answers $stats[$name]['ticked_count']++; if (strlen($item['ticked']) > 1) { $stats[$name]['ticked'] = $this->createAndIncrement($stats[$name]['ticked'], 'multiple'); } else { - if ($item['subtype'] = 'tf') { - if ($item['ticked'] == 'A') $field = 'TRUE'; - if ($item['ticked'] == 'B') $field = 'FALSE'; - $stats[$name]['ticked'] = $this->createAndIncrement($stats[$name]['ticked'], $field); - } else { - $stats[$name]['ticked'] = $this->createAndIncrement($stats[$name]['ticked'], $item['ticked']); + switch ($item['subtype']) { + case 'tf': + if ($item['ticked'] == 'A') $field = 'TRUE'; + if ($item['ticked'] == 'B') $field = 'FALSE'; + $stats[$name]['ticked'] = $this->createAndIncrement($stats[$name]['ticked'], $field); + break; + case 'mc': + $stats[$name]['ticked'] = $this->createAndIncrement($stats[$name]['ticked'], $item['ticked']); + break; + default: + $stats[$name]['ticked'] = $this->createAndIncrement($stats[$name]['ticked'], $item['ticked']); + break; } } } } } - // Compute more stats... + // Compute more stats $tmp = array(); foreach ($stats as $name => $stat) { // Discrimination index $stat['DI'] = ($stat['upper']-$stat['lower'])/(1.0*$stat['27%']); + + // Calculate percentages + $ticked_percentage = array(); + foreach ($stat['ticked'] as $t => $n) { + $ticked_percentage[$t] = array( 'n' => $n, '%' => (float)(100.0*$n/$stat['ticked_count']), 'valid' => (int)($t == $stat['valid'])); + } + $stat['ticked'] = $ticked_percentage; $tmp[$name] = $stat; } $stats = $tmp; - print_r($stats); + // Print CSV + $previous_subtype = null; + $header = '"question_id","subtype","27 %","upper","lower","DI","count","valid"'; + foreach ($stats as $name => $stat) { + if ($stat['subtype'] != $previous_subtype) { + #if (!is_null($previous_subtype)) echo "\n"; + echo $header; + foreach ($stat['ticked'] as $answer => $data) echo ",\"[$answer] count\""; + foreach ($stat['ticked'] as $answer => $data) echo ",\"[$answer] %\""; + echo "\n"; + $previous_subtype = $stat['subtype']; + } + echo "$name,{$stat['subtype']},{$stat['27%']},{$stat['upper']},{$stat['lower']},{$stat['DI']},{$stat['ticked_count']}"; + foreach ($stat['ticked'] as $answer => $data) if ($data['valid'] == 1) echo ",\"$answer\""; + foreach ($stat['ticked'] as $answer => $data) echo ",{$data['n']}"; + foreach ($stat['ticked'] as $answer => $data) echo ",{$data['%']}"; + echo "\n"; + } + } public function sortByTotalPoints($update = true) { if ($update) { usort($this->dataset, "cmp_total"); return $this->dataset; } else { $dataset = $this->dataset; usort($dataset, "cmp_total"); return $dataset; } } public function getDataSet() { return $this->dataset; } protected function createAndIncrement($table, $field, $increment = 1) { if (!array_key_exists($field, $table)) { $table[$field] = 0; } $table[$field] += $increment; return $table; } - public function doStats() { + public function getMarks() { + $marks = array(); + foreach($this->dataset as $student) { + $tmp = array(); + + $tmp['teacher'] = $student['teacher']; + $tmp['ID'] = $student['ID']; + $tmp['SECTION'] = $student['SECTION']; + $tmp['exam_points'] = $student['exam_points']; + $tmp['total'] = $student['total']; + $tmp['present'] = $student['present']; + $tmp['SCIPER'] = $student['SCIPER']; + $tmp['quarter_mark6'] = $student['quarter_mark6']; + $marks[] = $tmp; + } + return $marks; + } + + public function getStats() { $stats = array(); # Presence $stats['presence'] = array(); + $marks = array(); + $stats['quarter_mark6'] = array( 'n' => 0, 'tot' => 0, 'average' => null, 'stddev' => null, 'median' => null); foreach($this->dataset as $student) { - // Create category if need be + + // Presence $stats['presence'] = $this->createAndIncrement($stats['presence'], 'total'); switch ($student['type']) { case 'student': if ($student['present']) { $stats['presence'] = $this->createAndIncrement($stats['presence'], 'present'); } else { $stats['presence'] = $this->createAndIncrement($stats['presence'], 'absent'); } break; case 'unused': $stats['presence'] = $this->createAndIncrement($stats['presence'], 'unsused'); break; default: $stats['presence'] = $this->createAndIncrement($stats['presence'], 'unknown'); } + + // Average + if ($student['present']) $marks[] = $student['quarter_mark6']; } - print_r($stats); + $stats['quarter_mark6']['n'] = count($marks); + if ($stats['quarter_mark6']['n'] > 0) { + $stats['quarter_mark6']['tot'] = array_sum($marks); + $stats['quarter_mark6']['average'] = $stats['quarter_mark6']['tot']/$stats['quarter_mark6']['n']; + $stats['quarter_mark6']['stddev'] = stats_standard_deviation($marks); + sort($marks); + $stats['quarter_mark6']['median'] = $marks[round(count($marks)/2)]; + } else { + $stats['quarter_mark6']['tot'] = 0; + $stats['quarter_mark6']['average'] = 0; + $stats['quarter_mark6']['stddev'] = 0; + $stats['quarter_mark6']['median'] = 0; + } + + // Distribution (of marks) + $distribution = array(); + for ($m = 1.0 ; $m <= 6.0 ; $m += 0.25) $distribution[(string)$m] = 0; + $stats['distribution_total'] = 0; + foreach ($marks as $mark) { + $distribution[(string)$mark]++; + $stats['distribution_total']++; + } + $stats['distribution'] = $distribution; + $stats['distribution_percentage'] = array(); + if ($stats['quarter_mark6']['n']) { + foreach ($stats['distribution'] as $mark => $count) $stats['distribution_percentage'][$mark] = $count*100.0/$stats['distribution_total']; + } else { + $stats['distribution_percentage'] = $stats['distribution']; + } + + return($stats); } } ?>