Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F92320137
OptimizeResearchDataManagement.tex
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Nov 19, 09:28
Size
47 KB
Mime Type
text/x-tex
Expires
Thu, Nov 21, 09:28 (2 d)
Engine
blob
Format
Raw Data
Handle
22422683
Attached To
rOPTIMIZINGRDM Optimizing your research data management
OptimizeResearchDataManagement.tex
View Options
% Default to the notebook output style
% Inherit from the specified cell style.
\documentclass
[11pt]
{
article
}
\usepackage
[T1]
{
fontenc
}
% Nicer default font than Computer Modern for most use cases
\usepackage
{
palatino
}
% Basic figure setup, for now with no caption control since it's done
% automatically by Pandoc (which extracts ![](path) syntax from Markdown).
\usepackage
{
graphicx
}
% We will generate all images so they have a width \maxwidth. This means
% that they will get their normal width if they fit onto the page, but
% are scaled down if they would overflow the margins.
\makeatletter
\def\maxwidth
{
\ifdim\Gin
@nat@width>
\linewidth\linewidth
\else\Gin
@nat@width
\fi
}
\makeatother
\let\Oldincludegraphics\includegraphics
% Set max figure width to be 80% of text width, for now hardcoded.
\renewcommand
{
\includegraphics
}
[1]
{
\Oldincludegraphics
[width=.8\maxwidth]
{
#1
}}
% Ensure that by default, figures have no caption (until we provide a
% proper Figure object with a Caption API and a way to capture that
% in the conversion process - todo).
\usepackage
{
caption
}
\DeclareCaptionLabelFormat
{
nolabel
}{}
\captionsetup
{
labelformat=nolabel
}
\usepackage
{
adjustbox
}
% Used to constrain images to a maximum size
\usepackage
{
xcolor
}
% Allow colors to be defined
\usepackage
{
enumerate
}
% Needed for markdown enumerations to work
\usepackage
{
geometry
}
% Used to adjust the document margins
\usepackage
{
amsmath
}
% Equations
\usepackage
{
amssymb
}
% Equations
\usepackage
{
textcomp
}
% defines textquotesingle
% Hack from http://tex.stackexchange.com/a/47451/13684:
\AtBeginDocument
{
%
\def\PYZsq
{
\textquotesingle
}
% Upright quotes in Pygmentized code
}
\usepackage
{
upquote
}
% Upright quotes for verbatim code
\usepackage
{
eurosym
}
% defines \euro
\usepackage
[mathletters]
{
ucs
}
% Extended unicode (utf-8) support
\usepackage
[utf8x]
{
inputenc
}
% Allow utf-8 characters in the tex document
\usepackage
{
fancyvrb
}
% verbatim replacement that allows latex
\usepackage
{
grffile
}
% extends the file name processing of package graphics
% to support a larger range
% The hyperref package gives us a pdf with properly built
% internal navigation ('pdf bookmarks' for the table of contents,
% internal cross-reference links, web links for URLs, etc.)
\usepackage
{
hyperref
}
\usepackage
{
longtable
}
% longtable support required by pandoc >1.10
\usepackage
{
booktabs
}
% table support for pandoc > 1.12.2
\usepackage
[normalem]
{
ulem
}
% ulem is needed to support strikethroughs (\sout)
% normalem makes italics be italics, not underlines
% Colors for the hyperref package
\definecolor
{
urlcolor
}{
rgb
}{
0,.145,.698
}
\definecolor
{
linkcolor
}{
rgb
}{
.71,0.21,0.01
}
\definecolor
{
citecolor
}{
rgb
}{
.12,.54,.11
}
% ANSI colors
\definecolor
{
ansi-black
}{
HTML
}{
3E424D
}
\definecolor
{
ansi-black-intense
}{
HTML
}{
282C36
}
\definecolor
{
ansi-red
}{
HTML
}{
E75C58
}
\definecolor
{
ansi-red-intense
}{
HTML
}{
B22B31
}
\definecolor
{
ansi-green
}{
HTML
}{
00A250
}
\definecolor
{
ansi-green-intense
}{
HTML
}{
007427
}
\definecolor
{
ansi-yellow
}{
HTML
}{
DDB62B
}
\definecolor
{
ansi-yellow-intense
}{
HTML
}{
B27D12
}
\definecolor
{
ansi-blue
}{
HTML
}{
208FFB
}
\definecolor
{
ansi-blue-intense
}{
HTML
}{
0065CA
}
\definecolor
{
ansi-magenta
}{
HTML
}{
D160C4
}
\definecolor
{
ansi-magenta-intense
}{
HTML
}{
A03196
}
\definecolor
{
ansi-cyan
}{
HTML
}{
60C6C8
}
\definecolor
{
ansi-cyan-intense
}{
HTML
}{
258F8F
}
\definecolor
{
ansi-white
}{
HTML
}{
C5C1B4
}
\definecolor
{
ansi-white-intense
}{
HTML
}{
A1A6B2
}
% commands and environments needed by pandoc snippets
% extracted from the output of `pandoc -s`
\providecommand
{
\tightlist
}{
%
\setlength
{
\itemsep
}{
0pt
}
\setlength
{
\parskip
}{
0pt
}}
\DefineVerbatimEnvironment
{
Highlighting
}{
Verbatim
}{
commandchars=
\\\{\}
}
% Add ',fontsize=\small' for more characters per line
\newenvironment
{
Shaded
}{}{}
\newcommand
{
\KeywordTok
}
[1]
{
\textcolor
[rgb]
{
0.00,0.44,0.13
}{
\textbf
{{
#1
}}}}
\newcommand
{
\DataTypeTok
}
[1]
{
\textcolor
[rgb]
{
0.56,0.13,0.00
}{{
#1
}}}
\newcommand
{
\DecValTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.63,0.44
}{{
#1
}}}
\newcommand
{
\BaseNTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.63,0.44
}{{
#1
}}}
\newcommand
{
\FloatTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.63,0.44
}{{
#1
}}}
\newcommand
{
\CharTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.44,0.63
}{{
#1
}}}
\newcommand
{
\StringTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.44,0.63
}{{
#1
}}}
\newcommand
{
\CommentTok
}
[1]
{
\textcolor
[rgb]
{
0.38,0.63,0.69
}{
\textit
{{
#1
}}}}
\newcommand
{
\OtherTok
}
[1]
{
\textcolor
[rgb]
{
0.00,0.44,0.13
}{{
#1
}}}
\newcommand
{
\AlertTok
}
[1]
{
\textcolor
[rgb]
{
1.00,0.00,0.00
}{
\textbf
{{
#1
}}}}
\newcommand
{
\FunctionTok
}
[1]
{
\textcolor
[rgb]
{
0.02,0.16,0.49
}{{
#1
}}}
\newcommand
{
\RegionMarkerTok
}
[1]
{{
#1
}}
\newcommand
{
\ErrorTok
}
[1]
{
\textcolor
[rgb]
{
1.00,0.00,0.00
}{
\textbf
{{
#1
}}}}
\newcommand
{
\NormalTok
}
[1]
{{
#1
}}
% Additional commands for more recent versions of Pandoc
\newcommand
{
\ConstantTok
}
[1]
{
\textcolor
[rgb]
{
0.53,0.00,0.00
}{{
#1
}}}
\newcommand
{
\SpecialCharTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.44,0.63
}{{
#1
}}}
\newcommand
{
\VerbatimStringTok
}
[1]
{
\textcolor
[rgb]
{
0.25,0.44,0.63
}{{
#1
}}}
\newcommand
{
\SpecialStringTok
}
[1]
{
\textcolor
[rgb]
{
0.73,0.40,0.53
}{{
#1
}}}
\newcommand
{
\ImportTok
}
[1]
{{
#1
}}
\newcommand
{
\DocumentationTok
}
[1]
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
\textit
{{
#1
}}}}
\newcommand
{
\AnnotationTok
}
[1]
{
\textcolor
[rgb]
{
0.38,0.63,0.69
}{
\textbf
{
\textit
{{
#1
}}}}}
\newcommand
{
\CommentVarTok
}
[1]
{
\textcolor
[rgb]
{
0.38,0.63,0.69
}{
\textbf
{
\textit
{{
#1
}}}}}
\newcommand
{
\VariableTok
}
[1]
{
\textcolor
[rgb]
{
0.10,0.09,0.49
}{{
#1
}}}
\newcommand
{
\ControlFlowTok
}
[1]
{
\textcolor
[rgb]
{
0.00,0.44,0.13
}{
\textbf
{{
#1
}}}}
\newcommand
{
\OperatorTok
}
[1]
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{{
#1
}}}
\newcommand
{
\BuiltInTok
}
[1]
{{
#1
}}
\newcommand
{
\ExtensionTok
}
[1]
{{
#1
}}
\newcommand
{
\PreprocessorTok
}
[1]
{
\textcolor
[rgb]
{
0.74,0.48,0.00
}{{
#1
}}}
\newcommand
{
\AttributeTok
}
[1]
{
\textcolor
[rgb]
{
0.49,0.56,0.16
}{{
#1
}}}
\newcommand
{
\InformationTok
}
[1]
{
\textcolor
[rgb]
{
0.38,0.63,0.69
}{
\textbf
{
\textit
{{
#1
}}}}}
\newcommand
{
\WarningTok
}
[1]
{
\textcolor
[rgb]
{
0.38,0.63,0.69
}{
\textbf
{
\textit
{{
#1
}}}}}
% Define a nice break command that doesn't care if a line doesn't already
% exist.
\def\br
{
\hspace*
{
\fill
}
\\*
}
% Math Jax compatability definitions
\def\gt
{
>
}
\def\lt
{
<
}
% Document parameters
\title
{
OptimizeResearchDataManagement
}
% Pygments definitions
\makeatletter
\def\PY
@reset
{
\let\PY
@it=
\relax
\let\PY
@bf=
\relax
%
\let\PY
@ul=
\relax
\let\PY
@tc=
\relax
%
\let\PY
@bc=
\relax
\let\PY
@ff=
\relax
}
\def\PY
@tok#1
{
\csname
PY@tok@#1
\endcsname
}
\def\PY
@toks#1+
{
\ifx\relax
#1
\empty\else
%
\PY
@tok
{
#1
}
\expandafter\PY
@toks
\fi
}
\def\PY
@do#1
{
\PY
@bc
{
\PY
@tc
{
\PY
@ul
{
%
\PY
@it
{
\PY
@bf
{
\PY
@ff
{
#1
}}}}}}}
\def\PY
#1#2
{
\PY
@reset
\PY
@toks#1+
\relax
+
\PY
@do
{
#2
}}
\expandafter\def\csname
PY@tok@nv
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.10,0.09,0.49
}{
##1
}}}
\expandafter\def\csname
PY@tok@gd
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.63,0.00,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@ch
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.25,0.50,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@ge
\endcsname
{
\let\PY
@it=
\textit
}
\expandafter\def\csname
PY@tok@vi
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.10,0.09,0.49
}{
##1
}}}
\expandafter\def\csname
PY@tok@cm
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.25,0.50,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@c
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.25,0.50,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@ow
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.67,0.13,1.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@cp
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.74,0.48,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@c1
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.25,0.50,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@kp
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@kd
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@cpf
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.25,0.50,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@sd
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@gt
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.27,0.87
}{
##1
}}}
\expandafter\def\csname
PY@tok@cs
\endcsname
{
\let\PY
@it=
\textit\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.25,0.50,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@gi
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.63,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@gp
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.00,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@sh
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@m
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@err
\endcsname
{
\def\PY
@bc##1
{
\setlength
{
\fboxsep
}{
0pt
}
\fcolorbox
[rgb]
{
1.00,0.00,0.00
}{
1,1,1
}{
\strut
##1
}}}
\expandafter\def\csname
PY@tok@gu
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.50,0.00,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@sx
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@mb
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@kc
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@ss
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.10,0.09,0.49
}{
##1
}}}
\expandafter\def\csname
PY@tok@mo
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@nn
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.00,1.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@gs
\endcsname
{
\let\PY
@bf=
\textbf
}
\expandafter\def\csname
PY@tok@si
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.40,0.53
}{
##1
}}}
\expandafter\def\csname
PY@tok@o
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@gr
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
1.00,0.00,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@mh
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@na
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.49,0.56,0.16
}{
##1
}}}
\expandafter\def\csname
PY@tok@sc
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@ni
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.60,0.60,0.60
}{
##1
}}}
\expandafter\def\csname
PY@tok@nf
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.00,1.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@kn
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@ne
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.82,0.25,0.23
}{
##1
}}}
\expandafter\def\csname
PY@tok@kr
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@mi
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@s2
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@nc
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.00,1.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@s
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@k
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@nd
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.67,0.13,1.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@il
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@sr
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.40,0.53
}{
##1
}}}
\expandafter\def\csname
PY@tok@w
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.73,0.73
}{
##1
}}}
\expandafter\def\csname
PY@tok@gh
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.00,0.50
}{
##1
}}}
\expandafter\def\csname
PY@tok@sb
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@vc
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.10,0.09,0.49
}{
##1
}}}
\expandafter\def\csname
PY@tok@go
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.53,0.53,0.53
}{
##1
}}}
\expandafter\def\csname
PY@tok@no
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.53,0.00,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@bp
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@kt
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.69,0.00,0.25
}{
##1
}}}
\expandafter\def\csname
PY@tok@s1
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.13,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@se
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.73,0.40,0.13
}{
##1
}}}
\expandafter\def\csname
PY@tok@nl
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.63,0.63,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@mf
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.40,0.40,0.40
}{
##1
}}}
\expandafter\def\csname
PY@tok@nt
\endcsname
{
\let\PY
@bf=
\textbf\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@nb
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.00,0.50,0.00
}{
##1
}}}
\expandafter\def\csname
PY@tok@vg
\endcsname
{
\def\PY
@tc##1
{
\textcolor
[rgb]
{
0.10,0.09,0.49
}{
##1
}}}
\def\PYZbs
{
\char
`
\\
}
\def\PYZus
{
\char
`
\_
}
\def\PYZob
{
\char
`
\{
}
\def\PYZcb
{
\char
`
\}
}
\def\PYZca
{
\char
`
\^
}
\def\PYZam
{
\char
`
\&
}
\def\PYZlt
{
\char
`
\<
}
\def\PYZgt
{
\char
`
\>
}
\def\PYZsh
{
\char
`
\#
}
\def\PYZpc
{
\char
`
\%
}
\def\PYZdl
{
\char
`
\$
}
\def\PYZhy
{
\char
`
\-
}
\def\PYZsq
{
\char
`
\'
}
\def\PYZdq
{
\char
`
\"
}
\def\PYZti
{
\char
`
\~
}
% for compatibility with earlier versions
\def\PYZat
{
@
}
\def\PYZlb
{
[
}
\def\PYZrb
{
]
}
\makeatother
% Exact colors from NB
\definecolor
{
incolor
}{
rgb
}{
0.0, 0.0, 0.5
}
\definecolor
{
outcolor
}{
rgb
}{
0.545, 0.0, 0.0
}
% Prevent overflowing lines due to hard-to-break entities
\sloppy
% Setup hyperref package
\hypersetup
{
breaklinks=true,
% so long urls are correctly broken across lines
colorlinks=true,
urlcolor=urlcolor,
linkcolor=linkcolor,
citecolor=citecolor,
}
% Slightly bigger margins than the latex defaults
\geometry
{
verbose,tmargin=1in,bmargin=1in,lmargin=1in,rmargin=1in
}
\begin
{
document
}
\maketitle
\section*
{
Optimizing Research Data
Management
}
\label
{
optimizing-research-data-management
}
\subsection*
{
University of Basel
}
\label
{
university-of-basel
}
\subsubsection*
{
Wednesday March 22 and Thursday May 11,
2017
}
\label
{
wednesday-march-22-and-thursday-may-11-2017
}
\paragraph
{
Aude Dieudé , Jan Krause , Lorenza Salvatori (EPFL)
\&
Silke
Bellanger
(UNIBAS)
}
\label
{
aude-dieuduxe9-jan-krause-lorenza-salvatori-epfl-silke-bellanger-unibas
}
Contact: silke.bellanger@unibas.ch
\&
researchdata@epfl.ch
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
./Images/CC-By-NC-SA
_
88x31.png
}
\caption
{
.
}
\end
{
figure
}
\section*
{
Part 1
}
\label
{
part-1
}
\subsection*
{
1.1 - Introduction to RDM
}
\label
{
introduction-to-rdm
}
\subsubsection*
{
Definition, context and best
practices
}
\label
{
definition-context-and-best-practices
}
\begin
{
itemize
}
\tightlist
\item
Introduction:
\href
{
https://www.youtube.com/watch?v=N2zK3sAtr-4
}{
video
}
,
\end
{
itemize
}
\textless
{}
iframe width=``300'' height=``196''
src=``https://www.youtube.com/embed/N2zK3sAtr-4?start=20
\&
autoplay=0''
frameborder=``0'' allowfullscreen
\textgreater
{}
\subsubsection*
{
Definition : Research
data
}
\label
{
definition-research-data
}
\begin
{
itemize
}
\tightlist
\item
The definition of research data is not fixed or rigid: several
definitions are possible based on specific fields, institutions, and
organizations.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
For the Organization for Economic Cooperation and Development
\href
{
http://www.oecd.org/fr/sti/sci-tech/38500823.pdf
}{
OCDE
}
,
research data are defined as factual recordings (numbers, texts,
images and sounds), which are used as principal sources for scientific
research and which are often recognized by the scientific community as
being necessary to validate research results.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
One key element to take into consideration during research data
management are the legal, ethical and political aspects based on the
sensitivity of the data.
\end
{
itemize
}
\subsubsection*
{
Research Data Lifecycle
}
\label
{
research-data-lifecycle
}
\href
{
https://drive.google.com/file/d/0BxKZLWq08xX-TW5VOEUtd2FSRE0/view?pref=2
\&
pli=1
}{
Source:
Formation URFIST, Rennes, 2016
}
\subsection*
{
1.2 - Actors and Skills
}
\label
{
actors-and-skills
}
\subsubsection*
{
Actors
}
\label
{
actors
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/Actors2.png
}
\caption
{
Actors
}
\end
{
figure
}
\subsubsection*
{
Skills
}
\label
{
skills
}
\subsubsection*
{
Requirements regarding research data
management
}
\label
{
requirements-regarding-research-data-management
}
\subsubsection*
{
Publishers
}
\label
{
publishers
}
Many publishers and scientific journals require, under specific
conditions, the publication of used data to achieve the research project
results (permanent archiving, standardized formats, etc.). This is the
case, for instance, with PLoS and Nature Publishing Group. A list of
editorial policies are available online on this
\href
{
http://wiki.datadryad.org/Journal
_
instructions
}{
Dryad website
}
.
Note: This page seems to be a one shot publication and is not
exhaustive.
\subsubsection*
{
Funders
}
\label
{
funders
}
Examples of funders which require DMPs or equivalent:
\subsubsection*
{
Funding agency and DMP : Horizon
2020
}
\label
{
funding-agency-and-dmp-horizon-2020
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/H2020.png
}
\caption
{
.
}
\end
{
figure
}
\begin
{
itemize
}
\tightlist
\item
\href
{
https://ec.europa.eu/programmes/horizon2020/
}{
Horizon 2020
}
: is
the biggest funding agency from the European Commission with nearly
\euro
{}
80 billion of funding available over 7 years from 2014 to 2020.
Its main objective is to promote and support excellence in the
scientific field.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Horizon 2020 requires for some research projects the preparation of a
\href
{
http://ec.europa.eu/programmes/horizon2020/en/what-horizon-2020
}{
data
management plan
}
, which is mandatory in order to receive research
funding.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
\href
{
https://ec.europa.eu/digital-single-market/en/news/communication-european-cloud-initiative-building-competitive-data-and-knowledge-economy-europe
}{
As
of 2017
}
, the Commission will make
\textbf
{
open research data the
default option
}
, while ensuring opt-outs, for all new projects of the
Horizon 2020 program.
\end
{
itemize
}
\subsubsection*
{
Funding agency and DMP :
SNSF
}
\label
{
funding-agency-and-dmp-snsf
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/SNSF.png
}
\caption
{
.
}
\end
{
figure
}
Submission of
\textbf
{
data management plans
}
with the grant application
will be
\textbf
{
mandatory as of October 2017
}
. See the
\href
{
http://www.snf.ch/en/researchinFocus/newsroom/Pages/news-170306-towards-open-research-data.aspx
}{
communication
}
.
\subsection*
{
1.3 - Data Management Plan
}
\label
{
data-management-plan
}
\subsubsection*
{
Definition : Data Management
Plan
}
\label
{
definition-data-management-plan
}
\begin
{
itemize
}
\tightlist
\item
Data Management Plan (DMP) refers to the strategies put into place to
create, store, share, maintain, archive and preserve research data
throughout their life cycle.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
The DMP describes which data are going to be produced and how each
type of data will be organized, classified, archived, shared,
distributed, secured and preserved in a secure way.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Here is a
\href
{
https://www.youtube.com/watch?v=gYDb-GP1CA4
}{
video
}
,
which illustrates how the DMP works concretely:
\end
{
itemize
}
\textless
{}
iframe width=``300'' height=``196''
src=``https://www.youtube.com/embed/gYDb-GP1CA4?start=20
\&
autoplay=0''
frameborder=``0'' allowfullscreen
\textgreater
{}
\subsection*
{
1.4 - DMP best practices
}
\label
{
dmp-best-practices
}
\subsection*
{
Best practices examples: DMPonline
(UK)
}
\label
{
best-practices-examples-dmponline-uk
}
http://dmponline.dcc.ac.uk
\subsubsection*
{
Best practices examples:
Switzerland
}
\label
{
best-practices-examples-switzerland
}
To provide guidance in preparing a DMP, the
\textbf
{
\href
{
http://library.epfl.ch/files/content/sites/library3/files/research-data/dmp/Data
_
management
_
plan
_
checklist
_
EPFL
_
2016.pdf
}{
EPFL-ETHZ
checklist
}}
includes four categories to cover questions related to: -
Research Data Acquisition : type, quantity, license, etc. - Research
Data Format : format, metadata, identification, etc. - Research Data
Sharing : embargo, intellectual property, etc. - Data Preservation :
storage, sensitivity of the data, archiving, etc.
\subsubsection*
{
Guidelines and Policies, University
Basel
}
\label
{
guidelines-and-policies-university-basel
}
Research data policy is in preparation.
Guidelines regarding good scientific practice:
https://www.unibas.ch/en/Research/Research-in-Basel/Values-and-Principles.html
Informations regarding general data and it guidelines:
https://its.unibas.ch/content.cfm?content=586
\section*
{
Part 2 - Practical issues
}
\label
{
part-2---practical-issues
}
\begin
{
itemize
}
\tightlist
\item
Ethics, legal aspects, anonymization
\item
Collaborative coding and writing
\item
(Meta)data formats
\item
Publication and long term preservation
\end
{
itemize
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/tools.jpg
}
\caption
{
.
}
\end
{
figure
}
\subsection*
{
2.1 - Ethics
}
\label
{
ethics
}
\subsubsection*
{
2.1.1. When human beings are
involved
\ldots
{}}
\label
{
when-human-beings-are-involved
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/humanbeing.png
}
\caption
{
Human Beings
}
\end
{
figure
}
\textbf
{
Ethics issues arise in many areas of research
}
.
Research involving the voluntary participation of research subjects and
the collection of
\textbf
{
data that might be considered as personal
}
.
You must protect your
\textbf
{
volunteers, yourself and your researcher
colleagues
}
.
\href
{
http://ec.europa.eu/research/participants/data/ref/h2020/grants
_
manual/hi/ethics/h2020
_
hi
_
ethics-self-assess
_
en.pdf
}{
H2020
Programme Guidance How to complete your ethics self-assessment, p.1, 12
July 2016
}
\paragraph
{
Ethical Comission of both Basel
cantons
}
\label
{
ethical-comission-of-both-basel-cantons
}
\begin
{
itemize
}
\tightlist
\item
Web site: http://eknz.ch
\item
Contact: http://eknz.ch/kontakt/
\end
{
itemize
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/question.png
}
\caption
{
Questions
}
\end
{
figure
}
\begin
{
itemize
}
\tightlist
\item
Does your research practice involve collecting, processing and storing
information on persons?
\item
\ldots
{}
identifiable persons ?
\item
\ldots
{}
vulnerable persons ?
\item
\ldots
{}
children ?
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
How do you inform persons/subjects on what you will be doing ?
\end
{
itemize
}
\paragraph
{
Collecting consent
}
\label
{
collecting-consent
}
``Research involving human beings may only be carried out if,
{
[
}
\ldots
{}{
]
}
, the persons concerned have given their informed consent
or, after being duly informed, have not exercised their right to
dissent.
{
[
}
\ldots
{}{
]
}
The persons concerned may withhold or revoke
their consent at any time, without stating their reasons.''
\emph
{
Human
Research Act (HRA), article 7.
}
The consent must be: - Simple, understandable, - Adapted to the subject
(child, teenager
\ldots
{}
) (HRA Art. 21-22)
\textbf
{
Sources:
}
\begin
{
itemize
}
\tightlist
\item
\href
{
http://ec.europa.eu/research/participants/data/ref/h2020/grants
_
manual/hi/ethics/h2020
_
hi
_
ethics-self-assess
_
en.pdf
}{
H2020
Programme Guidance : How to complete your ethics self assessment
}
,
12th July 2016. Page 1.
\item
Swiss Academy of Medical Sciences (SAMS) (2015). ``Research with human
subjects. A manual for practitioners.'' 2nd edition,
http://swissethics.ch/doc/swissethics/manual
\_
research
\_
nov2015
\_
e.pdf
\item
Federal Act on Research involving Human Beings (Human Research Act,
HRA) of 30 September 2011 (Status as of 1 January 2014).
https://www.admin.ch/opc/en/classified-compilation/20061313/index.html
\end
{
itemize
}
\subsubsection*
{
2.1.2. Data ? What data ? Personal data ? Sensitive data
?
}
\label
{
data-what-data-personal-data-sensitive-data
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/personaldata.png
}
\caption
{}
\end
{
figure
}
\textbf
{
personal data
}
\begin
{
itemize
}
\tightlist
\item
all information relating to an identified or identifiable person
(Swiss FADP, article 3 a.)
\item
examples: name, address, identification number, e-mail, phone number,
medical records
\ldots
{}
There are various potential identifiers,
including full name, pseudonyms, occupation, address or any
combination of these.
\end
{
itemize
}
\textbf
{
sensitive personal data
}
According to the Swiss FADP (article 3 c.), data on:
\begin
{
enumerate
}
\def\labelenumi
{
\arabic
{
enumi
}
.
}
\tightlist
\item
religious, ideological, political or trade union-related views or
activities,
\item
\textbf
{
health, the intimate sphere or the racial origin
}
,
\item
social security measures,
\item
administrative or criminal proceedings and sanctions;
\end
{
enumerate
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/question.png
}
\caption
{
Questions
}
\end
{
figure
}
\begin
{
itemize
}
\tightlist
\item
What data do you typically use (collect, process, store) in the course
of a research project ?
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Among these data which ones are
\textbf
{
personal
}
?
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Among these data which ones are
\textbf
{
sensitive
}
?
\end
{
itemize
}
If you work with personal or sensitive data,
you should check with your faculty / institution.
\subsubsection*
{
2.1.3 Doing what with data ?
}
\label
{
doing-what-with-data
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/dataanalysis.png
}
\caption
{}
\end
{
figure
}
\subparagraph
{
Personal or sensitive data
processing
}
\label
{
personal-or-sensitive-data-processing
}
\textbf
{
Swiss
\href
{
https://www.admin.ch/opc/en/classified-compilation/19920153/index.html
}{
Federal
Act on Data Protection
}
(FADP) (or Loi sur la Protection des Données
LPD), article 3 e.
}
: any operation with personal data, irrespective of
the means applied and the procedure, and in particular: * the
collection, * storage, * use, * revision, * disclosure, * archiving * or
destruction
of data;
\textbf
{
Make sure that
\ldots
{}}
\begin
{
itemize
}
\tightlist
\item
processing data is carried out in good faith and only for the purpose
indicated at the time of collection
{
[
}
\ldots
{}{
]
}
(FAPD article 4),
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
consent is valid (given voluntarily on the provision of adequate
information (FAPD article 4),
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
data is correct (FAPD article 5),
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
participants can be informed on their personal data (FAPD article 8 ,
HRA, article 8),
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
data are rendered anonymous, as soon as the purpose of the processing
permits (FAPD article 22, Processing for reasearch).
\end
{
itemize
}
\subsubsection*
{
2.1.4. Protecting and disclosing personal
data
}
\label
{
protecting-and-disclosing-personal-data
}
\paragraph
{
Protection
}
\label
{
protection
}
Personal data must be protected against unauthorised processing through
adequate technical and organisational measures (Swiss FADP, article 7).
\textbf
{
Disclosure
}
Making personal data accessible, for example: * by permitting access, *
transmission * or publication.
(Swiss FADP article 3 f.)
\textbf
{
Cross-border disclosure
}
Personal data may not be disclosed abroad if the privacy of the data
subjects would be seriously endangered thereby, in particular due to the
absence of legislation that guarantees adequate protection.
Cross-border disclosure of personal data must be protected against
unauthorised processing through adequate technical and organisational
measures.
(FDAP Art. 6)
\paragraph
{
Ethical Comission of both Basel
cantons
}
\label
{
ethical-comission-of-both-basel-cantons
}
\begin
{
itemize
}
\tightlist
\item
Web site: http://eknz.ch
\item
Contact: http://eknz.ch/kontakt/
\end
{
itemize
}
\textbf
{
References
}
\begin
{
itemize
}
\item
Federal Act on Data Protection (FADP) of 19 June 1992 (Status as of 1
January 2014) Federal law on data protection
{
]
}
(235.1).
\item
Directive 95/46/EC of the European Parliament
\&
of the Council, of 24
October 1995 on the protection of individuals with regard to the
processing of personal data and on the free movement of such data (OJ
L 281, 23.11.1995, p.~31).
\item
\href
{
http://eur-lex.europa.eu/legal-content/EN/TXT/?uri=URISERV
\%
3Al14012
}{
Directive
95/46/EC
}
\item
As of 2018:
\href
{
http://eur-lex.europa.eu/legal-content/de/TXT/?uri=CELEX
\%
3A32016R0679
}{
REGULATION
(EU) 2016/679 repealing Directive 95/46/EC
}
\item
\href
{
http://ec.europa.eu/research/participants/data/ref/h2020/grants
_
manual/hi/ethics/h2020
_
hi
_
ethics-self-assess
_
en.pdf
}{
H2020
Program Guidance : how to complete your ethics self assessment
}
,
12.7.2016
\end
{
itemize
}
\subsection*
{
2.2 - Anonymization methods
}
\label
{
anonymization-methods
}
Privacy protection methods, either :
\begin
{
itemize
}
\tightlist
\item
removing,
\item
generalizing or
\item
encrypting,
\end
{
itemize
}
personal information from datasets.
\textbf
{
Note:
}
Anonymization or de-identification
\begin
{
itemize
}
\tightlist
\item
\textbf
{
Anonymization
}
is irreversible.
\item
\textbf
{
De-identification
}
may include preserving indentifiers that
can be re-linked by a trusted party.
\end
{
itemize
}
For legal aspects see HRA 35.
In passing, there is more to this (Privacy-Preserving Data Mining
Methods / Charu Affarwal and Philip Yu. 2008.):
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/Privacy-Preserving
_
Data
_
Mining
__
Methods.png
}
\caption
{
Privacy-Preserving
\_
Data
\_
Mining
\_\_
Methods.png
}
\end
{
figure
}
\paragraph
{
k-anonymity
}
\label
{
k-anonymity
}
\subparagraph
{
Definition
}
\label
{
definition
}
``A release of data is said to have the k-anonymity property if the
information for each person contained in the release cannot be
distinguished from at least k-1 individuals whose information also
appear in the release''
(
\href
{
https://en.wikipedia.org/wiki/K-anonymity
}{
Source
}
).
\subparagraph
{
Illustration
}
\label
{
illustration
}
Example including removal and generalization (same source):
\begin
{
longtable
}
[c]
{
@
{}
llllll@
{}}
\toprule
Name
&
Age
&
Gender
&
State of domicile
&
Religion
&
Disease
\tabularnewline
\midrule
\endhead
Ramsha
&
29
&
Female
&
Tamil Nadu
&
Hindu
&
Cancer
\tabularnewline
Yadu
&
24
&
Female
&
Kerala
&
Hindu
&
Viral infection
\tabularnewline
Salima
&
28
&
Female
&
Tamil Nadu
&
Muslim
&
TB
\tabularnewline
sunny
&
27
&
Male
&
Karnataka
&
Parsi
&
No illness
\tabularnewline
Joan
&
24
&
Female
&
Kerala
&
Christian
&
Heart-related
\tabularnewline
Bahuksana
&
23
&
Male
&
Karnataka
&
Buddhist
&
TB
\tabularnewline
Rambha
&
19
&
Male
&
Kerala
&
Hindu
&
Cancer
\tabularnewline
Kishor
&
29
&
Male
&
Karnataka
&
Hindu
&
Heart-related
\tabularnewline
Johnson
&
17
&
Male
&
Kerala
&
Christian
&
Heart-related
\tabularnewline
John
&
19
&
Male
&
Kerala
&
Christian
&
Viral infection
\tabularnewline
\bottomrule
\end
{
longtable
}
To (name and religion were removed, age was generalized):
\begin
{
longtable
}
[c]
{
@
{}
llllll@
{}}
\toprule
Name
&
Age
&
Gender
&
State of domicile
&
Religion
&
Disease
\tabularnewline
\midrule
\endhead
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Tamil Nadu
&
*
&
Cancer
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Kerala
&
*
&
Viral
infection
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Tamil Nadu
&
*
&
TB
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Male
&
Karnataka
&
*
&
No
illness
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Kerala
&
*
&
Heart-related
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Male
&
Karnataka
&
*
&
TB
\tabularnewline
*
&
Age ≤ 20
&
Male
&
Kerala
&
*
&
Cancer
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Male
&
Karnataka
&
*
&
Heart-related
\tabularnewline
*
&
Age ≤ 20
&
Male
&
Kerala
&
*
&
Heart-related
\tabularnewline
*
&
Age ≤ 20
&
Male
&
Kerala
&
*
&
Viral infection
\tabularnewline
\bottomrule
\end
{
longtable
}
This data has 2-anonymity with respect to the attributes `Age', `Gender'
and `State of domicile' since for any combination of these attributes
found in any row of the table there are always at least 2 rows with
those exact attributes.
\paragraph
{
l-diversity - motivation
}
\label
{
l-diversity---motivation
}
An extension of k-anonymity. Why? To overcome weaknesses of that model,
notably: *
\textbf
{
homogeneity attacks
}
: in the case that a group of
lines are homogeneous , *
\textbf
{
background knowledge attacks
}
: when
knowledge about a field reduces the set of possible sensible values
(e.g.~knowing that heart attacks are not frequent in Japanese patients)
(
\href
{
https://en.wikipedia.org/wiki/K-anonymity
}{
source
}
).
Imagine the group, or equivalence class, (extracted from the whole
dataset)
{
[
}
table adapted from the one above
{
]
}
:
\begin
{
longtable
}
[c]
{
@
{}
llllll@
{}}
\toprule
Name
&
Age
&
Gender
&
State of domicile
&
Religion
&
Disease
\tabularnewline
\midrule
\endhead
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Tamil Nadu
&
*
&
AIDS
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Tamil Nadu
&
*
&
AIDS
\tabularnewline
*
&
20
\textless
{}
Age ≤ 30
&
Female
&
Tamil Nadu
&
*
&
AIDS
\tabularnewline
\bottomrule
\end
{
longtable
}
If it is known that Miss Smith: was part of the study, is aged between
20 and 30, lives in Tamil Nadu. Then it is certain that she has AIDS,
even though we have 3-anonymity.
\subparagraph
{
l-diversity - definition
}
\label
{
l-diversity---definition
}
\textbf
{
The l-diversity Principle
}
: An equivalence class is said to
have l-diversity if there are at least l ``well-represented'' values for
the sensitive attribute. A table is said to have l-diversity if every
equivalence class of the table has l-diversity.
There are several definition of ``well-represented''
(
\href
{
https://en.wikipedia.org/wiki/L-diversity
}{
source
}
).
By the way, l-diversity has weaknesses to, that is why people invented
\textbf
{
t-closeness
}
.
\subparagraph
{
differential privacy
}
\label
{
differential-privacy
}
\textbf
{
By linking with another database
}
: Linked the anonymized GIC
database (which retained the birthdate, sex, and ZIP code of each
patient) with voter registration records, allowed to identify the
medical record of the governor of Massachusetts.
\emph
{
Differential Privacy by Cynthia Dwork, International Colloquium on
Automata, Languages and Programming (ICALP) 2006, p.~1--12.
DOI=10.1007/11787006
\_
1
}
(
\href
{
https://en.wikipedia.org/wiki/Differential
_
privacy
}{
source
}
).
\paragraph
{
Anonymization - theory and
tools
}
\label
{
anonymization---theory-and-tools
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/sdc.jpg
}
\caption
{}
\end
{
figure
}
Statistical Disclosure Control / Hundepool,
\&
al. 2012.
\href
{
http://proquest.safaribooksonline.com/9781118348215
}{
Ebook / EPFL
library
}
.
Tools *
\textbf
{
sdcMicro: Statistical Disclosure Control Methods for
Anonymization of Microdata and Risk Estimation (R package)
}
* ARX Data
Anonymization Tool (Java: library
\&
GUI) * μ-ARGUS (Java, GUI)
\subsection*
{
2.3 - Collaborative tools
}
\label
{
collaborative-tools
}
\subsubsection*
{
2.3.1 - File sharing
}
\label
{
file-sharing
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/owncloud.png
}
\caption
{
.
}
\end
{
figure
}
\begin
{
itemize
}
\tightlist
\item
Personal/group level: OwnCloud, free software: Mac, Windows, Linux,
iOS, Android
\ldots
{}
Web.
\item
Your own server: OwnCloud https://owncloud.org/
\item
Many plugins: contacts, calendar, collaborative writing, image
galleries, etc.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Swiss level: SwitchDrive https://drive.switch.ch/
\item
Owncloud with 50 Go by user,
\item
Restricted to Swiss universities members.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
A recent fork of ownCloud:
\href
{
https://nextcloud.com/
}{
NextCloud
}
aims more transparent development processes.
\end
{
itemize
}
\subsubsection*
{
2.3.2 - Collaborative
writing
}
\label
{
collaborative-writing
}
\paragraph
{
File sharing is not enough
}
\label
{
file-sharing-is-not-enough
}
People often need to collaborate at a finer level. More and more.
\includegraphics
{
Images/Vandergheynst
_
Collaborative.png
}
Source: Pr.
Vandergheynst, EFPL Library
\href
{
http://library.epfl.ch/noon-talks/en
}{
Noon Talk, 25.8.2016
}
.
\includegraphics
{
Images/Vandergheynst
_
IncrCollab.png
}
Source: Pr.
Vandergheynst, EFPL Library
\href
{
http://library.epfl.ch/noon-talks/en
}{
Noon Talk, 25.8.2016
}
.
\includegraphics
{
Images/Vandergheynst
_
Versions.png
}
Source: Pr.
Vandergheynst, EFPL Library
\href
{
http://library.epfl.ch/noon-talks/en
}{
Noon Talk, 25.8.2016
}
.
** In summary **
\textbf
{
Text processing
}
comments / revision mode functionalities are
not sufficient for good collaboration.
\textbf
{
Google Documents
}
and related tools are not scientific writing
oriented, particularly regarding figures, references, citations,
bibliography management and interactive figures.
**
\(
\Rightarrow
\)
we need something else! **
\paragraph
{
Share LaTeX
}
\label
{
share-latex
}
\textbf
{
\href
{
https://de.sharelatex.com/
}{
Share LaTeX
}}
is an
alternative to Authorea: collaborative writing based on LaTeX. Suited
for LaTeX power users.
\includegraphics
{
Images/ShareLaTeX.png
}
Access provided by SWITCH, via the
\href
{
https://sandstorm.cloud.switch.ch/
}{
Sandstorm platform
}
.
Good, but only if all partners are LaTeX users.
\paragraph
{
Authorea
}
\label
{
authorea
}
\textbf
{
\href
{
https://www.authorea.com/
}{
Authorea
}}
: collaborative
writing, easy to use.
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/Authorea.png
}
\caption
{
Authorea
}
\end
{
figure
}
\begin
{
itemize
}
\tightlist
\item
Simple syntax : WYSIWYG and Markdown (lightweight text formatting
language). More complex formating possible using LaTeX
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Enables others to make comments
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Supports interactive documents / figures (Jupyter)
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Offline synchronization on personal computer (using the Git version
control system)
\end
{
itemize
}
\subsubsection*
{
2.3.3 - Tools for coding and analyzing
data
}
\label
{
tools-for-coding-and-analyzing-data
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/git.png
}
\caption
{
.
}
\end
{
figure
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/jupyter-logo.png
}
\caption
{
.
}
\end
{
figure
}
Computational workflow (e.g.~SnakeMake)
\subsection*
{
2.4 - Data and Storage
}
\label
{
data-and-storage
}
\subsubsection*
{
2.4.1 - (Meta)data formats
}
\label
{
metadata-formats
}
\subsubsection*
{
Metadata
}
\label
{
metadata
}
\begin
{
itemize
}
\tightlist
\item
Most common generalist metadata formats:
\href
{
http://dublincore.org/documents/dces/
}{
Dublin Core (DCES)
}
,
\href
{
http://dublincore.org/documents/dcmi-terms/
}{
Dublin Core
(DCMI)
}
,
\href
{
https://schema.datacite.org/
}{
DataCite Metadata
Schema
}
.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
Numerous specialized metadata formats are available for most
disciplines, the Research Data Alliance
\href
{
http://rd-alliance.github.io/metadata-directory/
}{
Metadata
Directory
}
is a good starting point.
\end
{
itemize
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/MetadataDirectory.png
}
\caption
{
.
}
\end
{
figure
}
\subsubsection*
{
Data format
}
\label
{
data-format
}
Prefer a
\begin
{
itemize
}
\tightlist
\item
\textbf
{
standard format
}
,
\item
\textbf
{
open
}
and
\item
\textbf
{
widely used
}
\end
{
itemize
}
This way your data will not depend upon a particular software (or
company), operating system, or platform. And you will be able to: -
collaborate with more people (on various platforms) - avoid licensing
problems - maximize the reusability in the future
\subsubsection*
{
Some open formats to take into
account
}
\label
{
some-open-formats-to-take-into-account
}
\begin
{
itemize
}
\tightlist
\item
Portable Document Format
\textbf
{
PDF/A, ISO standard
}
, text
{
[
}
PDF for
archiving, no ciphers, included fonts
\ldots
{}{
]
}
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
\textbf
{
Text
}
simple way to encode data. Can be read by most software.
\item
CSV tables, can be read by most software, and extended using
\href
{
https://www.w3.org/standards/techs/csv
}{
CSV on the Web
}
(metadata, datatypes, relation
\ldots
{}
)
\item
JSON: Simply structured, less bulky than XML, ideal for data exchange.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
\textbf
{
Geodata
}
\item
\href
{
http://www.iso.org/iso/catalogue
_
detail.htm?csnumber=53798
}{
ISO
19115-1:2014
}
: the norm.
\item
\href
{
http://geojson.org/
}{
GeoJson.org
}
: lighter.
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
\textbf
{
HDF5
}
, more flexible (not text, but structured and indexed,
supports arbitrary metadata, good performances).
\item
Compatible with many tools (Python, R, Matlab, Mathematica
\ldots
{}
)
\end
{
itemize
}
\begin
{
itemize
}
\tightlist
\item
\textbf
{
Databases:
}
\item
SQL:
\href
{
https://www.postgresql.org/
}{
Postgresql
}
is relational,
open and efficient
\item
BigData:
\href
{
https://www.mongodb.com/
}{
MongoDB
}
for volume,
velocity, and variety
\end
{
itemize
}
\paragraph
{
Data formats list
}
\label
{
data-formats-list
}
Sustainability of digital formats by the US Library of Congress.
\href
{
http://www.digitalpreservation.gov/formats/
}{
This list
}
is
categorized by datatypes (text, audio, image, video, geospacial,
dataset, etc.)
\subsubsection*
{
2.4.2 - Storage, publication and
preservation
}
\label
{
storage-publication-and-preservation
}
\paragraph
{
2.4.2.1 - Storage at UNIBAS
}
\label
{
storage-at-unibas
}
** Digital Humanites Lab**
General informations: http://dhlab.unibas.ch/
\begin
{
itemize
}
\tightlist
\item
Organizing the national Data and Service Center for the Humanities
(DaSCH): http://dh-center.ch/
\item
Services for students and researchers at university of Basel and
national institutes.
\item
Storage options for primary data as and secondary data as databases
\item
Focus on data from the humanities, audiovisual data
\end
{
itemize
}
Contact:
\href
{
mailto:sekretariat-dhlab@unibas.ch
}{
\nolinkurl
{
sekretariat-dhlab@unibas.ch
}}
respectively
\href
{
mailto:info@dasch.swiss
}{
\nolinkurl
{
info@dasch.swiss
}}
\textbf
{
SciCore
}
General informations: https://scicore.unibas.ch/
Services for students and researchers at university of Basel and
associated institutes and Swiss Institute of Bioinformatics.
\begin
{
itemize
}
\tightlist
\item
Providing
\textbf
{
high-performance computing resources
}
(computing
cluster with 8000 cores)
\item
Providing
\textbf
{
high-performance storage
}
for researchers with large
data sets(
\textasciitilde
{}
1-10 TB) and/or with complex computational
requirements (e.g.~Linux workflows)~ and/or subject to special
requirements (e.g.~sensitive data)
\item
Providing
\textbf
{
storage for projects with large data volume
}
(over
10 TB, up to 500 TB); this requires dedicated project definition in a
discussion with the PI
\item
Providing
\textbf
{
scientific-service hosting (web sites)
}
for
resources with significant back-end requirements (storage and/or
calculation)
\item
Providing various types of
\textbf
{
consulting for data analysis and
management
}
.
\end
{
itemize
}
Contact for technical questions:
\href
{
mailto:scicore-admin@unibas.ch
}{
\nolinkurl
{
scicore-admin@unibas.ch
}}
\textbf
{
University Library Basel
}
General informations: http://ub.unibas.ch/
\begin
{
itemize
}
\tightlist
\item
Support and informations using
\textbf
{
disciplinary and multi-storage
options/repositories
}
\item
Support for
\textbf
{
individual solutions
}
\item
Building up
\textbf
{
infrastructures
}
-- looking for
\textbf
{
test
cases
}
\end
{
itemize
}
Contact:
\href
{
mailto:silke.bellanger@unibas.ch
}{
\nolinkurl
{
silke.bellanger@unibas.ch
}}
\paragraph
{
2.4.2.2 - Publication and
preservation
}
\label
{
publication-and-preservation
}
\paragraph
{
Research data publication
}
\label
{
research-data-publication
}
`` It is the
\textbf
{
release of research data, associated metadata,
accompanying documentation, and software code
{
[
}
\ldots
{}{
]
}
for re-use
and analysis
}
in such a manner that they can be discovered on the Web
and referred to in a unique and persistent way.
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/DataPublishing.png
}
\caption
{
Data Publishing
}
\end
{
figure
}
Data publishing occurs
\textbf
{
via dedicated data repositories and/or
(data) journals
}
which ensure that the published research objects are
well documented, curated, archived for the long term, interoperable,
citable, quality assured and discoverable -- all aspects of data
publishing that are important for future reuse of data by third party
end-users.''
Austin, C. C., Bloom, T. K., Dallmeier-Tiessen, S., Khodiyar, V.,
Murphy, F., Nurnberger, A., . . . Whyte, A. (2016).
\subsubsection*
{
Comparison: Dryad - Figshare -
Zenodo
}
\label
{
comparison-dryad---figshare---zenodo
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/Comp
_
Zenodo
_
Dryad
_
Figshare
_
1.PNG
}
\caption
{
Z-D-F
}
\end
{
figure
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/Comp
_
Zenodo
_
Dryad
_
Figshare
_
2.PNG
}
\caption
{
Z-D-F
}
\end
{
figure
}
\paragraph
{
Data repositories
}
\label
{
data-repositories
}
\begin
{
itemize
}
\tightlist
\item
\textbf
{
Zenodo
}
(hosted by CERN, free) http://zenodo.org
\end
{
itemize
}
\subsubsection*
{
Data Papers, Data
Journals
}
\label
{
data-papers-data-journals
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/DataPaper-DataJournal.PNG
}
\caption
{
DataPaper-DataJournal
}
\end
{
figure
}
\subsubsection*
{
Backup vs.~Preservation
}
\label
{
backup-vs.preservation
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/Preservervation
_
vs
_
Storage.png
}
\caption
{
Preservation vs.~Backup
}
\end
{
figure
}
\paragraph
{
Other data repositories
}
\label
{
other-data-repositories
}
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/re3datalogo
_
black.png
}
\caption
{
re3data
}
\end
{
figure
}
see
\textbf
{
\href
{
http://re3data.org
}{
re3data
}}
in which more than 1'500
data repositoris are described.
\paragraph
{
Data Citation
}
\label
{
data-citation
}
\begin
{
itemize
}
\tightlist
\item
Always use persistent identifiers to avoid broken links (about 60
\%
after 10 years)
\item
The most common persistent identifier is the DOI (digital object
identifier)
\item
e.g.: http://doi.org/10.5281/zenodo.7525
\item
Zenodo, Figshare, Dryad and Infoscience can provide DOIs.
\end
{
itemize
}
\subsection*
{
2.5 - Licences
}
\label
{
licences
}
A licence allows to define the way your data can be reused. For
instance:
Creative Commons (
\textbf
{
CC0
}
and
\textbf
{
CC-BY
}
)
http://creativecommons.org/ Since CC4.0, sui generis law protecting
database content is taken into account (in addition to the form
protected by copyright) https://wiki.creativecommons.org/wiki/Data
\begin
{
figure
}
[htbp]
\centering
\includegraphics
{
Images/CCbyncsa
_
others.png
}
\caption
{
.
}
\end
{
figure
}
You can contact us in the future here:
Contact:
\href
{
mailto:silke.bellanger@unibas.ch
}{
\nolinkurl
{
silke.bellanger@unibas.ch
}}
and
\href
{
mailto:researchdata@epfl.ch
}{
\nolinkurl
{
researchdata@epfl.ch
}}
We look forward to hearing from you!
Silke, Aude, Jan, and Lorenza
% Add a bibliography block to the postdoc
\end
{
document
}
Event Timeline
Log In to Comment