Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F60684415
GenomeRegion.cpp
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Wed, May 1, 22:52
Size
11 KB
Mime Type
text/x-c
Expires
Fri, May 3, 22:52 (2 d)
Engine
blob
Format
Raw Data
Handle
17396196
Attached To
R8820 scATAC-seq
GenomeRegion.cpp
View Options
#include <GenomeRegion.hpp>
#include <string>
#include <cmath>
// abs()
#include <stdexcept>
// std::invalid_argument
#include <seqan/bam_io.h>
/*
GenomeRegion GenomeRegion::constructRead(const seqan::BamAlignmentRecord& record,
const seqan::BamFileIn& bam_file)
{
GenomeRegion read ;
read.chromosome =
seqan::toCString(
seqan::getContigName(record, bam_file)) ;
read.chromosome_idx = record.rID ;
// read is on + strand
// |----------->
// record.beginPos
if(not seqan::hasFlagRC(record))
{ read.start = record.beginPos ;
read.length = seqan::endPosition(record.seq) ;
read.end = read.start + read.length ;
}
// read is on - strand
// <-----------|
// record.beginPos
else
{ read.length = seqan::endPosition(record.seq) ;
read.end = record.beginPos + 1 ;
read.start = read.end - read.length ;
}
if(read.start < 0 or read.end < 0)
{ char msg[4096] ;
sprintf(msg, "Error! invalide coordinate (<0) : [%s/%d %d %d)]",
read.chromosome.c_str(), read.chromosome_idx,
read.start, read.end) ;
throw std::invalid_argument(msg) ;
}
else if(read.start >= read.end)
{ char msg[4096] ;
sprintf(msg, "Error! start >= end : [%s/%d %d %d)]",
read.chromosome.c_str(), read.chromosome_idx,
read.start, read.end) ;
throw std::invalid_argument(msg) ;
}
return read ;
}
*/
// new
GenomeRegion
GenomeRegion
::
constructRead
(
const
seqan
::
BamAlignmentRecord
&
record
,
const
seqan
::
BamFileIn
&
bam_file
)
{
GenomeRegion
read
;
read
.
chromosome
=
seqan
::
toCString
(
seqan
::
getContigName
(
record
,
bam_file
))
;
read
.
chromosome_idx
=
record
.
rID
;
read
.
start
=
record
.
beginPos
;
read
.
length
=
seqan
::
endPosition
(
record
.
seq
)
;
read
.
end
=
read
.
start
+
read
.
length
;
if
(
read
.
start
<
0
or
read
.
end
<
0
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! invalide coordinate (<0) : [%s/%d %d %d)]"
,
read
.
chromosome
.
c_str
(),
read
.
chromosome_idx
,
read
.
start
,
read
.
end
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
read
.
start
>=
read
.
end
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! start >= end : [%s/%d %d %d)]"
,
read
.
chromosome
.
c_str
(),
read
.
chromosome_idx
,
read
.
start
,
read
.
end
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
return
read
;
}
/*
GenomeRegion GenomeRegion::constructReadATAC(const seqan::BamAlignmentRecord& record,
const seqan::BamFileIn& bam_file)
{ GenomeRegion read ;
read.chromosome =
seqan::toCString(
seqan::getContigName(record, bam_file)) ;
read.chromosome_idx = record.rID ;
// read is on + strand
if(not seqan::hasFlagRC(record))
{ read.start = record.beginPos + 4 ; }
// read is on - strand
else
{ read.start = record.beginPos - 5 ; }
read.end = read.start + 1 ;
read.length = 1 ;
return read ;
}
*/
// new
GenomeRegion
GenomeRegion
::
constructReadATAC
(
const
seqan
::
BamAlignmentRecord
&
record
,
const
seqan
::
BamFileIn
&
bam_file
)
{
GenomeRegion
read
=
GenomeRegion
::
constructRead
(
record
,
bam_file
);
if
(
not
seqan
::
hasFlagRC
(
record
))
{
read
.
start
+=
4
;
read
.
end
=
read
.
start
+
1
;
read
.
length
=
1
;
}
else
{
read
.
start
=
read
.
end
-
1
-
5
;
read
.
end
=
read
.
start
+
1
;
read
.
length
=
1
;
}
return
read
;
}
GenomeRegion
GenomeRegion
::
constructReadEdge
(
const
seqan
::
BamAlignmentRecord
&
record
,
const
seqan
::
BamFileIn
&
bam_file
)
{
GenomeRegion
read
=
GenomeRegion
::
constructRead
(
record
,
bam_file
);
if
(
not
seqan
::
hasFlagRC
(
record
))
{
read
.
end
=
read
.
start
+
1
;
read
.
length
=
1
;
}
else
{
read
.
start
=
read
.
end
-
1
;
read
.
length
=
1
;
}
return
read
;
}
// old
/*
GenomeRegion GenomeRegion::constructFragment(const seqan::BamAlignmentRecord& record,
const seqan::BamFileIn& bam_file)
{ GenomeRegion frag ;
frag.chromosome =
seqan::toCString(
seqan::getContigName(record, bam_file)) ;
frag.chromosome_idx = record.rID ;
// read is on + strand
// record
// |-----> <-----|
// record.beginPos
if(not seqan::hasFlagRC(record))
{ frag.start = record.beginPos ;
frag.length = record.tLen ;
frag.end = frag.start + frag.length ;
}
// read is on - strand
// record
// |-----> <-----|
// record.beginPos
else
{ // frag.end = seqan::endPosition(record.seq) + 1 ;
// frag.length = abs(record.tLen) ;
// frag.start = frag.end - frag.length ;
frag.end = record.beginPos + 1 ;
frag.length = abs(record.tLen) ;
frag.start = frag.end - frag.length ;
}
if(frag.start < 0 or frag.end < 0)
{ char msg[4096] ;
sprintf(msg, "Error! invalide coordinate (<0) : [%s/%d %d %d)]",
frag.chromosome.c_str(), frag.chromosome_idx,
frag.start, frag.end) ;
throw std::invalid_argument(msg) ;
}
else if(frag.start >= frag.end)
{ char msg[4096] ;
sprintf(msg, "Error! start >= end : [%s/%d %d %d)]",
frag.chromosome.c_str(), frag.chromosome_idx,
frag.start, frag.end) ;
throw std::invalid_argument(msg) ;
}
return frag ;
}
*/
GenomeRegion
GenomeRegion
::
constructFragment
(
const
seqan
::
BamAlignmentRecord
&
record
,
const
seqan
::
BamFileIn
&
bam_file
)
{
GenomeRegion
frag
;
frag
.
chromosome
=
seqan
::
toCString
(
seqan
::
getContigName
(
record
,
bam_file
))
;
frag
.
chromosome_idx
=
record
.
rID
;
// read is on + strand
// record
// |-----> <-----|
if
(
not
seqan
::
hasFlagRC
(
record
))
{
frag
.
start
=
record
.
beginPos
;
frag
.
length
=
record
.
tLen
;
frag
.
end
=
frag
.
start
+
frag
.
length
;
}
// read is on - strand
// record
// |-----> <-----|
else
{
// frag.end = seqan::endPosition(record.seq) + 1 ;
// frag.length = abs(record.tLen) ;
// frag.start = frag.end - frag.length ;
frag
.
length
=
abs
(
record
.
tLen
)
;
frag
.
start
=
record
.
pNext
;
frag
.
end
=
frag
.
start
+
frag
.
length
;
}
if
(
frag
.
start
<
0
or
frag
.
end
<
0
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! invalide coordinate (<0) : [%s/%d %d %d)]"
,
frag
.
chromosome
.
c_str
(),
frag
.
chromosome_idx
,
frag
.
start
,
frag
.
end
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
frag
.
start
>=
frag
.
end
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! start >= end : [%s/%d %d %d)]"
,
frag
.
chromosome
.
c_str
(),
frag
.
chromosome_idx
,
frag
.
start
,
frag
.
end
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
return
frag
;
}
GenomeRegion
GenomeRegion
::
constructFragmentCenter
(
const
seqan
::
BamAlignmentRecord
&
record
,
const
seqan
::
BamFileIn
&
bam_file
)
{
GenomeRegion
frag
=
GenomeRegion
::
constructFragment
(
record
,
bam_file
)
;
int
mid
=
frag
.
start
+
(
frag
.
length
/
2
)
;
frag
.
start
=
mid
;
frag
.
end
=
mid
+
1
;
frag
.
length
=
1
;
return
frag
;
}
GenomeRegion
::
GenomeRegion
(
const
GenomeRegion
&
other
)
:
chromosome
(
other
.
chromosome
),
chromosome_idx
(
other
.
chromosome_idx
),
start
(
other
.
start
),
end
(
other
.
end
),
length
(
other
.
length
)
{}
GenomeRegion
::
GenomeRegion
(
const
std
::
string
&
chromosome
,
int
chromosome_idx
,
int
start
,
int
end
)
:
chromosome
(
chromosome
),
chromosome_idx
(
chromosome_idx
),
start
(
start
),
end
(
end
),
length
(
end
-
start
)
{
if
(
this
->
start
<
0
or
this
->
end
<
0
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! invalide coordinate (<0) : [%s/%d %d %d)]"
,
this
->
chromosome
.
c_str
(),
this
->
chromosome_idx
,
this
->
start
,
this
->
end
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
else
if
(
start
>=
end
)
{
char
msg
[
4096
]
;
sprintf
(
msg
,
"Error! start >= end : [%s/%d %d %d)]"
,
this
->
chromosome
.
c_str
(),
this
->
chromosome_idx
,
this
->
start
,
this
->
end
)
;
throw
std
::
invalid_argument
(
msg
)
;
}
}
int
GenomeRegion
::
overlap_len
(
const
GenomeRegion
&
other
)
const
{
int
len
=
0
;
if
((
*
this
)
|
other
)
{
// this is contained in other or overlap perfectly other
if
(
this
->
start
>=
other
.
start
and
this
->
end
<=
other
.
end
)
{
len
=
this
->
length
;
}
// start of this overlaps end other
else
if
((
other
.
start
<
this
->
start
)
and
(
other
.
end
-
1
>=
this
->
start
))
{
len
=
other
.
end
-
this
->
start
;
}
// other contained in this (perect overlap is handled in first case)
else
if
(
other
.
start
>=
this
->
start
and
other
.
end
<=
this
->
end
)
{
len
=
other
.
length
;
}
// end of this overlaps start of other (only case left)
else
{
len
=
this
->
end
-
other
.
start
;
}
}
return
len
;
}
GenomeRegion
&
GenomeRegion
::
operator
=
(
const
GenomeRegion
&
rhs
)
{
if
(
this
==
&
rhs
)
{
return
*
this
;
}
this
->
start
=
rhs
.
start
;
this
->
end
=
rhs
.
end
;
this
->
length
=
rhs
.
length
;
this
->
chromosome
=
rhs
.
chromosome
;
this
->
chromosome_idx
=
rhs
.
chromosome_idx
;
return
*
this
;
}
GenomeRegion
&
GenomeRegion
::
operator
=
(
GenomeRegion
&&
rhs
)
{
if
(
this
==
&
rhs
)
{
return
*
this
;
}
this
->
start
=
rhs
.
start
;
this
->
end
=
rhs
.
end
;
this
->
length
=
rhs
.
length
;
this
->
chromosome
=
rhs
.
chromosome
;
this
->
chromosome_idx
=
rhs
.
chromosome_idx
;
return
*
this
;
}
bool
GenomeRegion
::
operator
==
(
const
GenomeRegion
&
rhs
)
const
{
if
(
this
==
&
rhs
)
{
return
true
;
}
if
(
this
->
chromosome_idx
==
rhs
.
chromosome_idx
and
this
->
start
==
rhs
.
start
and
this
->
end
==
rhs
.
end
and
this
->
length
==
rhs
.
length
)
{
return
true
;
}
return
false
;
}
bool
GenomeRegion
::
operator
|
(
const
GenomeRegion
&
rhs
)
const
{
if
((
this
->
chromosome_idx
!=
rhs
.
chromosome_idx
)
or
// on diff chromosomes
(
rhs
.
end
-
1
<
this
->
start
)
or
// rhs upstream this
(
this
->
end
-
1
<
rhs
.
start
))
// rhs downstream this
{
return
false
;
}
return
true
;
}
bool
GenomeRegion
::
operator
<
(
const
GenomeRegion
&
rhs
)
const
{
if
(
this
->
chromosome_idx
<
rhs
.
chromosome_idx
)
{
return
true
;
}
else
if
((
this
->
chromosome_idx
==
rhs
.
chromosome_idx
)
and
(
this
->
end
-
1
<
rhs
.
start
))
{
return
true
;
}
return
false
;
}
bool
GenomeRegion
::
operator
>
(
const
GenomeRegion
&
rhs
)
const
{
if
(
this
->
chromosome_idx
>
rhs
.
chromosome_idx
)
{
return
true
;
}
if
((
this
->
chromosome_idx
==
rhs
.
chromosome_idx
)
and
(
rhs
.
end
-
1
<
this
->
start
))
{
return
true
;
}
return
false
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
stream
,
const
GenomeRegion
&
region
)
{
stream
<<
"( "
<<
region
.
chromosome
<<
"/"
<<
region
.
chromosome_idx
<<
" "
<<
region
.
start
<<
" "
<<
region
.
end
<<
" "
<<
region
.
length
<<
" ) "
;
return
stream
;
}
Event Timeline
Log In to Comment