Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F122017630
PrefixMapping.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jul 15, 08:25
Size
2 KB
Mime Type
text/x-java
Expires
Thu, Jul 17, 08:25 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
27424499
Attached To
R1473 warcbase
PrefixMapping.java
View Options
package
org.warcbase.data
;
import
java.io.FileReader
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Collections
;
import
java.util.Comparator
;
import
java.util.List
;
import
au.com.bytecode.opencsv.CSVReader
;
public
class
PrefixMapping
{
public
class
PrefixNode
{
int
id
;
String
url
;
Long
startPos
;
Long
endPos
;
public
PrefixNode
(
int
id
,
String
url
,
Long
startPos
,
Long
endPos
){
this
.
id
=
id
;
this
.
url
=
url
;
this
.
startPos
=
startPos
;
this
.
endPos
=
endPos
;
}
public
int
getId
(){
return
id
;
}
public
String
getUrl
(){
return
url
;
}
public
Long
getStartPos
(){
return
startPos
;
}
public
Long
getEndPos
(){
return
endPos
;
}
}
public
static
ArrayList
<
PrefixNode
>
loadPrefix
(
String
prefixFile
,
UriMapping
map
)
throws
IOException
{
PrefixMapping
instance
=
new
PrefixMapping
();
final
Comparator
<
PrefixNode
>
comparator
=
new
Comparator
<
PrefixNode
>(){
@Override
public
int
compare
(
PrefixNode
n1
,
PrefixNode
n2
){
if
(
n1
.
startPos
>
n2
.
startPos
){
return
1
;
}
else
if
(
n1
.
startPos
==
n2
.
startPos
){
return
0
;
}
else
{
return
-
1
;
}
}
};
ArrayList
<
PrefixNode
>
prefixes
=
new
ArrayList
<
PrefixNode
>();
CSVReader
reader
=
new
CSVReader
(
new
FileReader
(
prefixFile
),
','
);
reader
.
readNext
();
String
line
;
String
[]
record
=
null
;
while
((
record
=
reader
.
readNext
())!=
null
){
int
id
=
Integer
.
valueOf
(
record
[
0
]);
String
url
=
record
[
1
];
List
<
String
>
results
=
map
.
prefixSearch
(
url
);
Long
[]
boundary
=
map
.
getIdRange
(
results
);
PrefixNode
node
=
instance
.
new
PrefixNode
(
id
,
url
,
boundary
[
0
],
boundary
[
1
]);
prefixes
.
add
(
node
);
}
Collections
.
sort
(
prefixes
,
comparator
);
reader
.
close
();
return
prefixes
;
}
public
int
getPrefixId
(
int
id
,
ArrayList
<
PrefixNode
>
prefixes
){
int
start
=
0
,
end
=
prefixes
.
size
()-
1
;
int
mid
;
while
(
start
<=
end
){
mid
=
(
start
+
end
)/
2
;
if
(
prefixes
.
get
(
mid
).
getStartPos
()
<=
id
&&
prefixes
.
get
(
mid
).
getEndPos
()
>=
id
){
return
prefixes
.
get
(
mid
).
getId
();
}
else
if
(
prefixes
.
get
(
mid
).
getStartPos
()
>
id
){
end
=
mid
-
1
;
}
else
{
start
=
mid
+
1
;
}
}
return
-
1
;
}
}
Event Timeline
Log In to Comment