Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F69595960
FSTPrefixSearch.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Tue, Jul 2, 13:27
Size
2 KB
Mime Type
text/x-java
Expires
Thu, Jul 4, 13:27 (2 d)
Engine
blob
Format
Raw Data
Handle
18725523
Attached To
R1473 warcbase
FSTPrefixSearch.java
View Options
package
org.warcbase.data
;
import
java.io.IOException
;
import
java.util.ArrayList
;
import
java.util.Iterator
;
import
java.util.List
;
import
org.apache.lucene.util.ArrayUtil
;
import
org.apache.lucene.util.BytesRef
;
import
org.apache.lucene.util.fst.FST
;
import
org.apache.lucene.util.fst.FST.Arc
;
import
org.apache.lucene.util.fst.FST.BytesReader
;
import
org.apache.lucene.util.fst.Util
;
public
class
FSTPrefixSearch
{
private
FST
<
Long
>
fst
;
public
FSTPrefixSearch
(
FST
<
Long
>
fst
){
this
.
fst
=
fst
;
}
public
List
<
String
>
prefixSearch
(
String
prefix
)
throws
IOException
{
// descend to the arc of the prefix string
Arc
<
Long
>
arc
=
fst
.
getFirstArc
(
new
Arc
<
Long
>());
BytesReader
fstReader
=
fst
.
getBytesReader
();
BytesRef
bref
=
new
BytesRef
(
prefix
);
for
(
int
i
=
0
;
i
<
bref
.
length
;
i
++){
fst
.
findTargetArc
(
bref
.
bytes
[
i
+
bref
.
offset
]
&
0xFF
,
arc
,
arc
,
fstReader
);
}
// collect all substrings started from the arc of prefix string.
List
<
BytesRef
>
result
=
new
ArrayList
<
BytesRef
>();
BytesRef
newPrefixBref
=
new
BytesRef
(
prefix
.
substring
(
0
,
prefix
.
length
()-
1
));
collect
(
result
,
fstReader
,
newPrefixBref
,
arc
);
// convert BytesRef results to String results
List
<
String
>
strResults
=
new
ArrayList
<
String
>();
Iterator
<
BytesRef
>
iter
=
result
.
iterator
();
while
(
iter
.
hasNext
()){
strResults
.
add
(
iter
.
next
().
utf8ToString
());
}
return
strResults
;
}
public
Long
[]
getIdRange
(
List
<
String
>
results
){
Long
startId
=
null
,
endId
=
null
;
String
firstRes
=
results
.
get
(
0
);
String
lastRes
=
results
.
get
(
results
.
size
()-
1
);
try
{
startId
=
Util
.
get
(
fst
,
new
BytesRef
(
firstRes
));
endId
=
Util
.
get
(
fst
,
new
BytesRef
(
lastRes
));
}
catch
(
IOException
e
)
{
e
.
printStackTrace
();
}
Long
[]
idrange
=
{
startId
,
endId
};
return
idrange
;
}
private
boolean
collect
(
List
<
BytesRef
>
res
,
BytesReader
fstReader
,
BytesRef
output
,
Arc
<
Long
>
arc
)
throws
IOException
{
if
(
output
.
length
==
output
.
bytes
.
length
)
{
output
.
bytes
=
ArrayUtil
.
grow
(
output
.
bytes
);
}
assert
output
.
offset
==
0
;
output
.
bytes
[
output
.
length
++]
=
(
byte
)
arc
.
label
;
fst
.
readFirstTargetArc
(
arc
,
arc
,
fstReader
);
while
(
true
)
{
if
(
arc
.
label
==
FST
.
END_LABEL
)
{
res
.
add
(
new
BytesRef
().
deepCopyOf
(
output
));
//if (res.size() >= num) return true;
}
else
{
int
save
=
output
.
length
;
if
(
collect
(
res
,
fstReader
,
output
,
new
Arc
<
Long
>().
copyFrom
(
arc
)))
{
return
true
;
}
output
.
length
=
save
;
}
if
(
arc
.
isLast
())
{
break
;
}
fst
.
readNextArc
(
arc
,
fstReader
);
}
return
false
;
}
}
Event Timeline
Log In to Comment