Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F90651957
ExtractSiteLinks.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Nov 3, 14:32
Size
1 KB
Mime Type
text/x-c
Expires
Tue, Nov 5, 14:32 (2 d)
Engine
blob
Format
Raw Data
Handle
22114948
Attached To
R1473 warcbase
ExtractSiteLinks.java
View Options
package
org.warcbase.data
;
import
java.io.File
;
import
java.io.FileInputStream
;
import
java.io.FileNotFoundException
;
import
java.io.FileOutputStream
;
import
java.io.IOException
;
import
java.io.InputStream
;
import
java.util.ArrayList
;
import
java.util.List
;
import
org.apache.lucene.search.suggest.Lookup
;
import
org.apache.lucene.search.suggest.Lookup.LookupResult
;
import
org.apache.lucene.search.suggest.fst.FSTCompletion
;
import
org.apache.lucene.search.suggest.fst.FSTCompletion.Completion
;
import
org.apache.lucene.search.suggest.fst.FSTCompletionLookup
;
import
org.apache.lucene.util.BytesRef
;
import
org.apache.lucene.util.IntsRef
;
import
org.apache.lucene.util.fst.FST
;
import
org.apache.lucene.util.fst.Util
;
import
org.apache.lucene.util.fst.NoOutputs
;
import
org.apache.lucene.util.fst.Outputs
;
import
org.apache.lucene.util.fst.PositiveIntOutputs
;
public
class
ExtractSiteLinks
{
public
static
void
main
(
String
[]
args
)
throws
IOException
{
// TODO Auto-generated method stub
NoOutputs
outputs
=
NoOutputs
.
getSingleton
();
File
outputFile
=
new
File
(
"map.txt"
);
FST
<
Object
>
fst
=
FST
.
read
(
outputFile
,
outputs
);
// load fst
FSTCompletion
fstCompletion
=
new
FSTCompletion
(
fst
);
String
prefix
=
"http"
;
List
<
Completion
>
results
=
fstCompletion
.
lookup
(
prefix
,
100
);
//return top-100 match results
for
(
Completion
match:
results
){
System
.
out
.
println
(
match
.
toString
());
}
}
}
Event Timeline
Log In to Comment