Page Menu
Home
c4science
Search
Configure Global Search
Log In
Files
F78042727
DetectMimeTypeTika.java
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Subscribers
None
File Metadata
Details
File Info
Storage
Attached
Created
Sun, Aug 18, 01:19
Size
973 B
Mime Type
text/x-java
Expires
Tue, Aug 20, 01:19 (1 d, 23 h)
Engine
blob
Format
Raw Data
Handle
19971547
Attached To
R1473 warcbase
DetectMimeTypeTika.java
View Options
package
org.warcbase.pig.piggybank
;
import
org.apache.pig.EvalFunc
;
import
org.apache.pig.data.Tuple
;
import
org.apache.tika.Tika
;
import
org.apache.tika.detect.DefaultDetector
;
import
org.apache.tika.parser.AutoDetectParser
;
import
java.io.ByteArrayInputStream
;
import
java.io.IOException
;
import
java.io.InputStream
;
public
class
DetectMimeTypeTika
extends
EvalFunc
<
String
>
{
@Override
public
String
exec
(
Tuple
input
)
throws
IOException
{
String
mimeType
;
if
(
input
==
null
||
input
.
size
()
==
0
||
input
.
get
(
0
)
==
null
)
{
return
null
;
}
String
content
=
(
String
)
input
.
get
(
0
);
InputStream
is
=
new
ByteArrayInputStream
(
content
.
getBytes
());
if
(
content
.
isEmpty
())
return
"EMPTY"
;
DefaultDetector
detector
=
new
DefaultDetector
();
AutoDetectParser
parser
=
new
AutoDetectParser
(
detector
);
mimeType
=
new
Tika
(
detector
,
parser
).
detect
(
is
);
return
mimeType
;
}
}
Event Timeline
Log In to Comment