Page MenuHomec4science

ArcRecord.scala
No OneTemporary

File Metadata

Created
Wed, May 22, 10:54

ArcRecord.scala

package org.warcbase.spark.archive.io
import org.apache.spark.SerializableWritable
import org.warcbase.data.ArcRecordUtils
import org.warcbase.io.ArcRecordWritable
import org.warcbase.spark.matchbox.ExtractDate.DateComponent
import org.warcbase.spark.matchbox.{ExtractDate, ExtractTopLevelDomain}
class ArcRecord(r: SerializableWritable[ArcRecordWritable]) extends ArchiveRecord {
val getCrawldate: String = ExtractDate(r.t.getRecord.getMetaData.getDate, DateComponent.YYYYMMDD)
val getMimeType: String = r.t.getRecord.getMetaData.getMimetype
val getUrl: String = r.t.getRecord.getMetaData.getUrl
val getDomain: String = ExtractTopLevelDomain(r.t.getRecord.getMetaData.getUrl)
val getContentBytes: Array[Byte] = ArcRecordUtils.getBodyContent(r.t.getRecord)
val getContentString: String = new String(getContentBytes)
}

Event Timeline