diff --git a/.settings/org.eclipse.core.resources.prefs b/.settings/org.eclipse.core.resources.prefs deleted file mode 100644 index e9441bb..0000000 --- a/.settings/org.eclipse.core.resources.prefs +++ /dev/null @@ -1,3 +0,0 @@ -eclipse.preferences.version=1 -encoding//src/main/java=UTF-8 -encoding/=UTF-8 diff --git a/.settings/org.eclipse.jdt.core.prefs b/.settings/org.eclipse.jdt.core.prefs deleted file mode 100644 index 4e9e193..0000000 --- a/.settings/org.eclipse.jdt.core.prefs +++ /dev/null @@ -1,304 +0,0 @@ -#Thu Aug 14 12:00:31 EDT 2014 -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_inits=do not insert -org.eclipse.jdt.core.formatter.brace_position_for_block=end_of_line -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation_type_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_try_resources=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert -org.eclipse.jdt.core.formatter.brace_position_for_annotation_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.alignment_for_assignment=0 -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_try_resources=insert -org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_javadoc_comment=false -org.eclipse.jdt.core.formatter.never_indent_line_comments_on_first_column=false -org.eclipse.jdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.indent_statements_compare_to_body=true -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_superinterfaces=insert -org.eclipse.jdt.core.formatter.insert_new_line_before_catch_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_throw=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_unary_operator=do not insert -org.eclipse.jdt.core.formatter.blank_lines_after_imports=1 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_annotation=0 -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.comment.format_source_code=true -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert -org.eclipse.jdt.core.compiler.debug.localVariable=generate -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_annotation_declaration_header=true -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_package=insert -org.eclipse.jdt.core.formatter.blank_lines_before_member_type=1 -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_try=insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_local_variable=insert -org.eclipse.jdt.core.formatter.keep_then_statement_on_same_line=false -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert -org.eclipse.jdt.core.formatter.format_line_comment_starting_on_first_column=true -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_local_declarations=insert -org.eclipse.jdt.core.formatter.blank_lines_before_new_chunk=1 -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_explicit_constructor_call=16 -org.eclipse.jdt.core.formatter.number_of_empty_lines_to_preserve=1 -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert -org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve -org.eclipse.jdt.core.formatter.brace_position_for_switch=end_of_line -org.eclipse.jdt.core.formatter.comment.format_header=false -org.eclipse.jdt.core.formatter.insert_space_before_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_before_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.alignment_for_superclass_in_type_declaration=16 -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_case=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon=do not insert -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_enum_declaration=16 -org.eclipse.jdt.core.formatter.comment.line_length=100 -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_cases=true -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_increments=insert -org.eclipse.jdt.core.formatter.insert_space_after_binary_operator=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_block=insert -org.eclipse.jdt.core.formatter.insert_space_after_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.brace_position_for_enum_declaration=end_of_line -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_explicitconstructorcall_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_before_at_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_assert=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_constructor_declaration_parameters=insert -org.eclipse.jdt.core.formatter.comment.format_javadoc_comments=true -org.eclipse.jdt.core.formatter.continuation_indentation=2 -org.eclipse.jdt.core.codeComplete.fieldSuffixes= -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_field=insert -org.eclipse.jdt.core.formatter.comment.insert_new_line_before_root_tags=insert -org.eclipse.jdt.core.formatter.alignment_for_selector_in_method_invocation=16 -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_parameterized_type_reference=insert -org.eclipse.jdt.core.formatter.insert_space_after_assignment_operator=insert -org.eclipse.jdt.core.formatter.blank_lines_before_method=1 -org.eclipse.jdt.core.formatter.put_empty_statement_on_new_line=true -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.blank_lines_between_type_declarations=1 -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_multiple_field_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_paren_in_cast=insert -org.eclipse.jdt.core.formatter.brace_position_for_constructor_declaration=end_of_line -org.eclipse.jdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_annotation_type_declaration=insert -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_qualified_allocation_expression=16 -org.eclipse.jdt.core.formatter.join_lines_in_comments=true -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_type_header=true -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.blank_lines_before_first_class_body_declaration=0 -org.eclipse.jdt.core.formatter.comment.format_block_comments=true -org.eclipse.jdt.core.formatter.insert_space_before_question_in_conditional=insert -org.eclipse.jdt.core.formatter.alignment_for_multiple_fields=16 -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_for_increments=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_case=do not insert -org.eclipse.jdt.core.formatter.alignment_for_union_type_in_multicatch=16 -org.eclipse.jdt.core.formatter.comment.format_html=true -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_semicolon_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.alignment_for_parameters_in_method_declaration=16 -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_synchronized=do not insert -org.eclipse.jdt.core.compiler.problem.enumIdentifier=error -org.eclipse.jdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_after_semicolon_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_before_assignment_operator=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_superinterfaces=do not insert -org.eclipse.jdt.core.formatter.brace_position_for_method_declaration=end_of_line -org.eclipse.jdt.core.formatter.insert_new_line_before_else_in_if_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_constant_arguments=do not insert -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_allocation_expression=16 -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_annotation_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_if=insert -org.eclipse.jdt.core.formatter.comment.new_lines_at_javadoc_boundaries=true -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_member=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_bracket_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_local_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_constant_header=true -org.eclipse.jdt.core.formatter.comment.insert_new_line_for_parameter=do not insert -org.eclipse.jdt.core.codeComplete.staticFinalFieldSuffixes= -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_method_declaration=16 -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_annotation=insert -org.eclipse.jdt.core.formatter.insert_space_after_ellipsis=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_before_finally_in_try_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_at_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_enum_constant=16 -org.eclipse.jdt.core.codeComplete.fieldPrefixes= -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_switch=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.brace_position_for_enum_constant=end_of_line -org.eclipse.jdt.core.formatter.wrap_before_or_operator_multicatch=true -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert -org.eclipse.jdt.core.codeComplete.staticFieldSuffixes= -org.eclipse.jdt.core.formatter.insert_space_after_and_in_type_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_array_initializer=insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_annotation_type_member_declaration=do not insert -org.eclipse.jdt.core.formatter.alignment_for_enum_constants=0 -org.eclipse.jdt.core.formatter.alignment_for_method_declaration=0 -org.eclipse.jdt.core.formatter.tabulation.char=space -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_allocation_expression=insert -org.eclipse.jdt.core.formatter.wrap_before_binary_operator=true -org.eclipse.jdt.core.formatter.insert_space_after_question_in_wildcard=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_annotation=do not insert -org.eclipse.jdt.core.formatter.format_guardian_clause_on_one_line=false -org.eclipse.jdt.core.formatter.keep_else_statement_on_same_line=false -org.eclipse.jdt.core.formatter.compact_else_if=true -org.eclipse.jdt.core.codeComplete.localPrefixes= -org.eclipse.jdt.core.formatter.insert_space_before_parenthesized_expression_in_return=insert -org.eclipse.jdt.core.formatter.insert_space_between_brackets_in_array_type_reference=do not insert -org.eclipse.jdt.core.formatter.brace_position_for_anonymous_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.comment.clear_blank_lines_in_block_comment=false -org.eclipse.jdt.core.formatter.disabling_tag=@formatter\:off -org.eclipse.jdt.core.formatter.align_type_members_on_columns=false -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_multiple_field_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_type=insert -org.eclipse.jdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.join_wrapped_lines=true -org.eclipse.jdt.core.codeComplete.localSuffixes= -org.eclipse.jdt.core.formatter.insert_space_after_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_bracket_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.indent_body_declarations_compare_to_enum_declaration_header=true -org.eclipse.jdt.core.formatter.use_on_off_tags=false -org.eclipse.jdt.core.formatter.alignment_for_expressions_in_array_initializer=16 -org.eclipse.jdt.core.formatter.never_indent_block_comments_on_first_column=false -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_method=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_switch=insert -org.eclipse.jdt.core.formatter.comment.format_line_comments=true -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_catch=insert -org.eclipse.jdt.core.formatter.keep_empty_array_initializer_on_one_line=false -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.comment.indent_parameter_description=false -org.eclipse.jdt.core.formatter.indentation.size=2 -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_enum_constant=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_for=insert -org.eclipse.jdt.core.compiler.debug.sourceFile=generate -org.eclipse.jdt.core.formatter.insert_space_before_ellipsis=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_for=insert -org.eclipse.jdt.core.formatter.alignment_for_throws_clause_in_constructor_declaration=16 -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert -org.eclipse.jdt.core.formatter.comment.indent_root_tags=true -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_constant=insert -org.eclipse.jdt.core.formatter.alignment_for_binary_expression=16 -org.eclipse.jdt.core.formatter.brace_position_for_type_declaration=end_of_line -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_binary_operator=insert -org.eclipse.jdt.core.formatter.use_tabs_only_for_leading_indentations=false -org.eclipse.jdt.core.formatter.indent_breaks_compare_to_cases=true -org.eclipse.jdt.core.formatter.insert_space_between_empty_brackets_in_array_allocation_expression=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_prefix_operator=do not insert -org.eclipse.jdt.core.formatter.blank_lines_before_package=0 -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_enum_declaration=insert -org.eclipse.jdt.core.formatter.indent_empty_lines=false -org.eclipse.jdt.core.formatter.insert_space_before_closing_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.alignment_for_arguments_in_method_invocation=16 -org.eclipse.jdt.core.formatter.wrap_outer_expressions_when_nested=true -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_annotation=do not insert -org.eclipse.jdt.core.formatter.insert_new_line_after_annotation_on_parameter=insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_throws=do not insert -org.eclipse.jdt.core.codeComplete.staticFinalFieldPrefixes= -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert -org.eclipse.jdt.core.formatter.number_of_blank_lines_at_beginning_of_method_body=0 -org.eclipse.jdt.core.formatter.brace_position_for_block_in_case=end_of_line -org.eclipse.jdt.core.formatter.insert_space_after_colon_in_assert=insert -org.eclipse.jdt.core.formatter.alignment_for_conditional_expression=80 -org.eclipse.jdt.core.codeComplete.argumentPrefixes= -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert -org.eclipse.jdt.core.formatter.keep_imple_if_on_one_line=false -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_declaration=insert -org.eclipse.jdt.core.formatter.indent_switchstatements_compare_to_switch=false -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_synchronized=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_for_inits=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_synchronized=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_enum_constant=insert -org.eclipse.jdt.core.codeComplete.argumentSuffixes= -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_parameters=insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_enum_constant_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_for=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.alignment_for_resources_in_try=80 -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_comma_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_while=insert -org.eclipse.jdt.core.codeComplete.staticFieldPrefixes= -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert -org.eclipse.jdt.core.compiler.debug.lineNumber=generate -org.eclipse.jdt.core.formatter.blank_lines_before_imports=1 -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert -org.eclipse.jdt.core.formatter.alignment_for_compact_if=16 -org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled -org.eclipse.jdt.core.formatter.insert_new_line_after_label=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_closing_angle_bracket_in_type_arguments=insert -org.eclipse.jdt.core.formatter.insert_space_before_unary_operator=do not insert -org.eclipse.jdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=false -org.eclipse.jdt.core.formatter.insert_space_after_closing_brace_in_block=insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_paren_in_constructor_declaration=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_try=do not insert -org.eclipse.jdt.core.formatter.continuation_indentation_for_array_initializer=2 -org.eclipse.jdt.core.formatter.lineSplit=100 -org.eclipse.jdt.core.compiler.source=1.7 -org.eclipse.jdt.core.formatter.tabulation.size=2 -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_anonymous_type_declaration=insert -org.eclipse.jdt.core.formatter.insert_new_line_in_empty_method_body=insert -org.eclipse.jdt.core.compiler.problem.assertIdentifier=error -org.eclipse.jdt.core.formatter.brace_position_for_array_initializer=end_of_line -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_explicitconstructorcall_arguments=do not insert -org.eclipse.jdt.core.formatter.blank_lines_between_import_groups=1 -org.eclipse.jdt.core.formatter.comment.new_lines_at_block_boundaries=true -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_constructor_declaration_parameters=do not insert -org.eclipse.jdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_question_in_conditional=insert -org.eclipse.jdt.core.formatter.insert_space_before_postfix_operator=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_colon_in_default=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_opening_brace_in_constructor_declaration=insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_angle_bracket_in_parameterized_type_reference=do not insert -org.eclipse.jdt.core.formatter.indent_statements_compare_to_block=true -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert -org.eclipse.jdt.core.formatter.insert_space_before_comma_in_type_arguments=do not insert -org.eclipse.jdt.core.formatter.insert_space_after_opening_paren_in_annotation=do not insert -eclipse.preferences.version=1 -org.eclipse.jdt.core.formatter.insert_space_before_closing_bracket_in_array_reference=do not insert -org.eclipse.jdt.core.formatter.enabling_tag=@formatter\:on -org.eclipse.jdt.core.compiler.compliance=1.7 -org.eclipse.jdt.core.formatter.blank_lines_after_package=1 -org.eclipse.jdt.core.formatter.alignment_for_superinterfaces_in_type_declaration=16 -org.eclipse.jdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert -org.eclipse.jdt.core.formatter.blank_lines_before_field=0 diff --git a/.settings/org.eclipse.jdt.ui.prefs b/.settings/org.eclipse.jdt.ui.prefs deleted file mode 100644 index 1976599..0000000 --- a/.settings/org.eclipse.jdt.ui.prefs +++ /dev/null @@ -1,7 +0,0 @@ -eclipse.preferences.version=1 -formatter_profile=_warcbase -formatter_settings_version=12 -org.eclipse.jdt.ui.exception.name=e -org.eclipse.jdt.ui.gettersetter.use.is=true -org.eclipse.jdt.ui.keywordthis=false -org.eclipse.jdt.ui.overrideannotation=true diff --git a/.settings/org.eclipse.m2e.core.prefs b/.settings/org.eclipse.m2e.core.prefs deleted file mode 100644 index f897a7f..0000000 --- a/.settings/org.eclipse.m2e.core.prefs +++ /dev/null @@ -1,4 +0,0 @@ -activeProfiles= -eclipse.preferences.version=1 -resolveWorkspaceProjects=true -version=1 diff --git a/.travis.yml b/.travis.yml index 86ab5a9..60e5ac7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,12 +1,12 @@ language: java sudo: false jdk: - oraclejdk7 - oraclejdk8 - openjdk7 before_install: - "echo $JAVA_OPTS" - "export JAVA_OPTS=-Xmx512m" script: - - mvn clean package appassembler:assemble + - mvn clean package diff --git a/README.md b/README.md index 20aca7b..8640115 100644 --- a/README.md +++ b/README.md @@ -1,116 +1,107 @@ Warcbase [![Build Status](https://travis-ci.org/lintool/warcbase.svg?branch=master)](https://travis-ci.org/lintool/warcbase) ======== Warcbase is an open-source platform for managing web archives built on Hadoop and HBase. The platform provides a flexible data model for storing and managing raw content as well as metadata and extracted knowledge. Tight integration with Hadoop provides powerful tools for analytics and data processing via Spark. There are two main ways of using Warcbase: -+ The first and most common is to analyze web archives using [Spark](http://spark.apache.org/). -+ The second is to take advantage of HBase to provide random access as well as analytics capabilities. Random access allows Warcbase to provide temporal browsing of archived content (i.e., "wayback" functionality). ++ The first and most common is to analyze web archives using [Spark](http://spark.apache.org/): these functionalities are contained in the `warcbase-core` module. ++ The second is to take advantage of HBase to provide random access as well as analytics capabilities. Random access allows Warcbase to provide temporal browsing of archived content (i.e., "wayback" functionality): these functionalities are contained in the `warcbase-hbase` module. You can use Warcbase without HBase, and since HBase requires more extensive setup, it is recommended that if you're just starting out, play with the Spark analytics and don't worry about HBase. -Warcbase is built against CDH 5.4.1: - -+ Hadoop version: 2.6.0-cdh5.4.1 -+ HBase version: 1.0.0-cdh5.4.1 -+ Spark version: 1.3.0-cdh5.4.1 - -The Hadoop ecosystem is evolving rapidly, so there may be incompatibilities with other versions. - -**Detailed documentation is available [here](http://lintool.github.io/warcbase-docs/).** - -Supporting files can be found in the [warcbase-resources repository](https://github.com/lintool/warcbase-resources). +Other helpful links: ++ Detailed documentation is available [here](http://lintool.github.io/warcbase-docs/). ++ Supporting files can be found in the [warcbase-resources repository](https://github.com/lintool/warcbase-resources). Getting Started --------------- Clone the repo: ``` $ git clone http://github.com/lintool/warcbase.git ``` You can then build Warcbase: ``` $ mvn clean package appassembler:assemble ``` For the impatient, to skip tests: ``` $ mvn clean package appassembler:assemble -DskipTests ``` -To generate Scaladocs: +Warcbase is built against CDH 5.7.1: -``` -$ mvn scala:doc -``` - -Generated Scaladocs will be under the `target/site` directory ++ Hadoop version: 2.6.0-cdh5.7.1 ++ Spark version: 1.6.0-cdh5.7.1 ++ HBase version: 1.2.0-cdh5.7.1 +The Hadoop ecosystem is evolving rapidly, so there may be incompatibilities with other versions. Spark Quickstart ---------------- -For the impatient, let's do a simple analysis with Spark. Within the repo there's already a sample ARC file stored at `src/test/resources/arc/example.arc.gz`. Our supporting resources repository also has [larger ARC and WARC files as real-world examples](https://github.com/lintool/warcbase-resources/tree/master/Sample-Data). +For the impatient, let's do a simple analysis with Spark. Within the repo there's already a sample ARC file stored at `warcbase-core/src/test/resources/arc/example.arc.gz`. Our supporting resources repository also has [larger ARC and WARC files as real-world examples](https://github.com/lintool/warcbase-resources/tree/master/Sample-Data). -If you need to install Spark, [we have a walkthrough here](http://lintool.github.io/warcbase-docs/Getting-Started/). This page also has instructions on how to get Spark Notebook, an interactive web-based editor, running. +If you need to install Spark, [we have a walkthrough here](http://lintool.github.io/warcbase-docs/Getting-Started/). This page also has instructions on how to install and run Spark Notebook, an interactive web-based editor. -Once you've got Spark installed, you can go ahead and fire up the Spark shell: +Once you've got Spark installed, go ahead and fire up the Spark shell: ``` -$ spark-shell --jars target/warcbase-0.1.0-SNAPSHOT-fatjar.jar +$ spark-shell --jars warcbase-core/target/warcbase-core-0.1.0-SNAPSHOT-fatjar.jar ``` Here's a simple script that extracts and counts the top-level domains (i.e., number of pages for each top-level domain) in the sample ARC data: ```scala import org.warcbase.spark.matchbox._ import org.warcbase.spark.rdd.RecordRDD._ -val r = RecordLoader.loadArchives("src/test/resources/arc/example.arc.gz", sc) +val r = RecordLoader.loadArchives("warcbase-core/src/test/resources/arc/example.arc.gz", sc) .keepValidPages() .map(r => ExtractDomain(r.getUrl)) .countItems() .take(10) ``` **Tip:** By default, commands in the Spark shell must be one line. To run multi-line commands, type `:paste` in Spark shell: you can then copy-paste the script above directly into Spark shell. Use Ctrl-D to finish the command. What to learn more? Check out our [detailed documentation](http://lintool.github.io/warcbase-docs/). Visualizations -------------- The result of analyses of using Warcbase can serve as input to visualizations that help scholars interactively explore the data. Examples include: + [Basic crawl statistics](http://lintool.github.io/warcbase/vis/crawl-sites/index.html) from the Canadian Political Parties and Political Interest Groups collection. + [Interactive graph visualization](http://lintool.github.io/warcbase-docs/Gephi-Converting-Site-Link-Structure-into-Dynamic-Visualization/) using Gephi. + [Named entity visualization](http://lintool.github.io/warcbase-docs/Spark-NER-Visualization/) for exploring relative frequencies of people, places, and locations. + [Shine interface](http://webarchives.ca/) for faceted full-text search. Next Steps ---------- + [Ingesting content into HBase](http://lintool.github.io/warcbase-docs/Ingesting-Content-into-HBase/): loading ARC and WARC data into HBase + [Warcbase/Wayback integration](http://lintool.github.io/warcbase-docs/Warcbase-Wayback-Integration/): guide to provide temporal browsing capabilities + [Warcbase Java tools](http://lintool.github.io/warcbase-docs/Warcbase-Java-Tools/): building the URL mapping, extracting the webgraph License ------- Licensed under the [Apache License, Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). Acknowledgments --------------- This work is supported in part by the U.S. National Science Foundation, the Natural Sciences and Engineering Research Council of Canada, the Social Sciences and Humanities Research Council of Canada, the Ontario Ministry of Research and Innovation's Early Researcher Award program, and the Mellon Foundation (via Columbia University). Any opinions, findings, and conclusions or recommendations expressed are those of the researchers and do not necessarily reflect the views of the sponsors. diff --git a/pom.xml b/pom.xml index de687f6..71f3b83 100644 --- a/pom.xml +++ b/pom.xml @@ -1,539 +1,57 @@ 4.0.0 org.warcbase warcbase - jar + pom 0.1.0-SNAPSHOT Warcbase - An open-source platform for managing web archives built on Hadoop and HBase + An open-source platform for managing and analyzing web archives http://warcbase.org/ + + UTF-8 + UTF-8 + 2.10.5 + 2.6.0-cdh5.7.1 + 1.6.0-cdh5.7.1 + 1.2.0-cdh5.7.1 + 3.4.5-cdh5.7.1 + + The Apache Software License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt repo scm:git:git@github.com:lintool/warcbase.git scm:git:git@github.com:lintool/warcbase.git git@github.com:lintool/warcbase.git - - - lintool - Jimmy Lin - jimmylin@umd.edu - - - milad621 - Milad Gholami - mgholami@cs.umd.edu - - - jeffyRao - Jinfeng Rao - jinfeng@cs.umd.edu - - - - - org.sonatype.oss - oss-parent - 7 - - - - UTF-8 - UTF-8 - 8.1.12.v20130726 - 2.6.0-cdh5.4.1 - 1.0.0-cdh5.4.1 - 3.4.5-cdh5.4.1 - 1.3.0-cdh5.4.1 - 2.10.4 - - - - - - - maven-clean-plugin - 2.6.1 - - - - src/main/solr/lib - false - - - - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.2 - - 1.7 - 1.7 - - - - org.apache.maven.plugins - maven-shade-plugin - 2.3 - - - package - - shade - - - - - - - META-INF/services/org.apache.lucene.codecs.Codec - - - - - - *:* - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - true - fatjar - - - org.apache.hadoop:* - - - - - - - - - org.apache.maven.plugins - maven-dependency-plugin - 2.4 - - - copy - package - - copy-dependencies - - - - src/main/solr/lib - - - - - - - - org.codehaus.mojo - appassembler-maven-plugin - 1.9 - - -Xms512M -Xmx24576M - - - org.warcbase.WarcbaseAdmin - WarcbaseAdmin - - - org.warcbase.data.UrlMappingBuilder - UrlMappingBuilder - - - org.warcbase.data.UrlMapping - UrlMapping - - - org.warcbase.data.ExtractLinks - ExtractLinks - - - org.warcbase.data.ExtractSiteLinks - ExtractSiteLinks - - - org.warcbase.ingest.IngestFiles - IngestFiles - - - org.warcbase.ingest.SearchForUrl - SearchForUrl - - - org.warcbase.browser.WarcBrowser - WarcBrowser - - - org.warcbase.analysis.DetectDuplicates - DetectDuplicates - - - org.warcbase.browser.SeleniumBrowser - SeleniumBrowser - - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - - - process-resources - - add-source - compile - - - - scala-test-compile - process-test-resources - - testCompile - - - - - ${scala.version} - true - - -target:jvm-1.7 - -g:vars - -deprecation - -dependencyfile - ${project.build.directory}/.scala_dependencies - - - - - - maven http://repo.maven.apache.org/maven2/ cloudera https://repository.cloudera.com/artifactory/cloudera-repos/ internetarchive Internet Archive Maven Repository http://builds.archive.org:8080/maven2 - - - junit - junit - 4.12 - test - - - org.scalatest - scalatest_2.10 - 2.2.4 - test - - - commons-codec - commons-codec - 1.8 - - - commons-io - commons-io - 2.4 - - - org.jsoup - jsoup - 1.7.3 - - - com.google.guava - guava - 14.0.1 - - - - tl.lin - lintools-datatypes - 1.0.0 - - - - - org.apache.hbase - hbase-client - ${hbase.version} - - org.apache.hadoophadoop-core - - - - org.apache.hbase - hbase-server - ${hbase.version} - - org.apache.hadoophadoop-core - org.mortbay.jettyservlet-api-2.5 - javax.servletservlet-api - asmasm - - - - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - - javax.servletservlet-api - - - - - org.apache.zookeeper - zookeeper - ${zookeeper.version} - - - - - - org.netpreserve.openwayback - openwayback-core - 2.0.0.BETA.2 - - org.apache.hadoophadoop-core - ch.qos.logbacklogback-classic - org.netpreserve.openwaybackopenwayback-cdx-server - org.netpreserve.openwaybackopenwayback-access-control-core - - it.unimi.dsidsiutils - fastutilfastutil - - - - org.netpreserve.commons - webarchive-commons - 1.1.4 - - org.apache.hadoophadoop-core - commons-langcommons-lang - fastutilfastutil - - - - - it.unimi.dsi - dsiutils - 2.2.0 - - ch.qos.logbacklogback-classic - commons-langcommons-lang - - - - it.unimi.dsi - fastutil - 6.5.15 - - commons-langcommons-lang - - - - - org.eclipse.jetty - jetty-server - ${jettyVersion} - - - org.eclipse.jetty - jetty-webapp - ${jettyVersion} - true - - - org.slf4j - slf4j-log4j12 - 1.6.4 - - - - org.apache.commons - commons-lang3 - 3.0 - - - commons-cli - commons-cli - 1.2 - - - - net.sf.opencsv - opencsv - 2.3 - - - - org.apache.tika - tika-core - 1.9 - - - org.apache.tika - tika-parsers - 1.9 - - - - org.antlr - antlr - 3.5.2 - - - - org.seleniumhq.selenium - selenium-java - 2.42.2 - - org.seleniumhq.seleniumselenium-htmlunit-driver - org.seleniumhq.seleniumselenium-ie-driver - org.webbitserverwebbit - - - - - org.scala-lang - scala-library - 2.10.4 - - - org.apache.spark - spark-core_2.10 - ${spark.version} - - com.typesafeconfig - org.xerial.snappysnappy-java - - - - - org.apache.spark - spark-graphx_2.10 - ${spark.version} - - - - com.chuusai - shapeless_2.10.4 - 2.0.0 - - - com.fasterxml.jackson.core - jackson-core - 2.7.2 - - - com.fasterxml.jackson.core - jackson-databind - 2.7.2 - - - org.json4s - json4s-jackson_2.10 - 3.2.10 - - - - - com.typesafe - config - 1.2.1 - - - - org.xerial.snappy - snappy-java - 1.0.5 - - - - edu.stanford.nlp - stanford-corenlp - 3.4.1 - - - - com.syncthemall - boilerpipe - 1.2.2 - - - - xerces - xercesImpl - 2.11.0 - - - - org.apache.lucene - lucene-core - 4.7.2 - - - org.apache.solr - solr-core - 4.7.2 - - slf4j-apiorg.slf4j - org.apache.hadoophadoop-annotations - org.apache.hadoophadoop-common - org.apache.hadoophadoop-hdfs - com.typesafeconfig - - - - - uk.bl.wa.discovery - warc-hadoop-indexer - 2.2.0-BETA-5 - - asmasm - com.typesafeconfig - - + + warcbase-core + warcbase-hbase + - diff --git a/warcbase-core/pom.xml b/warcbase-core/pom.xml new file mode 100644 index 0000000..317f9fd --- /dev/null +++ b/warcbase-core/pom.xml @@ -0,0 +1,222 @@ + + + + org.warcbase + warcbase + 0.1.0-SNAPSHOT + + + 4.0.0 + org.warcbase + warcbase-core + jar + 0.1.0-SNAPSHOT + Warcbase (Core) + An open-source platform for managing and analyzing web archives + http://warcbase.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + scm:git:git@github.com:lintool/warcbase.git + scm:git:git@github.com:lintool/warcbase.git + git@github.com:lintool/warcbase.git + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.2 + + 1.7 + 1.7 + + + + org.apache.maven.plugins + maven-shade-plugin + 2.3 + + + package + + shade + + + + + META-INF/services/org.apache.lucene.codecs.Codec + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + true + fatjar + + + org.apache.hadoop:* + org.apache.spark:* + + + + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + ${scala.version} + true + + -target:jvm-1.7 + -g:vars + -deprecation + -dependencyfile + ${project.build.directory}/.scala_dependencies + + + + + + + + + junit + junit + 4.12 + test + + + org.scalatest + scalatest_2.10 + 2.2.5 + test + + + + org.scala-lang + scala-library + ${scala.version} + + + com.chuusai + shapeless_2.10.5 + 2.0.0 + + + + org.apache.spark + spark-core_2.10 + ${spark.version} + + + org.apache.spark + spark-graphx_2.10 + ${spark.version} + + + + com.google.guava + guava + 14.0.1 + + + + + org.xerial.snappy + snappy-java + 1.0.5 + + + + org.jsoup + jsoup + 1.7.3 + + + + org.netpreserve.openwayback + openwayback-core + 2.0.0.BETA.2 + + + org.netpreserve.commons + webarchive-commons + 1.1.4 + + + + edu.stanford.nlp + stanford-corenlp + 3.4.1 + + + + org.apache.tika + tika-core + 1.9 + + + org.apache.tika + tika-parsers + 1.9 + + + com.syncthemall + boilerpipe + 1.2.2 + + + xerces + xercesImpl + 2.11.0 + + + + tl.lin + lintools-datatypes + 1.0.0 + + + + diff --git a/src/main/java/org/warcbase/data/ArcRecordUtils.java b/warcbase-core/src/main/java/org/warcbase/data/ArcRecordUtils.java similarity index 100% rename from src/main/java/org/warcbase/data/ArcRecordUtils.java rename to warcbase-core/src/main/java/org/warcbase/data/ArcRecordUtils.java diff --git a/src/main/java/org/warcbase/data/WarcRecordUtils.java b/warcbase-core/src/main/java/org/warcbase/data/WarcRecordUtils.java similarity index 100% rename from src/main/java/org/warcbase/data/WarcRecordUtils.java rename to warcbase-core/src/main/java/org/warcbase/data/WarcRecordUtils.java diff --git a/src/main/java/org/warcbase/demo/WacMapReduceArcDemo.java b/warcbase-core/src/main/java/org/warcbase/demo/WacMapReduceArcDemo.java similarity index 100% rename from src/main/java/org/warcbase/demo/WacMapReduceArcDemo.java rename to warcbase-core/src/main/java/org/warcbase/demo/WacMapReduceArcDemo.java diff --git a/src/main/java/org/warcbase/io/ArcRecordWritable.java b/warcbase-core/src/main/java/org/warcbase/io/ArcRecordWritable.java similarity index 100% rename from src/main/java/org/warcbase/io/ArcRecordWritable.java rename to warcbase-core/src/main/java/org/warcbase/io/ArcRecordWritable.java diff --git a/src/main/java/org/warcbase/io/GenericArchiveRecordWritable.java b/warcbase-core/src/main/java/org/warcbase/io/GenericArchiveRecordWritable.java similarity index 100% rename from src/main/java/org/warcbase/io/GenericArchiveRecordWritable.java rename to warcbase-core/src/main/java/org/warcbase/io/GenericArchiveRecordWritable.java diff --git a/src/main/java/org/warcbase/io/WarcRecordWritable.java b/warcbase-core/src/main/java/org/warcbase/io/WarcRecordWritable.java similarity index 100% rename from src/main/java/org/warcbase/io/WarcRecordWritable.java rename to warcbase-core/src/main/java/org/warcbase/io/WarcRecordWritable.java diff --git a/src/main/java/org/warcbase/mapreduce/WacArcInputFormat.java b/warcbase-core/src/main/java/org/warcbase/mapreduce/WacArcInputFormat.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/WacArcInputFormat.java rename to warcbase-core/src/main/java/org/warcbase/mapreduce/WacArcInputFormat.java diff --git a/src/main/java/org/warcbase/mapreduce/WacGenericInputFormat.java b/warcbase-core/src/main/java/org/warcbase/mapreduce/WacGenericInputFormat.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/WacGenericInputFormat.java rename to warcbase-core/src/main/java/org/warcbase/mapreduce/WacGenericInputFormat.java diff --git a/src/main/java/org/warcbase/mapreduce/WacWarcInputFormat.java b/warcbase-core/src/main/java/org/warcbase/mapreduce/WacWarcInputFormat.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/WacWarcInputFormat.java rename to warcbase-core/src/main/java/org/warcbase/mapreduce/WacWarcInputFormat.java diff --git a/src/main/java/org/warcbase/wayback/WarcbaseResourceIndex.java b/warcbase-core/src/main/java/org/warcbase/wayback/WarcbaseResourceIndex.java similarity index 100% rename from src/main/java/org/warcbase/wayback/WarcbaseResourceIndex.java rename to warcbase-core/src/main/java/org/warcbase/wayback/WarcbaseResourceIndex.java diff --git a/src/main/java/org/warcbase/wayback/WarcbaseResourceStore.java b/warcbase-core/src/main/java/org/warcbase/wayback/WarcbaseResourceStore.java similarity index 100% rename from src/main/java/org/warcbase/wayback/WarcbaseResourceStore.java rename to warcbase-core/src/main/java/org/warcbase/wayback/WarcbaseResourceStore.java diff --git a/src/main/python/break-into-date-scrapes.py b/warcbase-core/src/main/python/break-into-date-scrapes.py similarity index 100% rename from src/main/python/break-into-date-scrapes.py rename to warcbase-core/src/main/python/break-into-date-scrapes.py diff --git a/src/main/python/combine-entity-results-split-by-date.py b/warcbase-core/src/main/python/combine-entity-results-split-by-date.py similarity index 100% rename from src/main/python/combine-entity-results-split-by-date.py rename to warcbase-core/src/main/python/combine-entity-results-split-by-date.py diff --git a/src/main/python/combine-entity-results.py b/warcbase-core/src/main/python/combine-entity-results.py similarity index 100% rename from src/main/python/combine-entity-results.py rename to warcbase-core/src/main/python/combine-entity-results.py diff --git a/src/main/python/pig2gdf.py b/warcbase-core/src/main/python/pig2gdf.py similarity index 100% rename from src/main/python/pig2gdf.py rename to warcbase-core/src/main/python/pig2gdf.py diff --git a/src/main/resources/BDBCollection.xml b/warcbase-core/src/main/resources/BDBCollection.xml similarity index 100% rename from src/main/resources/BDBCollection.xml rename to warcbase-core/src/main/resources/BDBCollection.xml diff --git a/src/main/resources/log4j.properties b/warcbase-core/src/main/resources/log4j.properties similarity index 100% rename from src/main/resources/log4j.properties rename to warcbase-core/src/main/resources/log4j.properties diff --git a/src/main/scala/org/warcbase/spark/archive/io/ArcRecord.scala b/warcbase-core/src/main/scala/org/warcbase/spark/archive/io/ArcRecord.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/archive/io/ArcRecord.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/archive/io/ArcRecord.scala diff --git a/src/main/scala/org/warcbase/spark/archive/io/ArchiveRecord.scala b/warcbase-core/src/main/scala/org/warcbase/spark/archive/io/ArchiveRecord.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/archive/io/ArchiveRecord.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/archive/io/ArchiveRecord.scala diff --git a/src/main/scala/org/warcbase/spark/archive/io/GenericArchiveRecord.scala b/warcbase-core/src/main/scala/org/warcbase/spark/archive/io/GenericArchiveRecord.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/archive/io/GenericArchiveRecord.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/archive/io/GenericArchiveRecord.scala diff --git a/src/main/scala/org/warcbase/spark/archive/io/WarcRecord.scala b/warcbase-core/src/main/scala/org/warcbase/spark/archive/io/WarcRecord.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/archive/io/WarcRecord.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/archive/io/WarcRecord.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/DetectLanguage.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/DetectLanguage.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/DetectLanguage.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/DetectLanguage.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/DetectMimeTypeTika.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/DetectMimeTypeTika.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/DetectMimeTypeTika.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/DetectMimeTypeTika.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractAtMentions.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractAtMentions.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractAtMentions.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractAtMentions.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractBoilerpipeText.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractBoilerpipeText.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractBoilerpipeText.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractBoilerpipeText.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractDate.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractDate.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractDate.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractDate.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractDomain.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractDomain.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractDomain.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractDomain.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractEntities.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractEntities.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractEntities.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractEntities.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractGraph.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractGraph.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractGraph.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractGraph.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractHashtags.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractHashtags.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractHashtags.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractHashtags.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractImageLinks.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractImageLinks.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractImageLinks.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractImageLinks.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractLinks.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractLinks.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractLinks.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractLinks.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractTextFromPDFs.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractTextFromPDFs.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractTextFromPDFs.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractTextFromPDFs.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/ExtractUrls.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractUrls.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/ExtractUrls.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/ExtractUrls.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/NER3Classifier.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/NER3Classifier.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/NER3Classifier.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/NER3Classifier.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/NERCombinedJson.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/NERCombinedJson.scala similarity index 95% rename from src/main/scala/org/warcbase/spark/matchbox/NERCombinedJson.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/NERCombinedJson.scala index 06cc6f4..c98cc7c 100644 --- a/src/main/scala/org/warcbase/spark/matchbox/NERCombinedJson.scala +++ b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/NERCombinedJson.scala @@ -1,126 +1,125 @@ package org.warcbase.spark.matchbox import java.io.BufferedReader import java.io.BufferedWriter import java.io.InputStreamReader import java.io.OutputStreamWriter import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.spark.SparkContext import org.warcbase.spark.utils.JsonUtil import scala.collection.mutable.MutableList import scala.util.Random /** * Classifies records using NER and stores results as JSON */ class NERCombinedJson extends Serializable { - case class StringList(list: List[String]) // Used to prevent unchecked type-argument error def combineKeyCountLists (l1: List[(String, Int)], l2: List[(String, Int)]): List[(String, Int)] = { (l1 ++ l2).groupBy(_._1 ).map { case (key, tuples) => (key, tuples.map( _._2).sum) }.toList } /** Combines directory of part-files containing one JSON array per line * into a single file containing a single JSON array of arrays. * * @param srcDir name of directory holding files, also name that will * be given to JSON file. */ def partDirToFile(srcDir: String): Unit = { val hadoopConfig = new Configuration() val hdfs = FileSystem.get(hadoopConfig) val rnd = new Random val srcPath = new Path(srcDir) val tmpFile = rnd.alphanumeric.take(8).mkString + ".almostjson" val tmpPath = new Path(tmpFile) // Merge part-files into single file FileUtil.copyMerge(hdfs, srcPath, hdfs, tmpPath, false, hadoopConfig, null) // Read file of JSON arrays, write into single JSON array of arrays val fsInStream = hdfs.open(tmpPath) val inFile = new BufferedReader(new InputStreamReader(fsInStream)) hdfs.delete(srcPath, true) // Don't need part-files anymore val fsOutStream = hdfs.create(srcPath, true) // path was dir of part-files, // now is a file of JSON val outFile = new BufferedWriter(new OutputStreamWriter(fsOutStream)) outFile.write("[") val line = inFile.readLine() if (line != null) outFile.write(line) Iterator.continually(inFile.readLine()).takeWhile(_ != null).foreach(s => {outFile.write(", " + s)}) outFile.write("]") outFile.close() inFile.close() hdfs.delete(tmpPath, false) } /** Do NER classification on input path, output JSON. * * @param iNerClassifierFile path of classifier file * @param inputFile path of file with tuples (date: String, url: String, content: String) * from which to extract entities * @param outputFile path of output file (e.g., "entities.json") * @param sc Spark context object */ def classify(iNerClassifierFile: String, inputFile: String, outputFile: String, sc: SparkContext) { val out = sc.textFile(inputFile) .mapPartitions(iter => { NER3Classifier.apply(iNerClassifierFile) iter.map(line => { val substrs = line.split(",", 3) (substrs(0), substrs(1), substrs(2)) }) .map(r => { val classifiedJson = NER3Classifier.classify(r._3) val classifiedMap = JsonUtil.fromJson(classifiedJson) val classifiedMapCountTuples: Map[String, List[(String, Int)]] = classifiedMap.map { - case (nerType, entities: StringList) => (nerType, entities.list.groupBy(identity).mapValues(_.size).toList) + case (nerType, entities: List[String @unchecked]) => (nerType, entities.groupBy(identity).mapValues(_.size).toList) } ((r._1, r._2), classifiedMapCountTuples) }) }) .reduceByKey( (a, b) => (a ++ b).keySet.map(r => (r, combineKeyCountLists(a(r), b(r)))).toMap) .mapPartitions(iter => { iter.map(r => { val nerRec = new NerRecord(r._1._1, r._1._2) r._2.foreach(entityMap => { // e.g., entityMap = "PERSON" -> List(("Jack", 1), ("Diane", 3)) val ec = new EntityCounts(entityMap._1) entityMap._2.foreach(e => { ec.entities += new Entity(e._1, e._2) }) nerRec.ner += ec }) JsonUtil.toJson(nerRec) }) }) .saveAsTextFile(outputFile) partDirToFile(outputFile) } class Entity(iEntity: String, iFreq: Int) { var entity: String = iEntity var freq: Int = iFreq } class EntityCounts(iNerType: String) { var nerType: String = iNerType var entities = MutableList[Entity]() } class NerRecord(recDate: String, recDomain: String) { var date = recDate var domain = recDomain var ner = MutableList[EntityCounts]() } } diff --git a/src/main/scala/org/warcbase/spark/matchbox/RecordLoader.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/RecordLoader.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/RecordLoader.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/RecordLoader.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/RemoveHTML.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/RemoveHTML.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/RemoveHTML.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/RemoveHTML.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/StringUtils.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/StringUtils.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/StringUtils.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/StringUtils.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/TupleFormatter.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/TupleFormatter.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/TupleFormatter.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/TupleFormatter.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/TweetUtils.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/TweetUtils.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/TweetUtils.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/TweetUtils.scala diff --git a/src/main/scala/org/warcbase/spark/matchbox/WriteGDF.scala b/warcbase-core/src/main/scala/org/warcbase/spark/matchbox/WriteGDF.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/matchbox/WriteGDF.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/matchbox/WriteGDF.scala diff --git a/src/main/scala/org/warcbase/spark/pythonconverters/ArcRecordConverter.scala b/warcbase-core/src/main/scala/org/warcbase/spark/pythonconverters/ArcRecordConverter.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/pythonconverters/ArcRecordConverter.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/pythonconverters/ArcRecordConverter.scala diff --git a/src/main/scala/org/warcbase/spark/rdd/RecordRDD.scala b/warcbase-core/src/main/scala/org/warcbase/spark/rdd/RecordRDD.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/rdd/RecordRDD.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/rdd/RecordRDD.scala diff --git a/src/main/scala/org/warcbase/spark/scripts/CrawlStatistics.scala b/warcbase-core/src/main/scala/org/warcbase/spark/scripts/CrawlStatistics.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/scripts/CrawlStatistics.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/scripts/CrawlStatistics.scala diff --git a/src/main/scala/org/warcbase/spark/scripts/Filter.scala b/warcbase-core/src/main/scala/org/warcbase/spark/scripts/Filter.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/scripts/Filter.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/scripts/Filter.scala diff --git a/src/main/scala/org/warcbase/spark/scripts/SocialMediaLinks.scala b/warcbase-core/src/main/scala/org/warcbase/spark/scripts/SocialMediaLinks.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/scripts/SocialMediaLinks.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/scripts/SocialMediaLinks.scala diff --git a/src/main/scala/org/warcbase/spark/utils/JsonUtil.scala b/warcbase-core/src/main/scala/org/warcbase/spark/utils/JsonUtil.scala similarity index 100% rename from src/main/scala/org/warcbase/spark/utils/JsonUtil.scala rename to warcbase-core/src/main/scala/org/warcbase/spark/utils/JsonUtil.scala diff --git a/src/main/webapp/WEB-INF/web.xml b/warcbase-core/src/main/webapp/WEB-INF/web.xml similarity index 100% rename from src/main/webapp/WEB-INF/web.xml rename to warcbase-core/src/main/webapp/WEB-INF/web.xml diff --git a/src/test/java/org/warcbase/ingest/WacArcLoaderTest.java b/warcbase-core/src/test/java/org/warcbase/ingest/WacArcLoaderTest.java similarity index 100% rename from src/test/java/org/warcbase/ingest/WacArcLoaderTest.java rename to warcbase-core/src/test/java/org/warcbase/ingest/WacArcLoaderTest.java diff --git a/src/test/java/org/warcbase/ingest/WacWarcLoaderTest.java b/warcbase-core/src/test/java/org/warcbase/ingest/WacWarcLoaderTest.java similarity index 100% rename from src/test/java/org/warcbase/ingest/WacWarcLoaderTest.java rename to warcbase-core/src/test/java/org/warcbase/ingest/WacWarcLoaderTest.java diff --git a/src/test/java/org/warcbase/io/ArcRecordWritableTest.java b/warcbase-core/src/test/java/org/warcbase/io/ArcRecordWritableTest.java similarity index 100% rename from src/test/java/org/warcbase/io/ArcRecordWritableTest.java rename to warcbase-core/src/test/java/org/warcbase/io/ArcRecordWritableTest.java diff --git a/src/test/java/org/warcbase/io/GenericArchiveRecordWritableTest.java b/warcbase-core/src/test/java/org/warcbase/io/GenericArchiveRecordWritableTest.java similarity index 100% rename from src/test/java/org/warcbase/io/GenericArchiveRecordWritableTest.java rename to warcbase-core/src/test/java/org/warcbase/io/GenericArchiveRecordWritableTest.java diff --git a/src/test/java/org/warcbase/io/WarcRecordWritableTest.java b/warcbase-core/src/test/java/org/warcbase/io/WarcRecordWritableTest.java similarity index 100% rename from src/test/java/org/warcbase/io/WarcRecordWritableTest.java rename to warcbase-core/src/test/java/org/warcbase/io/WarcRecordWritableTest.java diff --git a/src/test/java/org/warcbase/mapreduce/WacArcInputFormatTest.java b/warcbase-core/src/test/java/org/warcbase/mapreduce/WacArcInputFormatTest.java similarity index 100% rename from src/test/java/org/warcbase/mapreduce/WacArcInputFormatTest.java rename to warcbase-core/src/test/java/org/warcbase/mapreduce/WacArcInputFormatTest.java diff --git a/src/test/java/org/warcbase/mapreduce/WacGenericInputFormatTest.java b/warcbase-core/src/test/java/org/warcbase/mapreduce/WacGenericInputFormatTest.java similarity index 100% rename from src/test/java/org/warcbase/mapreduce/WacGenericInputFormatTest.java rename to warcbase-core/src/test/java/org/warcbase/mapreduce/WacGenericInputFormatTest.java diff --git a/src/test/java/org/warcbase/mapreduce/WacWarcInputFormatTest.java b/warcbase-core/src/test/java/org/warcbase/mapreduce/WacWarcInputFormatTest.java similarity index 100% rename from src/test/java/org/warcbase/mapreduce/WacWarcInputFormatTest.java rename to warcbase-core/src/test/java/org/warcbase/mapreduce/WacWarcInputFormatTest.java diff --git a/src/test/resources/arc/example.arc.gz b/warcbase-core/src/test/resources/arc/example.arc.gz similarity index 100% rename from src/test/resources/arc/example.arc.gz rename to warcbase-core/src/test/resources/arc/example.arc.gz diff --git a/src/test/resources/ner/example.txt b/warcbase-core/src/test/resources/ner/example.txt similarity index 100% rename from src/test/resources/ner/example.txt rename to warcbase-core/src/test/resources/ner/example.txt diff --git a/src/test/resources/warc/example.warc.gz b/warcbase-core/src/test/resources/warc/example.warc.gz similarity index 100% rename from src/test/resources/warc/example.warc.gz rename to warcbase-core/src/test/resources/warc/example.warc.gz diff --git a/src/test/scala/org/warcbase/spark/ArcTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/ArcTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/ArcTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/ArcTest.scala diff --git a/src/test/scala/org/warcbase/spark/GenericArchiveRecordTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/GenericArchiveRecordTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/GenericArchiveRecordTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/GenericArchiveRecordTest.scala diff --git a/src/test/scala/org/warcbase/spark/WarcTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/WarcTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/WarcTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/WarcTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractAtMentionsTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractAtMentionsTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractAtMentionsTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractAtMentionsTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractDateTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractDateTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractDateTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractDateTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractDomainTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractDomainTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractDomainTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractDomainTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractEntitiesTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractEntitiesTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractEntitiesTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractEntitiesTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractHashtagsTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractHashtagsTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractHashtagsTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractHashtagsTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractImageLinksTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractImageLinksTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractImageLinksTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractImageLinksTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractLinksTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractLinksTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractLinksTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractLinksTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/ExtractUrlsTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractUrlsTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/ExtractUrlsTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/ExtractUrlsTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/StringUtilsTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/StringUtilsTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/StringUtilsTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/StringUtilsTest.scala diff --git a/src/test/scala/org/warcbase/spark/matchbox/TupleFormatterTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/matchbox/TupleFormatterTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/matchbox/TupleFormatterTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/matchbox/TupleFormatterTest.scala diff --git a/src/test/scala/org/warcbase/spark/rdd/CountableRDDTest.scala b/warcbase-core/src/test/scala/org/warcbase/spark/rdd/CountableRDDTest.scala similarity index 100% rename from src/test/scala/org/warcbase/spark/rdd/CountableRDDTest.scala rename to warcbase-core/src/test/scala/org/warcbase/spark/rdd/CountableRDDTest.scala diff --git a/warcbase-hbase/pom.xml b/warcbase-hbase/pom.xml new file mode 100644 index 0000000..b2b25e4 --- /dev/null +++ b/warcbase-hbase/pom.xml @@ -0,0 +1,250 @@ + + + + org.warcbase + warcbase + 0.1.0-SNAPSHOT + + + 4.0.0 + org.warcbase + warcbase-hbase + jar + 0.1.0-SNAPSHOT + Warcbase (HBase) + An open-source platform for managing and analyzing web archives + http://warcbase.org/ + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + scm:git:git@github.com:lintool/warcbase.git + scm:git:git@github.com:lintool/warcbase.git + git@github.com:lintool/warcbase.git + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.2 + + 1.7 + 1.7 + + + + org.apache.maven.plugins + maven-shade-plugin + 2.3 + + + package + + shade + + + + + META-INF/services/org.apache.lucene.codecs.Codec + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + true + fatjar + + + org.apache.hadoop:* + + + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + 2.4 + + + copy + package + + copy-dependencies + + + + src/main/solr/lib + + + + + + + + org.codehaus.mojo + appassembler-maven-plugin + 1.9 + + -Xms512M -Xmx24576M + + + org.warcbase.WarcbaseAdmin + WarcbaseAdmin + + + org.warcbase.data.UrlMappingBuilder + UrlMappingBuilder + + + org.warcbase.data.UrlMapping + UrlMapping + + + org.warcbase.data.ExtractLinks + ExtractLinks + + + org.warcbase.data.ExtractSiteLinks + ExtractSiteLinks + + + org.warcbase.ingest.IngestFiles + IngestFiles + + + org.warcbase.ingest.SearchForUrl + SearchForUrl + + + org.warcbase.browser.WarcBrowser + WarcBrowser + + + org.warcbase.analysis.DetectDuplicates + DetectDuplicates + + + org.warcbase.browser.SeleniumBrowser + SeleniumBrowser + + + + + + + org.scala-tools + maven-scala-plugin + 2.15.2 + + + process-resources + + add-source + compile + + + + scala-test-compile + process-test-resources + + testCompile + + + + + ${scala.version} + true + + -target:jvm-1.7 + -g:vars + -deprecation + -dependencyfile + ${project.build.directory}/.scala_dependencies + + + + + + + + + maven + http://repo.maven.apache.org/maven2/ + + + cloudera + https://repository.cloudera.com/artifactory/cloudera-repos/ + + + internetarchive + Internet Archive Maven Repository + http://builds.archive.org:8080/maven2 + + + + + + org.warcbase + warcbase-core + 0.1.0-SNAPSHOT + + + + org.apache.hbase + hbase-client + ${hbase.version} + + + org.apache.hbase + hbase-server + ${hbase.version} + + + + uk.bl.wa.discovery + warc-hadoop-indexer + 2.2.0-BETA-5 + + + + org.apache.lucene + lucene-core + 4.7.2 + + + org.apache.solr + solr-core + 4.7.2 + + + + org.seleniumhq.selenium + selenium-java + 2.42.2 + + + + diff --git a/src/main/java/org/warcbase/WarcbaseAdmin.java b/warcbase-hbase/src/main/java/org/warcbase/WarcbaseAdmin.java similarity index 100% rename from src/main/java/org/warcbase/WarcbaseAdmin.java rename to warcbase-hbase/src/main/java/org/warcbase/WarcbaseAdmin.java diff --git a/src/main/java/org/warcbase/analysis/FindArcUrls.java b/warcbase-hbase/src/main/java/org/warcbase/analysis/FindArcUrls.java similarity index 100% rename from src/main/java/org/warcbase/analysis/FindArcUrls.java rename to warcbase-hbase/src/main/java/org/warcbase/analysis/FindArcUrls.java diff --git a/src/main/java/org/warcbase/analysis/FindWarcUrls.java b/warcbase-hbase/src/main/java/org/warcbase/analysis/FindWarcUrls.java similarity index 100% rename from src/main/java/org/warcbase/analysis/FindWarcUrls.java rename to warcbase-hbase/src/main/java/org/warcbase/analysis/FindWarcUrls.java diff --git a/src/main/java/org/warcbase/analysis/graph/ExtractLinksWac.java b/warcbase-hbase/src/main/java/org/warcbase/analysis/graph/ExtractLinksWac.java similarity index 100% rename from src/main/java/org/warcbase/analysis/graph/ExtractLinksWac.java rename to warcbase-hbase/src/main/java/org/warcbase/analysis/graph/ExtractLinksWac.java diff --git a/src/main/java/org/warcbase/analysis/graph/ExtractSiteLinks.java b/warcbase-hbase/src/main/java/org/warcbase/analysis/graph/ExtractSiteLinks.java similarity index 100% rename from src/main/java/org/warcbase/analysis/graph/ExtractSiteLinks.java rename to warcbase-hbase/src/main/java/org/warcbase/analysis/graph/ExtractSiteLinks.java diff --git a/src/main/java/org/warcbase/analysis/graph/InvertAnchorText.java b/warcbase-hbase/src/main/java/org/warcbase/analysis/graph/InvertAnchorText.java similarity index 100% rename from src/main/java/org/warcbase/analysis/graph/InvertAnchorText.java rename to warcbase-hbase/src/main/java/org/warcbase/analysis/graph/InvertAnchorText.java diff --git a/src/main/java/org/warcbase/analysis/graph/PrefixMapping.java b/warcbase-hbase/src/main/java/org/warcbase/analysis/graph/PrefixMapping.java similarity index 100% rename from src/main/java/org/warcbase/analysis/graph/PrefixMapping.java rename to warcbase-hbase/src/main/java/org/warcbase/analysis/graph/PrefixMapping.java diff --git a/src/main/java/org/warcbase/browser/SeleniumBrowser.java b/warcbase-hbase/src/main/java/org/warcbase/browser/SeleniumBrowser.java similarity index 100% rename from src/main/java/org/warcbase/browser/SeleniumBrowser.java rename to warcbase-hbase/src/main/java/org/warcbase/browser/SeleniumBrowser.java diff --git a/src/main/java/org/warcbase/browser/WarcBrowser.java b/warcbase-hbase/src/main/java/org/warcbase/browser/WarcBrowser.java similarity index 100% rename from src/main/java/org/warcbase/browser/WarcBrowser.java rename to warcbase-hbase/src/main/java/org/warcbase/browser/WarcBrowser.java diff --git a/src/main/java/org/warcbase/browser/WarcBrowserServlet.java b/warcbase-hbase/src/main/java/org/warcbase/browser/WarcBrowserServlet.java similarity index 100% rename from src/main/java/org/warcbase/browser/WarcBrowserServlet.java rename to warcbase-hbase/src/main/java/org/warcbase/browser/WarcBrowserServlet.java diff --git a/src/main/java/org/warcbase/data/HBaseTableManager.java b/warcbase-hbase/src/main/java/org/warcbase/data/HBaseTableManager.java similarity index 100% rename from src/main/java/org/warcbase/data/HBaseTableManager.java rename to warcbase-hbase/src/main/java/org/warcbase/data/HBaseTableManager.java diff --git a/src/main/java/org/warcbase/data/UrlMapping.java b/warcbase-hbase/src/main/java/org/warcbase/data/UrlMapping.java similarity index 100% rename from src/main/java/org/warcbase/data/UrlMapping.java rename to warcbase-hbase/src/main/java/org/warcbase/data/UrlMapping.java diff --git a/src/main/java/org/warcbase/data/UrlMappingBuilder.java b/warcbase-hbase/src/main/java/org/warcbase/data/UrlMappingBuilder.java similarity index 100% rename from src/main/java/org/warcbase/data/UrlMappingBuilder.java rename to warcbase-hbase/src/main/java/org/warcbase/data/UrlMappingBuilder.java diff --git a/src/main/java/org/warcbase/data/UrlMappingMapReduceBuilder.java b/warcbase-hbase/src/main/java/org/warcbase/data/UrlMappingMapReduceBuilder.java similarity index 100% rename from src/main/java/org/warcbase/data/UrlMappingMapReduceBuilder.java rename to warcbase-hbase/src/main/java/org/warcbase/data/UrlMappingMapReduceBuilder.java diff --git a/src/main/java/org/warcbase/data/UrlUtils.java b/warcbase-hbase/src/main/java/org/warcbase/data/UrlUtils.java similarity index 100% rename from src/main/java/org/warcbase/data/UrlUtils.java rename to warcbase-hbase/src/main/java/org/warcbase/data/UrlUtils.java diff --git a/src/main/java/org/warcbase/demo/WacMapReduceHBaseDemo.java b/warcbase-hbase/src/main/java/org/warcbase/demo/WacMapReduceHBaseDemo.java similarity index 100% rename from src/main/java/org/warcbase/demo/WacMapReduceHBaseDemo.java rename to warcbase-hbase/src/main/java/org/warcbase/demo/WacMapReduceHBaseDemo.java diff --git a/src/main/java/org/warcbase/demo/WacMapReduceHBaseWrapperDemo.java b/warcbase-hbase/src/main/java/org/warcbase/demo/WacMapReduceHBaseWrapperDemo.java similarity index 100% rename from src/main/java/org/warcbase/demo/WacMapReduceHBaseWrapperDemo.java rename to warcbase-hbase/src/main/java/org/warcbase/demo/WacMapReduceHBaseWrapperDemo.java diff --git a/src/main/java/org/warcbase/index/IndexerMapper.java b/warcbase-hbase/src/main/java/org/warcbase/index/IndexerMapper.java similarity index 100% rename from src/main/java/org/warcbase/index/IndexerMapper.java rename to warcbase-hbase/src/main/java/org/warcbase/index/IndexerMapper.java diff --git a/src/main/java/org/warcbase/index/IndexerReducer.java b/warcbase-hbase/src/main/java/org/warcbase/index/IndexerReducer.java similarity index 100% rename from src/main/java/org/warcbase/index/IndexerReducer.java rename to warcbase-hbase/src/main/java/org/warcbase/index/IndexerReducer.java diff --git a/src/main/java/org/warcbase/index/IndexerRunner.java b/warcbase-hbase/src/main/java/org/warcbase/index/IndexerRunner.java similarity index 100% rename from src/main/java/org/warcbase/index/IndexerRunner.java rename to warcbase-hbase/src/main/java/org/warcbase/index/IndexerRunner.java diff --git a/src/main/java/org/warcbase/ingest/IngestFiles.java b/warcbase-hbase/src/main/java/org/warcbase/ingest/IngestFiles.java similarity index 100% rename from src/main/java/org/warcbase/ingest/IngestFiles.java rename to warcbase-hbase/src/main/java/org/warcbase/ingest/IngestFiles.java diff --git a/src/main/java/org/warcbase/ingest/SearchForUrl.java b/warcbase-hbase/src/main/java/org/warcbase/ingest/SearchForUrl.java similarity index 100% rename from src/main/java/org/warcbase/ingest/SearchForUrl.java rename to warcbase-hbase/src/main/java/org/warcbase/ingest/SearchForUrl.java diff --git a/src/main/java/org/warcbase/mapreduce/lib/Chain.java b/warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/Chain.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/lib/Chain.java rename to warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/Chain.java diff --git a/src/main/java/org/warcbase/mapreduce/lib/ChainMapContextImpl.java b/warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/ChainMapContextImpl.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/lib/ChainMapContextImpl.java rename to warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/ChainMapContextImpl.java diff --git a/src/main/java/org/warcbase/mapreduce/lib/HBaseRowToArcRecordWritableMapper.java b/warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/HBaseRowToArcRecordWritableMapper.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/lib/HBaseRowToArcRecordWritableMapper.java rename to warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/HBaseRowToArcRecordWritableMapper.java diff --git a/src/main/java/org/warcbase/mapreduce/lib/TableChainMapper.java b/warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/TableChainMapper.java similarity index 100% rename from src/main/java/org/warcbase/mapreduce/lib/TableChainMapper.java rename to warcbase-hbase/src/main/java/org/warcbase/mapreduce/lib/TableChainMapper.java diff --git a/src/main/solr/README.txt b/warcbase-hbase/src/main/solr/README.txt similarity index 100% rename from src/main/solr/README.txt rename to warcbase-hbase/src/main/solr/README.txt diff --git a/src/main/solr/WARCIndexer.conf b/warcbase-hbase/src/main/solr/WARCIndexer.conf similarity index 100% rename from src/main/solr/WARCIndexer.conf rename to warcbase-hbase/src/main/solr/WARCIndexer.conf diff --git a/src/main/solr/discovery/conf/currency.xml b/warcbase-hbase/src/main/solr/discovery/conf/currency.xml similarity index 100% rename from src/main/solr/discovery/conf/currency.xml rename to warcbase-hbase/src/main/solr/discovery/conf/currency.xml diff --git a/src/main/solr/discovery/conf/elevate.xml b/warcbase-hbase/src/main/solr/discovery/conf/elevate.xml similarity index 100% rename from src/main/solr/discovery/conf/elevate.xml rename to warcbase-hbase/src/main/solr/discovery/conf/elevate.xml diff --git a/src/main/solr/discovery/conf/lang/contractions_ca.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_ca.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/contractions_ca.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_ca.txt diff --git a/src/main/solr/discovery/conf/lang/contractions_fr.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_fr.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/contractions_fr.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_fr.txt diff --git a/src/main/solr/discovery/conf/lang/contractions_ga.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_ga.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/contractions_ga.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_ga.txt diff --git a/src/main/solr/discovery/conf/lang/contractions_it.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_it.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/contractions_it.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/contractions_it.txt diff --git a/src/main/solr/discovery/conf/lang/hyphenations_ga.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/hyphenations_ga.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/hyphenations_ga.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/hyphenations_ga.txt diff --git a/src/main/solr/discovery/conf/lang/stemdict_nl.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stemdict_nl.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stemdict_nl.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stemdict_nl.txt diff --git a/src/main/solr/discovery/conf/lang/stoptags_ja.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stoptags_ja.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stoptags_ja.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stoptags_ja.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_ar.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ar.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_ar.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ar.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_bg.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_bg.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_bg.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_bg.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_ca.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ca.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_ca.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ca.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_cz.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_cz.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_cz.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_cz.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_da.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_da.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_da.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_da.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_de.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_de.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_de.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_de.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_el.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_el.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_el.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_el.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_en.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_en.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_en.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_en.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_es.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_es.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_es.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_es.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_eu.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_eu.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_eu.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_eu.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_fa.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_fa.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_fa.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_fa.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_fi.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_fi.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_fi.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_fi.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_fr.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_fr.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_fr.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_fr.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_ga.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ga.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_ga.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ga.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_gl.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_gl.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_gl.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_gl.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_hi.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_hi.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_hi.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_hi.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_hu.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_hu.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_hu.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_hu.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_hy.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_hy.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_hy.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_hy.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_id.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_id.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_id.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_id.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_it.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_it.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_it.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_it.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_ja.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ja.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_ja.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ja.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_lv.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_lv.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_lv.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_lv.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_nl.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_nl.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_nl.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_nl.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_no.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_no.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_no.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_no.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_pt.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_pt.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_pt.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_pt.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_ro.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ro.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_ro.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ro.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_ru.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ru.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_ru.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_ru.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_sv.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_sv.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_sv.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_sv.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_th.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_th.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_th.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_th.txt diff --git a/src/main/solr/discovery/conf/lang/stopwords_tr.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_tr.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/stopwords_tr.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/stopwords_tr.txt diff --git a/src/main/solr/discovery/conf/lang/userdict_ja.txt b/warcbase-hbase/src/main/solr/discovery/conf/lang/userdict_ja.txt similarity index 100% rename from src/main/solr/discovery/conf/lang/userdict_ja.txt rename to warcbase-hbase/src/main/solr/discovery/conf/lang/userdict_ja.txt diff --git a/src/main/solr/discovery/conf/protwords.txt b/warcbase-hbase/src/main/solr/discovery/conf/protwords.txt similarity index 100% rename from src/main/solr/discovery/conf/protwords.txt rename to warcbase-hbase/src/main/solr/discovery/conf/protwords.txt diff --git a/src/main/solr/discovery/conf/schema.xml b/warcbase-hbase/src/main/solr/discovery/conf/schema.xml similarity index 100% rename from src/main/solr/discovery/conf/schema.xml rename to warcbase-hbase/src/main/solr/discovery/conf/schema.xml diff --git a/src/main/solr/discovery/conf/solrconfig-production.xml b/warcbase-hbase/src/main/solr/discovery/conf/solrconfig-production.xml similarity index 100% rename from src/main/solr/discovery/conf/solrconfig-production.xml rename to warcbase-hbase/src/main/solr/discovery/conf/solrconfig-production.xml diff --git a/src/main/solr/discovery/conf/solrconfig-server-4.10.4.xml b/warcbase-hbase/src/main/solr/discovery/conf/solrconfig-server-4.10.4.xml similarity index 100% rename from src/main/solr/discovery/conf/solrconfig-server-4.10.4.xml rename to warcbase-hbase/src/main/solr/discovery/conf/solrconfig-server-4.10.4.xml diff --git a/src/main/solr/discovery/conf/solrconfig.xml b/warcbase-hbase/src/main/solr/discovery/conf/solrconfig.xml similarity index 100% rename from src/main/solr/discovery/conf/solrconfig.xml rename to warcbase-hbase/src/main/solr/discovery/conf/solrconfig.xml diff --git a/src/main/solr/discovery/conf/solrcore.properties b/warcbase-hbase/src/main/solr/discovery/conf/solrcore.properties similarity index 100% rename from src/main/solr/discovery/conf/solrcore.properties rename to warcbase-hbase/src/main/solr/discovery/conf/solrcore.properties diff --git a/src/main/solr/discovery/conf/solrcore.properties-production b/warcbase-hbase/src/main/solr/discovery/conf/solrcore.properties-production similarity index 100% rename from src/main/solr/discovery/conf/solrcore.properties-production rename to warcbase-hbase/src/main/solr/discovery/conf/solrcore.properties-production diff --git a/src/main/solr/discovery/conf/stopwords.txt b/warcbase-hbase/src/main/solr/discovery/conf/stopwords.txt similarity index 100% rename from src/main/solr/discovery/conf/stopwords.txt rename to warcbase-hbase/src/main/solr/discovery/conf/stopwords.txt diff --git a/src/main/solr/discovery/conf/synonyms.txt b/warcbase-hbase/src/main/solr/discovery/conf/synonyms.txt similarity index 100% rename from src/main/solr/discovery/conf/synonyms.txt rename to warcbase-hbase/src/main/solr/discovery/conf/synonyms.txt diff --git a/src/main/solr/discovery/core.properties b/warcbase-hbase/src/main/solr/discovery/core.properties similarity index 100% rename from src/main/solr/discovery/core.properties rename to warcbase-hbase/src/main/solr/discovery/core.properties diff --git a/src/main/solr/solr.xml b/warcbase-hbase/src/main/solr/solr.xml similarity index 100% rename from src/main/solr/solr.xml rename to warcbase-hbase/src/main/solr/solr.xml diff --git a/src/main/solr/zoo.cfg b/warcbase-hbase/src/main/solr/zoo.cfg similarity index 100% rename from src/main/solr/zoo.cfg rename to warcbase-hbase/src/main/solr/zoo.cfg diff --git a/src/test/java/org/warcbase/data/UrlMappingTest.java b/warcbase-hbase/src/test/java/org/warcbase/data/UrlMappingTest.java similarity index 100% rename from src/test/java/org/warcbase/data/UrlMappingTest.java rename to warcbase-hbase/src/test/java/org/warcbase/data/UrlMappingTest.java diff --git a/src/test/java/org/warcbase/data/UrlUtilsTest.java b/warcbase-hbase/src/test/java/org/warcbase/data/UrlUtilsTest.java similarity index 100% rename from src/test/java/org/warcbase/data/UrlUtilsTest.java rename to warcbase-hbase/src/test/java/org/warcbase/data/UrlUtilsTest.java