Package org.apache.nutch.parse
Class ParseOutputFormat
- java.lang.Object
-
- org.apache.hadoop.mapreduce.OutputFormat<Text,Parse>
-
- org.apache.nutch.parse.ParseOutputFormat
-
public class ParseOutputFormat extends OutputFormat<Text,Parse>
-
-
Constructor Summary
Constructors Constructor Description ParseOutputFormat()
-
Method Summary
All Methods Static Methods Instance Methods Concrete Methods Modifier and Type Method Description void
checkOutputSpecs(JobContext context)
static String
filterNormalize(String fromUrl, String toUrl, String fromHost, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers)
static String
filterNormalize(String fromUrl, String toUrl, String origin, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers, String urlNormalizerScope)
OutputCommitter
getOutputCommitter(TaskAttemptContext context)
RecordWriter<Text,Parse>
getRecordWriter(TaskAttemptContext context)
String
getUniqueFile(TaskAttemptContext context, String name)
-
-
-
Method Detail
-
getOutputCommitter
public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException
- Specified by:
getOutputCommitter
in classOutputFormat<Text,Parse>
- Throws:
IOException
-
checkOutputSpecs
public void checkOutputSpecs(JobContext context) throws IOException
- Specified by:
checkOutputSpecs
in classOutputFormat<Text,Parse>
- Throws:
IOException
-
getUniqueFile
public String getUniqueFile(TaskAttemptContext context, String name)
-
getRecordWriter
public RecordWriter<Text,Parse> getRecordWriter(TaskAttemptContext context) throws IOException
- Specified by:
getRecordWriter
in classOutputFormat<Text,Parse>
- Throws:
IOException
-
filterNormalize
public static String filterNormalize(String fromUrl, String toUrl, String fromHost, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers)
-
filterNormalize
public static String filterNormalize(String fromUrl, String toUrl, String origin, boolean ignoreInternalLinks, boolean ignoreExternalLinks, String ignoreExternalLinksMode, URLFilters filters, URLExemptionFilters exemptionFilters, URLNormalizers normalizers, String urlNormalizerScope)
-
-