Package org.apache.nutch.hostdb
Class UpdateHostDbReducer
- java.lang.Object
-
- org.apache.hadoop.mapreduce.Reducer<Text,NutchWritable,Text,HostDatum>
-
- org.apache.nutch.hostdb.UpdateHostDbReducer
-
public class UpdateHostDbReducer extends Reducer<Text,NutchWritable,Text,HostDatum>
-
-
Nested Class Summary
-
Nested classes/interfaces inherited from class org.apache.hadoop.mapreduce.Reducer
Reducer.Context
-
-
Field Summary
Fields Modifier and Type Field Description protected static boolean
checkAny
protected static boolean
checkFailed
protected static boolean
checkKnown
protected static boolean
checkNew
protected static CrawlDatumProcessor[]
crawlDatumProcessors
protected ThreadPoolExecutor
executor
protected static boolean
force
protected static long
now
protected static String[]
numericFields
protected static Text[]
numericFieldWritables
protected Integer
numResolverThreads
protected static int[]
percentiles
protected static Integer
purgeFailedHostsThreshold
protected BlockingQueue<Runnable>
queue
protected static Integer
recheckInterval
protected ResolverThread
resolverThread
protected static String[]
stringFields
protected static Text[]
stringFieldWritables
-
Constructor Summary
Constructors Constructor Description UpdateHostDbReducer()
-
Method Summary
All Methods Instance Methods Concrete Methods Modifier and Type Method Description void
cleanup(Reducer.Context context)
Shut down all running threads and wait for completion.protected boolean
isEligibleForCheck(HostDatum datum)
Determines whether a record is eligible for recheck.void
reduce(Text key, Iterable<NutchWritable> values, Reducer.Context context)
void
setup(Reducer.Context context)
Configures the thread pool and prestarts all resolver threads.protected boolean
shouldCheck(HostDatum datum)
Determines whether a record should be checked.
-
-
-
Field Detail
-
resolverThread
protected ResolverThread resolverThread
-
numResolverThreads
protected Integer numResolverThreads
-
purgeFailedHostsThreshold
protected static Integer purgeFailedHostsThreshold
-
recheckInterval
protected static Integer recheckInterval
-
checkFailed
protected static boolean checkFailed
-
checkNew
protected static boolean checkNew
-
checkKnown
protected static boolean checkKnown
-
checkAny
protected static boolean checkAny
-
force
protected static boolean force
-
now
protected static long now
-
numericFields
protected static String[] numericFields
-
stringFields
protected static String[] stringFields
-
percentiles
protected static int[] percentiles
-
numericFieldWritables
protected static Text[] numericFieldWritables
-
stringFieldWritables
protected static Text[] stringFieldWritables
-
crawlDatumProcessors
protected static CrawlDatumProcessor[] crawlDatumProcessors
-
queue
protected BlockingQueue<Runnable> queue
-
executor
protected ThreadPoolExecutor executor
-
-
Method Detail
-
setup
public void setup(Reducer.Context context)
Configures the thread pool and prestarts all resolver threads.
-
reduce
public void reduce(Text key, Iterable<NutchWritable> values, Reducer.Context context) throws IOException, InterruptedException
- Overrides:
reduce
in classReducer<Text,NutchWritable,Text,HostDatum>
- Throws:
IOException
InterruptedException
-
shouldCheck
protected boolean shouldCheck(HostDatum datum)
Determines whether a record should be checked.- Parameters:
datum
- aHostDatum
to check for eligibility- Returns:
- true if it should be checked, false otherwise
-
isEligibleForCheck
protected boolean isEligibleForCheck(HostDatum datum)
Determines whether a record is eligible for recheck.- Parameters:
datum
- aHostDatum
to check for eligibility- Returns:
- true if eligible for recheck, false otherwise
-
cleanup
public void cleanup(Reducer.Context context)
Shut down all running threads and wait for completion.
-
-