Constant Field Values
Contents
org.apache.*
-
org.apache.nutch.collection.CollectionManager Modifier and Type Constant Field Value public static final String
DEFAULT_FILE_NAME
"subcollections.xml"
-
org.apache.nutch.collection.Subcollection Modifier and Type Constant Field Value public static final String
TAG_BLACKLIST
"blacklist"
public static final String
TAG_COLLECTION
"subcollection"
public static final String
TAG_COLLECTIONS
"subcollections"
public static final String
TAG_ID
"id"
public static final String
TAG_KEY
"key"
public static final String
TAG_NAME
"name"
public static final String
TAG_WHITELIST
"whitelist"
-
org.apache.nutch.crawl.CrawlDatum Modifier and Type Constant Field Value public static final String
FETCH_DIR_NAME
"crawl_fetch"
public static final String
GENERATE_DIR_NAME
"crawl_generate"
public static final String
PARSE_DIR_NAME
"crawl_parse"
public static final byte
STATUS_DB_DUPLICATE
7
public static final byte
STATUS_DB_FETCHED
2
public static final byte
STATUS_DB_GONE
3
public static final byte
STATUS_DB_MAX
31
public static final byte
STATUS_DB_NOTMODIFIED
6
public static final byte
STATUS_DB_ORPHAN
8
public static final byte
STATUS_DB_REDIR_PERM
5
public static final byte
STATUS_DB_REDIR_TEMP
4
public static final byte
STATUS_DB_UNFETCHED
1
public static final byte
STATUS_FETCH_GONE
37
public static final byte
STATUS_FETCH_MAX
63
public static final byte
STATUS_FETCH_NOTMODIFIED
38
public static final byte
STATUS_FETCH_REDIR_PERM
36
public static final byte
STATUS_FETCH_REDIR_TEMP
35
public static final byte
STATUS_FETCH_RETRY
34
public static final byte
STATUS_FETCH_SUCCESS
33
public static final byte
STATUS_INJECTED
66
public static final byte
STATUS_LINKED
67
public static final byte
STATUS_PARSE_META
68
public static final byte
STATUS_SIGNATURE
65
-
org.apache.nutch.crawl.CrawlDb Modifier and Type Constant Field Value public static final String
CRAWLDB_ADDITIONS_ALLOWED
"db.update.additions.allowed"
public static final String
CRAWLDB_PURGE_404
"db.update.purge.404"
public static final String
CRAWLDB_PURGE_ORPHANS
"db.update.purge.orphans"
public static final String
CURRENT_NAME
"current"
public static final String
LOCK_NAME
".locked"
-
org.apache.nutch.crawl.CrawlDbFilter Modifier and Type Constant Field Value public static final String
URL_FILTERING
"crawldb.url.filters"
public static final String
URL_NORMALIZING
"crawldb.url.normalizers"
public static final String
URL_NORMALIZING_SCOPE
"crawldb.url.normalizers.scope"
-
org.apache.nutch.crawl.DeduplicationJob Modifier and Type Constant Field Value protected static final String
DEDUPLICATION_COMPARE_ORDER
"deduplication.compare.order"
protected static final String
DEDUPLICATION_GROUP_MODE
"deduplication.group.mode"
-
org.apache.nutch.crawl.FetchSchedule Modifier and Type Constant Field Value public static final int
SECONDS_PER_DAY
86400
public static final int
STATUS_MODIFIED
1
public static final int
STATUS_NOTMODIFIED
2
public static final int
STATUS_UNKNOWN
0
-
org.apache.nutch.crawl.Generator Modifier and Type Constant Field Value public static final String
GENERATE_UPDATE_CRAWLDB
"generate.update.crawldb"
public static final String
GENERATOR_COUNT_MODE
"generate.count.mode"
public static final String
GENERATOR_COUNT_VALUE_DOMAIN
"domain"
public static final String
GENERATOR_COUNT_VALUE_HOST
"host"
public static final String
GENERATOR_CUR_TIME
"generate.curTime"
public static final String
GENERATOR_DELAY
"crawl.gen.delay"
public static final String
GENERATOR_EXPR
"generate.expr"
public static final String
GENERATOR_FETCH_DELAY_EXPR
"generate.fetch.delay.expr"
public static final String
GENERATOR_FILTER
"generate.filter"
public static final String
GENERATOR_HOSTDB
"generate.hostdb"
public static final String
GENERATOR_MAX_COUNT
"generate.max.count"
public static final String
GENERATOR_MAX_COUNT_EXPR
"generate.max.count.expr"
public static final String
GENERATOR_MAX_NUM_SEGMENTS
"generate.max.num.segments"
public static final String
GENERATOR_MIN_INTERVAL
"generate.min.interval"
public static final String
GENERATOR_MIN_SCORE
"generate.min.score"
public static final String
GENERATOR_NORMALISE
"generate.normalise"
public static final String
GENERATOR_RESTRICT_STATUS
"generate.restrict.status"
public static final String
GENERATOR_TOP_N
"generate.topN"
-
org.apache.nutch.crawl.Injector Modifier and Type Constant Field Value public static final String
URL_FILTER_NORMALIZE_ALL
"crawldb.inject.filter.normalize.all"
-
org.apache.nutch.crawl.Injector.InjectMapper Modifier and Type Constant Field Value public static final String
EQUAL_CHARACTER
"="
public static final String
TAB_CHARACTER
"\t"
public static final String
URL_NORMALIZING_SCOPE
"crawldb.url.normalizers.scope"
-
org.apache.nutch.crawl.LinkDb Modifier and Type Constant Field Value public static final String
CURRENT_NAME
"current"
public static final String
IGNORE_EXTERNAL_LINKS
"linkdb.ignore.external.links"
public static final String
IGNORE_INTERNAL_LINKS
"linkdb.ignore.internal.links"
public static final String
LOCK_NAME
".locked"
-
org.apache.nutch.crawl.LinkDbFilter Modifier and Type Constant Field Value public static final String
URL_FILTERING
"linkdb.url.filters"
public static final String
URL_NORMALIZING
"linkdb.url.normalizer"
public static final String
URL_NORMALIZING_SCOPE
"linkdb.url.normalizer.scope"
-
org.apache.nutch.crawl.MimeAdaptiveFetchSchedule Modifier and Type Constant Field Value public static final String
SCHEDULE_DEC_RATE
"db.fetch.schedule.adaptive.dec_rate"
public static final String
SCHEDULE_INC_RATE
"db.fetch.schedule.adaptive.inc_rate"
public static final String
SCHEDULE_MIME_FILE
"db.fetch.schedule.mime.file"
-
org.apache.nutch.crawl.URLPartitioner Modifier and Type Constant Field Value public static final String
PARTITION_MODE_DOMAIN
"byDomain"
public static final String
PARTITION_MODE_HOST
"byHost"
public static final String
PARTITION_MODE_IP
"byIP"
public static final String
PARTITION_MODE_KEY
"partition.url.mode"
-
org.apache.nutch.fetcher.Fetcher Modifier and Type Constant Field Value public static final String
CONTENT_REDIR
"content"
public static final int
PERM_REFRESH_TIME
5
public static final String
PROTOCOL_REDIR
"protocol"
-
org.apache.nutch.fetcher.FetchItemQueues Modifier and Type Constant Field Value public static final String
DEFAULT_ID
"default"
public static final String
QUEUE_MODE_DOMAIN
"byDomain"
public static final String
QUEUE_MODE_HOST
"byHost"
public static final String
QUEUE_MODE_IP
"byIP"
-
org.apache.nutch.hostdb.ReadHostDb Modifier and Type Constant Field Value public static final String
HOSTDB_DUMP_HEADER
"hostdb.dump.field.header"
public static final String
HOSTDB_DUMP_HOMEPAGES
"hostdb.dump.homepages"
public static final String
HOSTDB_DUMP_HOSTNAMES
"hostdb.dump.hostnames"
public static final String
HOSTDB_FILTER_EXPRESSION
"hostdb.filter.expression"
-
org.apache.nutch.hostdb.UpdateHostDb Modifier and Type Constant Field Value public static final String
HOSTDB_CHECK_FAILED
"hostdb.check.failed"
public static final String
HOSTDB_CHECK_KNOWN
"hostdb.check.known"
public static final String
HOSTDB_CHECK_NEW
"hostdb.check.new"
public static final String
HOSTDB_CRAWLDATUM_PROCESSORS
"hostdb.crawldatum.processors"
public static final String
HOSTDB_FORCE_CHECK
"hostdb.force.check"
public static final String
HOSTDB_NUM_RESOLVER_THREADS
"hostdb.num.resolvers.threads"
public static final String
HOSTDB_NUMERIC_FIELDS
"hostdb.numeric.fields"
public static final String
HOSTDB_PERCENTILES
"hostdb.percentiles"
public static final String
HOSTDB_PURGE_FAILED_HOSTS_THRESHOLD
"hostdb.purge.failed.hosts.threshold"
public static final String
HOSTDB_RECHECK_INTERVAL
"hostdb.recheck.interval"
public static final String
HOSTDB_STRING_FIELDS
"hostdb.string.fields"
public static final String
HOSTDB_URL_FILTERING
"hostdb.url.filter"
public static final String
HOSTDB_URL_NORMALIZING
"hostdb.url.normalize"
public static final String
LOCK_NAME
".locked"
-
org.apache.nutch.indexer.IndexerMapReduce Modifier and Type Constant Field Value public static final String
INDEXER_BINARY_AS_BASE64
"indexer.binary.base64"
public static final String
INDEXER_DELETE
"indexer.delete"
public static final String
INDEXER_DELETE_ROBOTS_NOINDEX
"indexer.delete.robots.noindex"
public static final String
INDEXER_DELETE_SKIPPED
"indexer.delete.skipped.by.indexingfilter"
public static final String
INDEXER_NO_COMMIT
"indexer.nocommit"
public static final String
INDEXER_PARAMS
"indexer.additional.params"
public static final String
INDEXER_SKIP_NOTMODIFIED
"indexer.skip.notmodified"
public static final String
URL_FILTERING
"indexer.url.filters"
public static final String
URL_NORMALIZING
"indexer.url.normalizers"
-
org.apache.nutch.indexer.IndexingFilters Modifier and Type Constant Field Value public static final String
INDEXINGFILTER_ORDER
"indexingfilter.order"
-
org.apache.nutch.indexer.NutchDocument Modifier and Type Constant Field Value public static final byte
VERSION
2
-
org.apache.nutch.indexer.NutchIndexAction Modifier and Type Constant Field Value public static final byte
ADD
0
public static final byte
DELETE
1
public static final byte
UPDATE
2
-
org.apache.nutch.indexer.feed.FeedIndexingFilter Modifier and Type Constant Field Value public static final String
dateFormatStr
"yyyyMMddHHmm"
-
org.apache.nutch.indexer.filter.MimeTypeIndexingFilter Modifier and Type Constant Field Value public static final String
MIMEFILTER_REGEX_FILE
"mimetype.filter.file"
-
org.apache.nutch.indexer.links.LinksIndexingFilter Modifier and Type Constant Field Value public static final String
LINKS_INLINKS_HOST
"index.links.inlinks.host.ignore"
public static final String
LINKS_ONLY_HOSTS
"index.links.hosts.only"
public static final String
LINKS_OUTLINKS_HOST
"index.links.outlinks.host.ignore"
-
org.apache.nutch.indexwriter.cloudsearch.CloudSearchConstants Modifier and Type Constant Field Value public static final String
BATCH_DUMP
"batch.dump"
public static final String
ENDPOINT
"endpoint"
public static final String
MAX_DOCS_BATCH
"batch.maxSize"
public static final String
REGION
"region"
-
org.apache.nutch.indexwriter.csv.CSVConstants Modifier and Type Constant Field Value public static final String
CSV_CHARSET
"charset"
public static final String
CSV_ESCAPECHARACTER
"escapechar"
public static final String
CSV_FIELD_SEPARATOR
"separator"
public static final String
CSV_FIELDS
"fields"
public static final String
CSV_MAXFIELDLENGTH
"maxfieldlength"
public static final String
CSV_MAXFIELDVALUES
"maxfieldvalues"
public static final String
CSV_OUTPATH
"outpath"
public static final String
CSV_QUOTECHARACTER
"quotechar"
public static final String
CSV_VALUESEPARATOR
"valuesep"
public static final String
CSV_WITHHEADER
"header"
-
org.apache.nutch.indexwriter.dummy.DummyConstants Modifier and Type Constant Field Value public static final String
DELETE
"delete"
public static final String
PATH
"path"
-
org.apache.nutch.indexwriter.elastic.ElasticConstants Modifier and Type Constant Field Value public static final String
BULK_CLOSE_TIMEOUT
"bulk.close.timeout"
public static final String
EXPONENTIAL_BACKOFF_MILLIS
"exponential.backoff.millis"
public static final String
EXPONENTIAL_BACKOFF_RETRIES
"exponential.backoff.retries"
public static final String
HOSTS
"host"
public static final String
INDEX
"index"
public static final String
MAX_BULK_DOCS
"max.bulk.docs"
public static final String
MAX_BULK_LENGTH
"max.bulk.size"
public static final String
OPTIONS
"options"
public static final String
PASSWORD
"password"
public static final String
PORT
"port"
public static final String
SCHEME
"scheme"
public static final String
USE_AUTH
"auth"
public static final String
USER
"username"
-
org.apache.nutch.indexwriter.kafka.KafkaConstants Modifier and Type Constant Field Value public static final String
HOST
"host"
public static final String
KEY_SERIALIZER
"key.serializer"
public static final String
MAX_DOC_COUNT
"max.doc.count"
public static final String
PORT
"port"
public static final String
TOPIC
"topic"
public static final String
VALUE_SERIALIZER
"value.serializer"
-
org.apache.nutch.indexwriter.opensearch1x.OpenSearch1xConstants Modifier and Type Constant Field Value public static final String
BULK_CLOSE_TIMEOUT
"bulk.close.timeout"
public static final String
EXPONENTIAL_BACKOFF_MILLIS
"exponential.backoff.millis"
public static final String
EXPONENTIAL_BACKOFF_RETRIES
"exponential.backoff.retries"
public static final String
HOSTS
"host"
public static final String
INDEX
"index"
public static final String
KEY_STORE_PASSWORD
"key.store.password"
public static final String
KEY_STORE_PATH
"key.store.path"
public static final String
KEY_STORE_TYPE
"key.store.type"
public static final String
MAX_BULK_DOCS
"max.bulk.docs"
public static final String
MAX_BULK_LENGTH
"max.bulk.size"
public static final String
OPTIONS
"options"
public static final String
PASSWORD
"password"
public static final String
PORT
"port"
public static final String
SCHEME
"scheme"
public static final String
TRUST_STORE_PASSWORD
"trust.store.password"
public static final String
TRUST_STORE_PATH
"trust.store.path"
public static final String
TRUST_STORE_TYPE
"trust.store.type"
public static final String
USER
"username"
-
org.apache.nutch.indexwriter.solr.SolrConstants Modifier and Type Constant Field Value public static final String
AUTH_HEADER_NAME
"auth.header.name"
public static final String
AUTH_HEADER_VALUE
"auth.header.value"
public static final String
COLLECTION
"collection"
public static final String
COMMIT_SIZE
"commitSize"
public static final String
PASSWORD
"password"
public static final String
SERVER_TYPE
"type"
public static final String
SERVER_URLS
"url"
public static final String
USE_AUTH
"auth"
public static final String
USERNAME
"username"
public static final String
WEIGHT_FIELD
"weight.field"
-
org.apache.nutch.metadata.CreativeCommons Modifier and Type Constant Field Value public static final String
LICENSE_LOCATION
"License-Location"
public static final String
LICENSE_URL
"License-Url"
public static final String
WORK_TYPE
"Work-Type"
-
org.apache.nutch.metadata.DublinCore Modifier and Type Constant Field Value public static final String
CONTRIBUTOR
"contributor"
public static final String
COVERAGE
"coverage"
public static final String
CREATOR
"creator"
public static final String
DATE
"date"
public static final String
DESCRIPTION
"description"
public static final String
FORMAT
"format"
public static final String
IDENTIFIER
"identifier"
public static final String
LANGUAGE
"language"
public static final String
MODIFIED
"modified"
public static final String
PUBLISHER
"publisher"
public static final String
RELATION
"relation"
public static final String
RIGHTS
"rights"
public static final String
SOURCE
"source"
public static final String
SUBJECT
"subject"
public static final String
TITLE
"title"
public static final String
TYPE
"type"
-
org.apache.nutch.metadata.Feed Modifier and Type Constant Field Value public static final String
FEED
"feed"
public static final String
FEED_AUTHOR
"author"
public static final String
FEED_PUBLISHED
"published"
public static final String
FEED_TAGS
"tag"
public static final String
FEED_UPDATED
"updated"
-
org.apache.nutch.metadata.HttpHeaders Modifier and Type Constant Field Value public static final String
CLIENT_TRANSFER_ENCODING
"Client-Transfer-Encoding"
public static final String
CONTENT_DISPOSITION
"Content-Disposition"
public static final String
CONTENT_ENCODING
"Content-Encoding"
public static final String
CONTENT_LANGUAGE
"Content-Language"
public static final String
CONTENT_LENGTH
"Content-Length"
public static final String
CONTENT_LOCATION
"Content-Location"
public static final String
CONTENT_MD5
"Content-MD5"
public static final String
CONTENT_TYPE
"Content-Type"
public static final String
IF_MODIFIED_SINCE
"If-Modified-Since"
public static final String
LAST_MODIFIED
"Last-Modified"
public static final String
LOCATION
"Location"
public static final String
TRANSFER_ENCODING
"Transfer-Encoding"
public static final String
USER_AGENT
"User-Agent"
-
org.apache.nutch.metadata.Nutch Modifier and Type Constant Field Value public static final String
ARG_CRAWLDB
"crawldb"
public static final String
ARG_HOSTDB
"hostdb"
public static final String
ARG_LINKDB
"linkdb"
public static final String
ARG_SEEDDIR
"url_dir"
public static final String
ARG_SEEDNAME
"seedName"
public static final String
ARG_SEGMENTDIR
"segment_dir"
public static final String
ARG_SEGMENTS
"segment"
public static final String
CACHING_FORBIDDEN_ALL
"all"
public static final String
CACHING_FORBIDDEN_CONTENT
"content"
public static final String
CACHING_FORBIDDEN_KEY
"caching.forbidden"
public static final String
CACHING_FORBIDDEN_NONE
"none"
public static final String
CHAR_ENCODING_FOR_CONVERSION
"CharEncodingForConversion"
public static final String
CRAWL_ID_KEY
"storage.crawl.id"
public static final String
FETCH_EVENT_CONTENTLANG
"content-language"
public static final String
FETCH_EVENT_CONTENTTYPE
"content-type"
public static final String
FETCH_EVENT_FETCHTIME
"fetchTime"
public static final String
FETCH_EVENT_SCORE
"score"
public static final String
FETCH_EVENT_TITLE
"title"
public static final String
FETCH_STATUS_KEY
"_fst_"
public static final String
FETCH_TIME_KEY
"_ftk_"
public static final String
FIXED_INTERVAL_KEY
"fixedInterval"
public static final String
GENERATE_TIME_KEY
"_ngt_"
public static final String
ORIGINAL_CHAR_ENCODING
"OriginalCharEncoding"
public static final String
PROTO_STATUS_KEY
"_pst_"
public static final String
REPR_URL_KEY
"_repr_"
public static final String
ROBOTS_METATAG
"robots"
public static final String
SCORE_KEY
"nutch.crawl.score"
public static final String
SEGMENT_NAME_KEY
"nutch.segment.name"
public static final String
SIGNATURE_KEY
"nutch.content.digest"
public static final String
STAT_PROGRESS
"progress"
public static final String
VAL_RESULT
"result"
-
org.apache.nutch.microformats.reltag.RelTagParser Modifier and Type Constant Field Value public static final String
REL_TAG
"Rel-Tag"
-
org.apache.nutch.net.URLFilters Modifier and Type Constant Field Value public static final String
URLFILTER_ORDER
"urlfilter.order"
-
org.apache.nutch.net.URLNormalizers Modifier and Type Constant Field Value public static final String
SCOPE_CRAWLDB
"crawldb"
public static final String
SCOPE_DEFAULT
"default"
public static final String
SCOPE_FETCHER
"fetcher"
public static final String
SCOPE_GENERATE_HOST_COUNT
"generate_host_count"
public static final String
SCOPE_INDEXER
"indexer"
public static final String
SCOPE_INJECT
"inject"
public static final String
SCOPE_LINKDB
"linkdb"
public static final String
SCOPE_OUTLINK
"outlink"
public static final String
SCOPE_PARTITION
"partition"
-
org.apache.nutch.net.protocols.ProtocolLogUtil Modifier and Type Constant Field Value public static final String
HTTP_LOG_SUPPRESSION
"http.log.exceptions.suppress.stack"
-
org.apache.nutch.net.protocols.Response Modifier and Type Constant Field Value public static final String
FETCH_TIME
"nutch.fetch.time"
public static final String
IP_ADDRESS
"_ip_"
public static final String
REQUEST
"_request_"
public static final String
RESPONSE_HEADERS
"_response.headers_"
public static final String
TRUNCATED_CONTENT
"http.content.truncated"
public static final String
TRUNCATED_CONTENT_REASON
"http.content.truncated.reason"
-
org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer Modifier and Type Constant Field Value public static final String
NORM_HOST_IDN
"urlnormalizer.basic.host.idn"
public static final String
NORM_HOST_TRIM_TRAILING_DOT
"urlnormalizer.basic.host.trim-trailing-dot"
-
org.apache.nutch.parse.HtmlParseFilters Modifier and Type Constant Field Value public static final String
HTMLPARSEFILTER_ORDER
"htmlparsefilter.order"
-
org.apache.nutch.parse.ParseData Modifier and Type Constant Field Value public static final String
DIR_NAME
"parse_data"
-
org.apache.nutch.parse.ParserFactory Modifier and Type Constant Field Value public static final String
DEFAULT_PLUGIN
"*"
-
org.apache.nutch.parse.ParseSegment Modifier and Type Constant Field Value public static final String
SKIP_TRUNCATED
"parser.skip.truncated"
-
org.apache.nutch.parse.ParseStatus Modifier and Type Constant Field Value public static final byte
FAILED
2
public static final short
FAILED_EXCEPTION
200
public static final short
FAILED_INVALID_FORMAT
203
public static final short
FAILED_MISSING_CONTENT
205
public static final short
FAILED_MISSING_PARTS
204
public static final short
FAILED_TRUNCATED
202
public static final byte
NOTPARSED
0
public static final byte
SUCCESS
1
public static final short
SUCCESS_REDIRECT
100
-
org.apache.nutch.parse.ParseText Modifier and Type Constant Field Value public static final String
DIR_NAME
"parse_text"
-
org.apache.nutch.parse.feed.FeedParser Modifier and Type Constant Field Value public static final String
CHARSET_UTF8
"charset=UTF-8"
public static final String
TEXT_PLAIN_CONTENT_TYPE
"text/plain; charset=UTF-8"
-
org.apache.nutch.parsefilter.naivebayes.NaiveBayesParseFilter Modifier and Type Constant Field Value public static final String
DICTFILE_MODELFILTER
"parsefilter.naivebayes.wordlist"
public static final String
TRAINFILE_MODELFILTER
"parsefilter.naivebayes.trainfile"
-
org.apache.nutch.protocol.Content Modifier and Type Constant Field Value public static final String
DIR_NAME
"content"
-
org.apache.nutch.protocol.ProtocolStatus Modifier and Type Constant Field Value public static final int
ACCESS_DENIED
17
public static final int
BLOCKED
23
public static final int
EXCEPTION
16
public static final int
FAILED
2
public static final int
GONE
11
public static final int
MOVED
12
public static final int
NOTFETCHING
20
public static final int
NOTFOUND
14
public static final int
NOTMODIFIED
21
public static final int
PROTO_NOT_FOUND
10
public static final int
REDIR_EXCEEDED
19
public static final int
RETRY
15
public static final int
ROBOTS_DENIED
18
public static final int
SUCCESS
1
public static final int
TEMP_MOVED
13
public static final int
WOULDBLOCK
22
-
org.apache.nutch.protocol.http.api.HttpBase Modifier and Type Constant Field Value public static final int
BUFFER_SIZE
8192
-
org.apache.nutch.protocol.httpclient.HttpAuthenticationFactory Modifier and Type Constant Field Value public static final String
WWW_AUTHENTICATE
"WWW-Authenticate"
-
org.apache.nutch.scoring.depth.DepthScoringFilter Modifier and Type Constant Field Value public static final int
DEFAULT_MAX_DEPTH
1000
public static final String
DEPTH_KEY
"_depth_"
public static final String
MAX_DEPTH_KEY
"_maxdepth_"
-
org.apache.nutch.scoring.metadata.MetadataScoringFilter Modifier and Type Constant Field Value public static final String
METADATA_CONTENT
"scoring.content.md"
public static final String
METADATA_DATUM
"scoring.db.md"
public static final String
METADATA_PARSED
"scoring.parse.md"
-
org.apache.nutch.scoring.webgraph.LinkDatum Modifier and Type Constant Field Value public static final byte
INLINK
1
public static final byte
OUTLINK
2
-
org.apache.nutch.scoring.webgraph.LinkDumper Modifier and Type Constant Field Value public static final String
DUMP_DIR
"linkdump"
-
org.apache.nutch.scoring.webgraph.WebGraph Modifier and Type Constant Field Value public static final String
INLINK_DIR
"inlinks"
public static final String
LOCK_NAME
".locked"
public static final String
NODE_DIR
"nodes"
public static final String
OLD_OUTLINK_DIR
"outlinks/old"
public static final String
OUTLINK_DIR
"outlinks/current"
-
org.apache.nutch.scoring.webgraph.WebGraph.OutlinkDb Modifier and Type Constant Field Value public static final String
URL_FILTERING
"webgraph.url.filters"
public static final String
URL_NORMALIZING
"webgraph.url.normalizers"
-
org.apache.nutch.service.resources.ConfigResource Modifier and Type Constant Field Value public static final String
DEFAULT
"default"
-
org.apache.nutch.tools.CommonCrawlFormatWARC Modifier and Type Constant Field Value public static final String
MAX_WARC_FILE_SIZE
"warc.file.size.max"
public static final String
TEMPLATE
"${prefix}-${timestamp17}-${serialno}"
-
org.apache.nutch.tools.WARCUtils Modifier and Type Constant Field Value public static final String
COLONSP
": "
public static final String
CONFORMS_TO
"conformsTo"
public static final String
CRLF
"\r\n"
public static final String
FORMAT
"format"
public static final String
HOSTNAME
"hostname"
public static final String
HTTP_HEADER_FROM
"http-header-from"
public static final String
HTTP_HEADER_USER_AGENT
"http-header-user-agent"
public static final String
IP
"ip"
public static final String
OPERATOR
"operator"
public static final String
ROBOTS
"robots"
public static final String
SOFTWARE
"software"
protected static final String
X_HIDE_HEADER
"X-Crawler-"
-
org.apache.nutch.tools.arc.ArcSegmentCreator Modifier and Type Constant Field Value public static final String
URL_VERSION
"arc.url.version"
-
org.apache.nutch.tools.arc.ArcSegmentCreator.ArcSegmentCreatorMapper Modifier and Type Constant Field Value public static final String
URL_VERSION
"arc.url.version"
-
org.apache.nutch.urlfilter.automaton.AutomatonURLFilter Modifier and Type Constant Field Value public static final String
URLFILTER_AUTOMATON_FILE
"urlfilter.automaton.file"
public static final String
URLFILTER_AUTOMATON_RULES
"urlfilter.automaton.rules"
-
org.apache.nutch.urlfilter.fast.FastURLFilter Modifier and Type Constant Field Value public static final String
URLFILTER_FAST_FILE
"urlfilter.fast.file"
public static final String
URLFILTER_FAST_MAX_LENGTH
"urlfilter.fast.url.max.length"
public static final String
URLFILTER_FAST_PATH_MAX_LENGTH
"urlfilter.fast.url.path.max.length"
public static final String
URLFILTER_FAST_QUERY_MAX_LENGTH
"urlfilter.fast.url.query.max.length"
-
org.apache.nutch.urlfilter.ignoreexempt.ExemptionUrlFilter Modifier and Type Constant Field Value public static final String
DB_IGNORE_EXTERNAL_EXEMPTIONS_FILE
"db.ignore.external.exemptions.file"
-
org.apache.nutch.urlfilter.regex.RegexURLFilter Modifier and Type Constant Field Value public static final String
URLFILTER_REGEX_FILE
"urlfilter.regex.file"
public static final String
URLFILTER_REGEX_RULES
"urlfilter.regex.rules"
-
org.apache.nutch.util.EncodingDetector Modifier and Type Constant Field Value public static final String
MIN_CONFIDENCE_KEY
"encodingdetector.charset.min.confidence"
public static final int
NO_THRESHOLD
-1
-
org.apache.nutch.util.NutchConfiguration Modifier and Type Constant Field Value public static final String
UUID_KEY
"nutch.conf.uuid"
-
org.apache.nutch.util.SitemapProcessor Modifier and Type Constant Field Value public static final String
CURRENT_NAME
"current"
public static final String
LOCK_NAME
".locked"
public static final String
SITEMAP_ALWAYS_TRY_SITEMAPXML_ON_ROOT
"sitemap.url.default.sitemap.xml"
public static final String
SITEMAP_OVERWRITE_EXISTING
"sitemap.url.overwrite.existing"
public static final String
SITEMAP_REDIR_MAX
"sitemap.redir.max"
public static final String
SITEMAP_SIZE_MAX
"sitemap.size.max"
public static final String
SITEMAP_STRICT_PARSING
"sitemap.strict.parsing"
public static final String
SITEMAP_URL_FILTERING
"sitemap.url.filter"
public static final String
SITEMAP_URL_NORMALIZING
"sitemap.url.normalize"
-
org.apache.nutch.util.domain.DomainSuffix Modifier and Type Constant Field Value public static final float
DEFAULT_BOOST
1.0f