Monday, November 30, 2015

Flume - use case example

  1. wiselog access log
    1. log server
      1. ## agent(s): agent01
        ## names of source(s), channel(s) and sink(s)
        agent01.sources = source01
        agent01.channels = channel01
        agent01.sinks = sink01
        
        ## channel01
        agent01.channels.channel01.type = memory
        agent01.channels.channel01.capacity = 100000
        
        ## source01
        agent01.sources.source01.channels = channel01
        agent01.sources.source01.type = spooldir
        agent01.sources.source01.spoolDir = /data/log-collector/spool-dir
        agent01.sources.source01.deletePolicy = immediate
        agent01.sources.source01.basenameHeader = true
        agent01.sources.source01.basenameHeaderKey = type
        agent01.sources.source01.deserializer.maxLineLength = 10240
        agent01.sources.source01.interceptors = interceptor02
        agent01.sources.source01.interceptors.interceptor02.type = static
        agent01.sources.source01.interceptors.interceptor02.key = timestamp
        agent01.sources.source01.interceptors.interceptor02.value = 0
        
        ## sink01
        agent01.sinks.sink01.channel = channel01
        agent01.sinks.sink01.type = avro
        agent01.sinks.sink01.hostname = <hostname>
        agent01.sinks.sink01.port = 4545
        
        ## test
        #agent01.sinks.sink01.channel = channel01
        #agent01.sinks.sink01.type = logger
    2. HDFS server
      1. ## agent(s): agent01
        ## names of source(s), channel(s) and sink(s)
        agent01.sources = source01
        agent01.channels = channel01
        agent01.sinks = sink01
          
        ## channel01
        agent01.channels.channel01.type = memory
        agent01.channels.channel01.capacity = 100000
          
        ## source01
        agent01.sources.source01.channels = channel01
        agent01.sources.source01.type = avro
        agent01.sources.source01.bind = <hostname>
        agent01.sources.source01.port = 4545
        agent01.sources.source01.interceptors = interceptor01 interceptor02
        agent01.sources.source01.interceptors.interceptor01.type = host
        agent01.sources.source01.interceptors.interceptor02.type = regex_extractor
        agent01.sources.source01.interceptors.interceptor02.regex = ^\\d+\\.\\d+.\\d+.\\d+\\s\\[(\\d{2}\\/[a-zA-Z]{3}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2})\\s\\+0900\\]\\s
        agent01.sources.source01.interceptors.interceptor02.serializers = s01
        agent01.sources.source01.interceptors.interceptor02.serializers.s01.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
        agent01.sources.source01.interceptors.interceptor02.serializers.s01.pattern = dd/MMM/yyyy:HH:mm:ss
        agent01.sources.source01.interceptors.interceptor02.serializers.s01.name = timestamp
          
        ## sink01
        agent01.sinks.sink01.channel = channel01
        agent01.sinks.sink01.type = hdfs
        agent01.sinks.sink01.hdfs.path = /log/wiselog/access/%{type}/yyyymmdd=%Y%m%d/hh=%H
        agent01.sinks.sink01.hdfs.filePrefix = %{host}
        agent01.sinks.sink01.hdfs.inUsePrefix = .
        agent01.sinks.sink01.hdfs.rollInterval = 300
        agent01.sinks.sink01.hdfs.rollSize = 0
        agent01.sinks.sink01.hdfs.rollCount = 0
        agent01.sinks.sink01.hdfs.idleTimeout = 60
        agent01.sinks.sink01.hdfs.writeFormat = Text
        agent01.sinks.sink01.hdfs.codeC = gzip
          
        ## test
        #agent01.sinks.sink01.channel = channel01
        #agent01.sinks.sink01.type = logger
  2. apache access log
    1. log server
      1. ## agent(s): agent01
        ## names of source(s), channel(s) and sink(s)
        agent01.sources = source01 source02
        agent01.channels = channel01
        agent01.sinks = sink01
          
        ## channel01
        agent01.channels.channel01.type = memory
        agent01.channels.channel01.capacity = 100000
          
        ## source01
        agent01.sources.source01.channels = channel01
        agent01.sources.source01.type = spooldir
        agent01.sources.source01.spoolDir = /home/log-collector/test
        agent01.sources.source01.deletePolicy = immediate
        agent01.sources.source01.deserializer.maxLineLength = 204800
        agent01.sources.source01.interceptors = interceptor01 interceptor02
        agent01.sources.source01.interceptors.interceptor01.type = static
        agent01.sources.source01.interceptors.interceptor01.key = type
        agent01.sources.source01.interceptors.interceptor01.value = test
        agent01.sources.source01.interceptors.interceptor02.type = static
        agent01.sources.source01.interceptors.interceptor02.key = timestamp
        agent01.sources.source01.interceptors.interceptor02.value = 0
        
        ## source02
        agent01.sources.source02.channels = channel01
        agent01.sources.source02.type = spooldir
        agent01.sources.source02.spoolDir = /home/log-collector/test2
        agent01.sources.source02.deletePolicy = immediate
        agent01.sources.source02.deserializer.maxLineLength = 204800
        agent01.sources.source02.interceptors = interceptor01 interceptor02
        agent01.sources.source02.interceptors.interceptor01.type = static
        agent01.sources.source02.interceptors.interceptor01.key = type
        agent01.sources.source02.interceptors.interceptor01.value = test2
        agent01.sources.source02.interceptors.interceptor02.type = static
        agent01.sources.source02.interceptors.interceptor02.key = timestamp
        agent01.sources.source02.interceptors.interceptor02.value = 0
          
        ## sink01
        agent01.sinks.sink01.channel = channel01
        agent01.sinks.sink01.type = avro
        agent01.sinks.sink01.hostname = 10.0.2.a
        agent01.sinks.sink01.port = 4545
          
        ## test
        #agent01.sinks.sink01.channel = channel01
        #agent01.sinks.sink01.type = logger
    2. HDFS server
      1. ## agent(s): agent01
        ## names of source(s), channel(s) and sink(s)
        agent01.sources = source01
        agent01.channels = channel01
        agent01.sinks = sink01
          
        ## channel01
        agent01.channels.channel01.type = memory
        agent01.channels.channel01.capacity = 100000
          
        ## source01
        agent01.sources.source01.channels = channel01
        agent01.sources.source01.type = avro
        agent01.sources.source01.bind = 10.0.2.a
        agent01.sources.source01.port = 4545
        agent01.sources.source01.interceptors = interceptor01 interceptor02
        agent01.sources.source01.interceptors.interceptor01.type = host
        agent01.sources.source01.interceptors.interceptor02.type = regex_extractor
        agent01.sources.source01.interceptors.interceptor02.regex = ^[\\s\\S]+\\[(\\d{1,2}\\/[A-Z][a-z]{2}\\/\\d{4}:\\d{2}:\\d{2}:\\d{2})\\s\\+0900\\][\\s\\S]+
        agent01.sources.source01.interceptors.interceptor02.serializers = s01
        agent01.sources.source01.interceptors.interceptor02.serializers.s01.type = org.apache.flume.interceptor.RegexExtractorInterceptorMillisSerializer
        agent01.sources.source01.interceptors.interceptor02.serializers.s01.pattern = dd/MMM/yyyy:HH:mm:ss
        agent01.sources.source01.interceptors.interceptor02.serializers.s01.name = timestamp
          
        ## sink01
        agent01.sinks.sink01.channel = channel01
        agent01.sinks.sink01.type = hdfs
        agent01.sinks.sink01.hdfs.path = /log/apache/access/%{type}/yyyymmdd=%Y%m%d/hh=%H
        agent01.sinks.sink01.hdfs.filePrefix = %{host}
        agent01.sinks.sink01.hdfs.inUsePrefix = .
        agent01.sinks.sink01.hdfs.rollInterval = 300
        agent01.sinks.sink01.hdfs.rollSize = 0
        agent01.sinks.sink01.hdfs.rollCount = 0
        agent01.sinks.sink01.hdfs.idleTimeout = 120
        agent01.sinks.sink01.hdfs.writeFormat = Text
        agent01.sinks.sink01.hdfs.codeC = gzip
          
        ## test
        #agent01.sinks.sink01.channel = channel01
        #agent01.sinks.sink01.type = logger

No comments:

Post a Comment

Note: Only a member of this blog may post a comment.