- spark + mariaDB test
- SPARK_CLASSPATH=mysql-connector-java-5.1.34-bin.jar bin/pyspark
- df = sqlContext.load(source="jdbc", url="jdbc:mysql://10.0.2.a/test?user=root&password=yourpassword", dbtable="r_input_day")
- df.first()
- spark + elasticsearch test
- SPARK_CLASSPATH=/data/elasticsearch-hadoop-2.1.0.Beta4/dist/elasticsearch-hadoop-2.1.0.Beta4.jar ./bin/pyspark
- conf = {"es.resource":"sec-team/access-report"}
- rdd = sc.newAPIHadoopRDD("org.elasticsearch.hadoop.mr.EsInputFormat", "org.apache.hadoop.io.NullWritable", "org.elasticsearch.hadoop.mr.LinkedMapWritable", conf=conf)
- rdd.first()
- spark streaming test
- network_wordcount.py
from __future__ import print_function import sys from pyspark import SparkContext from pyspark.streaming import StreamingContext if __name__ == "__main__": if len(sys.argv) != 3: print("Usage: network_wordcount.py <hostname> <port>", file=sys.stderr) exit(-1) sc = SparkContext(appName="PythonStreamingNetworkWordCount") ssc = StreamingContext(sc, 1) lines = ssc.socketTextStream(sys.argv[1], int(sys.argv[2])) counts = lines.flatMap(lambda line: line.split(" "))\ .map(lambda word: (word, 1))\ .reduceByKey(lambda a, b: a+b) counts.pprint() ssc.start() ssc.awaitTermination()
- nc -lk 9999
- spark-submit network_wordcount.py localhost 9999
Monday, November 30, 2015
Spark test
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment
Note: Only a member of this blog may post a comment.