spark读mongodb
idea maven依赖
<dependency>
<groupId>org.mongodb.spark</groupId>
<artifactId>mongo-spark-connector_2.11</artifactId>
<version>2.3.2</version>
</dependency>
val mongoURL = s"mongodb://账号:密码@机器ip:27017"
val builder = SparkSession.builder()
builder.appName(appName)
builder.config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
builder.config("spark.rdd.compress", "true")
builder.config("dfs.client.socket-timeout", "300000")
//builder.master("local[1]") 本地跑打开
val spark= builder.getOrCreate()
import spark.implicits._
val source = spark.read.mongo(ReadConfig(Map("uri" -> mongoURL, "database" -> "", "collection" -> “”, "batchSize" -> "30000")))
.select("x")
//解析
source.map(it => {
val x= it.getAs[String]("x")
x
})