- collecting the information listed below from the hdfs-site.xml of remote HA enabled HDFS
- e.g.
<
property
>
<
name
>dfs.nameservices</
name
>
<
value
>hadooptest</
value
>
</
property
>
<
property
>
<
name
>dfs.client.failover.proxy.provider.hadooptest</
name
>
<
value
>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</
value
>
</
property
>
<
property
>
<
name
>dfs.ha.namenodes.hadooptest</
name
>
<
value
>nn1,nn2</
value
>
</
property
>
<
property
>
<
name
>dfs.namenode.rpc-address.hadooptest.nn1</
name
>
<
value
>nn1.net:8020</
value
>
</
property
>
<
property
>
<
name
>dfs.namenode.rpc-address.hadooptest.nn2</
name
>
<
value
>nn2.net:8020</
value
>
</
property
>
- use the information at the client side
- distcp
hadoop distcp \
-Ddfs.nameservices=hadooptest \
-Ddfs.client.failover.proxy.provider.hadooptest=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider \
-Ddfs.ha.namenodes.hadooptest=nn1,nn2 \
-Ddfs.namenode.rpc-address.hadooptest.nn1=nn1.net:
8020
\
-Ddfs.namenode.rpc-address.hadooptest.nn2=nn2.net:
8020
\
/test/result \
hdfs:
//hadooptest/tmp/test/result2
- spark
val sc =
new
SparkContext
sc.hadoopConfiguration.set(
"dfs.nameservices"
,
"hadooptest"
)
sc.hadoopConfiguration.set(
"dfs.client.failover.proxy.provider.hadooptest"
,
"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"
)
sc.hadoopConfiguration.set(
"dfs.ha.namenodes.hadooptest"
,
"nn1,nn2"
)
sc.hadoopConfiguration.set(
"dfs.namenode.rpc-address.hadooptest.nn1"
,
"nn1.net:8020"
)
sc.hadoopConfiguration.set(
"dfs.namenode.rpc-address.hadooptest.nn2"
,
"nn2.net:8020"
)
dataFrame.write.orc(
"hdfs://hadooptest/dest/file.orc"
)
No comments:
Post a Comment
Note: Only a member of this blog may post a comment.