共计 5523 个字符,预计需要花费 14 分钟才能阅读完成。
hadoop 环境
hadoop 版本为
hadoop version
Hadoop 2.10.1
Subversion https://github.com/apache/hadoop -r 1827467c9a56f133025f28557bfc2c562d78e816
Compiled by centos on 2020-09-14T13:17Z
Compiled with protoc 2.5.0
From source with checksum 3114edef868f1f3824e7d0f68be03650
客户端开发
- 引入依赖(应用 maven)
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.10.1</version>
</dependency>
- 编写代码
package com.definesys.hadoop;
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import java.io.FileInputStream;
import java.io.IOException;
/**
* @Description:
* @author: jianfeng.zheng
* @since: 2020/12/14 12:36 上午
* @history: 1.2020/12/14 created by jianfeng.zheng
*/
public class HDFS {public static void main(String[] cmd) throws IOException {Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000/");
// conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
System.setProperty("HADOOP_USER_NAME", "hadoop");
FileSystem fs = FileSystem.get(conf);
Path dst = new Path("hdfs://master:9000/demo/hello.txt");
FSDataOutputStream os = fs.create(dst);
FileInputStream is = new FileInputStream("/root/hello.txt");
IOUtils.copy(is, os);
is.close();
os.close();
fs.close();}
}
- 打包
如果是 web 利用,个别会打包为 war 或者 ear,不论是哪种,这两种包格局都会把依赖包打进去,因而不必做非凡解决,如果须要本地运行,那么须要借助两个插件,把以下配置信息复制到 pom.xml 中
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>2.6</version>
<configuration>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>com.definesys.hadoop.HDFS</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
<overWriteIfNewer>true</overWriteIfNewer>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
maven-jar-plugin 会依据配置生成 MANIFEST.MF
文件,MANIFEST.MF 文件记录运行类信息,依赖信息,相似以下这样
Manifest-Version: 1.0
Archiver-Version: Plexus Archiver
Built-By: asan
Class-Path: lib/hadoop-client-2.10.1.jar ....
Created-By: Apache Maven 3.6.3
Build-Jdk: 1.8.0_161
Main-Class: com.definesys.hadoop.HDFS
classpathPrefix 指定了依赖 jar 包所在的门路为 lib,maven-dependency-plugin 插件负责将依赖包全副 copy 到指定门路下,这里指定了 ${project.build.directory}/lib 目录,和 classpathPrefix 对应,打包实现后执行以下命令即可
java -jar hadoop-hdfs-1.0.jar
#或者手动指定运行类
java -cp hadoop-hdfs-1.0.jar com.definesys.hadoop.HDFS
打包还有一个插件 maven-assembly-plugin,不倡议应用这个插件进行打包,起因是这个插件会将所有依赖解压放到一个 jar 包里,hadoop 有些机制是通过 spi 实现,解压后会造成配置文件笼罩的状况
一个简略的 HDFS 操作类
package com.definesys.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.FileInputStream;
import java.io.IOException;
/**
* @Description:
* @author: jianfeng.zheng
* @since: 2020/12/14 12:36 上午
* @history: 1.2020/12/14 created by jianfeng.zheng
*/
public class HDFS {public static void main(String[] cmd) throws IOException {HDFS hdfs = new HDFS();
hdfs.mkdir("/hdfsDemo");
hdfs.putFile("/root/hello.txt", "/hdfsDemo");
hdfs.dowloadFile("/hdfsDemo/hello.txt", "/root/hello-hdfs.txt");
hdfs.deleteFile("/hdfsDemo");
}
public boolean mkdir(String path) throws IOException {FileSystem fs = this.getHDFSFileSystem();
return fs.mkdirs(new Path(path));
}
public void putFile(String localPath, String hdfsPath) throws IOException {this.getHDFSFileSystem().copyFromLocalFile(new Path(localPath), new Path(hdfsPath));
}
public void deleteFile(String path) throws IOException {this.getHDFSFileSystem().delete(new Path(path), true);
}
public void dowloadFile(String hdfsPath, String localPath) throws IOException {this.getHDFSFileSystem().copyToLocalFile(new Path(hdfsPath), new Path(localPath));
}
private FileSystem getHDFSFileSystem() {Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://master:9000/");
System.setProperty("HADOOP_USER_NAME", "hadoop");
try {FileSystem fs = FileSystem.get(conf);
return fs;
} catch (IOException e) {throw new RuntimeException(e);
}
}
}
问题
权限问题
Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=root, access=WRITE, inode="/":hadoop:supergroup:drwxr-xr-x
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)
HDFS 文件系统权限和 Linux 相似,不同的用户对文件操作权限不一样,如果代码中没有指定用户名,那么就用执行程序的操作系统作为用户名,在这里是 root,咱们能够看下 hdfs 的文件权限
$ hadoop fs -ls /
Found 5 items
drwxr-xr-x - asan supergroup 0 2020-12-16 10:07 /001
drwx-w---- - hadoop supergroup 0 2020-12-07 10:54 /tmp
drwxr-xr-x - hadoop supergroup 0 2020-12-07 11:05 /user
# 根门路权限
$ hadoop fs -ls -d /
drwxr-xr-x - hadoop supergroup 0 2020-12-18 00:42 /
有几个解决方案
- 批改根门路权限或者其余文件夹权限为 777
$ hadoop fs -chmod 777 /demo
$ hadoop fs -ls -d /demo
drwxrwxrwx - hadoop supergroup 0 2020-12-18 00:46 /demo
- 勾销权限验证
在 master 节点退出以下配置
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
- 在代码中退出用户名配置(举荐)
System.setProperty("HADOOP_USER_NAME", "hadoop");
代码需在执行 hdfs 操作之前退出
正文完