hadoop环境

hadoop版本为

hadoop versionHadoop 2.10.1Subversion https://github.com/apache/hadoop -r 1827467c9a56f133025f28557bfc2c562d78e816Compiled by centos on 2020-09-14T13:17ZCompiled with protoc 2.5.0From source with checksum 3114edef868f1f3824e7d0f68be03650

客户端开发

  • 引入依赖(应用maven)
<dependency>    <groupId>org.apache.hadoop</groupId>    <artifactId>hadoop-client</artifactId>    <version>2.10.1</version></dependency><dependency>    <groupId>org.apache.hadoop</groupId>    <artifactId>hadoop-hdfs</artifactId>    <version>2.10.1</version></dependency><dependency>    <groupId>org.apache.hadoop</groupId>    <artifactId>hadoop-common</artifactId>    <version>2.10.1</version></dependency>
  • 编写代码
package com.definesys.hadoop;import org.apache.commons.io.IOUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import org.apache.hadoop.hdfs.DistributedFileSystem;import java.io.FileInputStream;import java.io.IOException;/** * @Description: * @author: jianfeng.zheng * @since: 2020/12/14 12:36 上午 * @history: 1.2020/12/14 created by jianfeng.zheng */public class HDFS {    public static void main(String[] cmd) throws IOException {        Configuration conf = new Configuration();        conf.set("fs.defaultFS", "hdfs://master:9000/");//        conf.set("fs.hdfs.impl", DistributedFileSystem.class.getName());        System.setProperty("HADOOP_USER_NAME", "hadoop");        FileSystem fs = FileSystem.get(conf);        Path dst = new Path("hdfs://master:9000/demo/hello.txt");        FSDataOutputStream os = fs.create(dst);        FileInputStream is = new FileInputStream("/root/hello.txt");        IOUtils.copy(is, os);        is.close();        os.close();        fs.close();    }}
  • 打包

如果是web利用,个别会打包为war或者ear,不论是哪种,这两种包格局都会把依赖包打进去,因而不必做非凡解决,如果须要本地运行,那么须要借助两个插件,把以下配置信息复制到pom.xml中

<build>    <plugins>        <plugin>            <groupId>org.apache.maven.plugins</groupId>            <artifactId>maven-jar-plugin</artifactId>            <version>2.6</version>            <configuration>                <archive>                    <manifest>                        <addClasspath>true</addClasspath>                        <classpathPrefix>lib/</classpathPrefix>                        <mainClass>com.definesys.hadoop.HDFS</mainClass>                    </manifest>                </archive>            </configuration>        </plugin>        <plugin>            <groupId>org.apache.maven.plugins</groupId>            <artifactId>maven-dependency-plugin</artifactId>            <executions>                <execution>                    <id>copy-dependencies</id>                    <phase>package</phase>                    <goals>                        <goal>copy-dependencies</goal>                    </goals>                    <configuration>                        <outputDirectory>${project.build.directory}/lib</outputDirectory>                        <overWriteReleases>false</overWriteReleases>                        <overWriteSnapshots>false</overWriteSnapshots>                        <overWriteIfNewer>true</overWriteIfNewer>                    </configuration>                </execution>            </executions>        </plugin>    </plugins></build>

maven-jar-plugin会依据配置生成MANIFEST.MF文件,MANIFEST.MF文件记录运行类信息,依赖信息,相似以下这样

Manifest-Version: 1.0Archiver-Version: Plexus ArchiverBuilt-By: asanClass-Path: lib/hadoop-client-2.10.1.jar ....Created-By: Apache Maven 3.6.3Build-Jdk: 1.8.0_161Main-Class: com.definesys.hadoop.HDFS

classpathPrefix指定了依赖jar包所在的门路为lib,maven-dependency-plugin插件负责将依赖包全副copy到指定门路下,这里指定了${project.build.directory}/lib目录,和classpathPrefix对应,打包实现后执行以下命令即可

java -jar hadoop-hdfs-1.0.jar#或者手动指定运行类java -cp hadoop-hdfs-1.0.jar com.definesys.hadoop.HDFS
打包还有一个插件maven-assembly-plugin,不倡议应用这个插件进行打包,起因是这个插件会将所有依赖解压放到一个jar包里,hadoop有些机制是通过spi实现,解压后会造成配置文件笼罩的状况

一个简略的HDFS操作类

package com.definesys.hadoop;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;import java.io.FileInputStream;import java.io.IOException;/** * @Description: * @author: jianfeng.zheng * @since: 2020/12/14 12:36 上午 * @history: 1.2020/12/14 created by jianfeng.zheng */public class HDFS {    public static void main(String[] cmd) throws IOException {        HDFS hdfs = new HDFS();        hdfs.mkdir("/hdfsDemo");        hdfs.putFile("/root/hello.txt", "/hdfsDemo");        hdfs.dowloadFile("/hdfsDemo/hello.txt", "/root/hello-hdfs.txt");        hdfs.deleteFile("/hdfsDemo");    }    public boolean mkdir(String path) throws IOException {        FileSystem fs = this.getHDFSFileSystem();        return fs.mkdirs(new Path(path));    }    public void putFile(String localPath, String hdfsPath) throws IOException {        this.getHDFSFileSystem().copyFromLocalFile(new Path(localPath), new Path(hdfsPath));    }    public void deleteFile(String path) throws IOException {        this.getHDFSFileSystem().delete(new Path(path), true);    }    public void dowloadFile(String hdfsPath, String localPath) throws IOException {        this.getHDFSFileSystem().copyToLocalFile(new Path(hdfsPath), new Path(localPath));    }    private FileSystem getHDFSFileSystem() {        Configuration conf = new Configuration();        conf.set("fs.defaultFS", "hdfs://master:9000/");        System.setProperty("HADOOP_USER_NAME", "hadoop");        try {            FileSystem fs = FileSystem.get(conf);            return fs;        } catch (IOException e) {            throw new RuntimeException(e);        }    }}

问题

权限问题

Exception in thread "main" org.apache.hadoop.security.AccessControlException: Permission denied: user=root, access=WRITE, inode="/":hadoop:supergroup:drwxr-xr-x        at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350)        at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251)        at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)

HDFS文件系统权限和Linux相似,不同的用户对文件操作权限不一样,如果代码中没有指定用户名,那么就用执行程序的操作系统作为用户名,在这里是root,咱们能够看下hdfs的文件权限

$ hadoop fs -ls /Found 5 itemsdrwxr-xr-x   - asan   supergroup          0 2020-12-16 10:07 /001drwx-w----   - hadoop supergroup          0 2020-12-07 10:54 /tmpdrwxr-xr-x   - hadoop supergroup          0 2020-12-07 11:05 /user# 根门路权限$ hadoop fs -ls -d /drwxr-xr-x   - hadoop supergroup          0 2020-12-18 00:42 /

有几个解决方案

  • 批改根门路权限或者其余文件夹权限为777
$ hadoop fs -chmod 777 /demo$ hadoop fs -ls -d /demodrwxrwxrwx   - hadoop supergroup          0 2020-12-18 00:46 /demo
  • 勾销权限验证

在master节点退出以下配置

<property>    <name>dfs.permissions.enabled</name>    <value>false</value></property>
  • 在代码中退出用户名配置(举荐)
System.setProperty("HADOOP_USER_NAME", "hadoop");
代码需在执行hdfs操作之前退出