$ wget https://dlcdn.apache.org/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz
$ tar zxvf apache-hive-3.1.3-bin.tar.gz.zshrc 수정
# 추가
export HIVE_HOME=/path/to/apache-hive-3.1.3-bin
# 수정
export PATH=$PATH:$HIVE_HOME/bin$ $HADOOP_HOME/sbin/start-all.sh# Hive의 중간 데이터 결과를 저장하기 위한 /tmp 디렉토리 생성
$ hadoop fs -mkdir /tmp
$ hadoop fs -chmod g+w /tmp
# hive.metastore.warehouse.dir의 기본 설정 위치. 데이터 웨어하우스를 저장하는 기본 디렉토리
$ hadoop fs -mkdir -p /user/hive/warehouse
$ hadoop fs -chmod g+w /user/hive/warehouse$ $HIVE_HOME/bin/schematool -dbType derby -initSchema$ hivedataset/employees.txt
# 샘플 : 10001,1953-09-02,Georgi,Facello,M,1986-06-26,d005
$ head dataset/employeesdataset/departments.txt
# 샘플 : d009,Customer Service
$ head dataset/departments$ hiveserver2 --hiveconf hive.server2.thrift.port=10000 --hiveconf hive.root.logger=DEBUG,consolebeeline : Hive server2에 접속하여 쿼리를 실행하기 위한 CLI 도구
$ beeline
beeline> !help
beeline> !connect jdbc:hive2://localhost:10000
# default user : scott
# default password : tiger에러 발생
User: is not allowed to impersonate
hive-site.xml 생성 후
vim $HIVE_HOME/conf/hive-site.xml아래의 설정 추가
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>hive.server2.enable.doAs</name>
<value>false</value>
</property>
</configuration>hiveserver2 실행
$ hiveserver2 --hiveconf hive.server2.thrift.port=10000 --hiveconf hive.root.logger=DEBUG,consolebeeline 실행
$ beeline
beeline> !connect jdbc:hive2://localhost:10000
# default user : scott
# default password : tigeremployees 테이블 생성
CREATE TABLE employees
(
emp_no INT,
birth_date DATE,
first_name STRING,
last_name STRING,
gender STRING,
hire_date DATE,
dept_no STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n';테이블 데이터 로드
-- path는 수정 필요
LOAD DATA LOCAL INPATH '/path/to/dataengineering/dataset/employees'
OVERWRITE INTO TABLE employees;데이터 조회
SELECT * FROM employees LIMIT 10;
-- 생일이 같은 사람 수를 많은 수로 정렬
SELECT birth_date, count(birth_date) AS count
FROM employees
GROUP BY birth_date
ORDER BY count DESC
LIMIT 10;테이블 정보 확인
0: jdbc:hive2://localhost:10000> DESCRIBE EXTENDED employees;저장된 파일 위치 확인
# employees 디렉토리 존재
$ hadoop fs -ls /user/hive/warehouse테이블 삭제
0: jdbc:hive2://localhost:10000> DROP TABLE employees;삭제 후 다시 파일 위치 확인
# employees가 존재하지 않음
$ hadoop fs -ls /user/hive/warehouseDataset 확인
$ hadoop fs -ls /user/fastcampus/dataset
# 파일이 존재하지 않으면 업로드
$ hadoop fs -mkdir -p /user/fastcampus/hive/dataset/employees
$ hadoop fs -copyFromLocal ./dataset/employees /user/fastcampus/hive/dataset/employeesExternal table 생성
CREATE EXTERNAL TABLE employees
(
emp_no INT,
birth_date DATE,
first_name STRING,
last_name STRING,
gender STRING,
hire_date DATE,
dept_no STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION '/user/fastcampus/hive/dataset/employees';데이터 확인
SELECT * FROM employees
LIMIT 100테이블 삭제
DROP TABLE employees;
show tables;
HDFS에 데이터가 그대로 있는지 확인
$ hadoop fs -ls /user/fastcampus/dataset/employees# 이전에 업로드 하지 않았다면 업로드
$ hadoop fs -mkdir -p /user/fastcampus/hive/dataset/employees
$ hadoop fs -copyFromLocal ./dataset/employees /user/fastcampus/hive/dataset/employees
# departments 데이터 업로드
$ hadoop fs -mkdir -p /user/fastcampus/hive/dataset/departments
$ hadoop fs -copyFromLocal ./dataset/departments /user/fastcampus/hive/dataset/departmentsemployees 테이블 생성
CREATE EXTERNAL TABLE employees
(
emp_no INT,
birth_date DATE,
first_name STRING,
last_name STRING,
gender STRING,
hire_date DATE,
dept_no STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION '/user/fastcampus/hive/dataset/employees';departments 테이블 생성
CREATE EXTERNAL TABLE departments
(
dept_no STRING,
dept_name STRING
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\n'
STORED AS TEXTFILE
LOCATION '/user/fastcampus/hive/dataset/departments';beeline에서 join 실행
SELECT
e.emp_no,
e.first_name,
e.last_name,
d.dept_name
FROM employees e
JOIN departments d ON e.dept_no = d.dept_no
LIMIT 10;