尚硅谷大数据技术之电信客服
舟率率 10/25/2025 尚硅谷
# 项目概况
# 开发环境
centos7
# 软件版本
hadoop3.2.0、mysql8.0.41、jdk8、flume1.6.0、kafka2.8.2、hbase2.2.7
# 开发语言
Java
# 可视化图表



# 操作步骤
# 启动MySQL
# 查看mysql是否启动 启动命令: systemctl start mysqld.service
systemctl status mysqld.service
# 进入mysql终端
# MySQL的用户名:root 密码:123456
# MySQL的用户名:root 密码:123456
# MySQL的用户名:root 密码:123456
mysql -uroot -p123456
1
2
3
4
5
6
7
8
9
2
3
4
5
6
7
8
9
# 启动Hadoop
# 离开安全模式: hdfs dfsadmin -safemode leave
# 启动hadoop
bash /export/software/hadoop-3.2.0/sbin/start-hadoop.sh
# 停止hadoop
# bash /export/software/hadoop-3.2.0/sbin/stop-hadoop.sh
1
2
3
4
5
6
7
2
3
4
5
6
7

# 启动hbase
# 启动zookeeper
/export/software/apache-zookeeper-3.6.4-bin/bin/zkServer.sh start
# 开启hbase
sh /export/software/hbase-2.2.7/bin/start-hbase.sh
# 进入hbase shell
/export/software/hbase-2.2.7/bin/hbase shell
# 关闭hbase
# sh /export/software/hbase-2.2.7/bin/stop-hbase.sh
# 关闭zookeeper
# /export/software/apache-zookeeper-3.6.4-bin/bin/zkServer.sh stop
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 启动kafka
# 启动kafka
sh /export/software/kafka_2.12-2.8.2/bin/kafka-server-start.sh -daemon /export/software/kafka_2.12-2.8.2/config/server.properties
# 创建topic
/export/software/kafka_2.12-2.8.2/bin/kafka-topics.sh --create --topic calllog --replication-factor 1 --partitions 1 --zookeeper master:2181
# 启动消费者
/export/software/kafka_2.12-2.8.2/bin/kafka-console-consumer.sh --bootstrap-server master:9092 --topic calllog
# 关闭kafka
# sh /export/software/kafka_2.12-2.8.2/bin/kafka-server-stop.sh
1
2
3
4
5
6
7
8
9
10
2
3
4
5
6
7
8
9
10
# 准备目录
mkdir -p /data/jobs/project/
cd /data/jobs/project/
# 上传 "project-atguigu-telecom-customer-service" 整个文件夹 到 "/data/jobs/project/" 目录
yes | cp project-atguigu-telecom-customer-service/script/* .
1
2
3
4
5
6
7
8
2
3
4
5
6
7
8
# MySQL建表
cd /data/jobs/project/
mysql -u root -p < db_telecom.sql
1
2
3
4
5
2
3
4
5
# 程序打包迁移
cd /data/jobs/project/
# 打包
sed -i 's/\r//g' build_and_deploy.sh
bash build_and_deploy.sh
# 复制到路径
sed -i 's/\r//g' copy_file.sh
bash copy_file.sh
ls -l
1
2
3
4
5
6
7
8
9
10
11
12
2
3
4
5
6
7
8
9
10
11
12
# 准备hbase的协处理器
cd /data/jobs/project/
hdfs dfs -mkdir -p /hbase/coprocessor/
hdfs dfs -put -f ct_consumer-1.0-SNAPSHOT.jar /hbase/coprocessor/
hdfs dfs -ls /hbase/coprocessor/ct_consumer-1.0-SNAPSHOT.jar
1
2
3
4
5
6
7
2
3
4
5
6
7
# 启动flume
rm -rf /data/jobs/project/callLog.csv
touch /data/jobs/project/callLog.csv
# 更换窗口查看文件,监控文件
tail -f /data/jobs/project/callLog.csv
# 启动flume
cd /export/software/apache-flume-1.6.0-bin/
bin/flume-ng agent -n a2 -c conf -f /data/jobs/project/source_file_sink_kafka.conf -Dflume.root.logger=INFO,console
1
2
3
4
5
6
7
8
9
10
11
2
3
4
5
6
7
8
9
10
11
# 准备hbase的协处理器
# 进入hbase shell
# /export/software/hbase-2.2.7/bin/hbase shell
# 查看命名空间是否存在(若返回空则不存在)
list_namespace 'ns_ct'
# 若不存在,创建命名空间
create_namespace 'ns_ct'
# 检查表是否存在
exists 'ns_ct:calllog'
# 创建表
create 'ns_ct:calllog', {NAME => 'f1'}, {NAME => 'f2'}, SPLITS => ['10000000', '20000000']
# 进入hbase shell
# /export/software/hbase-2.2.7/bin/hbase shell
# 在hbase shell中执行以下命令
disable 'ns_ct:calllog'
alter 'ns_ct:calllog', METHOD => 'table_att', 'coprocessor' => '/hbase/coprocessor/ct_consumer-1.0-SNAPSHOT.jar|hbase.CalleeWriteObserver|100'
enable 'ns_ct:calllog'
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# ct_consumer消费数据
cd /data/jobs/project/
java -cp ct_consumer.jar kafka.HBaseConsumer
1
2
3
4
5
2
3
4
5
# ct_producer生产数据
cd /data/jobs/project/
# 生产数据
sed -i 's/\r//g' product_log.sh
bash product_log.sh
# 验证tail命令监控窗口
# 验证kafka topic消费日志
# 验证flume日志更新
# 验证hbase
# 进入hbase shell
# /export/software/hbase-2.2.7/bin/hbase shell
# 在hbase shell中执行以下命令,查看表数据
scan 'ns_ct:calllog', {STARTROW=> '0', LIMIT => 10}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# ct_analysis分析
cd /data/jobs/project/
yes | cp /export/software/apache-hive-3.1.2-bin/lib/mysql-connector-j-8.0.33.jar /export/software/hadoop-3.2.0/share/hadoop/
java -cp ct_analysis.jar runner.CountDurationRunner
1
2
3
4
5
6
7
2
3
4
5
6
7
# 查看MySQL
# 请确认mysql服务已经启动了
# 进入MySQL终端 mysql -uroot -p123456
# 执行以下sql
USE db_telecom;
select * from tb_call limit 10;
select * from tb_contacts limit 10;
select * from tb_dimension_date limit 10;
1
2
3
4
5
6
7
8
9
10
11
2
3
4
5
6
7
8
9
10
11
# ct_web数据展示
# 解压 "project-atguigu-telecom-customer-service" 目录下的 "apache-tomcat-8.5.24_备份.7z" 压缩包文件到当前目录
# 安装 idea插件 "smart tomcat"
# http://localhost:8080/ct_web/
1
2
3
4
5
2
3
4
5