https://www.jianshu.com/p/e628da68328d
在所有节点上添加清华大学的ceph镜像源
#cat >> /etc/yum.repos.d/ceph.repo << EOF
[ceph]
name=Ceph
baseurl=https://mirrors.tuna.tsinghua.edu.cn/ceph/rpm-nautilus/el7/x86_64/
enabled=1
priority=2
gpgcheck=1
EOF
导入asc
#rpm --import 'https://download.ceph.com/keys/release.asc'
安装yum-plugin-priorities包
#yum install yum-plugin-priorities
确认yum-plugin-priorities配置文件enabled=1
cat /etc/yum/pluginconf.d/priorities.conf
[main]
enabled = 1
在ceph存储节点上安装ceph软件,ceph-deploy使用最新版本,epel的版本较旧
#yum -y install ceph-deploy
#yum -y install ntp ntpdate ntp-doc openssh-server snappy leveldb gdisk python-argparse gperftools-libs
#yum -y install ceph
ceph01\ceph02\ceph03 3MON+6OSD
在ceph01\ceph02\ceph03
给每个存储节点添加用于deploy的账号
useradd ceph-deploy
echo ceph-deploy:ceph123 | chpasswd
echo "ceph-deploy ALL = (root) NOPASSWD:ALL" | tee /etc/sudoers.d/ceph-deploy
chmod 0440 /etc/sudoers.d/ceph-deploy
在操作机及操作用户上,生产sshkey,并配置免密登录 cehp01,ceph02和ceph03
ssh-keygen
ssh-copy-id ceph-deploy@ceph01
ssh-copy-id ceph-deploy@ceph02
ssh-copy-id ceph-deploy@ceph03
新建目录my-cluster,进入目录,操作需要在目录中进行
mkdir my-cluster
cd my-cluster/
新建集群,将配置复制到各个节点
ceph-deploy --username ceph-deploy new ceph01
ceph-deploy --username ceph-deploy mon create-initial
ceph-deploy --username ceph-deploy admin ceph01 ceph02 ceph03
部署ceph的各个组件,如果部署不成功,需要检查是否安装有相关的包,及文件目录是否存在
ceph-deploy --username ceph-deploy --overwrite-conf mon create ceph01 ceph02 ceph03
ceph-deploy --username ceph-deploy --overwrite-conf mgr create ceph01 ceph02 ceph03
ceph-deploy --username ceph-deploy --overwrite-conf mds create ceph01 ceph02 ceph03
ceph-deploy --username ceph-deploy --overwrite-conf rgw create ceph01 ceph02 ceph03
为各个组件添加节点
ceph-deploy --username ceph-deploy --overwrite-conf mon/mgr/mds/rgw add ceph04
为集群添加OSD
ceph-deploy --username ceph-deploy osd create --data /dev/sdb ceph01
ceph-deploy --username ceph-deploy osd create --data /dev/sdc ceph01
ceph-deploy --username ceph-deploy osd create --data /dev/sdb ceph02
ceph-deploy --username ceph-deploy osd create --data /dev/sdc ceph02
ceph-deploy --username ceph-deploy osd create --data /dev/sdb ceph03
ceph-deploy --username ceph-deploy osd create --data /dev/sdc ceph03
在任意结点上查看集群状态
#ceph -s
cluster:
id: 0cd78d03-771a-4c45-99eb-49b200ae7338
health: HEALTH_WARN
too few PGs per OSD (16 < min 30)
services:
mon: 3 daemons, quorum ceph01,ceph03,ceph02 (age 27m)
mgr: ceph01(active, since 82m), standbys: ceph02, ceph03
osd: 6 osds: 6 up (since 54m), 6 in (since 54m)
rgw: 3 daemons active (ceph01, ceph02, ceph03)
data:
pools: 4 pools, 32 pgs
objects: 189 objects, 2.3 KiB
usage: 6.0 GiB used, 888 GiB / 894 GiB avail
pgs: 32 active+clean
新建一个pool pool_1包含100个pg
#ceph osd pool create pool_1 100
新建一个pool pool_2包含100个pg
#ceph osd pool create pool_2 100
可以看到集群变为正常
#ceph -s
cluster:
id: 0cd78d03-771a-4c45-99eb-49b200ae7338
health: HEALTH_OK
services:
mon: 3 daemons, quorum ceph01,ceph03,ceph02 (age 12m)
mgr: ceph01(active, since 107m), standbys: ceph02, ceph03
osd: 6 osds: 6 up (since 79m), 6 in (since 79m)
rgw: 3 daemons active (ceph01, ceph02, ceph03)
data:
pools: 6 pools, 232 pgs
objects: 189 objects, 2.3 KiB
usage: 6.0 GiB used, 888 GiB / 894 GiB avail
pgs: 232 active+clean
在ceph部署机ceph01上,修改配置文件关闭内核不支持的块设备特性
#cat >> /root/my-cluster/ceph.conf << EOF
rbd_default_features = 1
EOF
在ceph部署机ceph01上,向存储节点及客户端复制配置文件
#ceph-deploy --overwrite-conf admin ceph01 ceph02 ceph03 mgmt01
在ceph集群中新建5个rbd块设备,lun1-5
#rbd create pool_1/lun1 --size 10G
#rbd create pool_1/lun2 --size 20G
#rbd create pool_1/lun3 --size 10G
#rbd create pool_1/lun4 --size 10G
#rbd create pool_1/lun5 --size 10G
查看各个rbd块设备的信息
#rbd --image pool_1/lun1 info
rbd image 'lun1':
size 10 GiB in 2560 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 8ab460553879
block_name_prefix: rbd_data.8ab460553879
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
op_features:
flags:
create_timestamp: Mon Jun 10 11:08:16 2019
access_timestamp: Mon Jun 10 11:08:16 2019
modify_timestamp: Mon Jun 10 11:08:16 2019
#rbd --image pool_1/lun4 info
rbd image 'lun4':
size 10 GiB in 2560 objects
order 22 (4 MiB objects)
snapshot_count: 0
id: 652180d4d5fa
block_name_prefix: rbd_data.652180d4d5fa
format: 2
features: layering, exclusive-lock, object-map, fast-diff, deep-flatten
op_features:
flags:
create_timestamp: Mon Jun 10 11:08:56 2019
access_timestamp: Mon Jun 10 11:08:56 2019
modify_timestamp: Mon Jun 10 11:08:56 2019
查看pool_1中的rbd块设备
# rbd ls pool_1
lun1
lun2
lun3
lun4
lun5
在客户端mgmt01上挂载块设备pool_1/lun1
在mgmt01上安装ceph软件,使能模板rbd,重启机器,或者modprobe rbd
#yum install ceph-common -y
#echo 'rbd' > /etc/modules-load.d/rbd.conf
#reboot
#lsmod | grep rbd
rbd 83640 0
libceph 306625 1 rbd
在客户端挂载ceph rbd块设备lun1
# rbd map pool_1/lun1
/dev/rbd0
# fdisk -l | grep rbd
Disk /dev/rbd0: 10.7 GB, 10737418240 bytes, 20971520 sectors
# rbd device list
id pool namespace image snap device
0 pool_1 lun1 - /dev/rbd0
使用ceph-deploy --overwrite-conf admin部署的keyring权限太大,可以自己创建一个keyring client.rdb给块设备客户端mgmt用
# ceph auth get-or-create client.rbd mon 'allow r' osd 'allow class-read object_prefix rbd_children, allow rwx pool=pool_1' > ceph.client.rbd.keyring
# scp ceph.client.rbd.keyring mgmt01:/etc/ceph/
在mgmt01上使用client.rbd来运行ceph -s
#ceph -s --name client.rbd
cluster:
id: 0cd78d03-771a-4c45-99eb-49b200ae7338
health: HEALTH_WARN
application not enabled on 1 pool(s)
services:
mon: 3 daemons, quorum ceph01,ceph03,ceph02 (age 21h)
mgr: ceph01(active, since 4d), standbys: ceph02, ceph03
osd: 6 osds: 6 up (since 4d), 6 in (since 4d)
rgw: 3 daemons active (ceph01, ceph02, ceph03)
data:
pools: 6 pools, 232 pgs
objects: 237 objects, 2.5 KiB
usage: 6.0 GiB used, 888 GiB / 894 GiB avail
pgs: 232 active+clean
在mgmt01上使用client.rbd来创建块设备
# rbd create pool_1/lun193 --size 10G --name client.rbd
在mgmt01上使用client.rbd来map块设备
# rbd map pool_1/lun193 --name client.rbd
/dev/rbd0
将块设备挂载在操作系统中进行格式化
#rbd map pool_1/lun1 --name client.rbd
#rbd map pool_1/lun2 --name client.rbd
#rbd map pool_1/lun3 --name client.rbd
#rbd map pool_1/lun4 --name client.rbd
#rbd map pool_1/lun5 --name client.rbd
#mkfs.ext4 /dev/rbd1
#mkfs.ext4 /dev/rbd2
#mkfs.ext4 /dev/rbd3
#mkfs.ext4 /dev/rbd4
#mkfs.ext4 /dev/rbd5
在所有node上启用rbd模块,并安装rbd-common
#ansible node -m shell -a 'modprobe rbd'
#ansible node -m shell -a 'yum install -y ceph-common'
将ceph01上获取client.rbd的keyring并进行base64编码
# ceph auth get-key client.rbd | base64
QVFER0hmOWN5ZVJJTkJBQUR2bHRkQzFZWkFCVkZxU0djS3NyT0E9PQ==
根据上面的输出,创建secret ceph-client-rbd
apiVersion: v1
kind: Secret
metadata:
name: ceph-client-rbd
type: "kubernetes.io/rbd"
data:
key: QVFER0hmOWN5ZVJJTkJBQUR2bHRkQzFZWkFCVkZxU0djS3NyT0E9PQ==
创建pv,注意: 这里是user:rbd 而不是user: client.rbd
kind: PersistentVolume
apiVersion: v1
metadata:
name: ceph-pool1-lun1
spec:
storageClassName: manual
capacity:
storage: 10Gi
accessModes:
- ReadWriteOnce
rbd:
fsType: ext4
image: lun1
monitors:
- '22.22.3.245:6789'
- '22.22.3.246:6789'
- '22.22.3.247:6789'
pool: pool_1
readOnly: false
secretRef:
name: ceph-client-rbd
namespace: default
user: rbd
按照表格中的参数创建5个pv,可以看到容量不匹配,重复使用ceph image,都不会报错
#kubectl get pv
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
ceph-pool1-lun1 10Gi RWO Retain Available 15m
ceph-pool1-lun2 15Gi RWO Retain Available 12m
ceph-pool1-lun3 10Gi RWO Retain Available 8m21s
ceph-pool1-lun4 15Gi RWO Retain Available 8m21s
ceph-pool1-lun5 10Gi RWO Retain Available 8m20s
创建pvc
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: pvc1
spec:
storageClassName: manual
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi
查看pvc状态
# kubectl get pvc
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
pvc1 Bound ceph-pool1-lun1 10Gi RWO manual 10m
创建pod,挂载pvc1
kind: Pod
apiVersion: v1
metadata:
name: task-pv-pod
spec:
volumes:
- name: task-pv-volume
persistentVolumeClaim:
claimName: pvc1
containers:
- name: task-pv-container
image: nginx
ports:
- containerPort: 80
name: "http-server"
volumeMounts:
- mountPath: "/usr/share/nginx/html"
name: task-pv-volume
删除pod并重建到其他node上,可以看到pod中挂载的文件不会变化
查看pvc的匹配关系,可以看到pvc只匹配pv中的容量,而pv与ceph之间的关联并不检查容量大小,也不检查是否复用。
当rbd image在其他地方已经使用,在启动pod时会报错,describe pod可以看到
MountVolume.WaitForAttach failed for volume "ceph-pool1-lun2" : rbd image pool_1/lun2 is still being used
修改pod,使用pvc2,进入系统后,可以看到系统容量是20Gi,而不是pv和pvc中的15Gi,也就是操作系统里面看到的是image的大小,与PV/PVC中的容量无关, PV/PVC中的容量只用来做他们之间的绑定
# kubectl exec -it task-pv-pod /bin/sh
# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/rbd0 20G 45M 19G 1% /usr/share/nginx/html
创建两个pod,同时使用pvc3和pvc4,都会使用ceph上的lun4,可以看到两个pod都可以正常启动,而且都可以读写。考虑到文件系统是ext4,不建议这样使用。
# kubectl get pods | grep task
task-pv-pod 1/1 Running 0 32s
task-pv-pod-2 1/1 Running 0 32s