The problems could be related to a backlog of purge operations that still have to be done
Check which are the active MDS instances:
[root@ceph-mds-01 ~]# ceph mds stat
cephfs:2 {0=ceph-mds-01=up:active,1=ceph-mds-02=up:active} 1 up:standby |
ad on each of them check the purge queue:
[root@ceph-mds-01 ~]# ceph daemon mds.ceph-mds-01 perf dump | jq '.["purge_queue"]'
{
"pq_executing_ops": 10240,
"pq_executing_ops_high_water": 13202,
"pq_executing": 1,
"pq_executing_high_water": 16,
"pq_executed_ops": 75434534,
"pq_executed": 925172,
"pq_item_in_journal": 27476
}
[root@ceph-mds-01 ~]#
[root@ceph-mds-02 ~]# ceph daemon mds.ceph-mds-02 perf dump | jq '.["purge_queue"]'
{
"pq_executing_ops": 0,
"pq_executing_ops_high_water": 0,
"pq_executing": 0,
"pq_executing_high_water": 0,
"pq_executed_ops": 0,
"pq_executed": 0,
"pq_item_in_journal": 0
}
[root@ceph-mds-02 ~]# |
pq_item_in_journal is the number of entries that still have to be processed
V. questo thread: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/2XJIEQ4XCGRGE4DFELXVUK7ANZEGPC4L/