View Source

The problems could be related to a backlog of purge operations that still have to be done

Check which are the active MDS instances:

[root@ceph-mds-01 ~]# ceph mds stat
cephfs:2 {0=ceph-mds-01=up:active,1=ceph-mds-02=up:active} 1 up:standby

ad on each of them check the purge queue:

[root@ceph-mds-01 ~]# ceph daemon mds.ceph-mds-01 perf dump | jq '.["purge_queue"]'
{
  "pq_executing_ops": 10240,
  "pq_executing_ops_high_water": 13202,
  "pq_executing": 1,
  "pq_executing_high_water": 16,
  "pq_executed_ops": 75434534,
  "pq_executed": 925172,
  "pq_item_in_journal": 27476
}
[root@ceph-mds-01 ~]# 




[root@ceph-mds-02 ~]# ceph daemon mds.ceph-mds-02 perf dump | jq '.["purge_queue"]'
{
  "pq_executing_ops": 0,
  "pq_executing_ops_high_water": 0,
  "pq_executing": 0,
  "pq_executing_high_water": 0,
  "pq_executed_ops": 0,
  "pq_executed": 0,
  "pq_item_in_journal": 0
}
[root@ceph-mds-02 ~]#

pq_item_in_journal is the number of entries that still have to be processed

V. questo thread: https://lists.ceph.io/hyperkitty/list/ceph-users@ceph.io/thread/2XJIEQ4XCGRGE4DFELXVUK7ANZEGPC4L/