Trovo qual e` il PG problematico
[
root@ceph-osd-03 ~]# ceph health detail HEALTH_ERR 1 scrub errors; Possible data damage: 1 pg inconsistent OSD_SCRUB_ERRORS 1 scrub errors PG_DAMAGED Possible data damage: 1 pg inconsistent pg 43.9 is active+clean+inconsistent, acting [73,45,15,24,97,58]
Cerco di vedere dove sta il problema
[
root@ceph-osd-03 ~]# rados list-inconsistent-obj 43.9 --format=json-pretty
{
"epoch": 3272937,
"inconsistents": [
{
"object": {
"name": "rbd_data.44.4979edd3f6095c.000000000017f1f7",
"nspace": "",
"locator": "",
"snap": "head",
"version": 5320992
},
"errors": [],
"union_shard_errors": [
"read_error"
],
"selected_object_info": {
"oid": {
"oid": "rbd_data.44.4979edd3f6095c.000000000017f1f7",
"key": "",
"snapid": -2,
"hash": 788473801,
"max": 0,
"pool": 43,
"namespace": ""
},
"version": "3343372'5320992",
"prior_version": "3343372'5320991",
"last_reqid": "client.415639902.0:26811157",
"user_version": 5320992,
"size": 4194304,
"mtime": "2021-04-12 13:47:20.744809",
"local_mtime": "2021-04-12 13:47:20.756542",
"lost": 0,
"flags": [
"dirty"
],
"truncate_seq": 0,
"truncate_size": 0,
"data_digest": "0xffffffff",
"omap_digest": "0xffffffff",
"expected_object_size": 4194304,
"expected_write_size": 4194304,
"alloc_hint_flags": 0,
"manifest": {
"type": 0
},
"watchers": {}
},
"shards": [
{
"osd": 15,
"primary": false,
"shard": 2,
"errors": [],
"size": 1048576,
"omap_digest": "0xffffffff",
"data_digest": "0x00000000"
},
{
"osd": 24,
"primary": false,
"shard": 3,
"errors": [
"read_error"
],
"size": 1048576
},
{
"osd": 45,
"primary": false,
"shard": 1,
"errors": [],
"size": 1048576,
"omap_digest": "0xffffffff",
"data_digest": "0x00000000"
},
{
"osd": 58,
"primary": false,
"shard": 5,
"errors": [],
"size": 1048576,
"omap_digest": "0xffffffff",
"data_digest": "0x00000000"
},
{
"osd": 73,
"primary": true,
"shard": 0,
"errors": [],
"size": 1048576,
"omap_digest": "0xffffffff",
"data_digest": "0x00000000"
},
{
"osd": 97,
"primary": false,
"shard": 4,
"errors": [],
"size": 1048576,
"omap_digest": "0xffffffff",
"data_digest": "0x00000000"
}
]
}
]
}
[root@ceph-osd-03 ~]#
Qui si vede un read error sull'osd 24
read_error in genere vuol dire un problema su un disco.
Infatti c'e` un problema sul disco usato per uno di quei OSD:
[root@ceph-osd-03 ~]# [root@ceph-osd-03 ~]# grep -i err /var/log/messages Apr 15 05:39:05 ceph-osd-03 kernel: sd 0:0:6:0: [sdg] tag#7 Sense Key : Medium Error [current] [descriptor] Apr 15 05:39:05 ceph-osd-03 kernel: sd 0:0:6:0: [sdg] tag#7 Add. Sense: Unrecovered read error Apr 15 05:39:05 ceph-osd-03 kernel: blk_update_request: critical medium error, dev sdg, sector 5536204472 Apr 15 05:39:05 ceph-osd-03 ceph-osd: 2021-04-15 05:39:05.962 7f9ebd71b700 -1 bdev(0x55b527728000 /var/lib/ceph/osd/ceph-24/block) _aio_thread got r=-5 ((5) Input/output error) Apr 15 05:39:05 ceph-osd-03 ceph-osd: 2021-04-15 05:39:05.962 7f9ebd71b700 -1 bdev(0x55b527728000 /var/lib/ceph/osd/ceph-24/block) _aio_thread translating the error to EIO for upper layer Apr 15 08:54:27 ceph-osd-03 kernel: sd 0:0:6:0: [sdg] tag#0 Sense Key : Medium Error [current] [descriptor] Apr 15 08:54:27 ceph-osd-03 kernel: sd 0:0:6:0: [sdg] tag#0 Add. Sense: Unrecovered read error Apr 15 08:54:27 ceph-osd-03 kernel: blk_update_request: critical medium error, dev sdg, sector 5536204472 Apr 15 08:54:27 ceph-osd-03 ceph-osd: 2021-04-15 08:54:27.208 7f9ebd71b700 -1 bdev(0x55b527728000 /var/lib/ceph/osd/ceph-24/block) _aio_thread got r=-5 ((5) Input/output error) Apr 15 08:54:27 ceph-osd-03 ceph-osd: 2021-04-15 08:54:27.208 7f9ebd71b700 -1 bdev(0x55b527728000 /var/lib/ceph/osd/ceph-24/block) _aio_thread translating the error to EIO for upper layer Apr 15 08:54:27 ceph-osd-03 ceph-osd: 2021-04-15 08:54:27.208 7f9ebd71b700 -1 bdev(0x55b527728000 /var/lib/ceph/osd/ceph-24/block) _aio_thread got r=-5 ((5) Input/output error) Apr 15 08:54:27 ceph-osd-03 ceph-osd: 2021-04-15 08:54:27.208 7f9ebd71b700 -1 bdev(0x55b527728000 /var/lib/ceph/osd/ceph-24/block) _aio_thread translating the error to EIO for upper layer Apr 15 08:54:27 ceph-osd-03 ceph-osd: 2021-04-15 08:54:27.575 7f9ea5eec700 -1 log_channel(cluster) log [ERR] : Error -5 reading object 43:93e4ff74:::rbd_data.44.4979edd3f6095c.000000000017f1f7:head
Il disco va sostituito
Intanto proviamo a sistemare questo PG:
[root@ceph-osd-03 ~]# ceph pg repair 43.9 instructing pg 43.9s0 on osd.73 to repair [root@ceph-osd-03 ~]#
In teoria dopo un bel po`, dovrebbe sistemarsi