Discussion:
[PATCH 2/9] btrfs: Add helper to check the btrfs header.
Goffredo Baroncelli
2018-06-19 17:39:49 UTC
Permalink
This helper s used in a few places to help the debugging. As
conservative approach the error is only logged.
This to not impact the error handling.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 24 +++++++++++++++++++++++-
1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index fbabaebbe..c1eb6c521 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -77,7 +77,8 @@ struct btrfs_header
{
grub_btrfs_checksum_t checksum;
grub_btrfs_uuid_t uuid;
- grub_uint8_t dummy[0x30];
+ grub_uint64_t bytenr;
+ grub_uint8_t dummy[0x28];
grub_uint32_t nitems;
grub_uint8_t level;
} GRUB_PACKED;
@@ -286,6 +287,25 @@ free_iterator (struct grub_btrfs_leaf_descriptor *desc)
grub_free (desc->data);
}

+static grub_err_t
+check_btrfs_header (struct grub_btrfs_data *data, struct btrfs_header *header,
+ grub_disk_addr_t addr)
+{
+ if (grub_le_to_cpu64 (header->bytenr) != addr)
+ {
+ grub_dprintf ("btrfs", "btrfs_header.bytenr is not equal node addr\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header bytenr is not equal node addr");
+ }
+ if (grub_memcmp (data->sblock.uuid, header->uuid, sizeof(grub_btrfs_uuid_t)))
+ {
+ grub_dprintf ("btrfs", "btrfs_header.uuid doesn't match sblock uuid\n");
+ return grub_error (GRUB_ERR_BAD_FS,
+ "header uuid doesn't match sblock uuid");
+ }
+ return GRUB_ERR_NONE;
+}
+
static grub_err_t
save_ref (struct grub_btrfs_leaf_descriptor *desc,
grub_disk_addr_t addr, unsigned i, unsigned m, int l)
@@ -341,6 +361,7 @@ next (struct grub_btrfs_data *data,

err = grub_btrfs_read_logical (data, grub_le_to_cpu64 (node.addr),
&head, sizeof (head), 0);
+ check_btrfs_header (data, &head, grub_le_to_cpu64 (node.addr));
if (err)
return -err;

@@ -402,6 +423,7 @@ lower_bound (struct grub_btrfs_data *data,
/* FIXME: preread few nodes into buffer. */
err = grub_btrfs_read_logical (data, addr, &head, sizeof (head),
recursion_depth + 1);
+ check_btrfs_header (data, &head, addr);
if (err)
return err;
addr += sizeof (head);
--
2.18.0.rc2
Goffredo Baroncelli
2018-06-19 17:39:52 UTC
Permalink
A portion of the logging code is moved outside of internal for(;;). The part
that is left inside is the one which depends on the internal for(;;) index.

This is a preparatory patch. The next one will refactor the code inside
the for(;;) into an another function.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 25 ++++++++++++++-----------
1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 70bcb0fdc..15ed59151 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -867,6 +867,18 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (j = 0; j < 2; j++)
{
+ grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
+ "+0x%" PRIxGRUB_UINT64_T
+ " (%d stripes (%d substripes) of %"
+ PRIxGRUB_UINT64_T ")\n",
+ grub_le_to_cpu64 (key->offset),
+ grub_le_to_cpu64 (chunk->size),
+ grub_le_to_cpu16 (chunk->nstripes),
+ grub_le_to_cpu16 (chunk->nsubstripes),
+ grub_le_to_cpu64 (chunk->stripe_length));
+ grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
+ addr);
+
for (i = 0; i < redundancy; i++)
{
struct grub_btrfs_chunk_stripe *stripe;
@@ -879,20 +891,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;

- grub_dprintf ("btrfs", "chunk 0x%" PRIxGRUB_UINT64_T
- "+0x%" PRIxGRUB_UINT64_T
- " (%d stripes (%d substripes) of %"
- PRIxGRUB_UINT64_T ") stripe %" PRIxGRUB_UINT64_T
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
" maps to 0x%" PRIxGRUB_UINT64_T "\n",
- grub_le_to_cpu64 (key->offset),
- grub_le_to_cpu64 (chunk->size),
- grub_le_to_cpu16 (chunk->nstripes),
- grub_le_to_cpu16 (chunk->nsubstripes),
- grub_le_to_cpu64 (chunk->stripe_length),
stripen, stripe->offset);
grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- " for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
- addr);
+ "\n", paddr);

dev = find_device (data, stripe->device_id);
if (!dev)
--
2.18.0.rc2
Daniel Kiper
2018-07-12 14:06:49 UTC
Permalink
Post by Goffredo Baroncelli
A portion of the logging code is moved outside of internal for(;;). The part
that is left inside is the one which depends on the internal for(;;) index.
This is a preparatory patch. The next one will refactor the code inside
the for(;;) into an another function.
Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Goffredo Baroncelli
2018-06-19 17:39:48 UTC
Permalink
Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..fbabaebbe 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,74 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ case GRUB_BTRFS_CHUNK_TYPE_RAID5:
+ case GRUB_BTRFS_CHUNK_TYPE_RAID6:
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * Below is an example of a RAID 6 layout and the meaning of the
+ * variables. The same applies to RAID 5. The only differences is
+ * that there is only one parity disk instead of two.
+ *
+ * A RAID 6 layout consists of several stripes spread
+ * on the disks, following a layout like the one below
+ *
+ * Disk0 Disk1 Disk2 Ddisk3
+ *
+ * A1 B1 P1 Q1
+ * Q2 A2 B2 P2
+ * P3 Q3 A3 B3
+ * [...]
+ *
+ * Note that the placement of the parities depends on row index.
+ * In the code below:
+ * - stripe_nr is the stripe number not considering the parities
+ * (A1 = 0, B1 = 1, A2 = 2, B2 = 3, ...),
+ * - high is the row number (0 for A1...Q1, 1 for Q2...P2, ...),
+ * - stripen is the column number (or disk number),
+ * - off is the logical address to read (from the beginning of
+ * the chunk space),
+ * - chunk_stripe_length is the size of a stripe (typically 64k),
+ * - nstripes is the number of disks,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the disk offset from the beginning
+ * of the disk chunk mapping start,
+ * - csize is the "potential" data to read. It will be reduced to
+ * size if the latter is smaller.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
+
+ /*
+ * stripen is evaluated without considering
+ * the parities (0 for A1, A2, A3... 1 for B1, B2...).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
+
+ /*
+ * stripen now considers also the parities (0 for A1, 1 for A2,
+ * 2 for A3....). The math is performed modulo number of disks.
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
+
+ stripe_offset = low + chunk_stripe_length * high;
+ csize = chunk_stripe_length - low;
+
break;
}
default:
--
2.18.0.rc2
Goffredo Baroncelli
2018-07-08 15:51:37 UTC
Permalink
A gentle ping.

BR
G.Baroncelli
Post by Goffredo Baroncelli
---
grub-core/fs/btrfs.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..fbabaebbe 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,74 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * Below is an example of a RAID 6 layout and the meaning of the
+ * variables. The same applies to RAID 5. The only differences is
+ * that there is only one parity disk instead of two.
+ *
+ * A RAID 6 layout consists of several stripes spread
+ * on the disks, following a layout like the one below
+ *
+ * Disk0 Disk1 Disk2 Ddisk3
+ *
+ * A1 B1 P1 Q1
+ * Q2 A2 B2 P2
+ * P3 Q3 A3 B3
+ * [...]
+ *
+ * Note that the placement of the parities depends on row index.
+ * - stripe_nr is the stripe number not considering the parities
+ * (A1 = 0, B1 = 1, A2 = 2, B2 = 3, ...),
+ * - high is the row number (0 for A1...Q1, 1 for Q2...P2, ...),
+ * - stripen is the column number (or disk number),
+ * - off is the logical address to read (from the beginning of
+ * the chunk space),
+ * - chunk_stripe_length is the size of a stripe (typically 64k),
+ * - nstripes is the number of disks,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the disk offset from the beginning
+ * of the disk chunk mapping start,
+ * - csize is the "potential" data to read. It will be reduced to
+ * size if the latter is smaller.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
+
+ /*
+ * stripen is evaluated without considering
+ * the parities (0 for A1, A2, A3... 1 for B1, B2...).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
+
+ /*
+ * stripen now considers also the parities (0 for A1, 1 for A2,
+ * 2 for A3....). The math is performed modulo number of disks.
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
+
+ stripe_offset = low + chunk_stripe_length * high;
+ csize = chunk_stripe_length - low;
+
break;
}
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
Daniel Kiper
2018-07-09 10:20:15 UTC
Permalink
Post by Goffredo Baroncelli
A gentle ping.
Sorry for delay. I remember about this patchset but I am swamped with other stuff.
I will take a look at it this week.

Daniel
Goffredo Baroncelli
2018-07-09 16:29:15 UTC
Permalink
Post by Daniel Kiper
Post by Goffredo Baroncelli
A gentle ping.
Sorry for delay. I remember about this patchset but I am swamped with other stuff.
I will take a look at it this week.
Daniel
Thanks for the feedback: I was worried that my emails were lost somewhere in/with the spams :-)

BR
G.Baroncelli
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
Daniel Kiper
2018-07-12 13:46:08 UTC
Permalink
Post by Goffredo Baroncelli
---
grub-core/fs/btrfs.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..fbabaebbe 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,74 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * Below is an example of a RAID 6 layout and the meaning of the
+ * variables. The same applies to RAID 5. The only differences is
+ * that there is only one parity disk instead of two.
+ *
+ * A RAID 6 layout consists of several stripes spread
+ * on the disks, following a layout like the one below
+ *
+ * Disk0 Disk1 Disk2 Ddisk3
s/Ddisk3/Disk3/
Post by Goffredo Baroncelli
+ *
+ * A1 B1 P1 Q1
+ * Q2 A2 B2 P2
+ * P3 Q3 A3 B3
+ * [...]
+ *
+ * Note that the placement of the parities depends on row index.
+ * - stripe_nr is the stripe number not considering the parities
+ * (A1 = 0, B1 = 1, A2 = 2, B2 = 3, ...),
+ * - high is the row number (0 for A1...Q1, 1 for Q2...P2, ...),
+ * - stripen is the column number (or disk number),
+ * - off is the logical address to read (from the beginning of
+ * the chunk space),
What do you mean by chunk?
Post by Goffredo Baroncelli
+ * - chunk_stripe_length is the size of a stripe (typically 64k),
+ * - nstripes is the number of disks,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the disk offset from the beginning
+ * of the disk chunk mapping start,
Err... I am confused here...
Post by Goffredo Baroncelli
+ * - csize is the "potential" data to read. It will be reduced to
+ * size if the latter is smaller.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
Here it seems to me that off is from the beginning of RAID volume space.
So, it does not agree with the comment above...
Post by Goffredo Baroncelli
+ /*
+ * stripen is evaluated without considering
+ * the parities (0 for A1, A2, A3... 1 for B1, B2...).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
This looks good.
Post by Goffredo Baroncelli
+ /*
+ * stripen now considers also the parities (0 for A1, 1 for A2,
+ * 2 for A3....). The math is performed modulo number of disks.
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
Ditto.
Post by Goffredo Baroncelli
+ stripe_offset = low + chunk_stripe_length * high;
And here I am confused. Why "* high"? If chunk_stripe_length is stripe
length then stripe_offset leads to nowhere. Am I missing something?
I have a feeling that comments does not agree with the code somehow.

Daniel
Goffredo Baroncelli
2018-07-18 06:24:50 UTC
Permalink
Post by Daniel Kiper
Post by Goffredo Baroncelli
---
grub-core/fs/btrfs.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..fbabaebbe 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,74 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * Below is an example of a RAID 6 layout and the meaning of the
+ * variables. The same applies to RAID 5. The only differences is
+ * that there is only one parity disk instead of two.
+ *
+ * A RAID 6 layout consists of several stripes spread
+ * on the disks, following a layout like the one below
+ *
+ * Disk0 Disk1 Disk2 Ddisk3
s/Ddisk3/Disk3/
Post by Goffredo Baroncelli
+ *
+ * A1 B1 P1 Q1
+ * Q2 A2 B2 P2
+ * P3 Q3 A3 B3
+ * [...]
+ *
+ * Note that the placement of the parities depends on row index.
+ * - stripe_nr is the stripe number not considering the parities
+ * (A1 = 0, B1 = 1, A2 = 2, B2 = 3, ...),
+ * - high is the row number (0 for A1...Q1, 1 for Q2...P2, ...),
+ * - stripen is the column number (or disk number),
+ * - off is the logical address to read (from the beginning of
+ * the chunk space),
What do you mean by chunk?
Is a specific btrfs data structure (see https://btrfs.wiki.kernel.org/index.php/Manpage/mkfs.btrfs#BLOCK_GROUPS.2C_CHUNKS.2C_RAID). Most of the BTRFS internal structures (e.g. where the file data is stored) are in terms of *logical address*. At this level there is no concept of redundancy or raid nor disks. You can see the logical address as a flat space, large as the sum of the disks sizes (minus the space used by the parities or mirroring).

The "logical space" is divided in "slices" called chunk. A chunk typically has a size in terms of hundred of megabytes (IIRC up to 1GB)

A chunk (or block group) is a mapping. A chunk maps a "logical address" to a "physical address" on the disks.

You can think a chunk as a structure like:

u64 profile
u64 logical_start, logical_end
u64 disk1_id, disk1_start, disk1_end
u64 disk2_id, disk2_start, disk2_end
u64 disk3_id, disk3_start, disk3_end
[...]

In order to translate a "logical_address" to the pair of "disk" and "physical address:
- find the chunk which maps the logical address (looking to the "logical_start", "logical_end")
- then on the basis of "profile" value you can have the following cases:
profile == SINGLE:
is the simplest one: the logical address is mapped to the range
(disk1_start...disk1_end) of the disk "disk_id1"

profile == RAID1
like the previous one; however if the disk1 is not available,
you can find the data in the range
(disk2_start...disk2_end) of the disk "disk_id2". NOTE: the two ranges
must have the same sizes, but may have different starts

profile == RAID0
the data is without redundancy and it is spread on different disk each
"chunk_stripe_length" bytes; so:
off = logical_address - logical_start // this is what I call
// address from the
// beginning of the chunk






and so on....
Post by Daniel Kiper
Post by Goffredo Baroncelli
+ * - chunk_stripe_length is the size of a stripe (typically 64k),
+ * - nstripes is the number of disks,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the disk offset from the beginning
+ * of the disk chunk mapping start,
Err... I am confused here...
As described above, the chunk data structure translate from "logical address" to the pair (disk, physical address on the disk). Each chunk translate a small portion of the space (typically 1GB); so it is more correct to say that a chunk maps a range in the logical address to a range in the disks. 'off' is the offset from the beginning of the range in the logical address. 'stripe_offset' is the offset from the beginning of the range in the physical space
Post by Daniel Kiper
Post by Goffredo Baroncelli
+ * - csize is the "potential" data to read. It will be reduced to
+ * size if the latter is smaller.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
Here it seems to me that off is from the beginning of RAID volume space.
So, it does not agree with the comment above...
RAID volume space == chunk logical address space
Post by Daniel Kiper
Post by Goffredo Baroncelli
+ /*
+ * stripen is evaluated without considering
+ * the parities (0 for A1, A2, A3... 1 for B1, B2...).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
This looks good.
Post by Goffredo Baroncelli
+ /*
+ * stripen now considers also the parities (0 for A1, 1 for A2,
+ * 2 for A3....). The math is performed modulo number of disks.
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
Ditto.
Post by Goffredo Baroncelli
+ stripe_offset = low + chunk_stripe_length * high;
And here I am confused. Why "* high"? If chunk_stripe_length is stripe
length then stripe_offset leads to nowhere. Am I missing something?
I have a feeling that comments does not agree with the code somehow.
The "physical range" is divided in small range called(?) "stripe". These stripe are spread on the disks. 'high' is the offset from the beginning of the physical chunk range in unit of "chunk_stripe_length"

I found quite confuse the btrfs terminology around the "stripe" word. And I have to point out that these variables were named before my patch.

I am afraid that my comment doesn't help to "de-obfuscate" my code. I know that my English is far to be perfect. However this code is not so different that the previous one (i.e. handling the btrfs raid1/dup/raid0....). So I am thinking to remove all the comments because it seems to me that the my comments increase the confusion instead of reducing it.

Please, let me know your opinion
Post by Daniel Kiper
Daniel
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
Daniel Kiper
2018-09-03 12:52:45 UTC
Permalink
Post by Goffredo Baroncelli
Post by Daniel Kiper
Post by Goffredo Baroncelli
---
grub-core/fs/btrfs.c | 70 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index be195448d..fbabaebbe 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -119,6 +119,8 @@ struct grub_btrfs_chunk_item
#define GRUB_BTRFS_CHUNK_TYPE_RAID1 0x10
#define GRUB_BTRFS_CHUNK_TYPE_DUPLICATED 0x20
#define GRUB_BTRFS_CHUNK_TYPE_RAID10 0x40
+#define GRUB_BTRFS_CHUNK_TYPE_RAID5 0x80
+#define GRUB_BTRFS_CHUNK_TYPE_RAID6 0x100
grub_uint8_t dummy2[0xc];
grub_uint16_t nstripes;
grub_uint16_t nsubstripes;
@@ -764,6 +766,74 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
stripe_offset = low + chunk_stripe_length
* high;
csize = chunk_stripe_length - low;
+ break;
+ }
+ {
+ grub_uint64_t nparities, stripe_nr, high, low;
+
+ redundancy = 1; /* no redundancy for now */
+
+ if (grub_le_to_cpu64 (chunk->type) & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ {
+ grub_dprintf ("btrfs", "RAID5\n");
+ nparities = 1;
+ }
+ else
+ {
+ grub_dprintf ("btrfs", "RAID6\n");
+ nparities = 2;
+ }
+
+ /*
+ * Below is an example of a RAID 6 layout and the meaning of the
+ * variables. The same applies to RAID 5. The only differences is
+ * that there is only one parity disk instead of two.
+ *
+ * A RAID 6 layout consists of several stripes spread
+ * on the disks, following a layout like the one below
+ *
+ * Disk0 Disk1 Disk2 Ddisk3
s/Ddisk3/Disk3/
Post by Goffredo Baroncelli
+ *
+ * A1 B1 P1 Q1
+ * Q2 A2 B2 P2
+ * P3 Q3 A3 B3
+ * [...]
+ *
+ * Note that the placement of the parities depends on row index.
+ * - stripe_nr is the stripe number not considering the parities
+ * (A1 = 0, B1 = 1, A2 = 2, B2 = 3, ...),
+ * - high is the row number (0 for A1...Q1, 1 for Q2...P2, ...),
+ * - stripen is the column number (or disk number),
+ * - off is the logical address to read (from the beginning of
+ * the chunk space),
What do you mean by chunk?
Is a specific btrfs data structure (see
https://btrfs.wiki.kernel.org/index.php/Manpage/mkfs.btrfs#BLOCK_GROUPS.2C_CHUNKS.2C_RAID).
Most of the BTRFS internal structures (e.g. where the file data is
stored) are in terms of *logical address*. At this level there is no
concept of redundancy or raid nor disks. You can see the logical
address as a flat space, large as the sum of the disks sizes (minus
the space used by the parities or mirroring).
The "logical space" is divided in "slices" called chunk. A chunk
typically has a size in terms of hundred of megabytes (IIRC up to 1GB)
A chunk (or block group) is a mapping. A chunk maps a "logical
address" to a "physical address" on the disks.
u64 profile
u64 logical_start, logical_end
u64 disk1_id, disk1_start, disk1_end
u64 disk2_id, disk2_start, disk2_end
u64 disk3_id, disk3_start, disk3_end
[...]
- find the chunk which maps the logical address (looking to the "logical_start", "logical_end")
is the simplest one: the logical address is mapped to the range
(disk1_start...disk1_end) of the disk "disk_id1"
profile == RAID1
like the previous one; however if the disk1 is not available,
you can find the data in the range
(disk2_start...disk2_end) of the disk "disk_id2". NOTE: the two ranges
must have the same sizes, but may have different starts
profile == RAID0
the data is without redundancy and it is spread on different disk each
off = logical_address - logical_start // this is what I call
// address from the
// beginning of the chunk
and so on....
Post by Daniel Kiper
Post by Goffredo Baroncelli
+ * - chunk_stripe_length is the size of a stripe (typically 64k),
+ * - nstripes is the number of disks,
+ * - low is the offset of the data inside a stripe,
+ * - stripe_offset is the disk offset from the beginning
+ * of the disk chunk mapping start,
Err... I am confused here...
As described above, the chunk data structure translate from "logical
address" to the pair (disk, physical address on the disk). Each chunk
translate a small portion of the space (typically 1GB); so it is more
correct to say that a chunk maps a range in the logical address to a
range in the disks. 'off' is the offset from the beginning of the
range in the logical address. 'stripe_offset' is the offset from the
beginning of the range in the physical space
Post by Daniel Kiper
Post by Goffredo Baroncelli
+ * - csize is the "potential" data to read. It will be reduced to
+ * size if the latter is smaller.
+ */
+ stripe_nr = grub_divmod64 (off, chunk_stripe_length, &low);
Here it seems to me that off is from the beginning of RAID volume space.
So, it does not agree with the comment above...
RAID volume space == chunk logical address space
Post by Daniel Kiper
Post by Goffredo Baroncelli
+ /*
+ * stripen is evaluated without considering
+ * the parities (0 for A1, A2, A3... 1 for B1, B2...).
+ */
+ high = grub_divmod64 (stripe_nr, nstripes - nparities, &stripen);
This looks good.
Post by Goffredo Baroncelli
+ /*
+ * stripen now considers also the parities (0 for A1, 1 for A2,
+ * 2 for A3....). The math is performed modulo number of disks.
+ */
+ grub_divmod64 (high + stripen, nstripes, &stripen);
Ditto.
Post by Goffredo Baroncelli
+ stripe_offset = low + chunk_stripe_length * high;
And here I am confused. Why "* high"? If chunk_stripe_length is stripe
length then stripe_offset leads to nowhere. Am I missing something?
I have a feeling that comments does not agree with the code somehow.
The "physical range" is divided in small range called(?) "stripe".
These stripe are spread on the disks. 'high' is the offset from the
beginning of the physical chunk range in unit of "chunk_stripe_length"
I found quite confuse the btrfs terminology around the "stripe" word.
And I have to point out that these variables were named before my
patch.
I am afraid that my comment doesn't help to "de-obfuscate" my code. I
It does. Please extend the code comment accordingly. You can add a link
to the BTRFS Wiki page. Though please try to use terms only from BRFS
specs/docs. Do not introduce new ones if it is not really needed. This
whole thing will ease reading new and old code.
Post by Goffredo Baroncelli
know that my English is far to be perfect. However this code is not so
Do not worry about that. Many people here learn English. Just keep doing it.
Post by Goffredo Baroncelli
different that the previous one (i.e. handling the btrfs
raid1/dup/raid0....). So I am thinking to remove all the comments
because it seems to me that the my comments increase the confusion
instead of reducing it.
No, please do not do that. We needed it. If you take into account my
comments then everything should be OK.

Anyway, the code starts looking good. I think that we need 1-3
iterations and I will be able to get it into GRUB2 git repo.
So, please keep working on this feature.

Daniel

Goffredo Baroncelli
2018-06-19 17:39:55 UTC
Permalink
The original code which handles the recovery of a RAID 6 disks array
assumes that all reads are multiple of 1 << GRUB_DISK_SECTOR_BITS and it
assumes that all the I/O is done via the struct grub_diskfilter_segment.
This is not true for the btrfs code. In order to reuse the native
grub_raid6_recover() code, it is modified to not call
grub_diskfilter_read_node() directly, but to call an handler passed
as an argument.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/disk/raid6_recover.c | 52 ++++++++++++++++++++++------------
include/grub/diskfilter.h | 9 ++++++
2 files changed, 43 insertions(+), 18 deletions(-)

diff --git a/grub-core/disk/raid6_recover.c b/grub-core/disk/raid6_recover.c
index aa674f6ca..0cf691ddf 100644
--- a/grub-core/disk/raid6_recover.c
+++ b/grub-core/disk/raid6_recover.c
@@ -74,14 +74,26 @@ mod_255 (unsigned x)
}

static grub_err_t
-grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
- char *buf, grub_disk_addr_t sector, grub_size_t size)
+raid6_recover_read_node (void *data, int disknr,
+ grub_uint64_t sector,
+ void *buf, grub_size_t size)
+{
+ struct grub_diskfilter_segment *array = data;
+
+ return grub_diskfilter_read_node (&array->nodes[disknr],
+ (grub_disk_addr_t)sector,
+ size >> GRUB_DISK_SECTOR_BITS, buf);
+}
+
+grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func)
{
int i, q, pos;
int bad1 = -1, bad2 = -1;
char *pbuf = 0, *qbuf = 0;

- size <<= GRUB_DISK_SECTOR_BITS;
pbuf = grub_zalloc (size);
if (!pbuf)
goto quit;
@@ -91,17 +103,17 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
goto quit;

q = p + 1;
- if (q == (int) array->node_count)
+ if (q == (int) nstripes)
q = 0;

pos = q + 1;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;

- for (i = 0; i < (int) array->node_count - 2; i++)
+ for (i = 0; i < (int) nstripes - 2; i++)
{
int c;
- if (array->layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
+ if (layout & GRUB_RAID_LAYOUT_MUL_FROM_POS)
c = pos;
else
c = i;
@@ -109,8 +121,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
bad1 = c;
else
{
- if (! grub_diskfilter_read_node (&array->nodes[pos], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (!read_func(data, pos, sector, buf, size))
{
grub_crypto_xor (pbuf, pbuf, buf, size);
grub_raid_block_mulx (c, buf, size);
@@ -128,7 +139,7 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
}

pos++;
- if (pos == (int) array->node_count)
+ if (pos == (int) nstripes)
pos = 0;
}

@@ -139,16 +150,14 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
if (bad2 < 0)
{
/* One bad device */
- if ((! grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf)))
+ if (!read_func(data, p, sector, buf, size))
{
grub_crypto_xor (buf, buf, pbuf, size);
goto quit;
}

grub_errno = GRUB_ERR_NONE;
- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (buf, buf, qbuf, size);
@@ -160,14 +169,12 @@ grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
/* Two bad devices */
unsigned c;

- if (grub_diskfilter_read_node (&array->nodes[p], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, p, sector, buf, size))
goto quit;

grub_crypto_xor (pbuf, pbuf, buf, size);

- if (grub_diskfilter_read_node (&array->nodes[q], sector,
- size >> GRUB_DISK_SECTOR_BITS, buf))
+ if (read_func(data, q, sector, buf, size))
goto quit;

grub_crypto_xor (qbuf, qbuf, buf, size);
@@ -190,6 +197,15 @@ quit:
return grub_errno;
}

+static grub_err_t
+grub_raid6_recover (struct grub_diskfilter_segment *array, int disknr, int p,
+ char *buf, grub_disk_addr_t sector, grub_size_t size)
+{
+ return grub_raid6_recover_gen (array, array->node_count, disknr, p, buf,
+ sector, size << GRUB_DISK_SECTOR_BITS,
+ array->layout, raid6_recover_read_node);
+}
+
GRUB_MOD_INIT(raid6rec)
{
grub_raid6_init_table ();
diff --git a/include/grub/diskfilter.h b/include/grub/diskfilter.h
index d89273c1b..8deb1a8c3 100644
--- a/include/grub/diskfilter.h
+++ b/include/grub/diskfilter.h
@@ -189,6 +189,15 @@ typedef grub_err_t (*grub_raid6_recover_func_t) (struct grub_diskfilter_segment
extern grub_raid5_recover_func_t grub_raid5_recover_func;
extern grub_raid6_recover_func_t grub_raid6_recover_func;

+typedef grub_err_t (* raid_recover_read_t)(void *data, int disk_nr,
+ grub_uint64_t addr, void *dest,
+ grub_size_t size);
+
+extern grub_err_t
+grub_raid6_recover_gen (void *data, grub_uint64_t nstripes, int disknr, int p,
+ char *buf, grub_uint64_t sector, grub_size_t size,
+ int layout, raid_recover_read_t read_func);
+
grub_err_t grub_diskfilter_vg_register (struct grub_diskfilter_vg *vg);

grub_err_t
--
2.18.0.rc2
Daniel Kiper
2018-07-12 14:41:37 UTC
Permalink
Post by Goffredo Baroncelli
The original code which handles the recovery of a RAID 6 disks array
assumes that all reads are multiple of 1 << GRUB_DISK_SECTOR_BITS and it
assumes that all the I/O is done via the struct grub_diskfilter_segment.
This is not true for the btrfs code. In order to reuse the native
grub_raid6_recover() code, it is modified to not call
grub_diskfilter_read_node() directly, but to call an handler passed
as an argument.
Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Goffredo Baroncelli
2018-06-19 17:39:56 UTC
Permalink
Add the RAID 6 recovery, in order to use a RAID 6 filesystem even if some
disks (up to two) are missing. This code use the md RAID 6 code already
present in grub.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 51 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 7f01e447a..13777c868 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -30,6 +30,7 @@
#include <grub/i18n.h>
#include <grub/btrfs.h>
#include <grub/crypto.h>
+#include <grub/diskfilter.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -705,11 +706,36 @@ rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
}
}

+static grub_err_t
+raid6_recover_read_buffer (void *data, int disk_nr,
+ grub_uint64_t addr __attribute__ ((unused)),
+ void *dest, grub_size_t size)
+{
+ struct raid56_buffer *buffers = data;
+
+ if (!buffers[disk_nr].data_is_valid)
+ return grub_errno = GRUB_ERR_READ_ERROR;
+
+ grub_memcpy(dest, buffers[disk_nr].buf, size);
+
+ return grub_errno = GRUB_ERR_NONE;
+}
+
+static void
+rebuild_raid6 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+ grub_uint64_t csize, grub_uint64_t parities_pos, void *dest,
+ grub_uint64_t stripen)
+
+{
+ grub_raid6_recover_gen (buffers, nstripes, stripen, parities_pos,
+ dest, 0, csize, 0, raid6_recover_read_buffer);
+}
+
static grub_err_t
raid56_read_retry (struct grub_btrfs_data *data,
struct grub_btrfs_chunk_item *chunk,
- grub_uint64_t stripe_offset,
- grub_uint64_t csize, void *buf)
+ grub_uint64_t stripe_offset, grub_uint64_t stripen,
+ grub_uint64_t csize, void *buf, grub_uint64_t parities_pos)
{
struct raid56_buffer *buffers = NULL;
grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
@@ -788,6 +814,15 @@ raid56_read_retry (struct grub_btrfs_data *data,
ret = GRUB_ERR_READ_ERROR;
goto cleanup;
}
+ else if (failed_devices > 2 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID6))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for raid6: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
else
{
grub_dprintf ("btrfs",
@@ -800,7 +835,7 @@ raid56_read_retry (struct grub_btrfs_data *data,
if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
rebuild_raid5 (buf, buffers, nstripes, csize);
else
- grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+ rebuild_raid6 (buffers, nstripes, csize, parities_pos, buf, stripen);

cleanup:
if (buffers)
@@ -892,9 +927,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
unsigned redundancy = 1;
unsigned i, j;
int is_raid56;
+ grub_uint64_t parities_pos = 0;

- is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
- GRUB_BTRFS_CHUNK_TYPE_RAID5);
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ (GRUB_BTRFS_CHUNK_TYPE_RAID5 |
+ GRUB_BTRFS_CHUNK_TYPE_RAID6));

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -1039,6 +1076,8 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
* 2 for A3....). The math is performed modulo number of disks.
*/
grub_divmod64 (high + stripen, nstripes, &stripen);
+ grub_divmod64 (high + nstripes - nparities, nstripes,
+ &parities_pos);

stripe_offset = low + chunk_stripe_length * high;
csize = chunk_stripe_length - low;
@@ -1093,7 +1132,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_errno = GRUB_ERR_NONE;
if (err != GRUB_ERR_NONE)
err = raid56_read_retry (data, chunk, stripe_offset,
- csize, buf);
+ stripen, csize, buf, parities_pos);
}
if (err == GRUB_ERR_NONE)
break;
--
2.18.0.rc2
Daniel Kiper
2018-07-12 14:47:57 UTC
Permalink
Post by Goffredo Baroncelli
Add the RAID 6 recovery, in order to use a RAID 6 filesystem even if some
disks (up to two) are missing. This code use the md RAID 6 code already
present in grub.
---
grub-core/fs/btrfs.c | 51 ++++++++++++++++++++++++++++++++++++++------
1 file changed, 45 insertions(+), 6 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 7f01e447a..13777c868 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -30,6 +30,7 @@
#include <grub/i18n.h>
#include <grub/btrfs.h>
#include <grub/crypto.h>
+#include <grub/diskfilter.h>
GRUB_MOD_LICENSE ("GPLv3+");
@@ -705,11 +706,36 @@ rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
}
}
+static grub_err_t
+raid6_recover_read_buffer (void *data, int disk_nr,
+ grub_uint64_t addr __attribute__ ((unused)),
+ void *dest, grub_size_t size)
+{
+ struct raid56_buffer *buffers = data;
+
+ if (!buffers[disk_nr].data_is_valid)
+ return grub_errno = GRUB_ERR_READ_ERROR;
+
+ grub_memcpy(dest, buffers[disk_nr].buf, size);
+
+ return grub_errno = GRUB_ERR_NONE;
+}
+
+static void
+rebuild_raid6 (struct raid56_buffer *buffers, grub_uint64_t nstripes,
+ grub_uint64_t csize, grub_uint64_t parities_pos, void *dest,
+ grub_uint64_t stripen)
+
+{
+ grub_raid6_recover_gen (buffers, nstripes, stripen, parities_pos,
+ dest, 0, csize, 0, raid6_recover_read_buffer);
+}
+
static grub_err_t
raid56_read_retry (struct grub_btrfs_data *data,
struct grub_btrfs_chunk_item *chunk,
- grub_uint64_t stripe_offset,
- grub_uint64_t csize, void *buf)
+ grub_uint64_t stripe_offset, grub_uint64_t stripen,
+ grub_uint64_t csize, void *buf, grub_uint64_t parities_pos)
{
struct raid56_buffer *buffers = NULL;
grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
@@ -788,6 +814,15 @@ raid56_read_retry (struct grub_btrfs_data *data,
ret = GRUB_ERR_READ_ERROR;
goto cleanup;
}
+ else if (failed_devices > 2 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID6))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for raid6: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
else
{
grub_dprintf ("btrfs",
@@ -800,7 +835,7 @@ raid56_read_retry (struct grub_btrfs_data *data,
if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
rebuild_raid5 (buf, buffers, nstripes, csize);
else
- grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+ rebuild_raid6 (buffers, nstripes, csize, parities_pos, buf, stripen);
if (buffers)
@@ -892,9 +927,11 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
unsigned redundancy = 1;
unsigned i, j;
int is_raid56;
+ grub_uint64_t parities_pos = 0;
- is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
- GRUB_BTRFS_CHUNK_TYPE_RAID5);
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ (GRUB_BTRFS_CHUNK_TYPE_RAID5 |
+ GRUB_BTRFS_CHUNK_TYPE_RAID6));
if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -1039,6 +1076,8 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
* 2 for A3....). The math is performed modulo number of disks.
*/
grub_divmod64 (high + stripen, nstripes, &stripen);
+ grub_divmod64 (high + nstripes - nparities, nstripes,
+ &parities_pos);
This change suggest that this line does not belong to patch #1.
And you have to add parities_pos to the comment above too.

Daniel
Goffredo Baroncelli
2018-06-19 17:39:50 UTC
Permalink
The caller knows better if this error is fatal or not, i.e. another disk is
available or not.

This is a preparatory patch.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index c1eb6c521..8d07e2d72 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -604,9 +604,6 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
grub_device_iterate (find_device_iter, &ctx);
if (!ctx.dev_found)
{
- grub_error (GRUB_ERR_BAD_FS,
- N_("couldn't find a necessary member device "
- "of multi-device filesystem"));
return NULL;
}
data->n_devices_attached++;
@@ -902,6 +899,9 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
dev = find_device (data, stripe->device_id, j);
if (!dev)
{
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
err = grub_errno;
grub_errno = GRUB_ERR_NONE;
continue;
--
2.18.0.rc2
Daniel Kiper
2018-07-12 13:51:59 UTC
Permalink
Post by Goffredo Baroncelli
The caller knows better if this error is fatal or not, i.e. another disk is
available or not.
This is a preparatory patch.
Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Goffredo Baroncelli
2018-06-19 17:39:53 UTC
Permalink
Move the code in charge to read the data from disk into a separate
function. This helps to separate the error handling logic (which depend on
the different raid profiles) from the read from disk logic.
Refactoring this code increases the general readability too.

This is a preparatory patch, to help the adding of the RAID 5/6 recovery
code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 75 ++++++++++++++++++++++++++------------------
1 file changed, 44 insertions(+), 31 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 15ed59151..fa5bf56e0 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -625,6 +625,46 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id)
return ctx.dev_found;
}

+static grub_err_t
+btrfs_read_from_chunk (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripen, grub_uint64_t stripe_offset,
+ int redundancy, grub_uint64_t csize,
+ void *buf)
+{
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err;
+
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+ /* Right now the redundancy handling is easy.
+ With RAID5-like it will be more difficult. */
+ stripe += stripen + redundancy;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+
+ grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
+ " maps to 0x%" PRIxGRUB_UINT64_T
+ ". Reading paddr 0x%" PRIxGRUB_UINT64_T "\n",
+ stripen, stripe->offset, paddr);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ grub_dprintf ("btrfs",
+ "couldn't find a necessary member device "
+ "of multi-device filesystem\n");
+ grub_errno = GRUB_ERR_NONE;
+ return GRUB_ERR_READ_ERROR;
+ }
+
+ err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buf);
+ return err;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -638,7 +678,6 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_err_t err = 0;
struct grub_btrfs_key key_out;
int challoc = 0;
- grub_device_t dev;
struct grub_btrfs_key key_in;
grub_size_t chsize;
grub_disk_addr_t chaddr;
@@ -881,36 +920,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,

for (i = 0; i < redundancy; i++)
{
- struct grub_btrfs_chunk_stripe *stripe;
- grub_disk_addr_t paddr;
-
- stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
- /* Right now the redundancy handling is easy.
- With RAID5-like it will be more difficult. */
- stripe += stripen + i;
-
- paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
-
- grub_dprintf ("btrfs", "stripe %" PRIxGRUB_UINT64_T
- " maps to 0x%" PRIxGRUB_UINT64_T "\n",
- stripen, stripe->offset);
- grub_dprintf ("btrfs", "reading paddr 0x%" PRIxGRUB_UINT64_T
- "\n", paddr);
-
- dev = find_device (data, stripe->device_id);
- if (!dev)
- {
- grub_dprintf ("btrfs",
- "couldn't find a necessary member device "
- "of multi-device filesystem\n");
- err = grub_errno;
- grub_errno = GRUB_ERR_NONE;
- continue;
- }
-
- err = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
- paddr & (GRUB_DISK_SECTOR_SIZE - 1),
- csize, buf);
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
if (!err)
break;
grub_errno = GRUB_ERR_NONE;
--
2.18.0.rc2
Daniel Kiper
2018-07-12 14:10:55 UTC
Permalink
Post by Goffredo Baroncelli
Move the code in charge to read the data from disk into a separate
function. This helps to separate the error handling logic (which depend on
s/depend/depends/
Post by Goffredo Baroncelli
the different raid profiles) from the read from disk logic.
Refactoring this code increases the general readability too.
This is a preparatory patch, to help the adding of the RAID 5/6 recovery
code.
Otherwise, Reviewed-by: Daniel Kiper <***@oracle.com>

Daniel
Goffredo Baroncelli
2018-06-19 17:39:54 UTC
Permalink
Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 177 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 172 insertions(+), 5 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index fa5bf56e0..7f01e447a 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>

GRUB_MOD_LICENSE ("GPLv3+");

@@ -665,6 +666,154 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}

+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ i = 0;
+ while (buffers[i].data_is_valid && i < nstripes)
+ ++i;
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n",
+ i);
+ first = 1;
+ for (i = 0; i < nstripes; i++)
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first)
+ grub_memcpy(dest, buffers[i].buf, csize);
+ else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+
+ first = 0;
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers = NULL;
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_NONE;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ {
+ ret = GRUB_ERR_OUT_OF_MEMORY;
+ goto cleanup;
+ }
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ {
+ ret = GRUB_ERR_OUT_OF_MEMORY;
+ goto cleanup;
+ }
+ }
+
+ for (i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err2;
+
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+ stripe += i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ continue;
+ }
+
+ err2 = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err2 == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+ stripe->device_id);
+ }
+ }
+
+ for (failed_devices = i = 0; i < nstripes; i++)
+ if (!buffers[i].data_is_valid)
+ ++failed_devices;
+ if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 5: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
+ else
+ {
+ grub_dprintf ("btrfs",
+ "enough disks for RAID 5 rebuilding: total %"
+ PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ }
+
+ /* if these are enough, try to rebuild the data */
+ if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ rebuild_raid5 (buf, buffers, nstripes, csize);
+ else
+ grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+
+ cleanup:
+ if (buffers)
+ {
+ for (i = 0; i < nstripes; i++)
+ if (buffers[i].buf)
+ grub_free(buffers[i].buf);
+ grub_free(buffers);
+ }
+
+ return ret;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -742,6 +891,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_uint16_t nstripes;
unsigned redundancy = 1;
unsigned i, j;
+ int is_raid56;
+
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ GRUB_BTRFS_CHUNK_TYPE_RAID5);

if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -918,17 +1071,31 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);

- for (i = 0; i < redundancy; i++)
+ if (!is_raid56)
+ {
+ for (i = 0; i < redundancy; i++)
+ {
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
+ if (!err)
+ break;
+ grub_errno = GRUB_ERR_NONE;
+ }
+ }
+ else
{
err = btrfs_read_from_chunk (data, chunk, stripen,
stripe_offset,
- i, /* redundancy */
+ 0, /* no mirror */
csize, buf);
- if (!err)
- break;
grub_errno = GRUB_ERR_NONE;
+ if (err != GRUB_ERR_NONE)
+ err = raid56_read_retry (data, chunk, stripe_offset,
+ csize, buf);
}
- if (i != redundancy)
+ if (err == GRUB_ERR_NONE)
break;
}
if (err)
--
2.18.0.rc2
Daniel Kiper
2018-07-12 14:35:21 UTC
Permalink
Post by Goffredo Baroncelli
Add support for recovery for a RAID 5 btrfs profile. In addition
it is added some code as preparatory work for RAID 6 recovery code.
---
grub-core/fs/btrfs.c | 177 +++++++++++++++++++++++++++++++++++++++++--
1 file changed, 172 insertions(+), 5 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index fa5bf56e0..7f01e447a 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -29,6 +29,7 @@
#include <minilzo.h>
#include <grub/i18n.h>
#include <grub/btrfs.h>
+#include <grub/crypto.h>
GRUB_MOD_LICENSE ("GPLv3+");
@@ -665,6 +666,154 @@ btrfs_read_from_chunk (struct grub_btrfs_data *data,
return err;
}
+struct raid56_buffer {
+ void *buf;
+ int data_is_valid;
+};
+
+static void
+rebuild_raid5 (char *dest, struct raid56_buffer *buffers,
+ grub_uint64_t nstripes, grub_uint64_t csize)
+{
+ grub_uint64_t i;
+ int first;
+
+ i = 0;
+ while (buffers[i].data_is_valid && i < nstripes)
+ ++i;
+
+ if (i == nstripes)
+ {
+ grub_dprintf ("btrfs", "called rebuild_raid5(), but all disks are OK\n");
+ return;
+ }
+
+ grub_dprintf ("btrfs", "rebuilding RAID 5 stripe #%" PRIuGRUB_UINT64_T "\n",
+ i);
+ first = 1;
+ for (i = 0; i < nstripes; i++)
for (i = 0, first = 1; i < nstripes; i++)
Post by Goffredo Baroncelli
+ {
+ if (!buffers[i].data_is_valid)
+ continue;
+
+ if (first)
+ grub_memcpy(dest, buffers[i].buf, csize);
+ else
+ grub_crypto_xor (dest, dest, buffers[i].buf, csize);
+
+ first = 0;
Oh, this seems wrong to me. I think that it should be done in that way.


if (first) {
grub_memcpy(dest, buffers[i].buf, csize);
first = 0;
} else
grub_crypto_xor (dest, dest, buffers[i].buf, csize);
Post by Goffredo Baroncelli
+ }
+}
+
+static grub_err_t
+raid56_read_retry (struct grub_btrfs_data *data,
+ struct grub_btrfs_chunk_item *chunk,
+ grub_uint64_t stripe_offset,
+ grub_uint64_t csize, void *buf)
+{
+ struct raid56_buffer *buffers = NULL;
I do not think that you need NULL assignment here.
Post by Goffredo Baroncelli
+ grub_uint64_t nstripes = grub_le_to_cpu16 (chunk->nstripes);
+ grub_uint64_t chunk_type = grub_le_to_cpu64 (chunk->type);
+ grub_err_t ret = GRUB_ERR_NONE;
+ grub_uint64_t i, failed_devices;
+
+ buffers = grub_zalloc (sizeof(*buffers) * nstripes);
+ if (!buffers)
+ {
+ ret = GRUB_ERR_OUT_OF_MEMORY;
+ goto cleanup;
+ }
+
+ for (i = 0; i < nstripes; i++)
+ {
+ buffers[i].buf = grub_zalloc (csize);
+ if (!buffers[i].buf)
+ {
+ ret = GRUB_ERR_OUT_OF_MEMORY;
+ goto cleanup;
+ }
+ }
+
+ for (i = 0; i < nstripes; i++)
+ {
+ struct grub_btrfs_chunk_stripe *stripe;
+ grub_disk_addr_t paddr;
+ grub_device_t dev;
+ grub_err_t err2;
+
+ stripe = (struct grub_btrfs_chunk_stripe *) (chunk + 1);
+ stripe += i;
+
+ paddr = grub_le_to_cpu64 (stripe->offset) + stripe_offset;
+ grub_dprintf ("btrfs", "reading paddr %" PRIxGRUB_UINT64_T
+ " from stripe ID %" PRIxGRUB_UINT64_T "\n", paddr,
+ stripe->device_id);
+
+ dev = find_device (data, stripe->device_id);
+ if (!dev)
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " FAILED (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ continue;
+ }
+
+ err2 = grub_disk_read (dev->disk, paddr >> GRUB_DISK_SECTOR_BITS,
+ paddr & (GRUB_DISK_SECTOR_SIZE - 1),
+ csize, buffers[i].buf);
+ if (err2 == GRUB_ERR_NONE)
+ {
+ buffers[i].data_is_valid = 1;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T " Ok (dev ID %"
+ PRIxGRUB_UINT64_T ")\n", i, stripe->device_id);
+ }
+ else
+ {
+ buffers[i].data_is_valid = 0;
+ grub_dprintf ("btrfs", "stripe %" PRIuGRUB_UINT64_T
+ " FAILED (dev ID %" PRIxGRUB_UINT64_T ")\n", i,
+ stripe->device_id);
+ }
+ }
+
+ for (failed_devices = i = 0; i < nstripes; i++)
+ if (!buffers[i].data_is_valid)
+ ++failed_devices;
I think that you can count failed_devices in earlier loop.
Post by Goffredo Baroncelli
+ if (failed_devices > 1 && (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5))
+ {
+ grub_dprintf ("btrfs",
+ "not enough disks for RAID 5: total %" PRIuGRUB_UINT64_T
+ ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ ret = GRUB_ERR_READ_ERROR;
+ goto cleanup;
+ }
+ else
+ {
+ grub_dprintf ("btrfs",
+ "enough disks for RAID 5 rebuilding: total %"
+ PRIuGRUB_UINT64_T ", missing %" PRIuGRUB_UINT64_T "\n",
+ nstripes, failed_devices);
+ }
You do not need curly brackets after else.
Post by Goffredo Baroncelli
+ /* if these are enough, try to rebuild the data */
+ if (chunk_type & GRUB_BTRFS_CHUNK_TYPE_RAID5)
+ rebuild_raid5 (buf, buffers, nstripes, csize);
+ else
+ grub_dprintf ("btrfs", "called rebuild_raid6(), NOT IMPLEMENTED\n");
+
+ if (buffers)
+ {
+ for (i = 0; i < nstripes; i++)
+ if (buffers[i].buf)
+ grub_free(buffers[i].buf);
You can drop this if. grub_free() works with NULL without any issue.
Post by Goffredo Baroncelli
+ grub_free(buffers);
+ }
I think that this would work:

if (buffers)
for (i = 0; i < nstripes; i++)
grub_free(buffers[i].buf);

grub_free(buffers);
Post by Goffredo Baroncelli
+ return ret;
+}
+
static grub_err_t
grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
void *buf, grub_size_t size, int recursion_depth)
@@ -742,6 +891,10 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_uint16_t nstripes;
unsigned redundancy = 1;
unsigned i, j;
+ int is_raid56;
+
+ is_raid56 = !!(grub_le_to_cpu64 (chunk->type) &
+ GRUB_BTRFS_CHUNK_TYPE_RAID5);
if (grub_le_to_cpu64 (chunk->size) <= off)
{
@@ -918,17 +1071,31 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
grub_dprintf ("btrfs", "reading laddr 0x%" PRIxGRUB_UINT64_T "\n",
addr);
- for (i = 0; i < redundancy; i++)
+ if (!is_raid56)
+ {
+ for (i = 0; i < redundancy; i++)
+ {
+ err = btrfs_read_from_chunk (data, chunk, stripen,
+ stripe_offset,
+ i, /* redundancy */
+ csize, buf);
+ if (!err)
+ break;
+ grub_errno = GRUB_ERR_NONE;
+ }
+ }
You can drop these curly brackets.

Daniel
Goffredo Baroncelli
2018-06-19 18:22:19 UTC
Permalink
Forward because this patch still doesn't reach the mailing list
--------------------------------------

If a device is not found, record this failure by storing NULL in
data->devices_attached[]. This way we avoid unnecessary devices rescan,
and speedup the reads in case of a degraded array.

Signed-off-by: Goffredo Baroncelli <***@inwind.it>
---
grub-core/fs/btrfs.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 8d07e2d72..70bcb0fdc 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -588,7 +588,7 @@ find_device_iter (const char *name, void *data)
}

static grub_device_t
-find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
+find_device (struct grub_btrfs_data *data, grub_uint64_t id)
{
struct find_device_ctx ctx = {
.data = data,
@@ -600,12 +600,9 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
for (i = 0; i < data->n_devices_attached; i++)
if (id == data->devices_attached[i].id)
return data->devices_attached[i].dev;
- if (do_rescan)
- grub_device_iterate (find_device_iter, &ctx);
- if (!ctx.dev_found)
- {
- return NULL;
- }
+
+ grub_device_iterate (find_device_iter, &ctx);
+
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -617,7 +614,8 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
* sizeof (data->devices_attached[0]));
if (!data->devices_attached)
{
- grub_device_close (ctx.dev_found);
+ if (ctx.dev_found)
+ grub_device_close (ctx.dev_found);
data->devices_attached = tmp;
return NULL;
}
@@ -896,7 +894,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
" for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
addr);

- dev = find_device (data, stripe->device_id, j);
+ dev = find_device (data, stripe->device_id);
if (!dev)
{
grub_dprintf ("btrfs",
@@ -973,7 +971,8 @@ grub_btrfs_unmount (struct grub_btrfs_data *data)
unsigned i;
/* The device 0 is closed one layer upper. */
for (i = 1; i < data->n_devices_attached; i++)
- grub_device_close (data->devices_attached[i].dev);
+ if (data->devices_attached[i].dev)
+ grub_device_close (data->devices_attached[i].dev);
grub_free (data->devices_attached);
grub_free (data->extent);
grub_free (data);
--
2.18.0.rc2
Daniel Kiper
2018-07-12 14:02:18 UTC
Permalink
Post by Goffredo Baroncelli
Forward because this patch still doesn't reach the mailing list
Could you fix that somehow? It is confusing.
Post by Goffredo Baroncelli
--------------------------------------
If a device is not found, record this failure by storing NULL in
data->devices_attached[]. This way we avoid unnecessary devices rescan,
and speedup the reads in case of a degraded array.
---
grub-core/fs/btrfs.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 8d07e2d72..70bcb0fdc 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -588,7 +588,7 @@ find_device_iter (const char *name, void *data)
}
static grub_device_t
-find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
+find_device (struct grub_btrfs_data *data, grub_uint64_t id)
{
struct find_device_ctx ctx = {
.data = data,
@@ -600,12 +600,9 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
for (i = 0; i < data->n_devices_attached; i++)
if (id == data->devices_attached[i].id)
return data->devices_attached[i].dev;
- if (do_rescan)
- grub_device_iterate (find_device_iter, &ctx);
- if (!ctx.dev_found)
- {
- return NULL;
- }
+
+ grub_device_iterate (find_device_iter, &ctx);
+
data->n_devices_attached++;
if (data->n_devices_attached > data->n_devices_allocated)
{
@@ -617,7 +614,8 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
* sizeof (data->devices_attached[0]));
if (!data->devices_attached)
{
- grub_device_close (ctx.dev_found);
+ if (ctx.dev_found)
+ grub_device_close (ctx.dev_found);
data->devices_attached = tmp;
return NULL;
}
@@ -896,7 +894,7 @@ grub_btrfs_read_logical (struct grub_btrfs_data *data, grub_disk_addr_t addr,
" for laddr 0x%" PRIxGRUB_UINT64_T "\n", paddr,
addr);
- dev = find_device (data, stripe->device_id, j);
+ dev = find_device (data, stripe->device_id);
if (!dev)
{
grub_dprintf ("btrfs",
@@ -973,7 +971,8 @@ grub_btrfs_unmount (struct grub_btrfs_data *data)
unsigned i;
/* The device 0 is closed one layer upper. */
for (i = 1; i < data->n_devices_attached; i++)
- grub_device_close (data->devices_attached[i].dev);
+ if (data->devices_attached[i].dev)
+ grub_device_close (data->devices_attached[i].dev);
grub_free (data->devices_attached);
grub_free (data->extent);
grub_free (data);
The commit message or code is wrong. NULL is never stored into
data->devices_attached[]. Am I missing something?

Daniel
Goffredo Baroncelli
2018-07-18 18:59:17 UTC
Permalink
Resend because I forgot to put grub-devel in cc
-------------------------------------------------------------
Post by Daniel Kiper
Post by Goffredo Baroncelli
Forward because this patch still doesn't reach the mailing list
Could you fix that somehow? It is confusing.
I don't know which could be the problem. The only idea which I have, is that
the patch #4 is the only one which was never changed; gmail prevent two equal
emails to reach the inbox two times. I suspect that these two thing are related
Post by Daniel Kiper
Post by Goffredo Baroncelli
--------------------------------------
If a device is not found, record this failure by storing NULL in
data->devices_attached[]. This way we avoid unnecessary devices rescan,
and speedup the reads in case of a degraded array.
---
grub-core/fs/btrfs.c | 19 +++++++++----------
1 file changed, 9 insertions(+), 10 deletions(-)
diff --git a/grub-core/fs/btrfs.c b/grub-core/fs/btrfs.c
index 8d07e2d72..70bcb0fdc 100644
--- a/grub-core/fs/btrfs.c
+++ b/grub-core/fs/btrfs.c
@@ -588,7 +588,7 @@ find_device_iter (const char *name, void *data)
}
static grub_device_t
-find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
+find_device (struct grub_btrfs_data *data, grub_uint64_t id)
{
struct find_device_ctx ctx = {
.data = data,
@@ -600,12 +600,9 @@ find_device (struct grub_btrfs_data *data, grub_uint64_t id, int do_rescan)
for (i = 0; i < data->n_devices_attached; i++)
if (id == data->devices_attached[i].id)
return data->devices_attached[i].dev;
- if (do_rescan)
- grub_device_iterate (find_device_iter, &ctx);
- if (!ctx.dev_found)
- {
- return NULL;
- }
+
+ grub_device_iterate (find_device_iter, &ctx);
+
[...]
Post by Daniel Kiper
The commit message or code is wrong. NULL is never stored into
data->devices_attached[]. Am I missing something?
The original code searches a device (using 'id' as key) in the array
data->devices_attached[]; if the device is found, the device info are
returned.

Otherwise find_device() searches the device using grub_device_iterate().

If ctx.dev_found is NULL, the device is not found and the function returns NULL.
Otherwise find_device() stores the pair "ctx.dev_found" and "id"
in data->devices_attached[] array for further searches.
Finally find_device() returns the value found.

My patch removes the check: if the device is not found, find_devices() stores the pair "NULL" and "id". So data->devices_attached[] array acts as cache both for the "founded" and "not founded" devices.
Post by Daniel Kiper
Daniel
_______________________________________________
Grub-devel mailing list
https://lists.gnu.org/mailman/listinfo/grub-devel
--
gpg @keyserver.linux.it: Goffredo Baroncelli <kreijackATinwind.it>
Key fingerprint BBF5 1610 0B64 DAC6 5F7D 17B2 0EDA 9B37 8B82 E0B5
Daniel Kiper
2018-07-12 13:50:36 UTC
Permalink
Post by Goffredo Baroncelli
This helper s used in a few places to help the debugging. As
s/s/is/
Post by Goffredo Baroncelli
conservative approach the error is only logged.
This to not impact the error handling.
s/to/does/
Otherwise, Reviewed-by: Daniel Kiper <***@oracle.com>
You can add it to the next patch release if you do not change
anything except above listed things.

Daniel
Loading...