summaryrefslogtreecommitdiff
path: root/kernel-lib/raid56.c
blob: e3a9339e6412c9280b222ee9b06b07584e4a87be (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
/* -*- linux-c -*- ------------------------------------------------------- *
 *
 *   Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
 *   Boston MA 02111-1307, USA; either version 2 of the License, or
 *   (at your option) any later version; incorporated herein by reference.
 *
 * ----------------------------------------------------------------------- */

/*
 * Added helpers for unaligned native int access
 */

/*
 * raid6int1.c
 *
 * 1-way unrolled portable integer math RAID-6 instruction set
 *
 * This file was postprocessed using unroll.pl and then ported to userspace
 */
#include <stdint.h>
#include <unistd.h>
#include "kerncompat.h"
#include "ctree.h"
#include "disk-io.h"
#include "volumes.h"
#include "utils.h"
#include "kernel-lib/raid56.h"

/*
 * This is the C data type to use
 */

/* Change this from BITS_PER_LONG if there is something better... */
#if BITS_PER_LONG == 64
# define NBYTES(x) ((x) * 0x0101010101010101UL)
# define NSIZE  8
# define NSHIFT 3
typedef uint64_t unative_t;
#define put_unaligned_native(val,p)	put_unaligned_64((val),(p))
#define get_unaligned_native(p)		get_unaligned_64((p))
#else
# define NBYTES(x) ((x) * 0x01010101U)
# define NSIZE  4
# define NSHIFT 2
typedef uint32_t unative_t;
#define put_unaligned_native(val,p)	put_unaligned_32((val),(p))
#define get_unaligned_native(p)		get_unaligned_32((p))
#endif

/*
 * These sub-operations are separate inlines since they can sometimes be
 * specially optimized using architecture-specific hacks.
 */

/*
 * The SHLBYTE() operation shifts each byte left by 1, *not*
 * rolling over into the next byte
 */
static inline __attribute_const__ unative_t SHLBYTE(unative_t v)
{
	unative_t vv;

	vv = (v << 1) & NBYTES(0xfe);
	return vv;
}

/*
 * The MASK() operation returns 0xFF in any byte for which the high
 * bit is 1, 0x00 for any byte for which the high bit is 0.
 */
static inline __attribute_const__ unative_t MASK(unative_t v)
{
	unative_t vv;

	vv = v & NBYTES(0x80);
	vv = (vv << 1) - (vv >> 7); /* Overflow on the top bit is OK */
	return vv;
}


void raid6_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
	uint8_t **dptr = (uint8_t **)ptrs;
	uint8_t *p, *q;
	int d, z, z0;

	unative_t wd0, wq0, wp0, w10, w20;

	z0 = disks - 3;		/* Highest data disk */
	p = dptr[z0+1];		/* XOR parity */
	q = dptr[z0+2];		/* RS syndrome */

	for ( d = 0 ; d < bytes ; d += NSIZE*1 ) {
		wq0 = wp0 = get_unaligned_native(&dptr[z0][d+0*NSIZE]);
		for ( z = z0-1 ; z >= 0 ; z-- ) {
			wd0 = get_unaligned_native(&dptr[z][d+0*NSIZE]);
			wp0 ^= wd0;
			w20 = MASK(wq0);
			w10 = SHLBYTE(wq0);
			w20 &= NBYTES(0x1d);
			w10 ^= w20;
			wq0 = w10 ^ wd0;
		}
		put_unaligned_native(wp0, &p[d+NSIZE*0]);
		put_unaligned_native(wq0, &q[d+NSIZE*0]);
	}
}

static void xor_range(char *dst, const char*src, size_t size)
{
	/* Move to DWORD aligned */
	while (size && ((unsigned long)dst & sizeof(unsigned long))) {
		*dst++ ^= *src++;
		size--;
	}

	/* DWORD aligned part */
	while (size >= sizeof(unsigned long)) {
		*(unsigned long *)dst ^= *(unsigned long *)src;
		src += sizeof(unsigned long);
		dst += sizeof(unsigned long);
		size -= sizeof(unsigned long);
	}
	/* Remaining */
	while (size) {
		*dst++ ^= *src++;
		size--;
	}
}

/*
 * Generate desired data/parity stripe for RAID5
 *
 * @nr_devs:	Total number of devices, including parity
 * @stripe_len:	Stripe length
 * @data:	Data, with special layout:
 * 		data[0]:	 Data stripe 0
 * 		data[nr_devs-2]: Last data stripe
 * 		data[nr_devs-1]: RAID5 parity
 * @dest:	To generate which data. should follow above data layout
 */
int raid5_gen_result(int nr_devs, size_t stripe_len, int dest, void **data)
{
	int i;
	char *buf = data[dest];

	/* Validation check */
	if (stripe_len <= 0 || stripe_len != BTRFS_STRIPE_LEN) {
		error("invalid parameter for %s", __func__);
		return -EINVAL;
	}

	if (dest >= nr_devs || nr_devs < 2) {
		error("invalid parameter for %s", __func__);
		return -EINVAL;
	}
	/* Shortcut for 2 devs RAID5, which is just RAID1 */
	if (nr_devs == 2) {
		memcpy(data[dest], data[1 - dest], stripe_len);
		return 0;
	}
	memset(buf, 0, stripe_len);
	for (i = 0; i < nr_devs; i++) {
		if (i == dest)
			continue;
		xor_range(buf, data[i], stripe_len);
	}
	return 0;
}

/*
 * Raid 6 recovery code copied from kernel lib/raid6/recov.c.
 * With modifications:
 * - rename from raid6_2data_recov_intx1
 * - kfree/free modification for btrfs-progs
 */
int raid6_recov_data2(int nr_devs, size_t stripe_len, int dest1, int dest2,
		      void **data)
{
	u8 *p, *q, *dp, *dq;
	u8 px, qx, db;
	const u8 *pbmul;	/* P multiplier table for B data */
	const u8 *qmul;		/* Q multiplier table (for both) */
	char *zero_mem1, *zero_mem2;
	int ret = 0;

	/* Early check */
	if (dest1 < 0 || dest1 >= nr_devs - 2 ||
	    dest2 < 0 || dest2 >= nr_devs - 2 || dest1 >= dest2)
		return -EINVAL;

	zero_mem1 = calloc(1, stripe_len);
	zero_mem2 = calloc(1, stripe_len);
	if (!zero_mem1 || !zero_mem2) {
		free(zero_mem1);
		free(zero_mem2);
		return -ENOMEM;
	}

	p = (u8 *)data[nr_devs - 2];
	q = (u8 *)data[nr_devs - 1];

	/* Compute syndrome with zero for the missing data pages
	   Use the dead data pages as temporary storage for
	   delta p and delta q */
	dp = (u8 *)data[dest1];
	data[dest1] = (void *)zero_mem1;
	data[nr_devs - 2] = dp;
	dq = (u8 *)data[dest2];
	data[dest2] = (void *)zero_mem2;
	data[nr_devs - 1] = dq;

	raid6_gen_syndrome(nr_devs, stripe_len, data);

	/* Restore pointer table */
	data[dest1]   = dp;
	data[dest2]   = dq;
	data[nr_devs - 2] = p;
	data[nr_devs - 1] = q;

	/* Now, pick the proper data tables */
	pbmul = raid6_gfmul[raid6_gfexi[dest2 - dest1]];
	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]^raid6_gfexp[dest2]]];

	/* Now do it... */
	while ( stripe_len-- ) {
		px    = *p ^ *dp;
		qx    = qmul[*q ^ *dq];
		*dq++ = db = pbmul[px] ^ qx; /* Reconstructed B */
		*dp++ = db ^ px; /* Reconstructed A */
		p++; q++;
	}

	free(zero_mem1);
	free(zero_mem2);
	return ret;
}

/*
 * Raid 6 recover code copied from kernel lib/raid6/recov.c
 * - rename from raid6_datap_recov_intx1()
 * - parameter changed from faila to dest1
 */
int raid6_recov_datap(int nr_devs, size_t stripe_len, int dest1, void **data)
{
	u8 *p, *q, *dq;
	const u8 *qmul;		/* Q multiplier table */
	char *zero_mem;

	p = (u8 *)data[nr_devs - 2];
	q = (u8 *)data[nr_devs - 1];

	zero_mem = calloc(1, stripe_len);
	if (!zero_mem)
		return -ENOMEM;

	/* Compute syndrome with zero for the missing data page
	   Use the dead data page as temporary storage for delta q */
	dq = (u8 *)data[dest1];
	data[dest1] = (void *)zero_mem;
	data[nr_devs - 1] = dq;

	raid6_gen_syndrome(nr_devs, stripe_len, data);

	/* Restore pointer table */
	data[dest1]   = dq;
	data[nr_devs - 1] = q;

	/* Now, pick the proper data tables */
	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[dest1]]];

	/* Now do it... */
	while ( stripe_len-- ) {
		*p++ ^= *dq = qmul[*q ^ *dq];
		q++; dq++;
	}
	return 0;
}

/* Original raid56 recovery wrapper */
int raid56_recov(int nr_devs, size_t stripe_len, u64 profile, int dest1,
		 int dest2, void **data)
{
	int min_devs;
	int ret;

	if (profile & BTRFS_BLOCK_GROUP_RAID5)
		min_devs = 2;
	else if (profile & BTRFS_BLOCK_GROUP_RAID6)
		min_devs = 3;
	else
		return -EINVAL;
	if (nr_devs < min_devs)
		return -EINVAL;

	/* Nothing to recover */
	if (dest1 == -1 && dest2 == -1)
		return 0;

	/* Reorder dest1/2, so only dest2 can be -1  */
	if (dest1 == -1) {
		dest1 = dest2;
		dest2 = -1;
	} else if (dest2 != -1 && dest1 != -1) {
		/* Reorder dest1/2, ensure dest2 > dest1 */
		if (dest1 > dest2) {
			int tmp;

			tmp = dest2;
			dest2 = dest1;
			dest1 = tmp;
		}
	}

	if (profile & BTRFS_BLOCK_GROUP_RAID5) {
		if (dest2 != -1)
			return 1;
		return raid5_gen_result(nr_devs, stripe_len, dest1, data);
	}

	/* RAID6 one dev corrupted case*/
	if (dest2 == -1) {
		/* Regenerate P/Q */
		if (dest1 == nr_devs - 1 || dest1 == nr_devs - 2) {
			raid6_gen_syndrome(nr_devs, stripe_len, data);
			return 0;
		}

		/* Regerneate data from P */
		return raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
	}

	/* P/Q bot corrupted */
	if (dest1 == nr_devs - 2 && dest2 == nr_devs - 1) {
		raid6_gen_syndrome(nr_devs, stripe_len, data);
		return 0;
	}

	/* 2 Data corrupted */
	if (dest2 < nr_devs - 2)
		return raid6_recov_data2(nr_devs, stripe_len, dest1, dest2,
					 data);
	/* Data and P*/
	if (dest2 == nr_devs - 1)
		return raid6_recov_datap(nr_devs, stripe_len, dest1, data);

	/*
	 * Final case, Data and Q, recover data first then regenerate Q
	 */
	ret = raid5_gen_result(nr_devs - 1, stripe_len, dest1, data);
	if (ret < 0)
		return ret;
	raid6_gen_syndrome(nr_devs, stripe_len, data);
	return 0;
}