/*
* Copyright (C) 2007 Oracle. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 021110-1307, USA.
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <getopt.h>
#include <uuid/uuid.h>
#include "ctree.h"
#include "volumes.h"
#include "repair.h"
#include "disk-io.h"
#include "print-tree.h"
#include "task-utils.h"
#include "transaction.h"
#include "utils.h"
#include "commands.h"
#include "free-space-cache.h"
#include "free-space-tree.h"
#include "btrfsck.h"
#include "qgroup-verify.h"
#include "rbtree-utils.h"
#include "backref.h"
#include "kernel-shared/ulist.h"
#include "hash.h"
#include "help.h"
enum task_position {
TASK_EXTENTS,
TASK_FREE_SPACE,
TASK_FS_ROOTS,
TASK_NOTHING, /* have to be the last element */
};
struct task_ctx {
int progress_enabled;
enum task_position tp;
struct task_info *info;
};
static u64 bytes_used = 0;
static u64 total_csum_bytes = 0;
static u64 total_btree_bytes = 0;
static u64 total_fs_tree_bytes = 0;
static u64 total_extent_tree_bytes = 0;
static u64 btree_space_waste = 0;
static u64 data_bytes_allocated = 0;
static u64 data_bytes_referenced = 0;
static int found_old_backref = 0;
static LIST_HEAD(duplicate_extents);
static LIST_HEAD(delete_items);
static int no_holes = 0;
static int init_extent_tree = 0;
static int check_data_csum = 0;
static struct btrfs_fs_info *global_info;
static struct task_ctx ctx = { 0 };
static struct cache_tree *roots_info_cache = NULL;
enum btrfs_check_mode {
CHECK_MODE_ORIGINAL,
CHECK_MODE_LOWMEM,
CHECK_MODE_UNKNOWN,
CHECK_MODE_DEFAULT = CHECK_MODE_ORIGINAL
};
static enum btrfs_check_mode check_mode = CHECK_MODE_DEFAULT;
struct extent_backref {
struct list_head list;
unsigned int is_data:1;
unsigned int found_extent_tree:1;
unsigned int full_backref:1;
unsigned int found_ref:1;
unsigned int broken:1;
};
static inline struct extent_backref* to_extent_backref(struct list_head *entry)
{
return list_entry(entry, struct extent_backref, list);
}
struct data_backref {
struct extent_backref node;
union {
u64 parent;
u64 root;
};
u64 owner;
u64 offset;
u64 disk_bytenr;
u64 bytes;
u64 ram_bytes;
u32 num_refs;
u32 found_ref;
};
#define ROOT_DIR_ERROR (1<<1) /* bad ROOT_DIR */
#define DIR_ITEM_MISSING (1<<2) /* DIR_ITEM not found */
#define DIR_ITEM_MISMATCH (1<<3) /* DIR_ITEM found but not match */
#define INODE_REF_MISSING (1<<4) /* INODE_REF/INODE_EXTREF not found */
#define INODE_ITEM_MISSING (1<<5) /* INODE_ITEM not found */
#define INODE_ITEM_MISMATCH (1<<6) /* INODE_ITEM found but not match */
#define FILE_EXTENT_ERROR (1<<7) /* bad FILE_EXTENT */
#define ODD_CSUM_ITEM (1<<8) /* CSUM_ITEM error */
#define CSUM_ITEM_MISSING (1<<9) /* CSUM_ITEM not found */
#define LINK_COUNT_ERROR (1<<10) /* INODE_ITEM nlink count error */
#define NBYTES_ERROR (1<<11) /* INODE_ITEM nbytes count error */
#define ISIZE_ERROR (1<<12) /* INODE_ITEM size count error */
#define ORPHAN_ITEM (1<<13) /* INODE_ITEM no reference */
#define NO_INODE_ITEM (1<<14) /* no inode_item */
#define LAST_ITEM (1<<15) /* Complete this tree traversal */
#define ROOT_REF_MISSING (1<<16) /* ROOT_REF not found */
#define ROOT_REF_MISMATCH (1<<17) /* ROOT_REF found but not match */
static inline struct data_backref* to_data_backref(struct extent_backref *back)
{
return container_of(back, struct data_backref, node);
}
/*
* Much like data_backref, just removed the undetermined members
* and change it to use list_head.
* During extent scan, it is stored in root->orphan_data_extent.
* During fs tree scan, it is then moved to inode_rec->orphan_data_extents.
*/
struct orphan_data_extent {
struct list_head list;
u64 root;
u64 objectid;
u64 offset;
u64 disk_bytenr;
u64 disk_len;
};
struct tree_backref {
struct extent_backref node;
union {
u64 parent;
u64 root;
};
};
static inline struct tree_backref* to_tree_backref(struct extent_backref *back)
{
return container_of(back, struct tree_backref, node);
}
/* Explicit initialization for extent_record::flag_block_full_backref */
enum { FLAG_UNSET = 2 };
struct extent_record {
struct list_head backrefs;
struct list_head dups;
struct list_head list;
struct cache_extent cache;
struct btrfs_disk_key parent_key;
u64 start;
u64 max_size;
u64 nr;
u64 refs;
u64 extent_item_refs;
u64 generation;
u64 parent_generation;
u64 info_objectid;
u32 num_duplicates;
u8 info_level;
unsigned int flag_block_full_backref:2;
unsigned int found_rec:1;
unsigned int content_checked:1;
unsigned int owner_ref_checked:1;
unsigned int is_root:1;
unsigned int metadata:1;
unsigned int bad_full_backref:1;
unsigned int crossing_stripes:1;
unsigned int wrong_chunk_type:1;
};
static inline struct extent_record* to_extent_record(struct list_head *entry)
{
return container_of(entry, struct extent_record, list);
}
struct inode_backref {
struct list_head list;
unsigned int found_dir_item:1;
unsigned int found_dir_index:1;
unsigned int found_inode_ref:1;
u8 filetype;
u8 ref_type;
int errors;
u64 dir;
u64 index;
u16 namelen;
char name[0];
};
static inline struct inode_backref* to_inode_backref(struct list_head *entry)
{
return list_entry(entry, struct inode_backref, list);
}
struct root_item_record {
struct list_head list;
u64 objectid;
u64 bytenr;
u64 last_snapshot;
u8 level;
u8 drop_level;
int level_size;
struct btrfs_key drop_key;
};
#define REF_ERR_NO_DIR_ITEM (1 << 0)
#define REF_ERR_NO_DIR_INDEX (1 << 1)
#define REF_ERR_NO_INODE_REF (1 << 2)
#define REF_ERR_DUP_DIR_ITEM (1 << 3)
#define REF_ERR_DUP_DIR_INDEX (1 << 4)
#define REF_ERR_DUP_INODE_REF (1 << 5)
#define REF_ERR_INDEX_UNMATCH (1 << 6)
#define REF_ERR_FILETYPE_UNMATCH (1 << 7)
#define REF_ERR_NAME_TOO_LONG (1 << 8) // 100
#define REF_ERR_NO_ROOT_REF (1 << 9)
#define REF_ERR_NO_ROOT_BACKREF (1 << 10)
#define REF_ERR_DUP_ROOT_REF (1 << 11)
#define REF_ERR_DUP_ROOT_BACKREF (1 << 12)
struct file_extent_hole {
struct rb_node node;
u64 start;
u64 len;
};
struct inode_record {
struct list_head backrefs;
unsigned int checked:1;
unsigned int merging:1;
unsigned int found_inode_item:1;
unsigned int found_dir_item:1;
unsigned int found_file_extent:1;
unsigned int found_csum_item:1;
unsigned int some_csum_missing:1;
unsigned int nodatasum:1;
int errors;
u64 ino;
u32 nlink;
u32 imode;
u64 isize;
u64 nbytes;
u32 found_link;
u64 found_size;
u64 extent_start;
u64 extent_end;
struct rb_root holes;
struct list_head orphan_extents;
u32 refs;
};
#define I_ERR_NO_INODE_ITEM (1 << 0)
#define I_ERR_NO_ORPHAN_ITEM (1 << 1)
#define I_ERR_DUP_INODE_ITEM (1 << 2)
#define I_ERR_DUP_DIR_INDEX (1 << 3)
#define I_ERR_ODD_DIR_ITEM (1 << 4)
#define I_ERR_ODD_FILE_EXTENT (1 << 5)
#define I_ERR_BAD_FILE_EXTENT (1 << 6)
#define I_ERR_FILE_EXTENT_OVERLAP (1 << 7)
#define I_ERR_FILE_EXTENT_DISCOUNT (1 << 8) // 100
#define I_ERR_DIR_ISIZE_WRONG (1 << 9)
#define I_ERR_FILE_NBYTES_WRONG (1 << 10) // 400
#define I_ERR_ODD_CSUM_ITEM (1 << 11)
#define I_ERR_SOME_CSUM_MISSING (1 << 12)
#define I_ERR_LINK_COUNT_WRONG (1 << 13)
#define I_ERR_FILE_EXTENT_ORPHAN (1 << 14)
struct root_backref {
struct list_head list;
unsigned int found_dir_item:1;
unsigned int found_dir_index:1;
unsigned int found_back_ref:1;
unsigned int found_forward_ref:1;
unsigned int reachable:1;
int errors;
u64 ref_root;
u64 dir;
u64 index;
u16 namelen;
char name[0];
};
static inline struct root_backref* to_root_backref(struct list_head *entry)
{
return list_entry(entry, struct root_backref, list);
}
struct root_record {
struct list_head backrefs;
struct cache_extent cache;
unsigned int found_root_item:1;
u64 objectid;
u32 found_ref;
};
struct ptr_node {
struct cache_extent cache;
void *data;
};
struct shared_node {
struct cache_extent cache;
struct cache_tree root_cache;
struct cache_tree inode_cache;
struct inode_record *current;
u32 refs;
};
struct block_info {
u64 start;
u32 size;
};
struct walk_control {
struct cache_tree shared;
struct shared_node *nodes[BTRFS_MAX_LEVEL];
int active_node;
int root_level;
};
struct bad_item {
struct btrfs_key key;
u64 root_id;
struct list_head list;
};
struct extent_entry {
u64 bytenr;
u64 bytes;
int count;
int broken;
struct list_head list;
};
struct root_item_info {
/* level of the root */
u8 level;
/* number of nodes at this level, must be 1 for a root */
int node_count;
u64 bytenr;
u64 gen;
struct cache_extent cache_extent;
};
/*
* Error bit for low memory mode check.
*
* Currently no caller cares about it yet. Just internal use for error
* classification.
*/
#define BACKREF_MISSING (1 << 0) /* Backref missing in extent tree */
#define BACKREF_MISMATCH (1 << 1) /* Backref exists but does not match */
#define BYTES_UNALIGNED (1 << 2) /* Some bytes are not aligned */
#define REFERENCER_MISSING (1 << 3) /* Referencer not found */
#define REFERENCER_MISMATCH (1 << 4) /* Referenceer found but does not match */
#define CROSSING_STRIPE_BOUNDARY (1 << 4) /* For kernel scrub workaround */
#define ITEM_SIZE_MISMATCH (1 << 5) /* Bad item size */
#define UNKNOWN_TYPE (1 << 6) /* Unknown type */
#define ACCOUNTING_MISMATCH (1 << 7) /* Used space accounting error */
#define CHUNK_TYPE_MISMATCH (1 << 8)
static void *print_status_check(void *p)
{
struct task_ctx *priv = p;
const char work_indicator[] = { '.', 'o', 'O', 'o' };
uint32_t count = 0;
static char *task_position_string[] = {
"checking extents",
"checking free space cache",
"checking fs roots",
};
task_period_start(priv->info, 1000 /* 1s */);
if (priv->tp == TASK_NOTHING)
return NULL;
while (1) {
printf("%s [%c]\r", task_position_string[priv->tp],
work_indicator[count % 4]);
count++;
fflush(stdout);
task_period_wait(priv->info);
}
return NULL;
}
static int print_status_return(void *p)
{
printf("\n");
fflush(stdout);
return 0;
}
static enum btrfs_check_mode parse_check_mode(const char *str)
{
if (strcmp(str, "lowmem") == 0)
return CHECK_MODE_LOWMEM;
if (strcmp(str, "orig") == 0)
return CHECK_MODE_ORIGINAL;
if (strcmp(str, "original") == 0)
return CHECK_MODE_ORIGINAL;
return CHECK_MODE_UNKNOWN;
}
/* Compatible function to allow reuse of old codes */
static u64 first_extent_gap(struct rb_root *holes)
{
struct file_extent_hole *hole;
if (RB_EMPTY_ROOT(holes))
return (u64)-1;
hole = rb_entry(rb_first(holes), struct file_extent_hole, node);
return hole->start;
}
static int compare_hole(struct rb_node *node1, struct rb_node *node2)
{
struct file_extent_hole *hole1;
struct file_extent_hole *hole2;
hole1 = rb_entry(node1, struct file_extent_hole, node);
hole2 = rb_entry(node2, struct file_extent_hole, node);
if (hole1->start > hole2->start)
return -1;
if (hole1->start < hole2->start)
return 1;
/* Now hole1->start == hole2->start */
if (hole1->len >= hole2->len)
/*
* Hole 1 will be merge center
* Same hole will be merged later
*/
return -1;
/* Hole 2 will be merge center */
return 1;
}
/*
* Add a hole to the record
*
* This will do hole merge for copy_file_extent_holes(),
* which will ensure there won't be continuous holes.
*/
static int add_file_extent_hole(struct rb_root *holes,
u64 start, u64 len)
{
struct file_extent_hole *hole;
struct file_extent_hole *prev = NULL;
struct file_extent_hole *next = NULL;
hole = malloc(sizeof(*hole));
if (!hole)
return -ENOMEM;
hole->start = start;
hole->len = len;
/* Since compare will not return 0, no -EEXIST will happen */
rb_insert(holes, &hole->node, compare_hole);
/* simple merge with previous hole */
if (rb_prev(&hole->node))
prev = rb_entry(rb_prev(&hole->node), struct file_extent_hole,
node);
if (prev && prev->start + prev->len >= hole->start) {
hole->len = hole->start + hole->len - prev->start;
hole->start = prev->start;
rb_erase(&prev->node, holes);
free(prev);
prev = NULL;
}
/* iterate merge with next holes */
while (1) {
if (!rb_next(&hole->node))
break;
next = rb_entry(rb_next(&hole->node), struct file_extent_hole,
node);
if (hole->start + hole->len >= next->start) {
if (hole->start + hole->len <= next->start + next->len)
hole->len = next->start + next->len -
hole->start;
rb_erase(&next->node, holes);
free(next);
next = NULL;
} else
break;
}
return 0;
}
static int compare_hole_range(struct rb_node *node, void *data)
{
struct file_extent_hole *hole;
u64 start;
hole = (struct file_extent_hole *)data;
start = hole->start;
hole = rb_entry(node, struct file_extent_hole, node);
if (start < hole->start)
return -1;
if (start >= hole->start && start < hole->start + hole->len)
return 0;
return 1;
}
/*
* Delete a hole in the record
*
* This will do the hole split and is much restrict than add.
*/
static int del_file_extent_hole(struct rb_root *holes,
u64 start, u64 len)
{
struct file_extent_hole *hole;
struct file_extent_hole tmp;
u64 prev_start = 0;
u64 prev_len = 0;
u64 next_start = 0;
u64 next_len = 0;
struct rb_node *node;
int have_prev = 0;
int have_next = 0;
int ret = 0;
tmp.start = start;
tmp.len = len;
node = rb_search(holes, &tmp, compare_hole_range, NULL);
if (!node)
return -EEXIST;
hole = rb_entry(node, struct file_extent_hole, node);
if (start + len > hole->start + hole->len)
return -EEXIST;
/*
* Now there will be no overlap, delete the hole and re-add the
* split(s) if they exists.
*/
if (start > hole->start) {
prev_start = hole->start;
prev_len = start - hole->start;
have_prev = 1;
}
if (hole->start + hole->len > start + len) {
next_start = start + len;
next_len = hole->start + hole->len - start - len;
have_next = 1;
}
rb_erase(node, holes);
free(hole);
if (have_prev) {
ret = add_file_extent_hole(holes, prev_start, prev_len);
if (ret < 0)
return ret;
}
if (have_next) {
ret = add_file_extent_hole(holes, next_start, next_len);
if (ret < 0)
return ret;
}
return 0;
}
static int copy_file_extent_holes(struct rb_root *dst,
struct rb_root *src)
{
struct file_extent_hole *hole;
struct rb_node *node;
int ret = 0;
node = rb_first(src);
while (node) {
hole = rb_entry(node, struct file_extent_hole, node);
ret = add_file_extent_hole(dst, hole->start, hole->len);
if (ret)
break;
node = rb_next(node);
}
return ret;
}
static void free_file_extent_holes(struct rb_root *holes)
{
struct rb_node *node;
struct file_extent_hole *hole;
node = rb_first(holes);
while (node) {
hole = rb_entry(node, struct file_extent_hole, node);
rb_erase(node, holes);
free(hole);
node = rb_first(holes);
}
}
static void reset_cached_block_groups(struct btrfs_fs_info *fs_info);
static void record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
if (root->last_trans != trans->transid) {
root->track_dirty = 1;
root->last_trans = trans->transid;
root->commit_root = root->node;
extent_buffer_get(root->node);
}
}
static u8 imode_to_type(u32 imode)
{
#define S_SHIFT 12
static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
[S_IFREG >> S_SHIFT] = BTRFS_FT_REG_FILE,
[S_IFDIR >> S_SHIFT] = BTRFS_FT_DIR,
[S_IFCHR >> S_SHIFT] = BTRFS_FT_CHRDEV,
[S_IFBLK >> S_SHIFT] = BTRFS_FT_BLKDEV,
[S_IFIFO >> S_SHIFT] = BTRFS_FT_FIFO,
[S_IFSOCK >> S_SHIFT] = BTRFS_FT_SOCK,
[S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
};
return btrfs_type_by_mode[(imode & S_IFMT) >> S_SHIFT];
#undef S_SHIFT
}
static int device_record_compare(struct rb_node *node1, struct rb_node *node2)
{
struct device_record *rec1;
struct device_record *rec2;
rec1 = rb_entry(node1, struct device_record, node);
rec2 = rb_entry(node2, struct device_record, node);
if (rec1->devid > rec2->devid)
return -1;
else if (rec1->devid < rec2->devid)
return 1;
else
return 0;
}
static struct inode_record *clone_inode_rec(struct inode_record *orig_rec)
{
struct inode_record *rec;
struct inode_backref *backref;
struct inode_backref *orig;
struct inode_backref *tmp;
struct orphan_data_extent *src_orphan;
struct orphan_data_extent *dst_orphan;
struct rb_node *rb;
size_t size;
int ret;
rec = malloc(sizeof(*rec));
if (!rec)
return ERR_PTR(-ENOMEM);
memcpy(rec, orig_rec, sizeof(*rec));
rec->refs = 1;
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->orphan_extents);
rec->holes = RB_ROOT;
list_for_each_entry(orig, &orig_rec->backrefs, list) {
size = sizeof(*orig) + orig->namelen + 1;
backref = malloc(size);
if (!backref) {
ret = -ENOMEM;
goto cleanup;
}
memcpy(backref, orig, size);
list_add_tail(&backref->list, &rec->backrefs);
}
list_for_each_entry(src_orphan, &orig_rec->orphan_extents, list) {
dst_orphan = malloc(sizeof(*dst_orphan));
if (!dst_orphan) {
ret = -ENOMEM;
goto cleanup;
}
memcpy(dst_orphan, src_orphan, sizeof(*src_orphan));
list_add_tail(&dst_orphan->list, &rec->orphan_extents);
}
ret = copy_file_extent_holes(&rec->holes, &orig_rec->holes);
if (ret < 0)
goto cleanup_rb;
return rec;
cleanup_rb:
rb = rb_first(&rec->holes);
while (rb) {
struct file_extent_hole *hole;
hole = rb_entry(rb, struct file_extent_hole, node);
rb = rb_next(rb);
free(hole);
}
cleanup:
if (!list_empty(&rec->backrefs))
list_for_each_entry_safe(orig, tmp, &rec->backrefs, list) {
list_del(&orig->list);
free(orig);
}
if (!list_empty(&rec->orphan_extents))
list_for_each_entry_safe(orig, tmp, &rec->orphan_extents, list) {
list_del(&orig->list);
free(orig);
}
free(rec);
return ERR_PTR(ret);
}
static void print_orphan_data_extents(struct list_head *orphan_extents,
u64 objectid)
{
struct orphan_data_extent *orphan;
if (list_empty(orphan_extents))
return;
printf("The following data extent is lost in tree %llu:\n",
objectid);
list_for_each_entry(orphan, orphan_extents, list) {
printf("\tinode: %llu, offset:%llu, disk_bytenr: %llu, disk_len: %llu\n",
orphan->objectid, orphan->offset, orphan->disk_bytenr,
orphan->disk_len);
}
}
static void print_inode_error(struct btrfs_root *root, struct inode_record *rec)
{
u64 root_objectid = root->root_key.objectid;
int errors = rec->errors;
if (!errors)
return;
/* reloc root errors, we print its corresponding fs root objectid*/
if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) {
root_objectid = root->root_key.offset;
fprintf(stderr, "reloc");
}
fprintf(stderr, "root %llu inode %llu errors %x",
(unsigned long long) root_objectid,
(unsigned long long) rec->ino, rec->errors);
if (errors & I_ERR_NO_INODE_ITEM)
fprintf(stderr, ", no inode item");
if (errors & I_ERR_NO_ORPHAN_ITEM)
fprintf(stderr, ", no orphan item");
if (errors & I_ERR_DUP_INODE_ITEM)
fprintf(stderr, ", dup inode item");
if (errors & I_ERR_DUP_DIR_INDEX)
fprintf(stderr, ", dup dir index");
if (errors & I_ERR_ODD_DIR_ITEM)
fprintf(stderr, ", odd dir item");
if (errors & I_ERR_ODD_FILE_EXTENT)
fprintf(stderr, ", odd file extent");
if (errors & I_ERR_BAD_FILE_EXTENT)
fprintf(stderr, ", bad file extent");
if (errors & I_ERR_FILE_EXTENT_OVERLAP)
fprintf(stderr, ", file extent overlap");
if (errors & I_ERR_FILE_EXTENT_DISCOUNT)
fprintf(stderr, ", file extent discount");
if (errors & I_ERR_DIR_ISIZE_WRONG)
fprintf(stderr, ", dir isize wrong");
if (errors & I_ERR_FILE_NBYTES_WRONG)
fprintf(stderr, ", nbytes wrong");
if (errors & I_ERR_ODD_CSUM_ITEM)
fprintf(stderr, ", odd csum item");
if (errors & I_ERR_SOME_CSUM_MISSING)
fprintf(stderr, ", some csum missing");
if (errors & I_ERR_LINK_COUNT_WRONG)
fprintf(stderr, ", link count wrong");
if (errors & I_ERR_FILE_EXTENT_ORPHAN)
fprintf(stderr, ", orphan file extent");
fprintf(stderr, "\n");
/* Print the orphan extents if needed */
if (errors & I_ERR_FILE_EXTENT_ORPHAN)
print_orphan_data_extents(&rec->orphan_extents, root->objectid);
/* Print the holes if needed */
if (errors & I_ERR_FILE_EXTENT_DISCOUNT) {
struct file_extent_hole *hole;
struct rb_node *node;
int found = 0;
node = rb_first(&rec->holes);
fprintf(stderr, "Found file extent holes:\n");
while (node) {
found = 1;
hole = rb_entry(node, struct file_extent_hole, node);
fprintf(stderr, "\tstart: %llu, len: %llu\n",
hole->start, hole->len);
node = rb_next(node);
}
if (!found)
fprintf(stderr, "\tstart: 0, len: %llu\n",
round_up(rec->isize,
root->fs_info->sectorsize));
}
}
static void print_ref_error(int errors)
{
if (errors & REF_ERR_NO_DIR_ITEM)
fprintf(stderr, ", no dir item");
if (errors & REF_ERR_NO_DIR_INDEX)
fprintf(stderr, ", no dir index");
if (errors & REF_ERR_NO_INODE_REF)
fprintf(stderr, ", no inode ref");
if (errors & REF_ERR_DUP_DIR_ITEM)
fprintf(stderr, ", dup dir item");
if (errors & REF_ERR_DUP_DIR_INDEX)
fprintf(stderr, ", dup dir index");
if (errors & REF_ERR_DUP_INODE_REF)
fprintf(stderr, ", dup inode ref");
if (errors & REF_ERR_INDEX_UNMATCH)
fprintf(stderr, ", index mismatch");
if (errors & REF_ERR_FILETYPE_UNMATCH)
fprintf(stderr, ", filetype mismatch");
if (errors & REF_ERR_NAME_TOO_LONG)
fprintf(stderr, ", name too long");
if (errors & REF_ERR_NO_ROOT_REF)
fprintf(stderr, ", no root ref");
if (errors & REF_ERR_NO_ROOT_BACKREF)
fprintf(stderr, ", no root backref");
if (errors & REF_ERR_DUP_ROOT_REF)
fprintf(stderr, ", dup root ref");
if (errors & REF_ERR_DUP_ROOT_BACKREF)
fprintf(stderr, ", dup root backref");
fprintf(stderr, "\n");
}
static struct inode_record *get_inode_rec(struct cache_tree *inode_cache,
u64 ino, int mod)
{
struct ptr_node *node;
struct cache_extent *cache;
struct inode_record *rec = NULL;
int ret;
cache = lookup_cache_extent(inode_cache, ino, 1);
if (cache) {
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
if (mod && rec->refs > 1) {
node->data = clone_inode_rec(rec);
if (IS_ERR(node->data))
return node->data;
rec->refs--;
rec = node->data;
}
} else if (mod) {
rec = calloc(1, sizeof(*rec));
if (!rec)
return ERR_PTR(-ENOMEM);
rec->ino = ino;
rec->extent_start = (u64)-1;
rec->refs = 1;
INIT_LIST_HEAD(&rec->backrefs);
INIT_LIST_HEAD(&rec->orphan_extents);
rec->holes = RB_ROOT;
node = malloc(sizeof(*node));
if (!node) {
free(rec);
return ERR_PTR(-ENOMEM);
}
node->cache.start = ino;
node->cache.size = 1;
node->data = rec;
if (ino == BTRFS_FREE_INO_OBJECTID)
rec->found_link = 1;
ret = insert_cache_extent(inode_cache, &node->cache);
if (ret)
return ERR_PTR(-EEXIST);
}
return rec;
}
static void free_orphan_data_extents(struct list_head *orphan_extents)
{
struct orphan_data_extent *orphan;
while (!list_empty(orphan_extents)) {
orphan = list_entry(orphan_extents->next,
struct orphan_data_extent, list);
list_del(&orphan->list);
free(orphan);
}
}
static void free_inode_rec(struct inode_record *rec)
{
struct inode_backref *backref;
if (--rec->refs > 0)
return;
while (!list_empty(&rec->backrefs)) {
backref = to_inode_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
free_orphan_data_extents(&rec->orphan_extents);
free_file_extent_holes(&rec->holes);
free(rec);
}
static int can_free_inode_rec(struct inode_record *rec)
{
if (!rec->errors && rec->checked && rec->found_inode_item &&
rec->nlink == rec->found_link && list_empty(&rec->backrefs))
return 1;
return 0;
}
static void maybe_free_inode_rec(struct cache_tree *inode_cache,
struct inode_record *rec)
{
struct cache_extent *cache;
struct inode_backref *tmp, *backref;
struct ptr_node *node;
u8 filetype;
if (!rec->found_inode_item)
return;
filetype = imode_to_type(rec->imode);
list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
if (backref->found_dir_item && backref->found_dir_index) {
if (backref->filetype != filetype)
backref->errors |= REF_ERR_FILETYPE_UNMATCH;
if (!backref->errors && backref->found_inode_ref &&
rec->nlink == rec->found_link) {
list_del(&backref->list);
free(backref);
}
}
}
if (!rec->checked || rec->merging)
return;
if (S_ISDIR(rec->imode)) {
if (rec->found_size != rec->isize)
rec->errors |= I_ERR_DIR_ISIZE_WRONG;
if (rec->found_file_extent)
rec->errors |= I_ERR_ODD_FILE_EXTENT;
} else if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
if (rec->found_dir_item)
rec->errors |= I_ERR_ODD_DIR_ITEM;
if (rec->found_size != rec->nbytes)
rec->errors |= I_ERR_FILE_NBYTES_WRONG;
if (rec->nlink > 0 && !no_holes &&
(rec->extent_end < rec->isize ||
first_extent_gap(&rec->holes) < rec->isize))
rec->errors |= I_ERR_FILE_EXTENT_DISCOUNT;
}
if (S_ISREG(rec->imode) || S_ISLNK(rec->imode)) {
if (rec->found_csum_item && rec->nodatasum)
rec->errors |= I_ERR_ODD_CSUM_ITEM;
if (rec->some_csum_missing && !rec->nodatasum)
rec->errors |= I_ERR_SOME_CSUM_MISSING;
}
BUG_ON(rec->refs != 1);
if (can_free_inode_rec(rec)) {
cache = lookup_cache_extent(inode_cache, rec->ino, 1);
node = container_of(cache, struct ptr_node, cache);
BUG_ON(node->data != rec);
remove_cache_extent(inode_cache, &node->cache);
free(node);
free_inode_rec(rec);
}
}
static int check_orphan_item(struct btrfs_root *root, u64 ino)
{
struct btrfs_path path;
struct btrfs_key key;
int ret;
key.objectid = BTRFS_ORPHAN_OBJECTID;
key.type = BTRFS_ORPHAN_ITEM_KEY;
key.offset = ino;
btrfs_init_path(&path);
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
btrfs_release_path(&path);
if (ret > 0)
ret = -ENOENT;
return ret;
}
static int process_inode_item(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
struct inode_record *rec;
struct btrfs_inode_item *item;
rec = active_node->current;
BUG_ON(rec->ino != key->objectid || rec->refs > 1);
if (rec->found_inode_item) {
rec->errors |= I_ERR_DUP_INODE_ITEM;
return 1;
}
item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
rec->nlink = btrfs_inode_nlink(eb, item);
rec->isize = btrfs_inode_size(eb, item);
rec->nbytes = btrfs_inode_nbytes(eb, item);
rec->imode = btrfs_inode_mode(eb, item);
if (btrfs_inode_flags(eb, item) & BTRFS_INODE_NODATASUM)
rec->nodatasum = 1;
rec->found_inode_item = 1;
if (rec->nlink == 0)
rec->errors |= I_ERR_NO_ORPHAN_ITEM;
maybe_free_inode_rec(&active_node->inode_cache, rec);
return 0;
}
static struct inode_backref *get_inode_backref(struct inode_record *rec,
const char *name,
int namelen, u64 dir)
{
struct inode_backref *backref;
list_for_each_entry(backref, &rec->backrefs, list) {
if (rec->ino == BTRFS_MULTIPLE_OBJECTIDS)
break;
if (backref->dir != dir || backref->namelen != namelen)
continue;
if (memcmp(name, backref->name, namelen))
continue;
return backref;
}
backref = malloc(sizeof(*backref) + namelen + 1);
if (!backref)
return NULL;
memset(backref, 0, sizeof(*backref));
backref->dir = dir;
backref->namelen = namelen;
memcpy(backref->name, name, namelen);
backref->name[namelen] = '\0';
list_add_tail(&backref->list, &rec->backrefs);
return backref;
}
static int add_inode_backref(struct cache_tree *inode_cache,
u64 ino, u64 dir, u64 index,
const char *name, int namelen,
u8 filetype, u8 itemtype, int errors)
{
struct inode_record *rec;
struct inode_backref *backref;
rec = get_inode_rec(inode_cache, ino, 1);
BUG_ON(IS_ERR(rec));
backref = get_inode_backref(rec, name, namelen, dir);
BUG_ON(!backref);
if (errors)
backref->errors |= errors;
if (itemtype == BTRFS_DIR_INDEX_KEY) {
if (backref->found_dir_index)
backref->errors |= REF_ERR_DUP_DIR_INDEX;
if (backref->found_inode_ref && backref->index != index)
backref->errors |= REF_ERR_INDEX_UNMATCH;
if (backref->found_dir_item && backref->filetype != filetype)
backref->errors |= REF_ERR_FILETYPE_UNMATCH;
backref->index = index;
backref->filetype = filetype;
backref->found_dir_index = 1;
} else if (itemtype == BTRFS_DIR_ITEM_KEY) {
rec->found_link++;
if (backref->found_dir_item)
backref->errors |= REF_ERR_DUP_DIR_ITEM;
if (backref->found_dir_index && backref->filetype != filetype)
backref->errors |= REF_ERR_FILETYPE_UNMATCH;
backref->filetype = filetype;
backref->found_dir_item = 1;
} else if ((itemtype == BTRFS_INODE_REF_KEY) ||
(itemtype == BTRFS_INODE_EXTREF_KEY)) {
if (backref->found_inode_ref)
backref->errors |= REF_ERR_DUP_INODE_REF;
if (backref->found_dir_index && backref->index != index)
backref->errors |= REF_ERR_INDEX_UNMATCH;
else
backref->index = index;
backref->ref_type = itemtype;
backref->found_inode_ref = 1;
} else {
BUG_ON(1);
}
maybe_free_inode_rec(inode_cache, rec);
return 0;
}
static int merge_inode_recs(struct inode_record *src, struct inode_record *dst,
struct cache_tree *dst_cache)
{
struct inode_backref *backref;
u32 dir_count = 0;
int ret = 0;
dst->merging = 1;
list_for_each_entry(backref, &src->backrefs, list) {
if (backref->found_dir_index) {
add_inode_backref(dst_cache, dst->ino, backref->dir,
backref->index, backref->name,
backref->namelen, backref->filetype,
BTRFS_DIR_INDEX_KEY, backref->errors);
}
if (backref->found_dir_item) {
dir_count++;
add_inode_backref(dst_cache, dst->ino,
backref->dir, 0, backref->name,
backref->namelen, backref->filetype,
BTRFS_DIR_ITEM_KEY, backref->errors);
}
if (backref->found_inode_ref) {
add_inode_backref(dst_cache, dst->ino,
backref->dir, backref->index,
backref->name, backref->namelen, 0,
backref->ref_type, backref->errors);
}
}
if (src->found_dir_item)
dst->found_dir_item = 1;
if (src->found_file_extent)
dst->found_file_extent = 1;
if (src->found_csum_item)
dst->found_csum_item = 1;
if (src->some_csum_missing)
dst->some_csum_missing = 1;
if (first_extent_gap(&dst->holes) > first_extent_gap(&src->holes)) {
ret = copy_file_extent_holes(&dst->holes, &src->holes);
if (ret < 0)
return ret;
}
BUG_ON(src->found_link < dir_count);
dst->found_link += src->found_link - dir_count;
dst->found_size += src->found_size;
if (src->extent_start != (u64)-1) {
if (dst->extent_start == (u64)-1) {
dst->extent_start = src->extent_start;
dst->extent_end = src->extent_end;
} else {
if (dst->extent_end > src->extent_start)
dst->errors |= I_ERR_FILE_EXTENT_OVERLAP;
else if (dst->extent_end < src->extent_start) {
ret = add_file_extent_hole(&dst->holes,
dst->extent_end,
src->extent_start - dst->extent_end);
}
if (dst->extent_end < src->extent_end)
dst->extent_end = src->extent_end;
}
}
dst->errors |= src->errors;
if (src->found_inode_item) {
if (!dst->found_inode_item) {
dst->nlink = src->nlink;
dst->isize = src->isize;
dst->nbytes = src->nbytes;
dst->imode = src->imode;
dst->nodatasum = src->nodatasum;
dst->found_inode_item = 1;
} else {
dst->errors |= I_ERR_DUP_INODE_ITEM;
}
}
dst->merging = 0;
return 0;
}
static int splice_shared_node(struct shared_node *src_node,
struct shared_node *dst_node)
{
struct cache_extent *cache;
struct ptr_node *node, *ins;
struct cache_tree *src, *dst;
struct inode_record *rec, *conflict;
u64 current_ino = 0;
int splice = 0;
int ret;
if (--src_node->refs == 0)
splice = 1;
if (src_node->current)
current_ino = src_node->current->ino;
src = &src_node->root_cache;
dst = &dst_node->root_cache;
again:
cache = search_cache_extent(src, 0);
while (cache) {
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
cache = next_cache_extent(cache);
if (splice) {
remove_cache_extent(src, &node->cache);
ins = node;
} else {
ins = malloc(sizeof(*ins));
BUG_ON(!ins);
ins->cache.start = node->cache.start;
ins->cache.size = node->cache.size;
ins->data = rec;
rec->refs++;
}
ret = insert_cache_extent(dst, &ins->cache);
if (ret == -EEXIST) {
conflict = get_inode_rec(dst, rec->ino, 1);
BUG_ON(IS_ERR(conflict));
merge_inode_recs(rec, conflict, dst);
if (rec->checked) {
conflict->checked = 1;
if (dst_node->current == conflict)
dst_node->current = NULL;
}
maybe_free_inode_rec(dst, conflict);
free_inode_rec(rec);
free(ins);
} else {
BUG_ON(ret);
}
}
if (src == &src_node->root_cache) {
src = &src_node->inode_cache;
dst = &dst_node->inode_cache;
goto again;
}
if (current_ino > 0 && (!dst_node->current ||
current_ino > dst_node->current->ino)) {
if (dst_node->current) {
dst_node->current->checked = 1;
maybe_free_inode_rec(dst, dst_node->current);
}
dst_node->current = get_inode_rec(dst, current_ino, 1);
BUG_ON(IS_ERR(dst_node->current));
}
return 0;
}
static void free_inode_ptr(struct cache_extent *cache)
{
struct ptr_node *node;
struct inode_record *rec;
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
free_inode_rec(rec);
free(node);
}
FREE_EXTENT_CACHE_BASED_TREE(inode_recs, free_inode_ptr);
static struct shared_node *find_shared_node(struct cache_tree *shared,
u64 bytenr)
{
struct cache_extent *cache;
struct shared_node *node;
cache = lookup_cache_extent(shared, bytenr, 1);
if (cache) {
node = container_of(cache, struct shared_node, cache);
return node;
}
return NULL;
}
static int add_shared_node(struct cache_tree *shared, u64 bytenr, u32 refs)
{
int ret;
struct shared_node *node;
node = calloc(1, sizeof(*node));
if (!node)
return -ENOMEM;
node->cache.start = bytenr;
node->cache.size = 1;
cache_tree_init(&node->root_cache);
cache_tree_init(&node->inode_cache);
node->refs = refs;
ret = insert_cache_extent(shared, &node->cache);
return ret;
}
static int enter_shared_node(struct btrfs_root *root, u64 bytenr, u32 refs,
struct walk_control *wc, int level)
{
struct shared_node *node;
struct shared_node *dest;
int ret;
if (level == wc->active_node)
return 0;
BUG_ON(wc->active_node <= level);
node = find_shared_node(&wc->shared, bytenr);
if (!node) {
ret = add_shared_node(&wc->shared, bytenr, refs);
BUG_ON(ret);
node = find_shared_node(&wc->shared, bytenr);
wc->nodes[level] = node;
wc->active_node = level;
return 0;
}
if (wc->root_level == wc->active_node &&
btrfs_root_refs(&root->root_item) == 0) {
if (--node->refs == 0) {
free_inode_recs_tree(&node->root_cache);
free_inode_recs_tree(&node->inode_cache);
remove_cache_extent(&wc->shared, &node->cache);
free(node);
}
return 1;
}
dest = wc->nodes[wc->active_node];
splice_shared_node(node, dest);
if (node->refs == 0) {
remove_cache_extent(&wc->shared, &node->cache);
free(node);
}
return 1;
}
static int leave_shared_node(struct btrfs_root *root,
struct walk_control *wc, int level)
{
struct shared_node *node;
struct shared_node *dest;
int i;
if (level == wc->root_level)
return 0;
for (i = level + 1; i < BTRFS_MAX_LEVEL; i++) {
if (wc->nodes[i])
break;
}
BUG_ON(i >= BTRFS_MAX_LEVEL);
node = wc->nodes[wc->active_node];
wc->nodes[wc->active_node] = NULL;
wc->active_node = i;
dest = wc->nodes[wc->active_node];
if (wc->active_node < wc->root_level ||
btrfs_root_refs(&root->root_item) > 0) {
BUG_ON(node->refs <= 1);
splice_shared_node(node, dest);
} else {
BUG_ON(node->refs < 2);
node->refs--;
}
return 0;
}
/*
* Returns:
* < 0 - on error
* 1 - if the root with id child_root_id is a child of root parent_root_id
* 0 - if the root child_root_id isn't a child of the root parent_root_id but
* has other root(s) as parent(s)
* 2 - if the root child_root_id doesn't have any parent roots
*/
static int is_child_root(struct btrfs_root *root, u64 parent_root_id,
u64 child_root_id)
{
struct btrfs_path path;
struct btrfs_key key;
struct extent_buffer *leaf;
int has_parent = 0;
int ret;
btrfs_init_path(&path);
key.objectid = parent_root_id;
key.type = BTRFS_ROOT_REF_KEY;
key.offset = child_root_id;
ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
0, 0);
if (ret < 0)
return ret;
btrfs_release_path(&path);
if (!ret)
return 1;
key.objectid = child_root_id;
key.type = BTRFS_ROOT_BACKREF_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, &path,
0, 0);
if (ret < 0)
goto out;
while (1) {
leaf = path.nodes[0];
if (path.slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root->fs_info->tree_root, &path);
if (ret)
break;
leaf = path.nodes[0];
}
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.objectid != child_root_id ||
key.type != BTRFS_ROOT_BACKREF_KEY)
break;
has_parent = 1;
if (key.offset == parent_root_id) {
btrfs_release_path(&path);
return 1;
}
path.slots[0]++;
}
out:
btrfs_release_path(&path);
if (ret < 0)
return ret;
return has_parent ? 0 : 2;
}
static int process_dir_item(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u32 data_len;
int error;
int nritems = 0;
u8 filetype;
struct btrfs_dir_item *di;
struct inode_record *rec;
struct cache_tree *root_cache;
struct cache_tree *inode_cache;
struct btrfs_key location;
char namebuf[BTRFS_NAME_LEN];
root_cache = &active_node->root_cache;
inode_cache = &active_node->inode_cache;
rec = active_node->current;
rec->found_dir_item = 1;
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
total = btrfs_item_size_nr(eb, slot);
while (cur < total) {
nritems++;
btrfs_dir_item_key_to_cpu(eb, di, &location);
name_len = btrfs_dir_name_len(eb, di);
data_len = btrfs_dir_data_len(eb, di);
filetype = btrfs_dir_type(eb, di);
rec->found_size += name_len;
if (cur + sizeof(*di) + name_len > total ||
name_len > BTRFS_NAME_LEN) {
error = REF_ERR_NAME_TOO_LONG;
if (cur + sizeof(*di) > total)
break;
len = min_t(u32, total - cur - sizeof(*di),
BTRFS_NAME_LEN);
} else {
len = name_len;
error = 0;
}
read_extent_buffer(eb, namebuf, (unsigned long)(di + 1), len);
if (key->type == BTRFS_DIR_ITEM_KEY &&
key->offset != btrfs_name_hash(namebuf, len)) {
rec->errors |= I_ERR_ODD_DIR_ITEM;
error("DIR_ITEM[%llu %llu] name %s namelen %u filetype %u mismatch with its hash, wanted %llu have %llu",
key->objectid, key->offset, namebuf, len, filetype,
key->offset, btrfs_name_hash(namebuf, len));
}
if (location.type == BTRFS_INODE_ITEM_KEY) {
add_inode_backref(inode_cache, location.objectid,
key->objectid, key->offset, namebuf,
len, filetype, key->type, error);
} else if (location.type == BTRFS_ROOT_ITEM_KEY) {
add_inode_backref(root_cache, location.objectid,
key->objectid, key->offset,
namebuf, len, filetype,
key->type, error);
} else {
fprintf(stderr, "invalid location in dir item %u\n",
location.type);
add_inode_backref(inode_cache, BTRFS_MULTIPLE_OBJECTIDS,
key->objectid, key->offset, namebuf,
len, filetype, key->type, error);
}
len = sizeof(*di) + name_len + data_len;
di = (struct btrfs_dir_item *)((char *)di + len);
cur += len;
}
if (key->type == BTRFS_DIR_INDEX_KEY && nritems > 1)
rec->errors |= I_ERR_DUP_DIR_INDEX;
return 0;
}
static int process_inode_ref(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
int error;
struct cache_tree *inode_cache;
struct btrfs_inode_ref *ref;
char namebuf[BTRFS_NAME_LEN];
inode_cache = &active_node->inode_cache;
ref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref);
total = btrfs_item_size_nr(eb, slot);
while (cur < total) {
name_len = btrfs_inode_ref_name_len(eb, ref);
index = btrfs_inode_ref_index(eb, ref);
/* inode_ref + namelen should not cross item boundary */
if (cur + sizeof(*ref) + name_len > total ||
name_len > BTRFS_NAME_LEN) {
if (total < cur + sizeof(*ref))
break;
/* Still try to read out the remaining part */
len = min_t(u32, total - cur - sizeof(*ref),
BTRFS_NAME_LEN);
error = REF_ERR_NAME_TOO_LONG;
} else {
len = name_len;
error = 0;
}
read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
add_inode_backref(inode_cache, key->objectid, key->offset,
index, namebuf, len, 0, key->type, error);
len = sizeof(*ref) + name_len;
ref = (struct btrfs_inode_ref *)((char *)ref + len);
cur += len;
}
return 0;
}
static int process_inode_extref(struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
u32 total;
u32 cur = 0;
u32 len;
u32 name_len;
u64 index;
u64 parent;
int error;
struct cache_tree *inode_cache;
struct btrfs_inode_extref *extref;
char namebuf[BTRFS_NAME_LEN];
inode_cache = &active_node->inode_cache;
extref = btrfs_item_ptr(eb, slot, struct btrfs_inode_extref);
total = btrfs_item_size_nr(eb, slot);
while (cur < total) {
name_len = btrfs_inode_extref_name_len(eb, extref);
index = btrfs_inode_extref_index(eb, extref);
parent = btrfs_inode_extref_parent(eb, extref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
error = 0;
} else {
len = BTRFS_NAME_LEN;
error = REF_ERR_NAME_TOO_LONG;
}
read_extent_buffer(eb, namebuf,
(unsigned long)(extref + 1), len);
add_inode_backref(inode_cache, key->objectid, parent,
index, namebuf, len, 0, key->type, error);
len = sizeof(*extref) + name_len;
extref = (struct btrfs_inode_extref *)((char *)extref + len);
cur += len;
}
return 0;
}
static int count_csum_range(struct btrfs_root *root, u64 start,
u64 len, u64 *found)
{
struct btrfs_key key;
struct btrfs_path path;
struct extent_buffer *leaf;
int ret;
size_t size;
*found = 0;
u64 csum_end;
u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
btrfs_init_path(&path);
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
key.offset = start;
key.type = BTRFS_EXTENT_CSUM_KEY;
ret = btrfs_search_slot(NULL, root->fs_info->csum_root,
&key, &path, 0, 0);
if (ret < 0)
goto out;
if (ret > 0 && path.slots[0] > 0) {
leaf = path.nodes[0];
btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
key.type == BTRFS_EXTENT_CSUM_KEY)
path.slots[0]--;
}
while (len > 0) {
leaf = path.nodes[0];
if (path.slots[0] >= btrfs_header_nritems(leaf)) {
ret = btrfs_next_leaf(root->fs_info->csum_root, &path);
if (ret > 0)
break;
else if (ret < 0)
goto out;
leaf = path.nodes[0];
}
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
key.type != BTRFS_EXTENT_CSUM_KEY)
break;
btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
if (key.offset >= start + len)
break;
if (key.offset > start)
start = key.offset;
size = btrfs_item_size_nr(leaf, path.slots[0]);
csum_end = key.offset + (size / csum_size) *
root->fs_info->sectorsize;
if (csum_end > start) {
size = min(csum_end - start, len);
len -= size;
start += size;
*found += size;
}
path.slots[0]++;
}
out:
btrfs_release_path(&path);
if (ret < 0)
return ret;
return 0;
}
static int process_file_extent(struct btrfs_root *root,
struct extent_buffer *eb,
int slot, struct btrfs_key *key,
struct shared_node *active_node)
{
struct inode_record *rec;
struct btrfs_file_extent_item *fi;
u64 num_bytes = 0;
u64 disk_bytenr = 0;
u64 extent_offset = 0;
u64 mask = root->fs_info->sectorsize - 1;
int extent_type;
int ret;
rec = active_node->current;
BUG_ON(rec->ino != key->objectid || rec->refs > 1);
rec->found_file_extent = 1;
if (rec->extent_start == (u64)-1) {
rec->extent_start = key->offset;
rec->extent_end = key->offset;
}
if (rec->extent_end > key->offset)
rec->errors |= I_ERR_FILE_EXTENT_OVERLAP;
else if (rec->extent_end < key->offset) {
ret = add_file_extent_hole(&rec->holes, rec->extent_end,
key->offset - rec->extent_end);
if (ret < 0)
return ret;
}
fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item);
extent_type = btrfs_file_extent_type(eb, fi);
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
num_bytes = btrfs_file_extent_inline_len(eb, slot, fi);
if (num_bytes == 0)
rec->errors |= I_ERR_BAD_FILE_EXTENT;
rec->found_size += num_bytes;
num_bytes = (num_bytes + mask) & ~mask;
} else if (extent_type == BTRFS_FILE_EXTENT_REG ||
extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
num_bytes = btrfs_file_extent_num_bytes(eb, fi);
disk_bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
extent_offset = btrfs_file_extent_offset(eb, fi);
if (num_bytes == 0 || (num_bytes & mask))
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (num_bytes + extent_offset >
btrfs_file_extent_ram_bytes(eb, fi))
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC &&
(btrfs_file_extent_compression(eb, fi) ||
btrfs_file_extent_encryption(eb, fi) ||
btrfs_file_extent_other_encoding(eb, fi)))
rec->errors |= I_ERR_BAD_FILE_EXTENT;
if (disk_bytenr > 0)
rec->found_size += num_bytes;
} else {
rec->errors |= I_ERR_BAD_FILE_EXTENT;
}
rec->extent_end = key->offset + num_bytes;
/*
* The data reloc tree will copy full extents into its inode and then
* copy the corresponding csums. Because the extent it copied could be
* a preallocated extent that hasn't been written to yet there may be no
* csums to copy, ergo we won't have csums for our file extent. This is
* ok so just don't bother checking csums if the inode belongs to the
* data reloc tree.
*/
if (disk_bytenr > 0 &&
btrfs_header_owner(eb) != BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 found;
if (btrfs_file_extent_compression(eb, fi))
num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
else
disk_bytenr += extent_offset;
ret = count_csum_range(root, disk_bytenr, num_bytes, &found);
if (ret < 0)
return ret;
if (extent_type == BTRFS_FILE_EXTENT_REG) {
if (found > 0)
rec->found_csum_item = 1;
if (found < num_bytes)
rec->some_csum_missing = 1;
} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
if (found > 0)
rec->errors |= I_ERR_ODD_CSUM_ITEM;
}
}
return 0;
}
static int process_one_leaf(struct btrfs_root *root, struct extent_buffer *eb,
struct walk_control *wc)
{
struct btrfs_key key;
u32 nritems;
int i;
int ret = 0;
struct cache_tree *inode_cache;
struct shared_node *active_node;
if (wc->root_level == wc->active_node &&
btrfs_root_refs(&root->root_item) == 0)
return 0;
active_node = wc->nodes[wc->active_node];
inode_cache = &active_node->inode_cache;
nritems = btrfs_header_nritems(eb);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(eb, &key, i);
if (key.objectid == BTRFS_FREE_SPACE_OBJECTID)
continue;
if (key.type == BTRFS_ORPHAN_ITEM_KEY)
continue;
if (active_node->current == NULL ||
active_node->current->ino < key.objectid) {
if (active_node->current) {
active_node->current->checked = 1;
maybe_free_inode_rec(inode_cache,
active_node->current);
}
active_node->current = get_inode_rec(inode_cache,
key.objectid, 1);
BUG_ON(IS_ERR(active_node->current));
}
switch (key.type) {
case BTRFS_DIR_ITEM_KEY:
case BTRFS_DIR_INDEX_KEY:
ret = process_dir_item(eb, i, &key, active_node);
break;
case BTRFS_INODE_REF_KEY:
ret = process_inode_ref(eb, i, &key, active_node);
break;
case BTRFS_INODE_EXTREF_KEY:
ret = process_inode_extref(eb, i, &key, active_node);
break;
case BTRFS_INODE_ITEM_KEY:
ret = process_inode_item(eb, i, &key, active_node);
break;
case BTRFS_EXTENT_DATA_KEY:
ret = process_file_extent(root, eb, i, &key,
active_node);
break;
default:
break;
};
}
return ret;
}
struct node_refs {
u64 bytenr[BTRFS_MAX_LEVEL];
u64 refs[BTRFS_MAX_LEVEL];
int need_check[BTRFS_MAX_LEVEL];
};
static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
struct node_refs *nrefs, u64 level);
static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
unsigned int ext_ref);
/*
* Returns >0 Found error, not fatal, should continue
* Returns <0 Fatal error, must exit the whole check
* Returns 0 No errors found
*/
static int process_one_leaf_v2(struct btrfs_root *root, struct btrfs_path *path,
struct node_refs *nrefs, int *level, int ext_ref)
{
struct extent_buffer *cur = path->nodes[0];
struct btrfs_key key;
u64 cur_bytenr;
u32 nritems;
u64 first_ino = 0;
int root_level = btrfs_header_level(root->node);
int i;
int ret = 0; /* Final return value */
int err = 0; /* Positive error bitmap */
cur_bytenr = cur->start;
/* skip to first inode item or the first inode number change */
nritems = btrfs_header_nritems(cur);
for (i = 0; i < nritems; i++) {
btrfs_item_key_to_cpu(cur, &key, i);
if (i == 0)
first_ino = key.objectid;
if (key.type == BTRFS_INODE_ITEM_KEY ||
(first_ino && first_ino != key.objectid))
break;
}
if (i == nritems) {
path->slots[0] = nritems;
return 0;
}
path->slots[0] = i;
again:
err |= check_inode_item(root, path, ext_ref);
if (err & LAST_ITEM)
goto out;
/* still have inode items in thie leaf */
if (cur->start == cur_bytenr)
goto again;
/*
* we have switched to another leaf, above nodes may
* have changed, here walk down the path, if a node
* or leaf is shared, check whether we can skip this
* node or leaf.
*/
for (i = root_level; i >= 0; i--) {
if (path->nodes[i]->start == nrefs->bytenr[i])
continue;
ret = update_nodes_refs(root,
path->nodes[i]->start,
nrefs, i);
if (ret)
goto out;
if (!nrefs->need_check[i]) {
*level += 1;
break;
}
}
for (i = 0; i < *level; i++) {
free_extent_buffer(path->nodes[i]);
path->nodes[i] = NULL;
}
out:
err &= ~LAST_ITEM;
if (err && !ret)
ret = err;
return ret;
}
static void reada_walk_down(struct btrfs_root *root,
struct extent_buffer *node, int slot)
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 bytenr;
u64 ptr_gen;
u32 nritems;
int i;
int level;
level = btrfs_header_level(node);
if (level != 1)
return;
nritems = btrfs_header_nritems(node);
for (i = slot; i < nritems; i++) {
bytenr = btrfs_node_blockptr(node, i);
ptr_gen = btrfs_node_ptr_generation(node, i);
readahead_tree_block(fs_info, bytenr, fs_info->nodesize,
ptr_gen);
}
}
/*
* Check the child node/leaf by the following condition:
* 1. the first item key of the node/leaf should be the same with the one
* in parent.
* 2. block in parent node should match the child node/leaf.
* 3. generation of parent node and child's header should be consistent.
*
* Or the child node/leaf pointed by the key in parent is not valid.
*
* We hope to check leaf owner too, but since subvol may share leaves,
* which makes leaf owner check not so strong, key check should be
* sufficient enough for that case.
*/
static int check_child_node(struct extent_buffer *parent, int slot,
struct extent_buffer *child)
{
struct btrfs_key parent_key;
struct btrfs_key child_key;
int ret = 0;
btrfs_node_key_to_cpu(parent, &parent_key, slot);
if (btrfs_header_level(child) == 0)
btrfs_item_key_to_cpu(child, &child_key, 0);
else
btrfs_node_key_to_cpu(child, &child_key, 0);
if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
ret = -EINVAL;
fprintf(stderr,
"Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
parent_key.objectid, parent_key.type, parent_key.offset,
child_key.objectid, child_key.type, child_key.offset);
}
if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
ret = -EINVAL;
fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
btrfs_node_blockptr(parent, slot),
btrfs_header_bytenr(child));
}
if (btrfs_node_ptr_generation(parent, slot) !=
btrfs_header_generation(child)) {
ret = -EINVAL;
fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
btrfs_header_generation(child),
btrfs_node_ptr_generation(parent, slot));
}
return ret;
}
/*
* for a tree node or leaf, if it's shared, indeed we don't need to iterate it
* in every fs or file tree check. Here we find its all root ids, and only check
* it in the fs or file tree which has the smallest root id.
*/
static int need_check(struct btrfs_root *root, struct ulist *roots)
{
struct rb_node *node;
struct ulist_node *u;
if (roots->nnodes == 1)
return 1;
node = rb_first(&roots->root);
u = rb_entry(node, struct ulist_node, rb_node);
/*
* current root id is not smallest, we skip it and let it be checked
* in the fs or file tree who hash the smallest root id.
*/
if (root->objectid != u->val)
return 0;
return 1;
}
/*
* for a tree node or leaf, we record its reference count, so later if we still
* process this node or leaf, don't need to compute its reference count again.
*/
static int update_nodes_refs(struct btrfs_root *root, u64 bytenr,
struct node_refs *nrefs, u64 level)
{
int check, ret;
u64 refs;
struct ulist *roots;
if (nrefs->bytenr[level] != bytenr) {
ret = btrfs_lookup_extent_info(NULL, root, bytenr,
level, 1, &refs, NULL);
if (ret < 0)
return ret;
nrefs->bytenr[level] = bytenr;
nrefs->refs[level] = refs;
if (refs > 1) {
ret = btrfs_find_all_roots(NULL, root->fs_info, bytenr,
0, &roots);
if (ret)
return -EIO;
check = need_check(root, roots);
ulist_free(roots);
nrefs->need_check[level] = check;
} else {
nrefs->need_check[level] = 1;
}
}
return 0;
}
static int walk_down_tree(struct btrfs_root *root, struct btrfs_path *path,
struct walk_control *wc, int *level,
struct node_refs *nrefs)
{
enum btrfs_tree_block_status status;
u64 bytenr;
u64 ptr_gen;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *next;
struct extent_buffer *cur;
int ret, err = 0;
u64 refs;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
if (path->nodes[*level]->start == nrefs->bytenr[*level]) {
refs = nrefs->refs[*level];
ret = 0;
} else {
ret = btrfs_lookup_extent_info(NULL, root,
path->nodes[*level]->start,
*level, 1, &refs, NULL);
if (ret < 0) {
err = ret;
goto out;
}
nrefs->bytenr[*level] = path->nodes[*level]->start;
nrefs->refs[*level] = refs;
}
if (refs > 1) {
ret = enter_shared_node(root, path->nodes[*level]->start,
refs, wc, *level);
if (ret > 0) {
err = ret;
goto out;
}
}
while (*level >= 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
if (btrfs_header_level(cur) != *level)
WARN_ON(1);
if (path->slots[*level] >= btrfs_header_nritems(cur))
break;
if (*level == 0) {
ret = process_one_leaf(root, cur, wc);
if (ret < 0)
err = ret;
break;
}
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
if (bytenr == nrefs->bytenr[*level - 1]) {
refs = nrefs->refs[*level - 1];
} else {
ret = btrfs_lookup_extent_info(NULL, root, bytenr,
*level - 1, 1, &refs, NULL);
if (ret < 0) {
refs = 0;
} else {
nrefs->bytenr[*level - 1] = bytenr;
nrefs->refs[*level - 1] = refs;
}
}
if (refs > 1) {
ret = enter_shared_node(root, bytenr, refs,
wc, *level - 1);
if (ret > 0) {
path->slots[*level]++;
continue;
}
}
next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
free_extent_buffer(next);
reada_walk_down(root, cur, path->slots[*level]);
next = read_tree_block(root->fs_info, bytenr, ptr_gen);
if (!extent_buffer_uptodate(next)) {
struct btrfs_key node_key;
btrfs_node_key_to_cpu(path->nodes[*level],
&node_key,
path->slots[*level]);
btrfs_add_corrupt_extent_record(root->fs_info,
&node_key,
path->nodes[*level]->start,
root->fs_info->nodesize,
*level);
err = -EIO;
goto out;
}
}
ret = check_child_node(cur, path->slots[*level], next);
if (ret) {
free_extent_buffer(next);
err = ret;
goto out;
}
if (btrfs_is_leaf(next))
status = btrfs_check_leaf(root, NULL, next);
else
status = btrfs_check_node(root, NULL, next);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
free_extent_buffer(next);
err = -EIO;
goto out;
}
*level = *level - 1;
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = next;
path->slots[*level] = 0;
}
out:
path->slots[*level] = btrfs_header_nritems(path->nodes[*level]);
return err;
}
static int check_inode_item(struct btrfs_root *root, struct btrfs_path *path,
unsigned int ext_ref);
/*
* Returns >0 Found error, should continue
* Returns <0 Fatal error, must exit the whole check
* Returns 0 No errors found
*/
static int walk_down_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
int *level, struct node_refs *nrefs, int ext_ref)
{
enum btrfs_tree_block_status status;
u64 bytenr;
u64 ptr_gen;
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *next;
struct extent_buffer *cur;
int ret;
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
ret = update_nodes_refs(root, path->nodes[*level]->start,
nrefs, *level);
if (ret < 0)
return ret;
while (*level >= 0) {
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
cur = path->nodes[*level];
if (btrfs_header_level(cur) != *level)
WARN_ON(1);
if (path->slots[*level] >= btrfs_header_nritems(cur))
break;
/* Don't forgot to check leaf/node validation */
if (*level == 0) {
ret = btrfs_check_leaf(root, NULL, cur);
if (ret != BTRFS_TREE_BLOCK_CLEAN) {
ret = -EIO;
break;
}
ret = process_one_leaf_v2(root, path, nrefs,
level, ext_ref);
break;
} else {
ret = btrfs_check_node(root, NULL, cur);
if (ret != BTRFS_TREE_BLOCK_CLEAN) {
ret = -EIO;
break;
}
}
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
ret = update_nodes_refs(root, bytenr, nrefs, *level - 1);
if (ret)
break;
if (!nrefs->need_check[*level - 1]) {
path->slots[*level]++;
continue;
}
next = btrfs_find_tree_block(fs_info, bytenr, fs_info->nodesize);
if (!next || !btrfs_buffer_uptodate(next, ptr_gen)) {
free_extent_buffer(next);
reada_walk_down(root, cur, path->slots[*level]);
next = read_tree_block(fs_info, bytenr, ptr_gen);
if (!extent_buffer_uptodate(next)) {
struct btrfs_key node_key;
btrfs_node_key_to_cpu(path->nodes[*level],
&node_key,
path->slots[*level]);
btrfs_add_corrupt_extent_record(fs_info,
&node_key,
path->nodes[*level]->start,
fs_info->nodesize,
*level);
ret = -EIO;
break;
}
}
ret = check_child_node(cur, path->slots[*level], next);
if (ret < 0)
break;
if (btrfs_is_leaf(next))
status = btrfs_check_leaf(root, NULL, next);
else
status = btrfs_check_node(root, NULL, next);
if (status != BTRFS_TREE_BLOCK_CLEAN) {
free_extent_buffer(next);
ret = -EIO;
break;
}
*level = *level - 1;
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = next;
path->slots[*level] = 0;
}
return ret;
}
static int walk_up_tree(struct btrfs_root *root, struct btrfs_path *path,
struct walk_control *wc, int *level)
{
int i;
struct extent_buffer *leaf;
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
leaf = path->nodes[i];
if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
path->slots[i]++;
*level = i;
return 0;
} else {
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
BUG_ON(*level > wc->active_node);
if (*level == wc->active_node)
leave_shared_node(root, wc, *level);
*level = i + 1;
}
}
return 1;
}
static int walk_up_tree_v2(struct btrfs_root *root, struct btrfs_path *path,
int *level)
{
int i;
struct extent_buffer *leaf;
for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
leaf = path->nodes[i];
if (path->slots[i] + 1 < btrfs_header_nritems(leaf)) {
path->slots[i]++;
*level = i;
return 0;
} else {
free_extent_buffer(path->nodes[*level]);
path->nodes[*level] = NULL;
*level = i + 1;
}
}
return 1;
}
static int check_root_dir(struct inode_record *rec)
{
struct inode_backref *backref;
int ret = -1;
if (!rec->found_inode_item || rec->errors)
goto out;
if (rec->nlink != 1 || rec->found_link != 0)
goto out;
if (list_empty(&rec->backrefs))
goto out;
backref = to_inode_backref(rec->backrefs.next);
if (!backref->found_inode_ref)
goto out;
if (backref->index != 0 || backref->namelen != 2 ||
memcmp(backref->name, "..", 2))
goto out;
if (backref->found_dir_index || backref->found_dir_item)
goto out;
ret = 0;
out:
return ret;
}
static int repair_inode_isize(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_inode_item *ei;
struct btrfs_key key;
int ret;
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = (u64)-1;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
goto out;
if (ret) {
if (!path->slots[0]) {
ret = -ENOENT;
goto out;
}
path->slots[0]--;
ret = 0;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
if (key.objectid != rec->ino) {
ret = -ENOENT;
goto out;
}
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_size(path->nodes[0], ei, rec->found_size);
btrfs_mark_buffer_dirty(path->nodes[0]);
rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
printf("reset isize for dir %Lu root %Lu\n", rec->ino,
root->root_key.objectid);
out:
btrfs_release_path(path);
return ret;
}
static int repair_inode_orphan_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
int ret;
ret = btrfs_add_orphan_item(trans, root, path, rec->ino);
btrfs_release_path(path);
if (!ret)
rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
return ret;
}
static int repair_inode_nbytes(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct btrfs_inode_item *ei;
struct btrfs_key key;
int ret = 0;
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret) {
if (ret > 0)
ret = -ENOENT;
goto out;
}
/* Since ret == 0, no need to check anything */
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nbytes(path->nodes[0], ei, rec->found_size);
btrfs_mark_buffer_dirty(path->nodes[0]);
rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
printf("reset nbytes for ino %llu root %llu\n",
rec->ino, root->root_key.objectid);
out:
btrfs_release_path(path);
return ret;
}
static int add_missing_dir_index(struct btrfs_root *root,
struct cache_tree *inode_cache,
struct inode_record *rec,
struct inode_backref *backref)
{
struct btrfs_path path;
struct btrfs_trans_handle *trans;
struct btrfs_dir_item *dir_item;
struct extent_buffer *leaf;
struct btrfs_key key;
struct btrfs_disk_key disk_key;
struct inode_record *dir_rec;
unsigned long name_ptr;
u32 data_size = sizeof(*dir_item) + backref->namelen;
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
fprintf(stderr, "repairing missing dir index item for inode %llu\n",
(unsigned long long)rec->ino);
btrfs_init_path(&path);
key.objectid = backref->dir;
key.type = BTRFS_DIR_INDEX_KEY;
key.offset = backref->index;
ret = btrfs_insert_empty_item(trans, root, &path, &key, data_size);
BUG_ON(ret);
leaf = path.nodes[0];
dir_item = btrfs_item_ptr(leaf, path.slots[0], struct btrfs_dir_item);
disk_key.objectid = cpu_to_le64(rec->ino);
disk_key.type = BTRFS_INODE_ITEM_KEY;
disk_key.offset = 0;
btrfs_set_dir_item_key(leaf, dir_item, &disk_key);
btrfs_set_dir_type(leaf, dir_item, imode_to_type(rec->imode));
btrfs_set_dir_data_len(leaf, dir_item, 0);
btrfs_set_dir_name_len(leaf, dir_item, backref->namelen);
name_ptr = (unsigned long)(dir_item + 1);
write_extent_buffer(leaf, backref->name, name_ptr, backref->namelen);
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
backref->found_dir_index = 1;
dir_rec = get_inode_rec(inode_cache, backref->dir, 0);
BUG_ON(IS_ERR(dir_rec));
if (!dir_rec)
return 0;
dir_rec->found_size += backref->namelen;
if (dir_rec->found_size == dir_rec->isize &&
(dir_rec->errors & I_ERR_DIR_ISIZE_WRONG))
dir_rec->errors &= ~I_ERR_DIR_ISIZE_WRONG;
if (dir_rec->found_size != dir_rec->isize)
dir_rec->errors |= I_ERR_DIR_ISIZE_WRONG;
return 0;
}
static int delete_dir_index(struct btrfs_root *root,
struct inode_backref *backref)
{
struct btrfs_trans_handle *trans;
struct btrfs_dir_item *di;
struct btrfs_path path;
int ret = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans))
return PTR_ERR(trans);
fprintf(stderr, "Deleting bad dir index [%llu,%u,%llu] root %llu\n",
(unsigned long long)backref->dir,
BTRFS_DIR_INDEX_KEY, (unsigned long long)backref->index,
(unsigned long long)root->objectid);
btrfs_init_path(&path);
di = btrfs_lookup_dir_index(trans, root, &path, backref->dir,
backref->name, backref->namelen,
backref->index, -1);
if (IS_ERR(di)) {
ret = PTR_ERR(di);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
if (ret == -ENOENT)
return 0;
return ret;
}
if (!di)
ret = btrfs_del_item(trans, root, &path);
else
ret = btrfs_delete_one_dir_name(trans, root, &path, di);
BUG_ON(ret);
btrfs_release_path(&path);
btrfs_commit_transaction(trans, root);
return ret;
}
static int create_inode_item(struct btrfs_root *root,
struct inode_record *rec,
int root_dir)
{
struct btrfs_trans_handle *trans;
struct btrfs_inode_item inode_item;
time_t now = time(NULL);
int ret;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
return ret;
}
fprintf(stderr, "root %llu inode %llu recreating inode item, this may "
"be incomplete, please check permissions and content after "
"the fsck completes.\n", (unsigned long long)root->objectid,
(unsigned long long)rec->ino);
memset(&inode_item, 0, sizeof(inode_item));
btrfs_set_stack_inode_generation(&inode_item, trans->transid);
if (root_dir)
btrfs_set_stack_inode_nlink(&inode_item, 1);
else
btrfs_set_stack_inode_nlink(&inode_item, rec->found_link);
btrfs_set_stack_inode_nbytes(&inode_item, rec->found_size);
if (rec->found_dir_item) {
if (rec->found_file_extent)
fprintf(stderr, "root %llu inode %llu has both a dir "
"item and extents, unsure if it is a dir or a "
"regular file so setting it as a directory\n",
(unsigned long long)root->objectid,
(unsigned long long)rec->ino);
btrfs_set_stack_inode_mode(&inode_item, S_IFDIR | 0755);
btrfs_set_stack_inode_size(&inode_item, rec->found_size);
} else if (!rec->found_dir_item) {
btrfs_set_stack_inode_size(&inode_item, rec->extent_end);
btrfs_set_stack_inode_mode(&inode_item, S_IFREG | 0755);
}
btrfs_set_stack_timespec_sec(&inode_item.atime, now);
btrfs_set_stack_timespec_nsec(&inode_item.atime, 0);
btrfs_set_stack_timespec_sec(&inode_item.ctime, now);
btrfs_set_stack_timespec_nsec(&inode_item.ctime, 0);
btrfs_set_stack_timespec_sec(&inode_item.mtime, now);
btrfs_set_stack_timespec_nsec(&inode_item.mtime, 0);
btrfs_set_stack_timespec_sec(&inode_item.otime, 0);
btrfs_set_stack_timespec_nsec(&inode_item.otime, 0);
ret = btrfs_insert_inode(trans, root, rec->ino, &inode_item);
BUG_ON(ret);
btrfs_commit_transaction(trans, root);
return 0;
}
static int repair_inode_backrefs(struct btrfs_root *root,
struct inode_record *rec,
struct cache_tree *inode_cache,
int delete)
{
struct inode_backref *tmp, *backref;
u64 root_dirid = btrfs_root_dirid(&root->root_item);
int ret = 0;
int repaired = 0;
list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
if (!delete && rec->ino == root_dirid) {
if (!rec->found_inode_item) {
ret = create_inode_item(root, rec, 1);
if (ret)
break;
repaired++;
}
}
/* Index 0 for root dir's are special, don't mess with it */
if (rec->ino == root_dirid && backref->index == 0)
continue;
if (delete &&
((backref->found_dir_index && !backref->found_inode_ref) ||
(backref->found_dir_index && backref->found_inode_ref &&
(backref->errors & REF_ERR_INDEX_UNMATCH)))) {
ret = delete_dir_index(root, backref);
if (ret)
break;
repaired++;
list_del(&backref->list);
free(backref);
continue;
}
if (!delete && !backref->found_dir_index &&
backref->found_dir_item && backref->found_inode_ref) {
ret = add_missing_dir_index(root, inode_cache, rec,
backref);
if (ret)
break;
repaired++;
if (backref->found_dir_item &&
backref->found_dir_index) {
if (!backref->errors &&
backref->found_inode_ref) {
list_del(&backref->list);
free(backref);
continue;
}
}
}
if (!delete && (!backref->found_dir_index &&
!backref->found_dir_item &&
backref->found_inode_ref)) {
struct btrfs_trans_handle *trans;
struct btrfs_key location;
ret = check_dir_conflict(root, backref->name,
backref->namelen,
backref->dir,
backref->index);
if (ret) {
/*
* let nlink fixing routine to handle it,
* which can do it better.
*/
ret = 0;
break;
}
location.objectid = rec->ino;
location.type = BTRFS_INODE_ITEM_KEY;
location.offset = 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
break;
}
fprintf(stderr, "adding missing dir index/item pair "
"for inode %llu\n",
(unsigned long long)rec->ino);
ret = btrfs_insert_dir_item(trans, root, backref->name,
backref->namelen,
backref->dir, &location,
imode_to_type(rec->imode),
backref->index);
BUG_ON(ret);
btrfs_commit_transaction(trans, root);
repaired++;
}
if (!delete && (backref->found_inode_ref &&
backref->found_dir_index &&
backref->found_dir_item &&
!(backref->errors & REF_ERR_INDEX_UNMATCH) &&
!rec->found_inode_item)) {
ret = create_inode_item(root, rec, 0);
if (ret)
break;
repaired++;
}
}
return ret ? ret : repaired;
}
/*
* To determine the file type for nlink/inode_item repair
*
* Return 0 if file type is found and BTRFS_FT_* is stored into type.
* Return -ENOENT if file type is not found.
*/
static int find_file_type(struct inode_record *rec, u8 *type)
{
struct inode_backref *backref;
/* For inode item recovered case */
if (rec->found_inode_item) {
*type = imode_to_type(rec->imode);
return 0;
}
list_for_each_entry(backref, &rec->backrefs, list) {
if (backref->found_dir_index || backref->found_dir_item) {
*type = backref->filetype;
return 0;
}
}
return -ENOENT;
}
/*
* To determine the file name for nlink repair
*
* Return 0 if file name is found, set name and namelen.
* Return -ENOENT if file name is not found.
*/
static int find_file_name(struct inode_record *rec,
char *name, int *namelen)
{
struct inode_backref *backref;
list_for_each_entry(backref, &rec->backrefs, list) {
if (backref->found_dir_index || backref->found_dir_item ||
backref->found_inode_ref) {
memcpy(name, backref->name, backref->namelen);
*namelen = backref->namelen;
return 0;
}
}
return -ENOENT;
}
/* Reset the nlink of the inode to the correct one */
static int reset_nlink(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct inode_backref *backref;
struct inode_backref *tmp;
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
int ret = 0;
/* We don't believe this either, reset it and iterate backref */
rec->found_link = 0;
/* Remove all backref including the valid ones */
list_for_each_entry_safe(backref, tmp, &rec->backrefs, list) {
ret = btrfs_unlink(trans, root, rec->ino, backref->dir,
backref->index, backref->name,
backref->namelen, 0);
if (ret < 0)
goto out;
/* remove invalid backref, so it won't be added back */
if (!(backref->found_dir_index &&
backref->found_dir_item &&
backref->found_inode_ref)) {
list_del(&backref->list);
free(backref);
} else {
rec->found_link++;
}
}
/* Set nlink to 0 */
key.objectid = rec->ino;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
if (ret < 0)
goto out;
if (ret > 0) {
ret = -ENOENT;
goto out;
}
inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_inode_item);
btrfs_set_inode_nlink(path->nodes[0], inode_item, 0);
btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_release_path(path);
/*
* Add back valid inode_ref/dir_item/dir_index,
* add_link() will handle the nlink inc, so new nlink must be correct
*/
list_for_each_entry(backref, &rec->backrefs, list) {
ret = btrfs_add_link(trans, root, rec->ino, backref->dir,
backref->name, backref->namelen,
backref->filetype, &backref->index, 1);
if (ret < 0)
goto out;
}
out:
btrfs_release_path(path);
return ret;
}
static int get_highest_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
u64 *highest_ino)
{
struct btrfs_key key, found_key;
int ret;
btrfs_init_path(path);
key.objectid = BTRFS_LAST_FREE_OBJECTID;
key.offset = -1;
key.type = BTRFS_INODE_ITEM_KEY;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret == 1) {
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0] - 1);
*highest_ino = found_key.objectid;
ret = 0;
}
if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
ret = -EOVERFLOW;
btrfs_release_path(path);
return ret;
}
static int repair_inode_nlinks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
char *dir_name = "lost+found";
char namebuf[BTRFS_NAME_LEN] = {0};
u64 lost_found_ino;
u32 mode = 0700;
u8 type = 0;
int namelen = 0;
int name_recovered = 0;
int type_recovered = 0;
int ret = 0;
/*
* Get file name and type first before these invalid inode ref
* are deleted by remove_all_invalid_backref()
*/
name_recovered = !find_file_name(rec, namebuf, &namelen);
type_recovered = !find_file_type(rec, &type);
if (!name_recovered) {
printf("Can't get file name for inode %llu, using '%llu' as fallback\n",
rec->ino, rec->ino);
namelen = count_digits(rec->ino);
sprintf(namebuf, "%llu", rec->ino);
name_recovered = 1;
}
if (!type_recovered) {
printf("Can't get file type for inode %llu, using FILE as fallback\n",
rec->ino);
type = BTRFS_FT_REG_FILE;
type_recovered = 1;
}
ret = reset_nlink(trans, root, path, rec);
if (ret < 0) {
fprintf(stderr,
"Failed to reset nlink for inode %llu: %s\n",
rec->ino, strerror(-ret));
goto out;
}
if (rec->found_link == 0) {
ret = get_highest_inode(trans, root, path, &lost_found_ino);
if (ret < 0)
goto out;
lost_found_ino++;
ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
mode);
if (ret < 0) {
fprintf(stderr, "Failed to create '%s' dir: %s\n",
dir_name, strerror(-ret));
goto out;
}
ret = btrfs_add_link(trans, root, rec->ino, lost_found_ino,
namebuf, namelen, type, NULL, 1);
/*
* Add ".INO" suffix several times to handle case where
* "FILENAME.INO" is already taken by another file.
*/
while (ret == -EEXIST) {
/*
* Conflicting file name, add ".INO" as suffix * +1 for '.'
*/
if (namelen + count_digits(rec->ino) + 1 >
BTRFS_NAME_LEN) {
ret = -EFBIG;
goto out;
}
snprintf(namebuf + namelen, BTRFS_NAME_LEN - namelen,
".%llu", rec->ino);
namelen += count_digits(rec->ino) + 1;
ret = btrfs_add_link(trans, root, rec->ino,
lost_found_ino, namebuf,
namelen, type, NULL, 1);
}
if (ret < 0) {
fprintf(stderr,
"Failed to link the inode %llu to %s dir: %s\n",
rec->ino, dir_name, strerror(-ret));
goto out;
}
/*
* Just increase the found_link, don't actually add the
* backref. This will make things easier and this inode
* record will be freed after the repair is done.
* So fsck will not report problem about this inode.
*/
rec->found_link++;
printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
namelen, namebuf, dir_name);
}
printf("Fixed the nlink of inode %llu\n", rec->ino);
out:
/*
* Clear the flag anyway, or we will loop forever for the same inode
* as it will not be removed from the bad inode list and the dead loop
* happens.
*/
rec->errors &= ~I_ERR_LINK_COUNT_WRONG;
btrfs_release_path(path);
return ret;
}
/*
* Check if there is any normal(reg or prealloc) file extent for given
* ino.
* This is used to determine the file type when neither its dir_index/item or
* inode_item exists.
*
* This will *NOT* report error, if any error happens, just consider it does
* not have any normal file extent.
*/
static int find_normal_file_extent(struct btrfs_root *root, u64 ino)
{
struct btrfs_path path;
struct btrfs_key key;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
u8 type;
int ret = 0;
btrfs_init_path(&path);
key.objectid = ino;
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
if (ret < 0) {
ret = 0;
goto out;
}
if (ret && path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
ret = btrfs_next_leaf(root, &path);
if (ret) {
ret = 0;
goto out;
}
}
while (1) {
btrfs_item_key_to_cpu(path.nodes[0], &found_key,
path.slots[0]);
if (found_key.objectid != ino ||
found_key.type != BTRFS_EXTENT_DATA_KEY)
break;
fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path.nodes[0], fi);
if (type != BTRFS_FILE_EXTENT_INLINE) {
ret = 1;
goto out;
}
}
out:
btrfs_release_path(&path);
return ret;
}
static u32 btrfs_type_to_imode(u8 type)
{
static u32 imode_by_btrfs_type[] = {
[BTRFS_FT_REG_FILE] = S_IFREG,
[BTRFS_FT_DIR] = S_IFDIR,
[BTRFS_FT_CHRDEV] = S_IFCHR,
[BTRFS_FT_BLKDEV] = S_IFBLK,
[BTRFS_FT_FIFO] = S_IFIFO,
[BTRFS_FT_SOCK] = S_IFSOCK,
[BTRFS_FT_SYMLINK] = S_IFLNK,
};
return imode_by_btrfs_type[(type)];
}
static int repair_inode_no_item(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
u8 filetype;
u32 mode = 0700;
int type_recovered = 0;
int ret = 0;
printf("Trying to rebuild inode:%llu\n", rec->ino);
type_recovered = !find_file_type(rec, &filetype);
/*
* Try to determine inode type if type not found.
*
* For found regular file extent, it must be FILE.
* For found dir_item/index, it must be DIR.
*
* For undetermined one, use FILE as fallback.
*
* TODO:
* 1. If found backref(inode_index/item is already handled) to it,
* it must be DIR.
* Need new inode-inode ref structure to allow search for that.
*/
if (!type_recovered) {
if (rec->found_file_extent &&
find_normal_file_extent(root, rec->ino)) {
type_recovered = 1;
filetype = BTRFS_FT_REG_FILE;
} else if (rec->found_dir_item) {
type_recovered = 1;
filetype = BTRFS_FT_DIR;
} else if (!list_empty(&rec->orphan_extents)) {
type_recovered = 1;
filetype = BTRFS_FT_REG_FILE;
} else{
printf("Can't determine the filetype for inode %llu, assume it is a normal file\n",
rec->ino);
type_recovered = 1;
filetype = BTRFS_FT_REG_FILE;
}
}
ret = btrfs_new_inode(trans, root, rec->ino,
mode | btrfs_type_to_imode(filetype));
if (ret < 0)
goto out;
/*
* Here inode rebuild is done, we only rebuild the inode item,
* don't repair the nlink(like move to lost+found).
* That is the job of nlink repair.
*
* We just fill the record and return
*/
rec->found_dir_item = 1;
rec->imode = mode | btrfs_type_to_imode(filetype);
rec->nlink = 0;
rec->errors &= ~I_ERR_NO_INODE_ITEM;
/* Ensure the inode_nlinks repair function will be called */
rec->errors |= I_ERR_LINK_COUNT_WRONG;
out:
return ret;
}
static int repair_inode_orphan_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct orphan_data_extent *orphan;
struct orphan_data_extent *tmp;
int ret = 0;
list_for_each_entry_safe(orphan, tmp, &rec->orphan_extents, list) {
/*
* Check for conflicting file extents
*
* Here we don't know whether the extents is compressed or not,
* so we can only assume it not compressed nor data offset,
* and use its disk_len as extent length.
*/
ret = btrfs_get_extent(NULL, root, path, orphan->objectid,
orphan->offset, orphan->disk_len, 0);
btrfs_release_path(path);
if (ret < 0)
goto out;
if (!ret) {
fprintf(stderr,
"orphan extent (%llu, %llu) conflicts, delete the orphan\n",
orphan->disk_bytenr, orphan->disk_len);
ret = btrfs_free_extent(trans,
root->fs_info->extent_root,
orphan->disk_bytenr, orphan->disk_len,
0, root->objectid, orphan->objectid,
orphan->offset);
if (ret < 0)
goto out;
}
ret = btrfs_insert_file_extent(trans, root, orphan->objectid,
orphan->offset, orphan->disk_bytenr,
orphan->disk_len, orphan->disk_len);
if (ret < 0)
goto out;
/* Update file size info */
rec->found_size += orphan->disk_len;
if (rec->found_size == rec->nbytes)
rec->errors &= ~I_ERR_FILE_NBYTES_WRONG;
/* Update the file extent hole info too */
ret = del_file_extent_hole(&rec->holes, orphan->offset,
orphan->disk_len);
if (ret < 0)
goto out;
if (RB_EMPTY_ROOT(&rec->holes))
rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
list_del(&orphan->list);
free(orphan);
}
rec->errors &= ~I_ERR_FILE_EXTENT_ORPHAN;
out:
return ret;
}
static int repair_inode_discount_extent(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct inode_record *rec)
{
struct rb_node *node;
struct file_extent_hole *hole;
int found = 0;
int ret = 0;
node = rb_first(&rec->holes);
while (node) {
found = 1;
hole = rb_entry(node, struct file_extent_hole, node);
ret = btrfs_punch_hole(trans, root, rec->ino,
hole->start, hole->len);
if (ret < 0)
goto out;
ret = del_file_extent_hole(&rec->holes, hole->start,
hole->len);
if (ret < 0)
goto out;
if (RB_EMPTY_ROOT(&rec->holes))
rec->errors &= ~I_ERR_FILE_EXTENT_DISCOUNT;
node = rb_first(&rec->holes);
}
/* special case for a file losing all its file extent */
if (!found) {
ret = btrfs_punch_hole(trans, root, rec->ino, 0,
round_up(rec->isize,
root->fs_info->sectorsize));
if (ret < 0)
goto out;
}
printf("Fixed discount file extents for inode: %llu in root: %llu\n",
rec->ino, root->objectid);
out:
return ret;
}
static int try_repair_inode(struct btrfs_root *root, struct inode_record *rec)
{
struct btrfs_trans_handle *trans;
struct btrfs_path path;
int ret = 0;
if (!(rec->errors & (I_ERR_DIR_ISIZE_WRONG |
I_ERR_NO_ORPHAN_ITEM |
I_ERR_LINK_COUNT_WRONG |
I_ERR_NO_INODE_ITEM |
I_ERR_FILE_EXTENT_ORPHAN |
I_ERR_FILE_EXTENT_DISCOUNT|
I_ERR_FILE_NBYTES_WRONG)))
return rec->errors;
/*
* For nlink repair, it may create a dir and add link, so
* 2 for parent(256)'s dir_index and dir_item
* 2 for lost+found dir's inode_item and inode_ref
* 1 for the new inode_ref of the file
* 2 for lost+found dir's dir_index and dir_item for the file
*/
trans = btrfs_start_transaction(root, 7);
if (IS_ERR(trans))
return PTR_ERR(trans);
btrfs_init_path(&path);
if (rec->errors & I_ERR_NO_INODE_ITEM)
ret = repair_inode_no_item(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_FILE_EXTENT_ORPHAN)
ret = repair_inode_orphan_extent(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_FILE_EXTENT_DISCOUNT)
ret = repair_inode_discount_extent(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_DIR_ISIZE_WRONG)
ret = repair_inode_isize(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_NO_ORPHAN_ITEM)
ret = repair_inode_orphan_item(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_LINK_COUNT_WRONG)
ret = repair_inode_nlinks(trans, root, &path, rec);
if (!ret && rec->errors & I_ERR_FILE_NBYTES_WRONG)
ret = repair_inode_nbytes(trans, root, &path, rec);
btrfs_commit_transaction(trans, root);
btrfs_release_path(&path);
return ret;
}
static int check_inode_recs(struct btrfs_root *root,
struct cache_tree *inode_cache)
{
struct cache_extent *cache;
struct ptr_node *node;
struct inode_record *rec;
struct inode_backref *backref;
int stage = 0;
int ret = 0;
int err = 0;
u64 error = 0;
u64 root_dirid = btrfs_root_dirid(&root->root_item);
if (btrfs_root_refs(&root->root_item) == 0) {
if (!cache_tree_empty(inode_cache))
fprintf(stderr, "warning line %d\n", __LINE__);
return 0;
}
/*
* We need to repair backrefs first because we could change some of the
* errors in the inode recs.
*
* We also need to go through and delete invalid backrefs first and then
* add the correct ones second. We do this because we may get EEXIST
* when adding back the correct index because we hadn't yet deleted the
* invalid index.
*
* For example, if we were missing a dir index then the directories
* isize would be wrong, so if we fixed the isize to what we thought it
* would be and then fixed the backref we'd still have a invalid fs, so
* we need to add back the dir index and then check to see if the isize
* is still wrong.
*/
while (stage < 3) {
stage++;
if (stage == 3 && !err)
break;
cache = search_cache_extent(inode_cache, 0);
while (repair && cache) {
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
cache = next_cache_extent(cache);
/* Need to free everything up and rescan */
if (stage == 3) {
remove_cache_extent(inode_cache, &node->cache);
free(node);
free_inode_rec(rec);
continue;
}
if (list_empty(&rec->backrefs))
continue;
ret = repair_inode_backrefs(root, rec, inode_cache,
stage == 1);
if (ret < 0) {
err = ret;
stage = 2;
break;
} if (ret > 0) {
err = -EAGAIN;
}
}
}
if (err)
return err;
rec = get_inode_rec(inode_cache, root_dirid, 0);
BUG_ON(IS_ERR(rec));
if (rec) {
ret = check_root_dir(rec);
if (ret) {
fprintf(stderr, "root %llu root dir %llu error\n",
(unsigned long long)root->root_key.objectid,
(unsigned long long)root_dirid);
print_inode_error(root, rec);
error++;
}
} else {
if (repair) {
struct btrfs_trans_handle *trans;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
return err;
}
fprintf(stderr,
"root %llu missing its root dir, recreating\n",
(unsigned long long)root->objectid);
ret = btrfs_make_root_dir(trans, root, root_dirid);
BUG_ON(ret);
btrfs_commit_transaction(trans, root);
return -EAGAIN;
}
fprintf(stderr, "root %llu root dir %llu not found\n",
(unsigned long long)root->root_key.objectid,
(unsigned long long)root_dirid);
}
while (1) {
cache = search_cache_extent(inode_cache, 0);
if (!cache)
break;
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
remove_cache_extent(inode_cache, &node->cache);
free(node);
if (rec->ino == root_dirid ||
rec->ino == BTRFS_ORPHAN_OBJECTID) {
free_inode_rec(rec);
continue;
}
if (rec->errors & I_ERR_NO_ORPHAN_ITEM) {
ret = check_orphan_item(root, rec->ino);
if (ret == 0)
rec->errors &= ~I_ERR_NO_ORPHAN_ITEM;
if (can_free_inode_rec(rec)) {
free_inode_rec(rec);
continue;
}
}
if (!rec->found_inode_item)
rec->errors |= I_ERR_NO_INODE_ITEM;
if (rec->found_link != rec->nlink)
rec->errors |= I_ERR_LINK_COUNT_WRONG;
if (repair) {
ret = try_repair_inode(root, rec);
if (ret == 0 && can_free_inode_rec(rec)) {
free_inode_rec(rec);
continue;
}
ret = 0;
}
if (!(repair && ret == 0))
error++;
print_inode_error(root, rec);
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->found_dir_item)
backref->errors |= REF_ERR_NO_DIR_ITEM;
if (!backref->found_dir_index)
backref->errors |= REF_ERR_NO_DIR_INDEX;
if (!backref->found_inode_ref)
backref->errors |= REF_ERR_NO_INODE_REF;
fprintf(stderr, "\tunresolved ref dir %llu index %llu"
" namelen %u name %s filetype %d errors %x",
(unsigned long long)backref->dir,
(unsigned long long)backref->index,
backref->namelen, backref->name,
backref->filetype, backref->errors);
print_ref_error(backref->errors);
}
free_inode_rec(rec);
}
return (error > 0) ? -1 : 0;
}
static struct root_record *get_root_rec(struct cache_tree *root_cache,
u64 objectid)
{
struct cache_extent *cache;
struct root_record *rec = NULL;
int ret;
cache = lookup_cache_extent(root_cache, objectid, 1);
if (cache) {
rec = container_of(cache, struct root_record, cache);
} else {
rec = calloc(1, sizeof(*rec));
if (!rec)
return ERR_PTR(-ENOMEM);
rec->objectid = objectid;
INIT_LIST_HEAD(&rec->backrefs);
rec->cache.start = objectid;
rec->cache.size = 1;
ret = insert_cache_extent(root_cache, &rec->cache);
if (ret)
return ERR_PTR(-EEXIST);
}
return rec;
}
static struct root_backref *get_root_backref(struct root_record *rec,
u64 ref_root, u64 dir, u64 index,
const char *name, int namelen)
{
struct root_backref *backref;
list_for_each_entry(backref, &rec->backrefs, list) {
if (backref->ref_root != ref_root || backref->dir != dir ||
backref->namelen != namelen)
continue;
if (memcmp(name, backref->name, namelen))
continue;
return backref;
}
backref = calloc(1, sizeof(*backref) + namelen + 1);
if (!backref)
return NULL;
backref->ref_root = ref_root;
backref->dir = dir;
backref->index = index;
backref->namelen = namelen;
memcpy(backref->name, name, namelen);
backref->name[namelen] = '\0';
list_add_tail(&backref->list, &rec->backrefs);
return backref;
}
static void free_root_record(struct cache_extent *cache)
{
struct root_record *rec;
struct root_backref *backref;
rec = container_of(cache, struct root_record, cache);
while (!list_empty(&rec->backrefs)) {
backref = to_root_backref(rec->backrefs.next);
list_del(&backref->list);
free(backref);
}
free(rec);
}
FREE_EXTENT_CACHE_BASED_TREE(root_recs, free_root_record);
static int add_root_backref(struct cache_tree *root_cache,
u64 root_id, u64 ref_root, u64 dir, u64 index,
const char *name, int namelen,
int item_type, int errors)
{
struct root_record *rec;
struct root_backref *backref;
rec = get_root_rec(root_cache, root_id);
BUG_ON(IS_ERR(rec));
backref = get_root_backref(rec, ref_root, dir, index, name, namelen);
BUG_ON(!backref);
backref->errors |= errors;
if (item_type != BTRFS_DIR_ITEM_KEY) {
if (backref->found_dir_index || backref->found_back_ref ||
backref->found_forward_ref) {
if (backref->index != index)
backref->errors |= REF_ERR_INDEX_UNMATCH;
} else {
backref->index = index;
}
}
if (item_type == BTRFS_DIR_ITEM_KEY) {
if (backref->found_forward_ref)
rec->found_ref++;
backref->found_dir_item = 1;
} else if (item_type == BTRFS_DIR_INDEX_KEY) {
backref->found_dir_index = 1;
} else if (item_type == BTRFS_ROOT_REF_KEY) {
if (backref->found_forward_ref)
backref->errors |= REF_ERR_DUP_ROOT_REF;
else if (backref->found_dir_item)
rec->found_ref++;
backref->found_forward_ref = 1;
} else if (item_type == BTRFS_ROOT_BACKREF_KEY) {
if (backref->found_back_ref)
backref->errors |= REF_ERR_DUP_ROOT_BACKREF;
backref->found_back_ref = 1;
} else {
BUG_ON(1);
}
if (backref->found_forward_ref && backref->found_dir_item)
backref->reachable = 1;
return 0;
}
static int merge_root_recs(struct btrfs_root *root,
struct cache_tree *src_cache,
struct cache_tree *dst_cache)
{
struct cache_extent *cache;
struct ptr_node *node;
struct inode_record *rec;
struct inode_backref *backref;
int ret = 0;
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
free_inode_recs_tree(src_cache);
return 0;
}
while (1) {
cache = search_cache_extent(src_cache, 0);
if (!cache)
break;
node = container_of(cache, struct ptr_node, cache);
rec = node->data;
remove_cache_extent(src_cache, &node->cache);
free(node);
ret = is_child_root(root, root->objectid, rec->ino);
if (ret < 0)
break;
else if (ret == 0)
goto skip;
list_for_each_entry(backref, &rec->backrefs, list) {
BUG_ON(backref->found_inode_ref);
if (backref->found_dir_item)
add_root_backref(dst_cache, rec->ino,
root->root_key.objectid, backref->dir,
backref->index, backref->name,
backref->namelen, BTRFS_DIR_ITEM_KEY,
backref->errors);
if (backref->found_dir_index)
add_root_backref(dst_cache, rec->ino,
root->root_key.objectid, backref->dir,
backref->index, backref->name,
backref->namelen, BTRFS_DIR_INDEX_KEY,
backref->errors);
}
skip:
free_inode_rec(rec);
}
if (ret < 0)
return ret;
return 0;
}
static int check_root_refs(struct btrfs_root *root,
struct cache_tree *root_cache)
{
struct root_record *rec;
struct root_record *ref_root;
struct root_backref *backref;
struct cache_extent *cache;
int loop = 1;
int ret;
int error;
int errors = 0;
rec = get_root_rec(root_cache, BTRFS_FS_TREE_OBJECTID);
BUG_ON(IS_ERR(rec));
rec->found_ref = 1;
/* fixme: this can not detect circular references */
while (loop) {
loop = 0;
cache = search_cache_extent(root_cache, 0);
while (1) {
if (!cache)
break;
rec = container_of(cache, struct root_record, cache);
cache = next_cache_extent(cache);
if (rec->found_ref == 0)
continue;
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->reachable)
continue;
ref_root = get_root_rec(root_cache,
backref->ref_root);
BUG_ON(IS_ERR(ref_root));
if (ref_root->found_ref > 0)
continue;
backref->reachable = 0;
rec->found_ref--;
if (rec->found_ref == 0)
loop = 1;
}
}
}
cache = search_cache_extent(root_cache, 0);
while (1) {
if (!cache)
break;
rec = container_of(cache, struct root_record, cache);
cache = next_cache_extent(cache);
if (rec->found_ref == 0 &&
rec->objectid >= BTRFS_FIRST_FREE_OBJECTID &&
rec->objectid <= BTRFS_LAST_FREE_OBJECTID) {
ret = check_orphan_item(root->fs_info->tree_root,
rec->objectid);
if (ret == 0)
continue;
/*
* If we don't have a root item then we likely just have
* a dir item in a snapshot for this root but no actual
* ref key or anything so it's meaningless.
*/
if (!rec->found_root_item)
continue;
errors++;
fprintf(stderr, "fs tree %llu not referenced\n",
(unsigned long long)rec->objectid);
}
error = 0;
if (rec->found_ref > 0 && !rec->found_root_item)
error = 1;
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->found_dir_item)
backref->errors |= REF_ERR_NO_DIR_ITEM;
if (!backref->found_dir_index)
backref->errors |= REF_ERR_NO_DIR_INDEX;
if (!backref->found_back_ref)
backref->errors |= REF_ERR_NO_ROOT_BACKREF;
if (!backref->found_forward_ref)
backref->errors |= REF_ERR_NO_ROOT_REF;
if (backref->reachable && backref->errors)
error = 1;
}
if (!error)
continue;
errors++;
fprintf(stderr, "fs tree %llu refs %u %s\n",
(unsigned long long)rec->objectid, rec->found_ref,
rec->found_root_item ? "" : "not found");
list_for_each_entry(backref, &rec->backrefs, list) {
if (!backref->reachable)
continue;
if (!backref->errors && rec->found_root_item)
continue;
fprintf(stderr, "\tunresolved ref root %llu dir %llu"
" index %llu namelen %u name %s errors %x\n",
(unsigned long long)backref->ref_root,
(unsigned long long)backref->dir,
(unsigned long long)backref->index,
backref->namelen, backref->name,
backref->errors);
print_ref_error(backref->errors);
}
}
return errors > 0 ? 1 : 0;
}
static int process_root_ref(struct extent_buffer *eb, int slot,
struct btrfs_key *key,
struct cache_tree *root_cache)
{
u64 dirid;
u64 index;
u32 len;
u32 name_len;
struct btrfs_root_ref *ref;
char namebuf[BTRFS_NAME_LEN];
int error;
ref = btrfs_item_ptr(eb, slot, struct btrfs_root_ref);
dirid = btrfs_root_ref_dirid(eb, ref);
index = btrfs_root_ref_sequence(eb, ref);
name_len = btrfs_root_ref_name_len(eb, ref);
if (name_len <= BTRFS_NAME_LEN) {
len = name_len;
error = 0;
} else {
len = BTRFS_NAME_LEN;
error = REF_ERR_NAME_TOO_LONG;
}
read_extent_buffer(eb, namebuf, (unsigned long)(ref + 1), len);
if (key->type == BTRFS_ROOT_REF_KEY) {
add_root_backref(root_cache, key->offset, key->objectid, dirid,
index, namebuf, len, key->type, error);
} else {
add_root_backref(root_cache, key->objectid, key->offset, dirid,
index, namebuf, len, key->type, error);
}
return 0;
}
static void free_corrupt_block(struct cache_extent *cache)
{
struct btrfs_corrupt_block *corrupt;
corrupt = container_of(cache, struct btrfs_corrupt_block, cache);
free(corrupt);
}
FREE_EXTENT_CACHE_BASED_TREE(corrupt_blocks, free_corrupt_block);
/*
* Repair the btree of the given root.
*
* The fix is to remove the node key in corrupt_blocks cache_tree.
* and rebalance the tree.
* After the fix, the btree should be writeable.
*/
static int repair_btree(struct btrfs_root *root,
struct cache_tree *corrupt_blocks)
{
struct btrfs_trans_handle *trans;
struct btrfs_path path;
struct btrfs_corrupt_block *corrupt;
struct cache_extent *cache;
struct btrfs_key key;
u64 offset;
int level;
int ret = 0;
if (cache_tree_empty(corrupt_blocks))
return 0;
trans = btrfs_start_transaction(root, 1);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
fprintf(stderr, "Error starting transaction: %s\n",
strerror(-ret));
return ret;
}
btrfs_init_path(&path);
cache = first_cache_extent(corrupt_blocks);
while (cache) {
corrupt = container_of(cache, struct btrfs_corrupt_block,
cache);
level = corrupt->level;
path.lowest_level = level;
key.objectid = corrupt->key.objectid;
key.type = corrupt->key.type;
key.offset = corrupt->key.offset;
/*
* Here we don't want to do any tree balance, since it may
* cause a balance with corrupted brother leaf/node,
* so ins_len set to 0 here.
* Balance will be done after all corrupt node/leaf is deleted.
*/
ret = btrfs_search_slot(trans, root, &key, &path, 0, 1);
if (ret < 0)
goto out;
offset = btrfs_node_blockptr(path.nodes[level],
path.slots[level]);
/* Remove the ptr */
ret = btrfs_del_ptr(root, &path, level, path.slots[level]);
if (ret < 0)
goto out;
/*
* Remove the corresponding extent
* return value is not concerned.
*/
btrfs_release_path(&path);
ret = btrfs_free_extent(trans, root, offset,
root->fs_info->nodesize, 0,
root->root_key.objectid, level - 1, 0);
cache = next_cache_extent(cache);
}
/* Balance the btree using btrfs_search_slot() */
cache = first_cache_extent(corrupt_blocks);
while (cache) {
corrupt = container_of(cache, struct btrfs_corrupt_block,
cache);
memcpy(&key, &corrupt->key, sizeof(key));
ret = btrfs_search_slot(trans, root, &key, &path, -1, 1);
if (ret < 0)
goto out;
/* return will always >0 since it won't find the item */
ret = 0;
btrfs_release_path(&path);
cache = next_cache_extent(cache);
}
out<