[XFS] Avoid replaying inode buffer initialisation log items if on-disk version is newer.

SGI-PV: 969656
SGI-Modid: xfs-linux-melb:xfs-kern:29676a

Signed-off-by: Lachlan McIlroy <lachlan@sgi.com>
Signed-off-by: David Chinner <dgc@sgi.com>
Signed-off-by: Tim Shimmin <tes@sgi.com>
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index d7e1361..fa25b7d 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -52,6 +52,11 @@
 #define	XFS_BLI_UDQUOT_BUF	0x4
 #define XFS_BLI_PDQUOT_BUF	0x8
 #define	XFS_BLI_GDQUOT_BUF	0x10
+/*
+ * This flag indicates that the buffer contains newly allocated
+ * inodes.
+ */
+#define	XFS_BLI_INODE_NEW_BUF	0x20
 
 #define	XFS_BLI_CHUNK		128
 #define	XFS_BLI_SHIFT		7
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 8ae6e8e..dacb197 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1874,6 +1874,7 @@
 /*ARGSUSED*/
 STATIC void
 xlog_recover_do_reg_buffer(
+	xfs_mount_t		*mp,
 	xlog_recover_item_t	*item,
 	xfs_buf_t		*bp,
 	xfs_buf_log_format_t	*buf_f)
@@ -1884,6 +1885,50 @@
 	unsigned int		*data_map = NULL;
 	unsigned int		map_size = 0;
 	int                     error;
+	int			stale_buf = 1;
+
+	/*
+	 * Scan through the on-disk inode buffer and attempt to
+	 * determine if it has been written to since it was logged.
+	 *
+	 * - If any of the magic numbers are incorrect then the buffer is stale
+	 * - If any of the modes are non-zero then the buffer is not stale
+	 * - If all of the modes are zero and at least one of the generation
+	 *   counts is non-zero then the buffer is stale
+	 *
+	 * If the end result is a stale buffer then the log buffer is replayed
+	 * otherwise it is skipped.
+	 *
+	 * This heuristic is not perfect.  It can be improved by scanning the
+	 * entire inode chunk for evidence that any of the inode clusters have
+	 * been updated.  To fix this problem completely we will need a major
+	 * architectural change to the logging system.
+	 */
+	if (buf_f->blf_flags & XFS_BLI_INODE_NEW_BUF) {
+		xfs_dinode_t    *dip;
+		int             inodes_per_buf;
+		int		mode_count = 0;
+		int		gen_count = 0;
+
+		stale_buf = 0;
+		inodes_per_buf = XFS_BUF_COUNT(bp) >> mp->m_sb.sb_inodelog;
+		for (i = 0; i < inodes_per_buf; i++) {
+			dip = (xfs_dinode_t *)xfs_buf_offset(bp,
+				i * mp->m_sb.sb_inodesize);
+			if (be16_to_cpu(dip->di_core.di_magic) !=
+					XFS_DINODE_MAGIC) {
+				stale_buf = 1;
+				break;
+			}
+			if (be16_to_cpu(dip->di_core.di_mode))
+				mode_count++;
+			if (be16_to_cpu(dip->di_core.di_gen))
+				gen_count++;
+		}
+
+		if (!mode_count && gen_count)
+			stale_buf = 1;
+	}
 
 	switch (buf_f->blf_type) {
 	case XFS_LI_BUF:
@@ -1917,7 +1962,7 @@
 					       -1, 0, XFS_QMOPT_DOWARN,
 					       "dquot_buf_recover");
 		}
-		if (!error)
+		if (!error && stale_buf)
 			memcpy(xfs_buf_offset(bp,
 				(uint)bit << XFS_BLI_SHIFT),	/* dest */
 				item->ri_buf[i].i_addr,		/* source */
@@ -2089,7 +2134,7 @@
 	if (log->l_quotaoffs_flag & type)
 		return;
 
-	xlog_recover_do_reg_buffer(item, bp, buf_f);
+	xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 }
 
 /*
@@ -2190,7 +2235,7 @@
 		  (XFS_BLI_UDQUOT_BUF|XFS_BLI_PDQUOT_BUF|XFS_BLI_GDQUOT_BUF)) {
 		xlog_recover_do_dquot_buffer(mp, log, item, bp, buf_f);
 	} else {
-		xlog_recover_do_reg_buffer(item, bp, buf_f);
+		xlog_recover_do_reg_buffer(mp, item, bp, buf_f);
 	}
 	if (error)
 		return XFS_ERROR(error);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 60b6b89..95fff68 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -966,6 +966,7 @@
 	ASSERT(atomic_read(&bip->bli_refcount) > 0);
 
 	bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+	bip->bli_format.blf_flags |= XFS_BLI_INODE_NEW_BUF;
 }