xref: /OK3568_Linux_fs/kernel/fs/jfs/jfs_logmgr.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  *   Copyright (C) International Business Machines Corp., 2000-2004
4*4882a593Smuzhiyun  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
5*4882a593Smuzhiyun  */
6*4882a593Smuzhiyun 
7*4882a593Smuzhiyun /*
8*4882a593Smuzhiyun  *	jfs_logmgr.c: log manager
9*4882a593Smuzhiyun  *
10*4882a593Smuzhiyun  * for related information, see transaction manager (jfs_txnmgr.c), and
11*4882a593Smuzhiyun  * recovery manager (jfs_logredo.c).
12*4882a593Smuzhiyun  *
13*4882a593Smuzhiyun  * note: for detail, RTFS.
14*4882a593Smuzhiyun  *
15*4882a593Smuzhiyun  *	log buffer manager:
16*4882a593Smuzhiyun  * special purpose buffer manager supporting log i/o requirements.
17*4882a593Smuzhiyun  * per log serial pageout of logpage
18*4882a593Smuzhiyun  * queuing i/o requests and redrive i/o at iodone
19*4882a593Smuzhiyun  * maintain current logpage buffer
20*4882a593Smuzhiyun  * no caching since append only
21*4882a593Smuzhiyun  * appropriate jfs buffer cache buffers as needed
22*4882a593Smuzhiyun  *
23*4882a593Smuzhiyun  *	group commit:
24*4882a593Smuzhiyun  * transactions which wrote COMMIT records in the same in-memory
25*4882a593Smuzhiyun  * log page during the pageout of previous/current log page(s) are
26*4882a593Smuzhiyun  * committed together by the pageout of the page.
27*4882a593Smuzhiyun  *
28*4882a593Smuzhiyun  *	TBD lazy commit:
29*4882a593Smuzhiyun  * transactions are committed asynchronously when the log page
30*4882a593Smuzhiyun  * containing it COMMIT is paged out when it becomes full;
31*4882a593Smuzhiyun  *
32*4882a593Smuzhiyun  *	serialization:
33*4882a593Smuzhiyun  * . a per log lock serialize log write.
34*4882a593Smuzhiyun  * . a per log lock serialize group commit.
35*4882a593Smuzhiyun  * . a per log lock serialize log open/close;
36*4882a593Smuzhiyun  *
37*4882a593Smuzhiyun  *	TBD log integrity:
38*4882a593Smuzhiyun  * careful-write (ping-pong) of last logpage to recover from crash
39*4882a593Smuzhiyun  * in overwrite.
40*4882a593Smuzhiyun  * detection of split (out-of-order) write of physical sectors
41*4882a593Smuzhiyun  * of last logpage via timestamp at end of each sector
42*4882a593Smuzhiyun  * with its mirror data array at trailer).
43*4882a593Smuzhiyun  *
44*4882a593Smuzhiyun  *	alternatives:
45*4882a593Smuzhiyun  * lsn - 64-bit monotonically increasing integer vs
46*4882a593Smuzhiyun  * 32-bit lspn and page eor.
47*4882a593Smuzhiyun  */
48*4882a593Smuzhiyun 
49*4882a593Smuzhiyun #include <linux/fs.h>
50*4882a593Smuzhiyun #include <linux/blkdev.h>
51*4882a593Smuzhiyun #include <linux/interrupt.h>
52*4882a593Smuzhiyun #include <linux/completion.h>
53*4882a593Smuzhiyun #include <linux/kthread.h>
54*4882a593Smuzhiyun #include <linux/buffer_head.h>		/* for sync_blockdev() */
55*4882a593Smuzhiyun #include <linux/bio.h>
56*4882a593Smuzhiyun #include <linux/freezer.h>
57*4882a593Smuzhiyun #include <linux/export.h>
58*4882a593Smuzhiyun #include <linux/delay.h>
59*4882a593Smuzhiyun #include <linux/mutex.h>
60*4882a593Smuzhiyun #include <linux/seq_file.h>
61*4882a593Smuzhiyun #include <linux/slab.h>
62*4882a593Smuzhiyun #include "jfs_incore.h"
63*4882a593Smuzhiyun #include "jfs_filsys.h"
64*4882a593Smuzhiyun #include "jfs_metapage.h"
65*4882a593Smuzhiyun #include "jfs_superblock.h"
66*4882a593Smuzhiyun #include "jfs_txnmgr.h"
67*4882a593Smuzhiyun #include "jfs_debug.h"
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun /*
71*4882a593Smuzhiyun  * lbuf's ready to be redriven.  Protected by log_redrive_lock (jfsIO thread)
72*4882a593Smuzhiyun  */
73*4882a593Smuzhiyun static struct lbuf *log_redrive_list;
74*4882a593Smuzhiyun static DEFINE_SPINLOCK(log_redrive_lock);
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun 
77*4882a593Smuzhiyun /*
78*4882a593Smuzhiyun  *	log read/write serialization (per log)
79*4882a593Smuzhiyun  */
80*4882a593Smuzhiyun #define LOG_LOCK_INIT(log)	mutex_init(&(log)->loglock)
81*4882a593Smuzhiyun #define LOG_LOCK(log)		mutex_lock(&((log)->loglock))
82*4882a593Smuzhiyun #define LOG_UNLOCK(log)		mutex_unlock(&((log)->loglock))
83*4882a593Smuzhiyun 
84*4882a593Smuzhiyun 
85*4882a593Smuzhiyun /*
86*4882a593Smuzhiyun  *	log group commit serialization (per log)
87*4882a593Smuzhiyun  */
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun #define LOGGC_LOCK_INIT(log)	spin_lock_init(&(log)->gclock)
90*4882a593Smuzhiyun #define LOGGC_LOCK(log)		spin_lock_irq(&(log)->gclock)
91*4882a593Smuzhiyun #define LOGGC_UNLOCK(log)	spin_unlock_irq(&(log)->gclock)
92*4882a593Smuzhiyun #define LOGGC_WAKEUP(tblk)	wake_up_all(&(tblk)->gcwait)
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun /*
95*4882a593Smuzhiyun  *	log sync serialization (per log)
96*4882a593Smuzhiyun  */
97*4882a593Smuzhiyun #define	LOGSYNC_DELTA(logsize)		min((logsize)/8, 128*LOGPSIZE)
98*4882a593Smuzhiyun #define	LOGSYNC_BARRIER(logsize)	((logsize)/4)
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun #define	LOGSYNC_DELTA(logsize)		min((logsize)/4, 256*LOGPSIZE)
101*4882a593Smuzhiyun #define	LOGSYNC_BARRIER(logsize)	((logsize)/2)
102*4882a593Smuzhiyun */
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun /*
106*4882a593Smuzhiyun  *	log buffer cache synchronization
107*4882a593Smuzhiyun  */
108*4882a593Smuzhiyun static DEFINE_SPINLOCK(jfsLCacheLock);
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun #define	LCACHE_LOCK(flags)	spin_lock_irqsave(&jfsLCacheLock, flags)
111*4882a593Smuzhiyun #define	LCACHE_UNLOCK(flags)	spin_unlock_irqrestore(&jfsLCacheLock, flags)
112*4882a593Smuzhiyun 
113*4882a593Smuzhiyun /*
114*4882a593Smuzhiyun  * See __SLEEP_COND in jfs_locks.h
115*4882a593Smuzhiyun  */
116*4882a593Smuzhiyun #define LCACHE_SLEEP_COND(wq, cond, flags)	\
117*4882a593Smuzhiyun do {						\
118*4882a593Smuzhiyun 	if (cond)				\
119*4882a593Smuzhiyun 		break;				\
120*4882a593Smuzhiyun 	__SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \
121*4882a593Smuzhiyun } while (0)
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun #define	LCACHE_WAKEUP(event)	wake_up(event)
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun /*
127*4882a593Smuzhiyun  *	lbuf buffer cache (lCache) control
128*4882a593Smuzhiyun  */
129*4882a593Smuzhiyun /* log buffer manager pageout control (cumulative, inclusive) */
130*4882a593Smuzhiyun #define	lbmREAD		0x0001
131*4882a593Smuzhiyun #define	lbmWRITE	0x0002	/* enqueue at tail of write queue;
132*4882a593Smuzhiyun 				 * init pageout if at head of queue;
133*4882a593Smuzhiyun 				 */
134*4882a593Smuzhiyun #define	lbmRELEASE	0x0004	/* remove from write queue
135*4882a593Smuzhiyun 				 * at completion of pageout;
136*4882a593Smuzhiyun 				 * do not free/recycle it yet:
137*4882a593Smuzhiyun 				 * caller will free it;
138*4882a593Smuzhiyun 				 */
139*4882a593Smuzhiyun #define	lbmSYNC		0x0008	/* do not return to freelist
140*4882a593Smuzhiyun 				 * when removed from write queue;
141*4882a593Smuzhiyun 				 */
142*4882a593Smuzhiyun #define lbmFREE		0x0010	/* return to freelist
143*4882a593Smuzhiyun 				 * at completion of pageout;
144*4882a593Smuzhiyun 				 * the buffer may be recycled;
145*4882a593Smuzhiyun 				 */
146*4882a593Smuzhiyun #define	lbmDONE		0x0020
147*4882a593Smuzhiyun #define	lbmERROR	0x0040
148*4882a593Smuzhiyun #define lbmGC		0x0080	/* lbmIODone to perform post-GC processing
149*4882a593Smuzhiyun 				 * of log page
150*4882a593Smuzhiyun 				 */
151*4882a593Smuzhiyun #define lbmDIRECT	0x0100
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun /*
154*4882a593Smuzhiyun  * Global list of active external journals
155*4882a593Smuzhiyun  */
156*4882a593Smuzhiyun static LIST_HEAD(jfs_external_logs);
157*4882a593Smuzhiyun static struct jfs_log *dummy_log;
158*4882a593Smuzhiyun static DEFINE_MUTEX(jfs_log_mutex);
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun /*
161*4882a593Smuzhiyun  * forward references
162*4882a593Smuzhiyun  */
163*4882a593Smuzhiyun static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk,
164*4882a593Smuzhiyun 			 struct lrd * lrd, struct tlock * tlck);
165*4882a593Smuzhiyun 
166*4882a593Smuzhiyun static int lmNextPage(struct jfs_log * log);
167*4882a593Smuzhiyun static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
168*4882a593Smuzhiyun 			   int activate);
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun static int open_inline_log(struct super_block *sb);
171*4882a593Smuzhiyun static int open_dummy_log(struct super_block *sb);
172*4882a593Smuzhiyun static int lbmLogInit(struct jfs_log * log);
173*4882a593Smuzhiyun static void lbmLogShutdown(struct jfs_log * log);
174*4882a593Smuzhiyun static struct lbuf *lbmAllocate(struct jfs_log * log, int);
175*4882a593Smuzhiyun static void lbmFree(struct lbuf * bp);
176*4882a593Smuzhiyun static void lbmfree(struct lbuf * bp);
177*4882a593Smuzhiyun static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp);
178*4882a593Smuzhiyun static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block);
179*4882a593Smuzhiyun static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag);
180*4882a593Smuzhiyun static int lbmIOWait(struct lbuf * bp, int flag);
181*4882a593Smuzhiyun static bio_end_io_t lbmIODone;
182*4882a593Smuzhiyun static void lbmStartIO(struct lbuf * bp);
183*4882a593Smuzhiyun static void lmGCwrite(struct jfs_log * log, int cant_block);
184*4882a593Smuzhiyun static int lmLogSync(struct jfs_log * log, int hard_sync);
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun /*
189*4882a593Smuzhiyun  *	statistics
190*4882a593Smuzhiyun  */
191*4882a593Smuzhiyun #ifdef CONFIG_JFS_STATISTICS
192*4882a593Smuzhiyun static struct lmStat {
193*4882a593Smuzhiyun 	uint commit;		/* # of commit */
194*4882a593Smuzhiyun 	uint pagedone;		/* # of page written */
195*4882a593Smuzhiyun 	uint submitted;		/* # of pages submitted */
196*4882a593Smuzhiyun 	uint full_page;		/* # of full pages submitted */
197*4882a593Smuzhiyun 	uint partial_page;	/* # of partial pages submitted */
198*4882a593Smuzhiyun } lmStat;
199*4882a593Smuzhiyun #endif
200*4882a593Smuzhiyun 
write_special_inodes(struct jfs_log * log,int (* writer)(struct address_space *))201*4882a593Smuzhiyun static void write_special_inodes(struct jfs_log *log,
202*4882a593Smuzhiyun 				 int (*writer)(struct address_space *))
203*4882a593Smuzhiyun {
204*4882a593Smuzhiyun 	struct jfs_sb_info *sbi;
205*4882a593Smuzhiyun 
206*4882a593Smuzhiyun 	list_for_each_entry(sbi, &log->sb_list, log_list) {
207*4882a593Smuzhiyun 		writer(sbi->ipbmap->i_mapping);
208*4882a593Smuzhiyun 		writer(sbi->ipimap->i_mapping);
209*4882a593Smuzhiyun 		writer(sbi->direct_inode->i_mapping);
210*4882a593Smuzhiyun 	}
211*4882a593Smuzhiyun }
212*4882a593Smuzhiyun 
213*4882a593Smuzhiyun /*
214*4882a593Smuzhiyun  * NAME:	lmLog()
215*4882a593Smuzhiyun  *
216*4882a593Smuzhiyun  * FUNCTION:	write a log record;
217*4882a593Smuzhiyun  *
218*4882a593Smuzhiyun  * PARAMETER:
219*4882a593Smuzhiyun  *
220*4882a593Smuzhiyun  * RETURN:	lsn - offset to the next log record to write (end-of-log);
221*4882a593Smuzhiyun  *		-1  - error;
222*4882a593Smuzhiyun  *
223*4882a593Smuzhiyun  * note: todo: log error handler
224*4882a593Smuzhiyun  */
lmLog(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)225*4882a593Smuzhiyun int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
226*4882a593Smuzhiyun 	  struct tlock * tlck)
227*4882a593Smuzhiyun {
228*4882a593Smuzhiyun 	int lsn;
229*4882a593Smuzhiyun 	int diffp, difft;
230*4882a593Smuzhiyun 	struct metapage *mp = NULL;
231*4882a593Smuzhiyun 	unsigned long flags;
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun 	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
234*4882a593Smuzhiyun 		 log, tblk, lrd, tlck);
235*4882a593Smuzhiyun 
236*4882a593Smuzhiyun 	LOG_LOCK(log);
237*4882a593Smuzhiyun 
238*4882a593Smuzhiyun 	/* log by (out-of-transaction) JFS ? */
239*4882a593Smuzhiyun 	if (tblk == NULL)
240*4882a593Smuzhiyun 		goto writeRecord;
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	/* log from page ? */
243*4882a593Smuzhiyun 	if (tlck == NULL ||
244*4882a593Smuzhiyun 	    tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL)
245*4882a593Smuzhiyun 		goto writeRecord;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	/*
248*4882a593Smuzhiyun 	 *	initialize/update page/transaction recovery lsn
249*4882a593Smuzhiyun 	 */
250*4882a593Smuzhiyun 	lsn = log->lsn;
251*4882a593Smuzhiyun 
252*4882a593Smuzhiyun 	LOGSYNC_LOCK(log, flags);
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun 	/*
255*4882a593Smuzhiyun 	 * initialize page lsn if first log write of the page
256*4882a593Smuzhiyun 	 */
257*4882a593Smuzhiyun 	if (mp->lsn == 0) {
258*4882a593Smuzhiyun 		mp->log = log;
259*4882a593Smuzhiyun 		mp->lsn = lsn;
260*4882a593Smuzhiyun 		log->count++;
261*4882a593Smuzhiyun 
262*4882a593Smuzhiyun 		/* insert page at tail of logsynclist */
263*4882a593Smuzhiyun 		list_add_tail(&mp->synclist, &log->synclist);
264*4882a593Smuzhiyun 	}
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun 	/*
267*4882a593Smuzhiyun 	 *	initialize/update lsn of tblock of the page
268*4882a593Smuzhiyun 	 *
269*4882a593Smuzhiyun 	 * transaction inherits oldest lsn of pages associated
270*4882a593Smuzhiyun 	 * with allocation/deallocation of resources (their
271*4882a593Smuzhiyun 	 * log records are used to reconstruct allocation map
272*4882a593Smuzhiyun 	 * at recovery time: inode for inode allocation map,
273*4882a593Smuzhiyun 	 * B+-tree index of extent descriptors for block
274*4882a593Smuzhiyun 	 * allocation map);
275*4882a593Smuzhiyun 	 * allocation map pages inherit transaction lsn at
276*4882a593Smuzhiyun 	 * commit time to allow forwarding log syncpt past log
277*4882a593Smuzhiyun 	 * records associated with allocation/deallocation of
278*4882a593Smuzhiyun 	 * resources only after persistent map of these map pages
279*4882a593Smuzhiyun 	 * have been updated and propagated to home.
280*4882a593Smuzhiyun 	 */
281*4882a593Smuzhiyun 	/*
282*4882a593Smuzhiyun 	 * initialize transaction lsn:
283*4882a593Smuzhiyun 	 */
284*4882a593Smuzhiyun 	if (tblk->lsn == 0) {
285*4882a593Smuzhiyun 		/* inherit lsn of its first page logged */
286*4882a593Smuzhiyun 		tblk->lsn = mp->lsn;
287*4882a593Smuzhiyun 		log->count++;
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 		/* insert tblock after the page on logsynclist */
290*4882a593Smuzhiyun 		list_add(&tblk->synclist, &mp->synclist);
291*4882a593Smuzhiyun 	}
292*4882a593Smuzhiyun 	/*
293*4882a593Smuzhiyun 	 * update transaction lsn:
294*4882a593Smuzhiyun 	 */
295*4882a593Smuzhiyun 	else {
296*4882a593Smuzhiyun 		/* inherit oldest/smallest lsn of page */
297*4882a593Smuzhiyun 		logdiff(diffp, mp->lsn, log);
298*4882a593Smuzhiyun 		logdiff(difft, tblk->lsn, log);
299*4882a593Smuzhiyun 		if (diffp < difft) {
300*4882a593Smuzhiyun 			/* update tblock lsn with page lsn */
301*4882a593Smuzhiyun 			tblk->lsn = mp->lsn;
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 			/* move tblock after page on logsynclist */
304*4882a593Smuzhiyun 			list_move(&tblk->synclist, &mp->synclist);
305*4882a593Smuzhiyun 		}
306*4882a593Smuzhiyun 	}
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 	LOGSYNC_UNLOCK(log, flags);
309*4882a593Smuzhiyun 
310*4882a593Smuzhiyun 	/*
311*4882a593Smuzhiyun 	 *	write the log record
312*4882a593Smuzhiyun 	 */
313*4882a593Smuzhiyun       writeRecord:
314*4882a593Smuzhiyun 	lsn = lmWriteRecord(log, tblk, lrd, tlck);
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	/*
317*4882a593Smuzhiyun 	 * forward log syncpt if log reached next syncpt trigger
318*4882a593Smuzhiyun 	 */
319*4882a593Smuzhiyun 	logdiff(diffp, lsn, log);
320*4882a593Smuzhiyun 	if (diffp >= log->nextsync)
321*4882a593Smuzhiyun 		lsn = lmLogSync(log, 0);
322*4882a593Smuzhiyun 
323*4882a593Smuzhiyun 	/* update end-of-log lsn */
324*4882a593Smuzhiyun 	log->lsn = lsn;
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	LOG_UNLOCK(log);
327*4882a593Smuzhiyun 
328*4882a593Smuzhiyun 	/* return end-of-log address */
329*4882a593Smuzhiyun 	return lsn;
330*4882a593Smuzhiyun }
331*4882a593Smuzhiyun 
332*4882a593Smuzhiyun /*
333*4882a593Smuzhiyun  * NAME:	lmWriteRecord()
334*4882a593Smuzhiyun  *
335*4882a593Smuzhiyun  * FUNCTION:	move the log record to current log page
336*4882a593Smuzhiyun  *
337*4882a593Smuzhiyun  * PARAMETER:	cd	- commit descriptor
338*4882a593Smuzhiyun  *
339*4882a593Smuzhiyun  * RETURN:	end-of-log address
340*4882a593Smuzhiyun  *
341*4882a593Smuzhiyun  * serialization: LOG_LOCK() held on entry/exit
342*4882a593Smuzhiyun  */
343*4882a593Smuzhiyun static int
lmWriteRecord(struct jfs_log * log,struct tblock * tblk,struct lrd * lrd,struct tlock * tlck)344*4882a593Smuzhiyun lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
345*4882a593Smuzhiyun 	      struct tlock * tlck)
346*4882a593Smuzhiyun {
347*4882a593Smuzhiyun 	int lsn = 0;		/* end-of-log address */
348*4882a593Smuzhiyun 	struct lbuf *bp;	/* dst log page buffer */
349*4882a593Smuzhiyun 	struct logpage *lp;	/* dst log page */
350*4882a593Smuzhiyun 	caddr_t dst;		/* destination address in log page */
351*4882a593Smuzhiyun 	int dstoffset;		/* end-of-log offset in log page */
352*4882a593Smuzhiyun 	int freespace;		/* free space in log page */
353*4882a593Smuzhiyun 	caddr_t p;		/* src meta-data page */
354*4882a593Smuzhiyun 	caddr_t src;
355*4882a593Smuzhiyun 	int srclen;
356*4882a593Smuzhiyun 	int nbytes;		/* number of bytes to move */
357*4882a593Smuzhiyun 	int i;
358*4882a593Smuzhiyun 	int len;
359*4882a593Smuzhiyun 	struct linelock *linelock;
360*4882a593Smuzhiyun 	struct lv *lv;
361*4882a593Smuzhiyun 	struct lvd *lvd;
362*4882a593Smuzhiyun 	int l2linesize;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	len = 0;
365*4882a593Smuzhiyun 
366*4882a593Smuzhiyun 	/* retrieve destination log page to write */
367*4882a593Smuzhiyun 	bp = (struct lbuf *) log->bp;
368*4882a593Smuzhiyun 	lp = (struct logpage *) bp->l_ldata;
369*4882a593Smuzhiyun 	dstoffset = log->eor;
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	/* any log data to write ? */
372*4882a593Smuzhiyun 	if (tlck == NULL)
373*4882a593Smuzhiyun 		goto moveLrd;
374*4882a593Smuzhiyun 
375*4882a593Smuzhiyun 	/*
376*4882a593Smuzhiyun 	 *	move log record data
377*4882a593Smuzhiyun 	 */
378*4882a593Smuzhiyun 	/* retrieve source meta-data page to log */
379*4882a593Smuzhiyun 	if (tlck->flag & tlckPAGELOCK) {
380*4882a593Smuzhiyun 		p = (caddr_t) (tlck->mp->data);
381*4882a593Smuzhiyun 		linelock = (struct linelock *) & tlck->lock;
382*4882a593Smuzhiyun 	}
383*4882a593Smuzhiyun 	/* retrieve source in-memory inode to log */
384*4882a593Smuzhiyun 	else if (tlck->flag & tlckINODELOCK) {
385*4882a593Smuzhiyun 		if (tlck->type & tlckDTREE)
386*4882a593Smuzhiyun 			p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot;
387*4882a593Smuzhiyun 		else
388*4882a593Smuzhiyun 			p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot;
389*4882a593Smuzhiyun 		linelock = (struct linelock *) & tlck->lock;
390*4882a593Smuzhiyun 	}
391*4882a593Smuzhiyun #ifdef	_JFS_WIP
392*4882a593Smuzhiyun 	else if (tlck->flag & tlckINLINELOCK) {
393*4882a593Smuzhiyun 
394*4882a593Smuzhiyun 		inlinelock = (struct inlinelock *) & tlck;
395*4882a593Smuzhiyun 		p = (caddr_t) & inlinelock->pxd;
396*4882a593Smuzhiyun 		linelock = (struct linelock *) & tlck;
397*4882a593Smuzhiyun 	}
398*4882a593Smuzhiyun #endif				/* _JFS_WIP */
399*4882a593Smuzhiyun 	else {
400*4882a593Smuzhiyun 		jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck);
401*4882a593Smuzhiyun 		return 0;	/* Probably should trap */
402*4882a593Smuzhiyun 	}
403*4882a593Smuzhiyun 	l2linesize = linelock->l2linesize;
404*4882a593Smuzhiyun 
405*4882a593Smuzhiyun       moveData:
406*4882a593Smuzhiyun 	ASSERT(linelock->index <= linelock->maxcnt);
407*4882a593Smuzhiyun 
408*4882a593Smuzhiyun 	lv = linelock->lv;
409*4882a593Smuzhiyun 	for (i = 0; i < linelock->index; i++, lv++) {
410*4882a593Smuzhiyun 		if (lv->length == 0)
411*4882a593Smuzhiyun 			continue;
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 		/* is page full ? */
414*4882a593Smuzhiyun 		if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) {
415*4882a593Smuzhiyun 			/* page become full: move on to next page */
416*4882a593Smuzhiyun 			lmNextPage(log);
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 			bp = log->bp;
419*4882a593Smuzhiyun 			lp = (struct logpage *) bp->l_ldata;
420*4882a593Smuzhiyun 			dstoffset = LOGPHDRSIZE;
421*4882a593Smuzhiyun 		}
422*4882a593Smuzhiyun 
423*4882a593Smuzhiyun 		/*
424*4882a593Smuzhiyun 		 * move log vector data
425*4882a593Smuzhiyun 		 */
426*4882a593Smuzhiyun 		src = (u8 *) p + (lv->offset << l2linesize);
427*4882a593Smuzhiyun 		srclen = lv->length << l2linesize;
428*4882a593Smuzhiyun 		len += srclen;
429*4882a593Smuzhiyun 		while (srclen > 0) {
430*4882a593Smuzhiyun 			freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
431*4882a593Smuzhiyun 			nbytes = min(freespace, srclen);
432*4882a593Smuzhiyun 			dst = (caddr_t) lp + dstoffset;
433*4882a593Smuzhiyun 			memcpy(dst, src, nbytes);
434*4882a593Smuzhiyun 			dstoffset += nbytes;
435*4882a593Smuzhiyun 
436*4882a593Smuzhiyun 			/* is page not full ? */
437*4882a593Smuzhiyun 			if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
438*4882a593Smuzhiyun 				break;
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 			/* page become full: move on to next page */
441*4882a593Smuzhiyun 			lmNextPage(log);
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 			bp = (struct lbuf *) log->bp;
444*4882a593Smuzhiyun 			lp = (struct logpage *) bp->l_ldata;
445*4882a593Smuzhiyun 			dstoffset = LOGPHDRSIZE;
446*4882a593Smuzhiyun 
447*4882a593Smuzhiyun 			srclen -= nbytes;
448*4882a593Smuzhiyun 			src += nbytes;
449*4882a593Smuzhiyun 		}
450*4882a593Smuzhiyun 
451*4882a593Smuzhiyun 		/*
452*4882a593Smuzhiyun 		 * move log vector descriptor
453*4882a593Smuzhiyun 		 */
454*4882a593Smuzhiyun 		len += 4;
455*4882a593Smuzhiyun 		lvd = (struct lvd *) ((caddr_t) lp + dstoffset);
456*4882a593Smuzhiyun 		lvd->offset = cpu_to_le16(lv->offset);
457*4882a593Smuzhiyun 		lvd->length = cpu_to_le16(lv->length);
458*4882a593Smuzhiyun 		dstoffset += 4;
459*4882a593Smuzhiyun 		jfs_info("lmWriteRecord: lv offset:%d length:%d",
460*4882a593Smuzhiyun 			 lv->offset, lv->length);
461*4882a593Smuzhiyun 	}
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	if ((i = linelock->next)) {
464*4882a593Smuzhiyun 		linelock = (struct linelock *) lid_to_tlock(i);
465*4882a593Smuzhiyun 		goto moveData;
466*4882a593Smuzhiyun 	}
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	/*
469*4882a593Smuzhiyun 	 *	move log record descriptor
470*4882a593Smuzhiyun 	 */
471*4882a593Smuzhiyun       moveLrd:
472*4882a593Smuzhiyun 	lrd->length = cpu_to_le16(len);
473*4882a593Smuzhiyun 
474*4882a593Smuzhiyun 	src = (caddr_t) lrd;
475*4882a593Smuzhiyun 	srclen = LOGRDSIZE;
476*4882a593Smuzhiyun 
477*4882a593Smuzhiyun 	while (srclen > 0) {
478*4882a593Smuzhiyun 		freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset;
479*4882a593Smuzhiyun 		nbytes = min(freespace, srclen);
480*4882a593Smuzhiyun 		dst = (caddr_t) lp + dstoffset;
481*4882a593Smuzhiyun 		memcpy(dst, src, nbytes);
482*4882a593Smuzhiyun 
483*4882a593Smuzhiyun 		dstoffset += nbytes;
484*4882a593Smuzhiyun 		srclen -= nbytes;
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 		/* are there more to move than freespace of page ? */
487*4882a593Smuzhiyun 		if (srclen)
488*4882a593Smuzhiyun 			goto pageFull;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 		/*
491*4882a593Smuzhiyun 		 * end of log record descriptor
492*4882a593Smuzhiyun 		 */
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 		/* update last log record eor */
495*4882a593Smuzhiyun 		log->eor = dstoffset;
496*4882a593Smuzhiyun 		bp->l_eor = dstoffset;
497*4882a593Smuzhiyun 		lsn = (log->page << L2LOGPSIZE) + dstoffset;
498*4882a593Smuzhiyun 
499*4882a593Smuzhiyun 		if (lrd->type & cpu_to_le16(LOG_COMMIT)) {
500*4882a593Smuzhiyun 			tblk->clsn = lsn;
501*4882a593Smuzhiyun 			jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn,
502*4882a593Smuzhiyun 				 bp->l_eor);
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 			INCREMENT(lmStat.commit);	/* # of commit */
505*4882a593Smuzhiyun 
506*4882a593Smuzhiyun 			/*
507*4882a593Smuzhiyun 			 * enqueue tblock for group commit:
508*4882a593Smuzhiyun 			 *
509*4882a593Smuzhiyun 			 * enqueue tblock of non-trivial/synchronous COMMIT
510*4882a593Smuzhiyun 			 * at tail of group commit queue
511*4882a593Smuzhiyun 			 * (trivial/asynchronous COMMITs are ignored by
512*4882a593Smuzhiyun 			 * group commit.)
513*4882a593Smuzhiyun 			 */
514*4882a593Smuzhiyun 			LOGGC_LOCK(log);
515*4882a593Smuzhiyun 
516*4882a593Smuzhiyun 			/* init tblock gc state */
517*4882a593Smuzhiyun 			tblk->flag = tblkGC_QUEUE;
518*4882a593Smuzhiyun 			tblk->bp = log->bp;
519*4882a593Smuzhiyun 			tblk->pn = log->page;
520*4882a593Smuzhiyun 			tblk->eor = log->eor;
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 			/* enqueue transaction to commit queue */
523*4882a593Smuzhiyun 			list_add_tail(&tblk->cqueue, &log->cqueue);
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 			LOGGC_UNLOCK(log);
526*4882a593Smuzhiyun 		}
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 		jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x",
529*4882a593Smuzhiyun 			le16_to_cpu(lrd->type), log->bp, log->page, dstoffset);
530*4882a593Smuzhiyun 
531*4882a593Smuzhiyun 		/* page not full ? */
532*4882a593Smuzhiyun 		if (dstoffset < LOGPSIZE - LOGPTLRSIZE)
533*4882a593Smuzhiyun 			return lsn;
534*4882a593Smuzhiyun 
535*4882a593Smuzhiyun 	      pageFull:
536*4882a593Smuzhiyun 		/* page become full: move on to next page */
537*4882a593Smuzhiyun 		lmNextPage(log);
538*4882a593Smuzhiyun 
539*4882a593Smuzhiyun 		bp = (struct lbuf *) log->bp;
540*4882a593Smuzhiyun 		lp = (struct logpage *) bp->l_ldata;
541*4882a593Smuzhiyun 		dstoffset = LOGPHDRSIZE;
542*4882a593Smuzhiyun 		src += nbytes;
543*4882a593Smuzhiyun 	}
544*4882a593Smuzhiyun 
545*4882a593Smuzhiyun 	return lsn;
546*4882a593Smuzhiyun }
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun /*
550*4882a593Smuzhiyun  * NAME:	lmNextPage()
551*4882a593Smuzhiyun  *
552*4882a593Smuzhiyun  * FUNCTION:	write current page and allocate next page.
553*4882a593Smuzhiyun  *
554*4882a593Smuzhiyun  * PARAMETER:	log
555*4882a593Smuzhiyun  *
556*4882a593Smuzhiyun  * RETURN:	0
557*4882a593Smuzhiyun  *
558*4882a593Smuzhiyun  * serialization: LOG_LOCK() held on entry/exit
559*4882a593Smuzhiyun  */
lmNextPage(struct jfs_log * log)560*4882a593Smuzhiyun static int lmNextPage(struct jfs_log * log)
561*4882a593Smuzhiyun {
562*4882a593Smuzhiyun 	struct logpage *lp;
563*4882a593Smuzhiyun 	int lspn;		/* log sequence page number */
564*4882a593Smuzhiyun 	int pn;			/* current page number */
565*4882a593Smuzhiyun 	struct lbuf *bp;
566*4882a593Smuzhiyun 	struct lbuf *nextbp;
567*4882a593Smuzhiyun 	struct tblock *tblk;
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 	/* get current log page number and log sequence page number */
570*4882a593Smuzhiyun 	pn = log->page;
571*4882a593Smuzhiyun 	bp = log->bp;
572*4882a593Smuzhiyun 	lp = (struct logpage *) bp->l_ldata;
573*4882a593Smuzhiyun 	lspn = le32_to_cpu(lp->h.page);
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	LOGGC_LOCK(log);
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 	/*
578*4882a593Smuzhiyun 	 *	write or queue the full page at the tail of write queue
579*4882a593Smuzhiyun 	 */
580*4882a593Smuzhiyun 	/* get the tail tblk on commit queue */
581*4882a593Smuzhiyun 	if (list_empty(&log->cqueue))
582*4882a593Smuzhiyun 		tblk = NULL;
583*4882a593Smuzhiyun 	else
584*4882a593Smuzhiyun 		tblk = list_entry(log->cqueue.prev, struct tblock, cqueue);
585*4882a593Smuzhiyun 
586*4882a593Smuzhiyun 	/* every tblk who has COMMIT record on the current page,
587*4882a593Smuzhiyun 	 * and has not been committed, must be on commit queue
588*4882a593Smuzhiyun 	 * since tblk is queued at commit queueu at the time
589*4882a593Smuzhiyun 	 * of writing its COMMIT record on the page before
590*4882a593Smuzhiyun 	 * page becomes full (even though the tblk thread
591*4882a593Smuzhiyun 	 * who wrote COMMIT record may have been suspended
592*4882a593Smuzhiyun 	 * currently);
593*4882a593Smuzhiyun 	 */
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 	/* is page bound with outstanding tail tblk ? */
596*4882a593Smuzhiyun 	if (tblk && tblk->pn == pn) {
597*4882a593Smuzhiyun 		/* mark tblk for end-of-page */
598*4882a593Smuzhiyun 		tblk->flag |= tblkGC_EOP;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 		if (log->cflag & logGC_PAGEOUT) {
601*4882a593Smuzhiyun 			/* if page is not already on write queue,
602*4882a593Smuzhiyun 			 * just enqueue (no lbmWRITE to prevent redrive)
603*4882a593Smuzhiyun 			 * buffer to wqueue to ensure correct serial order
604*4882a593Smuzhiyun 			 * of the pages since log pages will be added
605*4882a593Smuzhiyun 			 * continuously
606*4882a593Smuzhiyun 			 */
607*4882a593Smuzhiyun 			if (bp->l_wqnext == NULL)
608*4882a593Smuzhiyun 				lbmWrite(log, bp, 0, 0);
609*4882a593Smuzhiyun 		} else {
610*4882a593Smuzhiyun 			/*
611*4882a593Smuzhiyun 			 * No current GC leader, initiate group commit
612*4882a593Smuzhiyun 			 */
613*4882a593Smuzhiyun 			log->cflag |= logGC_PAGEOUT;
614*4882a593Smuzhiyun 			lmGCwrite(log, 0);
615*4882a593Smuzhiyun 		}
616*4882a593Smuzhiyun 	}
617*4882a593Smuzhiyun 	/* page is not bound with outstanding tblk:
618*4882a593Smuzhiyun 	 * init write or mark it to be redriven (lbmWRITE)
619*4882a593Smuzhiyun 	 */
620*4882a593Smuzhiyun 	else {
621*4882a593Smuzhiyun 		/* finalize the page */
622*4882a593Smuzhiyun 		bp->l_ceor = bp->l_eor;
623*4882a593Smuzhiyun 		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
624*4882a593Smuzhiyun 		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0);
625*4882a593Smuzhiyun 	}
626*4882a593Smuzhiyun 	LOGGC_UNLOCK(log);
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 	/*
629*4882a593Smuzhiyun 	 *	allocate/initialize next page
630*4882a593Smuzhiyun 	 */
631*4882a593Smuzhiyun 	/* if log wraps, the first data page of log is 2
632*4882a593Smuzhiyun 	 * (0 never used, 1 is superblock).
633*4882a593Smuzhiyun 	 */
634*4882a593Smuzhiyun 	log->page = (pn == log->size - 1) ? 2 : pn + 1;
635*4882a593Smuzhiyun 	log->eor = LOGPHDRSIZE;	/* ? valid page empty/full at logRedo() */
636*4882a593Smuzhiyun 
637*4882a593Smuzhiyun 	/* allocate/initialize next log page buffer */
638*4882a593Smuzhiyun 	nextbp = lbmAllocate(log, log->page);
639*4882a593Smuzhiyun 	nextbp->l_eor = log->eor;
640*4882a593Smuzhiyun 	log->bp = nextbp;
641*4882a593Smuzhiyun 
642*4882a593Smuzhiyun 	/* initialize next log page */
643*4882a593Smuzhiyun 	lp = (struct logpage *) nextbp->l_ldata;
644*4882a593Smuzhiyun 	lp->h.page = lp->t.page = cpu_to_le32(lspn + 1);
645*4882a593Smuzhiyun 	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun 	return 0;
648*4882a593Smuzhiyun }
649*4882a593Smuzhiyun 
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun /*
652*4882a593Smuzhiyun  * NAME:	lmGroupCommit()
653*4882a593Smuzhiyun  *
654*4882a593Smuzhiyun  * FUNCTION:	group commit
655*4882a593Smuzhiyun  *	initiate pageout of the pages with COMMIT in the order of
656*4882a593Smuzhiyun  *	page number - redrive pageout of the page at the head of
657*4882a593Smuzhiyun  *	pageout queue until full page has been written.
658*4882a593Smuzhiyun  *
659*4882a593Smuzhiyun  * RETURN:
660*4882a593Smuzhiyun  *
661*4882a593Smuzhiyun  * NOTE:
662*4882a593Smuzhiyun  *	LOGGC_LOCK serializes log group commit queue, and
663*4882a593Smuzhiyun  *	transaction blocks on the commit queue.
664*4882a593Smuzhiyun  *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
665*4882a593Smuzhiyun  */
lmGroupCommit(struct jfs_log * log,struct tblock * tblk)666*4882a593Smuzhiyun int lmGroupCommit(struct jfs_log * log, struct tblock * tblk)
667*4882a593Smuzhiyun {
668*4882a593Smuzhiyun 	int rc = 0;
669*4882a593Smuzhiyun 
670*4882a593Smuzhiyun 	LOGGC_LOCK(log);
671*4882a593Smuzhiyun 
672*4882a593Smuzhiyun 	/* group committed already ? */
673*4882a593Smuzhiyun 	if (tblk->flag & tblkGC_COMMITTED) {
674*4882a593Smuzhiyun 		if (tblk->flag & tblkGC_ERROR)
675*4882a593Smuzhiyun 			rc = -EIO;
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 		LOGGC_UNLOCK(log);
678*4882a593Smuzhiyun 		return rc;
679*4882a593Smuzhiyun 	}
680*4882a593Smuzhiyun 	jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc);
681*4882a593Smuzhiyun 
682*4882a593Smuzhiyun 	if (tblk->xflag & COMMIT_LAZY)
683*4882a593Smuzhiyun 		tblk->flag |= tblkGC_LAZY;
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun 	if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) &&
686*4882a593Smuzhiyun 	    (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag)
687*4882a593Smuzhiyun 	     || jfs_tlocks_low)) {
688*4882a593Smuzhiyun 		/*
689*4882a593Smuzhiyun 		 * No pageout in progress
690*4882a593Smuzhiyun 		 *
691*4882a593Smuzhiyun 		 * start group commit as its group leader.
692*4882a593Smuzhiyun 		 */
693*4882a593Smuzhiyun 		log->cflag |= logGC_PAGEOUT;
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 		lmGCwrite(log, 0);
696*4882a593Smuzhiyun 	}
697*4882a593Smuzhiyun 
698*4882a593Smuzhiyun 	if (tblk->xflag & COMMIT_LAZY) {
699*4882a593Smuzhiyun 		/*
700*4882a593Smuzhiyun 		 * Lazy transactions can leave now
701*4882a593Smuzhiyun 		 */
702*4882a593Smuzhiyun 		LOGGC_UNLOCK(log);
703*4882a593Smuzhiyun 		return 0;
704*4882a593Smuzhiyun 	}
705*4882a593Smuzhiyun 
706*4882a593Smuzhiyun 	/* lmGCwrite gives up LOGGC_LOCK, check again */
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 	if (tblk->flag & tblkGC_COMMITTED) {
709*4882a593Smuzhiyun 		if (tblk->flag & tblkGC_ERROR)
710*4882a593Smuzhiyun 			rc = -EIO;
711*4882a593Smuzhiyun 
712*4882a593Smuzhiyun 		LOGGC_UNLOCK(log);
713*4882a593Smuzhiyun 		return rc;
714*4882a593Smuzhiyun 	}
715*4882a593Smuzhiyun 
716*4882a593Smuzhiyun 	/* upcount transaction waiting for completion
717*4882a593Smuzhiyun 	 */
718*4882a593Smuzhiyun 	log->gcrtc++;
719*4882a593Smuzhiyun 	tblk->flag |= tblkGC_READY;
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun 	__SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED),
722*4882a593Smuzhiyun 		     LOGGC_LOCK(log), LOGGC_UNLOCK(log));
723*4882a593Smuzhiyun 
724*4882a593Smuzhiyun 	/* removed from commit queue */
725*4882a593Smuzhiyun 	if (tblk->flag & tblkGC_ERROR)
726*4882a593Smuzhiyun 		rc = -EIO;
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 	LOGGC_UNLOCK(log);
729*4882a593Smuzhiyun 	return rc;
730*4882a593Smuzhiyun }
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun /*
733*4882a593Smuzhiyun  * NAME:	lmGCwrite()
734*4882a593Smuzhiyun  *
735*4882a593Smuzhiyun  * FUNCTION:	group commit write
736*4882a593Smuzhiyun  *	initiate write of log page, building a group of all transactions
737*4882a593Smuzhiyun  *	with commit records on that page.
738*4882a593Smuzhiyun  *
739*4882a593Smuzhiyun  * RETURN:	None
740*4882a593Smuzhiyun  *
741*4882a593Smuzhiyun  * NOTE:
742*4882a593Smuzhiyun  *	LOGGC_LOCK must be held by caller.
743*4882a593Smuzhiyun  *	N.B. LOG_LOCK is NOT held during lmGroupCommit().
744*4882a593Smuzhiyun  */
lmGCwrite(struct jfs_log * log,int cant_write)745*4882a593Smuzhiyun static void lmGCwrite(struct jfs_log * log, int cant_write)
746*4882a593Smuzhiyun {
747*4882a593Smuzhiyun 	struct lbuf *bp;
748*4882a593Smuzhiyun 	struct logpage *lp;
749*4882a593Smuzhiyun 	int gcpn;		/* group commit page number */
750*4882a593Smuzhiyun 	struct tblock *tblk;
751*4882a593Smuzhiyun 	struct tblock *xtblk = NULL;
752*4882a593Smuzhiyun 
753*4882a593Smuzhiyun 	/*
754*4882a593Smuzhiyun 	 * build the commit group of a log page
755*4882a593Smuzhiyun 	 *
756*4882a593Smuzhiyun 	 * scan commit queue and make a commit group of all
757*4882a593Smuzhiyun 	 * transactions with COMMIT records on the same log page.
758*4882a593Smuzhiyun 	 */
759*4882a593Smuzhiyun 	/* get the head tblk on the commit queue */
760*4882a593Smuzhiyun 	gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn;
761*4882a593Smuzhiyun 
762*4882a593Smuzhiyun 	list_for_each_entry(tblk, &log->cqueue, cqueue) {
763*4882a593Smuzhiyun 		if (tblk->pn != gcpn)
764*4882a593Smuzhiyun 			break;
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 		xtblk = tblk;
767*4882a593Smuzhiyun 
768*4882a593Smuzhiyun 		/* state transition: (QUEUE, READY) -> COMMIT */
769*4882a593Smuzhiyun 		tblk->flag |= tblkGC_COMMIT;
770*4882a593Smuzhiyun 	}
771*4882a593Smuzhiyun 	tblk = xtblk;		/* last tblk of the page */
772*4882a593Smuzhiyun 
773*4882a593Smuzhiyun 	/*
774*4882a593Smuzhiyun 	 * pageout to commit transactions on the log page.
775*4882a593Smuzhiyun 	 */
776*4882a593Smuzhiyun 	bp = (struct lbuf *) tblk->bp;
777*4882a593Smuzhiyun 	lp = (struct logpage *) bp->l_ldata;
778*4882a593Smuzhiyun 	/* is page already full ? */
779*4882a593Smuzhiyun 	if (tblk->flag & tblkGC_EOP) {
780*4882a593Smuzhiyun 		/* mark page to free at end of group commit of the page */
781*4882a593Smuzhiyun 		tblk->flag &= ~tblkGC_EOP;
782*4882a593Smuzhiyun 		tblk->flag |= tblkGC_FREE;
783*4882a593Smuzhiyun 		bp->l_ceor = bp->l_eor;
784*4882a593Smuzhiyun 		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
785*4882a593Smuzhiyun 		lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC,
786*4882a593Smuzhiyun 			 cant_write);
787*4882a593Smuzhiyun 		INCREMENT(lmStat.full_page);
788*4882a593Smuzhiyun 	}
789*4882a593Smuzhiyun 	/* page is not yet full */
790*4882a593Smuzhiyun 	else {
791*4882a593Smuzhiyun 		bp->l_ceor = tblk->eor;	/* ? bp->l_ceor = bp->l_eor; */
792*4882a593Smuzhiyun 		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor);
793*4882a593Smuzhiyun 		lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write);
794*4882a593Smuzhiyun 		INCREMENT(lmStat.partial_page);
795*4882a593Smuzhiyun 	}
796*4882a593Smuzhiyun }
797*4882a593Smuzhiyun 
798*4882a593Smuzhiyun /*
799*4882a593Smuzhiyun  * NAME:	lmPostGC()
800*4882a593Smuzhiyun  *
801*4882a593Smuzhiyun  * FUNCTION:	group commit post-processing
802*4882a593Smuzhiyun  *	Processes transactions after their commit records have been written
803*4882a593Smuzhiyun  *	to disk, redriving log I/O if necessary.
804*4882a593Smuzhiyun  *
805*4882a593Smuzhiyun  * RETURN:	None
806*4882a593Smuzhiyun  *
807*4882a593Smuzhiyun  * NOTE:
808*4882a593Smuzhiyun  *	This routine is called a interrupt time by lbmIODone
809*4882a593Smuzhiyun  */
lmPostGC(struct lbuf * bp)810*4882a593Smuzhiyun static void lmPostGC(struct lbuf * bp)
811*4882a593Smuzhiyun {
812*4882a593Smuzhiyun 	unsigned long flags;
813*4882a593Smuzhiyun 	struct jfs_log *log = bp->l_log;
814*4882a593Smuzhiyun 	struct logpage *lp;
815*4882a593Smuzhiyun 	struct tblock *tblk, *temp;
816*4882a593Smuzhiyun 
817*4882a593Smuzhiyun 	//LOGGC_LOCK(log);
818*4882a593Smuzhiyun 	spin_lock_irqsave(&log->gclock, flags);
819*4882a593Smuzhiyun 	/*
820*4882a593Smuzhiyun 	 * current pageout of group commit completed.
821*4882a593Smuzhiyun 	 *
822*4882a593Smuzhiyun 	 * remove/wakeup transactions from commit queue who were
823*4882a593Smuzhiyun 	 * group committed with the current log page
824*4882a593Smuzhiyun 	 */
825*4882a593Smuzhiyun 	list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) {
826*4882a593Smuzhiyun 		if (!(tblk->flag & tblkGC_COMMIT))
827*4882a593Smuzhiyun 			break;
828*4882a593Smuzhiyun 		/* if transaction was marked GC_COMMIT then
829*4882a593Smuzhiyun 		 * it has been shipped in the current pageout
830*4882a593Smuzhiyun 		 * and made it to disk - it is committed.
831*4882a593Smuzhiyun 		 */
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun 		if (bp->l_flag & lbmERROR)
834*4882a593Smuzhiyun 			tblk->flag |= tblkGC_ERROR;
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 		/* remove it from the commit queue */
837*4882a593Smuzhiyun 		list_del(&tblk->cqueue);
838*4882a593Smuzhiyun 		tblk->flag &= ~tblkGC_QUEUE;
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun 		if (tblk == log->flush_tblk) {
841*4882a593Smuzhiyun 			/* we can stop flushing the log now */
842*4882a593Smuzhiyun 			clear_bit(log_FLUSH, &log->flag);
843*4882a593Smuzhiyun 			log->flush_tblk = NULL;
844*4882a593Smuzhiyun 		}
845*4882a593Smuzhiyun 
846*4882a593Smuzhiyun 		jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk,
847*4882a593Smuzhiyun 			 tblk->flag);
848*4882a593Smuzhiyun 
849*4882a593Smuzhiyun 		if (!(tblk->xflag & COMMIT_FORCE))
850*4882a593Smuzhiyun 			/*
851*4882a593Smuzhiyun 			 * Hand tblk over to lazy commit thread
852*4882a593Smuzhiyun 			 */
853*4882a593Smuzhiyun 			txLazyUnlock(tblk);
854*4882a593Smuzhiyun 		else {
855*4882a593Smuzhiyun 			/* state transition: COMMIT -> COMMITTED */
856*4882a593Smuzhiyun 			tblk->flag |= tblkGC_COMMITTED;
857*4882a593Smuzhiyun 
858*4882a593Smuzhiyun 			if (tblk->flag & tblkGC_READY)
859*4882a593Smuzhiyun 				log->gcrtc--;
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun 			LOGGC_WAKEUP(tblk);
862*4882a593Smuzhiyun 		}
863*4882a593Smuzhiyun 
864*4882a593Smuzhiyun 		/* was page full before pageout ?
865*4882a593Smuzhiyun 		 * (and this is the last tblk bound with the page)
866*4882a593Smuzhiyun 		 */
867*4882a593Smuzhiyun 		if (tblk->flag & tblkGC_FREE)
868*4882a593Smuzhiyun 			lbmFree(bp);
869*4882a593Smuzhiyun 		/* did page become full after pageout ?
870*4882a593Smuzhiyun 		 * (and this is the last tblk bound with the page)
871*4882a593Smuzhiyun 		 */
872*4882a593Smuzhiyun 		else if (tblk->flag & tblkGC_EOP) {
873*4882a593Smuzhiyun 			/* finalize the page */
874*4882a593Smuzhiyun 			lp = (struct logpage *) bp->l_ldata;
875*4882a593Smuzhiyun 			bp->l_ceor = bp->l_eor;
876*4882a593Smuzhiyun 			lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
877*4882a593Smuzhiyun 			jfs_info("lmPostGC: calling lbmWrite");
878*4882a593Smuzhiyun 			lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE,
879*4882a593Smuzhiyun 				 1);
880*4882a593Smuzhiyun 		}
881*4882a593Smuzhiyun 
882*4882a593Smuzhiyun 	}
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun 	/* are there any transactions who have entered lnGroupCommit()
885*4882a593Smuzhiyun 	 * (whose COMMITs are after that of the last log page written.
886*4882a593Smuzhiyun 	 * They are waiting for new group commit (above at (SLEEP 1))
887*4882a593Smuzhiyun 	 * or lazy transactions are on a full (queued) log page,
888*4882a593Smuzhiyun 	 * select the latest ready transaction as new group leader and
889*4882a593Smuzhiyun 	 * wake her up to lead her group.
890*4882a593Smuzhiyun 	 */
891*4882a593Smuzhiyun 	if ((!list_empty(&log->cqueue)) &&
892*4882a593Smuzhiyun 	    ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) ||
893*4882a593Smuzhiyun 	     test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low))
894*4882a593Smuzhiyun 		/*
895*4882a593Smuzhiyun 		 * Call lmGCwrite with new group leader
896*4882a593Smuzhiyun 		 */
897*4882a593Smuzhiyun 		lmGCwrite(log, 1);
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 	/* no transaction are ready yet (transactions are only just
900*4882a593Smuzhiyun 	 * queued (GC_QUEUE) and not entered for group commit yet).
901*4882a593Smuzhiyun 	 * the first transaction entering group commit
902*4882a593Smuzhiyun 	 * will elect herself as new group leader.
903*4882a593Smuzhiyun 	 */
904*4882a593Smuzhiyun 	else
905*4882a593Smuzhiyun 		log->cflag &= ~logGC_PAGEOUT;
906*4882a593Smuzhiyun 
907*4882a593Smuzhiyun 	//LOGGC_UNLOCK(log);
908*4882a593Smuzhiyun 	spin_unlock_irqrestore(&log->gclock, flags);
909*4882a593Smuzhiyun 	return;
910*4882a593Smuzhiyun }
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun /*
913*4882a593Smuzhiyun  * NAME:	lmLogSync()
914*4882a593Smuzhiyun  *
915*4882a593Smuzhiyun  * FUNCTION:	write log SYNCPT record for specified log
916*4882a593Smuzhiyun  *	if new sync address is available
917*4882a593Smuzhiyun  *	(normally the case if sync() is executed by back-ground
918*4882a593Smuzhiyun  *	process).
919*4882a593Smuzhiyun  *	calculate new value of i_nextsync which determines when
920*4882a593Smuzhiyun  *	this code is called again.
921*4882a593Smuzhiyun  *
922*4882a593Smuzhiyun  * PARAMETERS:	log	- log structure
923*4882a593Smuzhiyun  *		hard_sync - 1 to force all metadata to be written
924*4882a593Smuzhiyun  *
925*4882a593Smuzhiyun  * RETURN:	0
926*4882a593Smuzhiyun  *
927*4882a593Smuzhiyun  * serialization: LOG_LOCK() held on entry/exit
928*4882a593Smuzhiyun  */
lmLogSync(struct jfs_log * log,int hard_sync)929*4882a593Smuzhiyun static int lmLogSync(struct jfs_log * log, int hard_sync)
930*4882a593Smuzhiyun {
931*4882a593Smuzhiyun 	int logsize;
932*4882a593Smuzhiyun 	int written;		/* written since last syncpt */
933*4882a593Smuzhiyun 	int free;		/* free space left available */
934*4882a593Smuzhiyun 	int delta;		/* additional delta to write normally */
935*4882a593Smuzhiyun 	int more;		/* additional write granted */
936*4882a593Smuzhiyun 	struct lrd lrd;
937*4882a593Smuzhiyun 	int lsn;
938*4882a593Smuzhiyun 	struct logsyncblk *lp;
939*4882a593Smuzhiyun 	unsigned long flags;
940*4882a593Smuzhiyun 
941*4882a593Smuzhiyun 	/* push dirty metapages out to disk */
942*4882a593Smuzhiyun 	if (hard_sync)
943*4882a593Smuzhiyun 		write_special_inodes(log, filemap_fdatawrite);
944*4882a593Smuzhiyun 	else
945*4882a593Smuzhiyun 		write_special_inodes(log, filemap_flush);
946*4882a593Smuzhiyun 
947*4882a593Smuzhiyun 	/*
948*4882a593Smuzhiyun 	 *	forward syncpt
949*4882a593Smuzhiyun 	 */
950*4882a593Smuzhiyun 	/* if last sync is same as last syncpt,
951*4882a593Smuzhiyun 	 * invoke sync point forward processing to update sync.
952*4882a593Smuzhiyun 	 */
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun 	if (log->sync == log->syncpt) {
955*4882a593Smuzhiyun 		LOGSYNC_LOCK(log, flags);
956*4882a593Smuzhiyun 		if (list_empty(&log->synclist))
957*4882a593Smuzhiyun 			log->sync = log->lsn;
958*4882a593Smuzhiyun 		else {
959*4882a593Smuzhiyun 			lp = list_entry(log->synclist.next,
960*4882a593Smuzhiyun 					struct logsyncblk, synclist);
961*4882a593Smuzhiyun 			log->sync = lp->lsn;
962*4882a593Smuzhiyun 		}
963*4882a593Smuzhiyun 		LOGSYNC_UNLOCK(log, flags);
964*4882a593Smuzhiyun 
965*4882a593Smuzhiyun 	}
966*4882a593Smuzhiyun 
967*4882a593Smuzhiyun 	/* if sync is different from last syncpt,
968*4882a593Smuzhiyun 	 * write a SYNCPT record with syncpt = sync.
969*4882a593Smuzhiyun 	 * reset syncpt = sync
970*4882a593Smuzhiyun 	 */
971*4882a593Smuzhiyun 	if (log->sync != log->syncpt) {
972*4882a593Smuzhiyun 		lrd.logtid = 0;
973*4882a593Smuzhiyun 		lrd.backchain = 0;
974*4882a593Smuzhiyun 		lrd.type = cpu_to_le16(LOG_SYNCPT);
975*4882a593Smuzhiyun 		lrd.length = 0;
976*4882a593Smuzhiyun 		lrd.log.syncpt.sync = cpu_to_le32(log->sync);
977*4882a593Smuzhiyun 		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
978*4882a593Smuzhiyun 
979*4882a593Smuzhiyun 		log->syncpt = log->sync;
980*4882a593Smuzhiyun 	} else
981*4882a593Smuzhiyun 		lsn = log->lsn;
982*4882a593Smuzhiyun 
983*4882a593Smuzhiyun 	/*
984*4882a593Smuzhiyun 	 *	setup next syncpt trigger (SWAG)
985*4882a593Smuzhiyun 	 */
986*4882a593Smuzhiyun 	logsize = log->logsize;
987*4882a593Smuzhiyun 
988*4882a593Smuzhiyun 	logdiff(written, lsn, log);
989*4882a593Smuzhiyun 	free = logsize - written;
990*4882a593Smuzhiyun 	delta = LOGSYNC_DELTA(logsize);
991*4882a593Smuzhiyun 	more = min(free / 2, delta);
992*4882a593Smuzhiyun 	if (more < 2 * LOGPSIZE) {
993*4882a593Smuzhiyun 		jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n");
994*4882a593Smuzhiyun 		/*
995*4882a593Smuzhiyun 		 *	log wrapping
996*4882a593Smuzhiyun 		 *
997*4882a593Smuzhiyun 		 * option 1 - panic ? No.!
998*4882a593Smuzhiyun 		 * option 2 - shutdown file systems
999*4882a593Smuzhiyun 		 *	      associated with log ?
1000*4882a593Smuzhiyun 		 * option 3 - extend log ?
1001*4882a593Smuzhiyun 		 * option 4 - second chance
1002*4882a593Smuzhiyun 		 *
1003*4882a593Smuzhiyun 		 * mark log wrapped, and continue.
1004*4882a593Smuzhiyun 		 * when all active transactions are completed,
1005*4882a593Smuzhiyun 		 * mark log valid for recovery.
1006*4882a593Smuzhiyun 		 * if crashed during invalid state, log state
1007*4882a593Smuzhiyun 		 * implies invalid log, forcing fsck().
1008*4882a593Smuzhiyun 		 */
1009*4882a593Smuzhiyun 		/* mark log state log wrap in log superblock */
1010*4882a593Smuzhiyun 		/* log->state = LOGWRAP; */
1011*4882a593Smuzhiyun 
1012*4882a593Smuzhiyun 		/* reset sync point computation */
1013*4882a593Smuzhiyun 		log->syncpt = log->sync = lsn;
1014*4882a593Smuzhiyun 		log->nextsync = delta;
1015*4882a593Smuzhiyun 	} else
1016*4882a593Smuzhiyun 		/* next syncpt trigger = written + more */
1017*4882a593Smuzhiyun 		log->nextsync = written + more;
1018*4882a593Smuzhiyun 
1019*4882a593Smuzhiyun 	/* if number of bytes written from last sync point is more
1020*4882a593Smuzhiyun 	 * than 1/4 of the log size, stop new transactions from
1021*4882a593Smuzhiyun 	 * starting until all current transactions are completed
1022*4882a593Smuzhiyun 	 * by setting syncbarrier flag.
1023*4882a593Smuzhiyun 	 */
1024*4882a593Smuzhiyun 	if (!test_bit(log_SYNCBARRIER, &log->flag) &&
1025*4882a593Smuzhiyun 	    (written > LOGSYNC_BARRIER(logsize)) && log->active) {
1026*4882a593Smuzhiyun 		set_bit(log_SYNCBARRIER, &log->flag);
1027*4882a593Smuzhiyun 		jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn,
1028*4882a593Smuzhiyun 			 log->syncpt);
1029*4882a593Smuzhiyun 		/*
1030*4882a593Smuzhiyun 		 * We may have to initiate group commit
1031*4882a593Smuzhiyun 		 */
1032*4882a593Smuzhiyun 		jfs_flush_journal(log, 0);
1033*4882a593Smuzhiyun 	}
1034*4882a593Smuzhiyun 
1035*4882a593Smuzhiyun 	return lsn;
1036*4882a593Smuzhiyun }
1037*4882a593Smuzhiyun 
1038*4882a593Smuzhiyun /*
1039*4882a593Smuzhiyun  * NAME:	jfs_syncpt
1040*4882a593Smuzhiyun  *
1041*4882a593Smuzhiyun  * FUNCTION:	write log SYNCPT record for specified log
1042*4882a593Smuzhiyun  *
1043*4882a593Smuzhiyun  * PARAMETERS:	log	  - log structure
1044*4882a593Smuzhiyun  *		hard_sync - set to 1 to force metadata to be written
1045*4882a593Smuzhiyun  */
jfs_syncpt(struct jfs_log * log,int hard_sync)1046*4882a593Smuzhiyun void jfs_syncpt(struct jfs_log *log, int hard_sync)
1047*4882a593Smuzhiyun {	LOG_LOCK(log);
1048*4882a593Smuzhiyun 	if (!test_bit(log_QUIESCE, &log->flag))
1049*4882a593Smuzhiyun 		lmLogSync(log, hard_sync);
1050*4882a593Smuzhiyun 	LOG_UNLOCK(log);
1051*4882a593Smuzhiyun }
1052*4882a593Smuzhiyun 
1053*4882a593Smuzhiyun /*
1054*4882a593Smuzhiyun  * NAME:	lmLogOpen()
1055*4882a593Smuzhiyun  *
1056*4882a593Smuzhiyun  * FUNCTION:	open the log on first open;
1057*4882a593Smuzhiyun  *	insert filesystem in the active list of the log.
1058*4882a593Smuzhiyun  *
1059*4882a593Smuzhiyun  * PARAMETER:	ipmnt	- file system mount inode
1060*4882a593Smuzhiyun  *		iplog	- log inode (out)
1061*4882a593Smuzhiyun  *
1062*4882a593Smuzhiyun  * RETURN:
1063*4882a593Smuzhiyun  *
1064*4882a593Smuzhiyun  * serialization:
1065*4882a593Smuzhiyun  */
lmLogOpen(struct super_block * sb)1066*4882a593Smuzhiyun int lmLogOpen(struct super_block *sb)
1067*4882a593Smuzhiyun {
1068*4882a593Smuzhiyun 	int rc;
1069*4882a593Smuzhiyun 	struct block_device *bdev;
1070*4882a593Smuzhiyun 	struct jfs_log *log;
1071*4882a593Smuzhiyun 	struct jfs_sb_info *sbi = JFS_SBI(sb);
1072*4882a593Smuzhiyun 
1073*4882a593Smuzhiyun 	if (sbi->flag & JFS_NOINTEGRITY)
1074*4882a593Smuzhiyun 		return open_dummy_log(sb);
1075*4882a593Smuzhiyun 
1076*4882a593Smuzhiyun 	if (sbi->mntflag & JFS_INLINELOG)
1077*4882a593Smuzhiyun 		return open_inline_log(sb);
1078*4882a593Smuzhiyun 
1079*4882a593Smuzhiyun 	mutex_lock(&jfs_log_mutex);
1080*4882a593Smuzhiyun 	list_for_each_entry(log, &jfs_external_logs, journal_list) {
1081*4882a593Smuzhiyun 		if (log->bdev->bd_dev == sbi->logdev) {
1082*4882a593Smuzhiyun 			if (!uuid_equal(&log->uuid, &sbi->loguuid)) {
1083*4882a593Smuzhiyun 				jfs_warn("wrong uuid on JFS journal");
1084*4882a593Smuzhiyun 				mutex_unlock(&jfs_log_mutex);
1085*4882a593Smuzhiyun 				return -EINVAL;
1086*4882a593Smuzhiyun 			}
1087*4882a593Smuzhiyun 			/*
1088*4882a593Smuzhiyun 			 * add file system to log active file system list
1089*4882a593Smuzhiyun 			 */
1090*4882a593Smuzhiyun 			if ((rc = lmLogFileSystem(log, sbi, 1))) {
1091*4882a593Smuzhiyun 				mutex_unlock(&jfs_log_mutex);
1092*4882a593Smuzhiyun 				return rc;
1093*4882a593Smuzhiyun 			}
1094*4882a593Smuzhiyun 			goto journal_found;
1095*4882a593Smuzhiyun 		}
1096*4882a593Smuzhiyun 	}
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun 	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL))) {
1099*4882a593Smuzhiyun 		mutex_unlock(&jfs_log_mutex);
1100*4882a593Smuzhiyun 		return -ENOMEM;
1101*4882a593Smuzhiyun 	}
1102*4882a593Smuzhiyun 	INIT_LIST_HEAD(&log->sb_list);
1103*4882a593Smuzhiyun 	init_waitqueue_head(&log->syncwait);
1104*4882a593Smuzhiyun 
1105*4882a593Smuzhiyun 	/*
1106*4882a593Smuzhiyun 	 *	external log as separate logical volume
1107*4882a593Smuzhiyun 	 *
1108*4882a593Smuzhiyun 	 * file systems to log may have n-to-1 relationship;
1109*4882a593Smuzhiyun 	 */
1110*4882a593Smuzhiyun 
1111*4882a593Smuzhiyun 	bdev = blkdev_get_by_dev(sbi->logdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
1112*4882a593Smuzhiyun 				 log);
1113*4882a593Smuzhiyun 	if (IS_ERR(bdev)) {
1114*4882a593Smuzhiyun 		rc = PTR_ERR(bdev);
1115*4882a593Smuzhiyun 		goto free;
1116*4882a593Smuzhiyun 	}
1117*4882a593Smuzhiyun 
1118*4882a593Smuzhiyun 	log->bdev = bdev;
1119*4882a593Smuzhiyun 	uuid_copy(&log->uuid, &sbi->loguuid);
1120*4882a593Smuzhiyun 
1121*4882a593Smuzhiyun 	/*
1122*4882a593Smuzhiyun 	 * initialize log:
1123*4882a593Smuzhiyun 	 */
1124*4882a593Smuzhiyun 	if ((rc = lmLogInit(log)))
1125*4882a593Smuzhiyun 		goto close;
1126*4882a593Smuzhiyun 
1127*4882a593Smuzhiyun 	list_add(&log->journal_list, &jfs_external_logs);
1128*4882a593Smuzhiyun 
1129*4882a593Smuzhiyun 	/*
1130*4882a593Smuzhiyun 	 * add file system to log active file system list
1131*4882a593Smuzhiyun 	 */
1132*4882a593Smuzhiyun 	if ((rc = lmLogFileSystem(log, sbi, 1)))
1133*4882a593Smuzhiyun 		goto shutdown;
1134*4882a593Smuzhiyun 
1135*4882a593Smuzhiyun journal_found:
1136*4882a593Smuzhiyun 	LOG_LOCK(log);
1137*4882a593Smuzhiyun 	list_add(&sbi->log_list, &log->sb_list);
1138*4882a593Smuzhiyun 	sbi->log = log;
1139*4882a593Smuzhiyun 	LOG_UNLOCK(log);
1140*4882a593Smuzhiyun 
1141*4882a593Smuzhiyun 	mutex_unlock(&jfs_log_mutex);
1142*4882a593Smuzhiyun 	return 0;
1143*4882a593Smuzhiyun 
1144*4882a593Smuzhiyun 	/*
1145*4882a593Smuzhiyun 	 *	unwind on error
1146*4882a593Smuzhiyun 	 */
1147*4882a593Smuzhiyun       shutdown:		/* unwind lbmLogInit() */
1148*4882a593Smuzhiyun 	list_del(&log->journal_list);
1149*4882a593Smuzhiyun 	lbmLogShutdown(log);
1150*4882a593Smuzhiyun 
1151*4882a593Smuzhiyun       close:		/* close external log device */
1152*4882a593Smuzhiyun 	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1153*4882a593Smuzhiyun 
1154*4882a593Smuzhiyun       free:		/* free log descriptor */
1155*4882a593Smuzhiyun 	mutex_unlock(&jfs_log_mutex);
1156*4882a593Smuzhiyun 	kfree(log);
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun 	jfs_warn("lmLogOpen: exit(%d)", rc);
1159*4882a593Smuzhiyun 	return rc;
1160*4882a593Smuzhiyun }
1161*4882a593Smuzhiyun 
open_inline_log(struct super_block * sb)1162*4882a593Smuzhiyun static int open_inline_log(struct super_block *sb)
1163*4882a593Smuzhiyun {
1164*4882a593Smuzhiyun 	struct jfs_log *log;
1165*4882a593Smuzhiyun 	int rc;
1166*4882a593Smuzhiyun 
1167*4882a593Smuzhiyun 	if (!(log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL)))
1168*4882a593Smuzhiyun 		return -ENOMEM;
1169*4882a593Smuzhiyun 	INIT_LIST_HEAD(&log->sb_list);
1170*4882a593Smuzhiyun 	init_waitqueue_head(&log->syncwait);
1171*4882a593Smuzhiyun 
1172*4882a593Smuzhiyun 	set_bit(log_INLINELOG, &log->flag);
1173*4882a593Smuzhiyun 	log->bdev = sb->s_bdev;
1174*4882a593Smuzhiyun 	log->base = addressPXD(&JFS_SBI(sb)->logpxd);
1175*4882a593Smuzhiyun 	log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >>
1176*4882a593Smuzhiyun 	    (L2LOGPSIZE - sb->s_blocksize_bits);
1177*4882a593Smuzhiyun 	log->l2bsize = sb->s_blocksize_bits;
1178*4882a593Smuzhiyun 	ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits);
1179*4882a593Smuzhiyun 
1180*4882a593Smuzhiyun 	/*
1181*4882a593Smuzhiyun 	 * initialize log.
1182*4882a593Smuzhiyun 	 */
1183*4882a593Smuzhiyun 	if ((rc = lmLogInit(log))) {
1184*4882a593Smuzhiyun 		kfree(log);
1185*4882a593Smuzhiyun 		jfs_warn("lmLogOpen: exit(%d)", rc);
1186*4882a593Smuzhiyun 		return rc;
1187*4882a593Smuzhiyun 	}
1188*4882a593Smuzhiyun 
1189*4882a593Smuzhiyun 	list_add(&JFS_SBI(sb)->log_list, &log->sb_list);
1190*4882a593Smuzhiyun 	JFS_SBI(sb)->log = log;
1191*4882a593Smuzhiyun 
1192*4882a593Smuzhiyun 	return rc;
1193*4882a593Smuzhiyun }
1194*4882a593Smuzhiyun 
open_dummy_log(struct super_block * sb)1195*4882a593Smuzhiyun static int open_dummy_log(struct super_block *sb)
1196*4882a593Smuzhiyun {
1197*4882a593Smuzhiyun 	int rc;
1198*4882a593Smuzhiyun 
1199*4882a593Smuzhiyun 	mutex_lock(&jfs_log_mutex);
1200*4882a593Smuzhiyun 	if (!dummy_log) {
1201*4882a593Smuzhiyun 		dummy_log = kzalloc(sizeof(struct jfs_log), GFP_KERNEL);
1202*4882a593Smuzhiyun 		if (!dummy_log) {
1203*4882a593Smuzhiyun 			mutex_unlock(&jfs_log_mutex);
1204*4882a593Smuzhiyun 			return -ENOMEM;
1205*4882a593Smuzhiyun 		}
1206*4882a593Smuzhiyun 		INIT_LIST_HEAD(&dummy_log->sb_list);
1207*4882a593Smuzhiyun 		init_waitqueue_head(&dummy_log->syncwait);
1208*4882a593Smuzhiyun 		dummy_log->no_integrity = 1;
1209*4882a593Smuzhiyun 		/* Make up some stuff */
1210*4882a593Smuzhiyun 		dummy_log->base = 0;
1211*4882a593Smuzhiyun 		dummy_log->size = 1024;
1212*4882a593Smuzhiyun 		rc = lmLogInit(dummy_log);
1213*4882a593Smuzhiyun 		if (rc) {
1214*4882a593Smuzhiyun 			kfree(dummy_log);
1215*4882a593Smuzhiyun 			dummy_log = NULL;
1216*4882a593Smuzhiyun 			mutex_unlock(&jfs_log_mutex);
1217*4882a593Smuzhiyun 			return rc;
1218*4882a593Smuzhiyun 		}
1219*4882a593Smuzhiyun 	}
1220*4882a593Smuzhiyun 
1221*4882a593Smuzhiyun 	LOG_LOCK(dummy_log);
1222*4882a593Smuzhiyun 	list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list);
1223*4882a593Smuzhiyun 	JFS_SBI(sb)->log = dummy_log;
1224*4882a593Smuzhiyun 	LOG_UNLOCK(dummy_log);
1225*4882a593Smuzhiyun 	mutex_unlock(&jfs_log_mutex);
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 	return 0;
1228*4882a593Smuzhiyun }
1229*4882a593Smuzhiyun 
1230*4882a593Smuzhiyun /*
1231*4882a593Smuzhiyun  * NAME:	lmLogInit()
1232*4882a593Smuzhiyun  *
1233*4882a593Smuzhiyun  * FUNCTION:	log initialization at first log open.
1234*4882a593Smuzhiyun  *
1235*4882a593Smuzhiyun  *	logredo() (or logformat()) should have been run previously.
1236*4882a593Smuzhiyun  *	initialize the log from log superblock.
1237*4882a593Smuzhiyun  *	set the log state in the superblock to LOGMOUNT and
1238*4882a593Smuzhiyun  *	write SYNCPT log record.
1239*4882a593Smuzhiyun  *
1240*4882a593Smuzhiyun  * PARAMETER:	log	- log structure
1241*4882a593Smuzhiyun  *
1242*4882a593Smuzhiyun  * RETURN:	0	- if ok
1243*4882a593Smuzhiyun  *		-EINVAL	- bad log magic number or superblock dirty
1244*4882a593Smuzhiyun  *		error returned from logwait()
1245*4882a593Smuzhiyun  *
1246*4882a593Smuzhiyun  * serialization: single first open thread
1247*4882a593Smuzhiyun  */
lmLogInit(struct jfs_log * log)1248*4882a593Smuzhiyun int lmLogInit(struct jfs_log * log)
1249*4882a593Smuzhiyun {
1250*4882a593Smuzhiyun 	int rc = 0;
1251*4882a593Smuzhiyun 	struct lrd lrd;
1252*4882a593Smuzhiyun 	struct logsuper *logsuper;
1253*4882a593Smuzhiyun 	struct lbuf *bpsuper;
1254*4882a593Smuzhiyun 	struct lbuf *bp;
1255*4882a593Smuzhiyun 	struct logpage *lp;
1256*4882a593Smuzhiyun 	int lsn = 0;
1257*4882a593Smuzhiyun 
1258*4882a593Smuzhiyun 	jfs_info("lmLogInit: log:0x%p", log);
1259*4882a593Smuzhiyun 
1260*4882a593Smuzhiyun 	/* initialize the group commit serialization lock */
1261*4882a593Smuzhiyun 	LOGGC_LOCK_INIT(log);
1262*4882a593Smuzhiyun 
1263*4882a593Smuzhiyun 	/* allocate/initialize the log write serialization lock */
1264*4882a593Smuzhiyun 	LOG_LOCK_INIT(log);
1265*4882a593Smuzhiyun 
1266*4882a593Smuzhiyun 	LOGSYNC_LOCK_INIT(log);
1267*4882a593Smuzhiyun 
1268*4882a593Smuzhiyun 	INIT_LIST_HEAD(&log->synclist);
1269*4882a593Smuzhiyun 
1270*4882a593Smuzhiyun 	INIT_LIST_HEAD(&log->cqueue);
1271*4882a593Smuzhiyun 	log->flush_tblk = NULL;
1272*4882a593Smuzhiyun 
1273*4882a593Smuzhiyun 	log->count = 0;
1274*4882a593Smuzhiyun 
1275*4882a593Smuzhiyun 	/*
1276*4882a593Smuzhiyun 	 * initialize log i/o
1277*4882a593Smuzhiyun 	 */
1278*4882a593Smuzhiyun 	if ((rc = lbmLogInit(log)))
1279*4882a593Smuzhiyun 		return rc;
1280*4882a593Smuzhiyun 
1281*4882a593Smuzhiyun 	if (!test_bit(log_INLINELOG, &log->flag))
1282*4882a593Smuzhiyun 		log->l2bsize = L2LOGPSIZE;
1283*4882a593Smuzhiyun 
1284*4882a593Smuzhiyun 	/* check for disabled journaling to disk */
1285*4882a593Smuzhiyun 	if (log->no_integrity) {
1286*4882a593Smuzhiyun 		/*
1287*4882a593Smuzhiyun 		 * Journal pages will still be filled.  When the time comes
1288*4882a593Smuzhiyun 		 * to actually do the I/O, the write is not done, and the
1289*4882a593Smuzhiyun 		 * endio routine is called directly.
1290*4882a593Smuzhiyun 		 */
1291*4882a593Smuzhiyun 		bp = lbmAllocate(log , 0);
1292*4882a593Smuzhiyun 		log->bp = bp;
1293*4882a593Smuzhiyun 		bp->l_pn = bp->l_eor = 0;
1294*4882a593Smuzhiyun 	} else {
1295*4882a593Smuzhiyun 		/*
1296*4882a593Smuzhiyun 		 * validate log superblock
1297*4882a593Smuzhiyun 		 */
1298*4882a593Smuzhiyun 		if ((rc = lbmRead(log, 1, &bpsuper)))
1299*4882a593Smuzhiyun 			goto errout10;
1300*4882a593Smuzhiyun 
1301*4882a593Smuzhiyun 		logsuper = (struct logsuper *) bpsuper->l_ldata;
1302*4882a593Smuzhiyun 
1303*4882a593Smuzhiyun 		if (logsuper->magic != cpu_to_le32(LOGMAGIC)) {
1304*4882a593Smuzhiyun 			jfs_warn("*** Log Format Error ! ***");
1305*4882a593Smuzhiyun 			rc = -EINVAL;
1306*4882a593Smuzhiyun 			goto errout20;
1307*4882a593Smuzhiyun 		}
1308*4882a593Smuzhiyun 
1309*4882a593Smuzhiyun 		/* logredo() should have been run successfully. */
1310*4882a593Smuzhiyun 		if (logsuper->state != cpu_to_le32(LOGREDONE)) {
1311*4882a593Smuzhiyun 			jfs_warn("*** Log Is Dirty ! ***");
1312*4882a593Smuzhiyun 			rc = -EINVAL;
1313*4882a593Smuzhiyun 			goto errout20;
1314*4882a593Smuzhiyun 		}
1315*4882a593Smuzhiyun 
1316*4882a593Smuzhiyun 		/* initialize log from log superblock */
1317*4882a593Smuzhiyun 		if (test_bit(log_INLINELOG,&log->flag)) {
1318*4882a593Smuzhiyun 			if (log->size != le32_to_cpu(logsuper->size)) {
1319*4882a593Smuzhiyun 				rc = -EINVAL;
1320*4882a593Smuzhiyun 				goto errout20;
1321*4882a593Smuzhiyun 			}
1322*4882a593Smuzhiyun 			jfs_info("lmLogInit: inline log:0x%p base:0x%Lx size:0x%x",
1323*4882a593Smuzhiyun 				 log, (unsigned long long)log->base, log->size);
1324*4882a593Smuzhiyun 		} else {
1325*4882a593Smuzhiyun 			if (!uuid_equal(&logsuper->uuid, &log->uuid)) {
1326*4882a593Smuzhiyun 				jfs_warn("wrong uuid on JFS log device");
1327*4882a593Smuzhiyun 				rc = -EINVAL;
1328*4882a593Smuzhiyun 				goto errout20;
1329*4882a593Smuzhiyun 			}
1330*4882a593Smuzhiyun 			log->size = le32_to_cpu(logsuper->size);
1331*4882a593Smuzhiyun 			log->l2bsize = le32_to_cpu(logsuper->l2bsize);
1332*4882a593Smuzhiyun 			jfs_info("lmLogInit: external log:0x%p base:0x%Lx size:0x%x",
1333*4882a593Smuzhiyun 				 log, (unsigned long long)log->base, log->size);
1334*4882a593Smuzhiyun 		}
1335*4882a593Smuzhiyun 
1336*4882a593Smuzhiyun 		log->page = le32_to_cpu(logsuper->end) / LOGPSIZE;
1337*4882a593Smuzhiyun 		log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page);
1338*4882a593Smuzhiyun 
1339*4882a593Smuzhiyun 		/*
1340*4882a593Smuzhiyun 		 * initialize for log append write mode
1341*4882a593Smuzhiyun 		 */
1342*4882a593Smuzhiyun 		/* establish current/end-of-log page/buffer */
1343*4882a593Smuzhiyun 		if ((rc = lbmRead(log, log->page, &bp)))
1344*4882a593Smuzhiyun 			goto errout20;
1345*4882a593Smuzhiyun 
1346*4882a593Smuzhiyun 		lp = (struct logpage *) bp->l_ldata;
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 		jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d",
1349*4882a593Smuzhiyun 			 le32_to_cpu(logsuper->end), log->page, log->eor,
1350*4882a593Smuzhiyun 			 le16_to_cpu(lp->h.eor));
1351*4882a593Smuzhiyun 
1352*4882a593Smuzhiyun 		log->bp = bp;
1353*4882a593Smuzhiyun 		bp->l_pn = log->page;
1354*4882a593Smuzhiyun 		bp->l_eor = log->eor;
1355*4882a593Smuzhiyun 
1356*4882a593Smuzhiyun 		/* if current page is full, move on to next page */
1357*4882a593Smuzhiyun 		if (log->eor >= LOGPSIZE - LOGPTLRSIZE)
1358*4882a593Smuzhiyun 			lmNextPage(log);
1359*4882a593Smuzhiyun 
1360*4882a593Smuzhiyun 		/*
1361*4882a593Smuzhiyun 		 * initialize log syncpoint
1362*4882a593Smuzhiyun 		 */
1363*4882a593Smuzhiyun 		/*
1364*4882a593Smuzhiyun 		 * write the first SYNCPT record with syncpoint = 0
1365*4882a593Smuzhiyun 		 * (i.e., log redo up to HERE !);
1366*4882a593Smuzhiyun 		 * remove current page from lbm write queue at end of pageout
1367*4882a593Smuzhiyun 		 * (to write log superblock update), but do not release to
1368*4882a593Smuzhiyun 		 * freelist;
1369*4882a593Smuzhiyun 		 */
1370*4882a593Smuzhiyun 		lrd.logtid = 0;
1371*4882a593Smuzhiyun 		lrd.backchain = 0;
1372*4882a593Smuzhiyun 		lrd.type = cpu_to_le16(LOG_SYNCPT);
1373*4882a593Smuzhiyun 		lrd.length = 0;
1374*4882a593Smuzhiyun 		lrd.log.syncpt.sync = 0;
1375*4882a593Smuzhiyun 		lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1376*4882a593Smuzhiyun 		bp = log->bp;
1377*4882a593Smuzhiyun 		bp->l_ceor = bp->l_eor;
1378*4882a593Smuzhiyun 		lp = (struct logpage *) bp->l_ldata;
1379*4882a593Smuzhiyun 		lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1380*4882a593Smuzhiyun 		lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0);
1381*4882a593Smuzhiyun 		if ((rc = lbmIOWait(bp, 0)))
1382*4882a593Smuzhiyun 			goto errout30;
1383*4882a593Smuzhiyun 
1384*4882a593Smuzhiyun 		/*
1385*4882a593Smuzhiyun 		 * update/write superblock
1386*4882a593Smuzhiyun 		 */
1387*4882a593Smuzhiyun 		logsuper->state = cpu_to_le32(LOGMOUNT);
1388*4882a593Smuzhiyun 		log->serial = le32_to_cpu(logsuper->serial) + 1;
1389*4882a593Smuzhiyun 		logsuper->serial = cpu_to_le32(log->serial);
1390*4882a593Smuzhiyun 		lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1391*4882a593Smuzhiyun 		if ((rc = lbmIOWait(bpsuper, lbmFREE)))
1392*4882a593Smuzhiyun 			goto errout30;
1393*4882a593Smuzhiyun 	}
1394*4882a593Smuzhiyun 
1395*4882a593Smuzhiyun 	/* initialize logsync parameters */
1396*4882a593Smuzhiyun 	log->logsize = (log->size - 2) << L2LOGPSIZE;
1397*4882a593Smuzhiyun 	log->lsn = lsn;
1398*4882a593Smuzhiyun 	log->syncpt = lsn;
1399*4882a593Smuzhiyun 	log->sync = log->syncpt;
1400*4882a593Smuzhiyun 	log->nextsync = LOGSYNC_DELTA(log->logsize);
1401*4882a593Smuzhiyun 
1402*4882a593Smuzhiyun 	jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x",
1403*4882a593Smuzhiyun 		 log->lsn, log->syncpt, log->sync);
1404*4882a593Smuzhiyun 
1405*4882a593Smuzhiyun 	/*
1406*4882a593Smuzhiyun 	 * initialize for lazy/group commit
1407*4882a593Smuzhiyun 	 */
1408*4882a593Smuzhiyun 	log->clsn = lsn;
1409*4882a593Smuzhiyun 
1410*4882a593Smuzhiyun 	return 0;
1411*4882a593Smuzhiyun 
1412*4882a593Smuzhiyun 	/*
1413*4882a593Smuzhiyun 	 *	unwind on error
1414*4882a593Smuzhiyun 	 */
1415*4882a593Smuzhiyun       errout30:		/* release log page */
1416*4882a593Smuzhiyun 	log->wqueue = NULL;
1417*4882a593Smuzhiyun 	bp->l_wqnext = NULL;
1418*4882a593Smuzhiyun 	lbmFree(bp);
1419*4882a593Smuzhiyun 
1420*4882a593Smuzhiyun       errout20:		/* release log superblock */
1421*4882a593Smuzhiyun 	lbmFree(bpsuper);
1422*4882a593Smuzhiyun 
1423*4882a593Smuzhiyun       errout10:		/* unwind lbmLogInit() */
1424*4882a593Smuzhiyun 	lbmLogShutdown(log);
1425*4882a593Smuzhiyun 
1426*4882a593Smuzhiyun 	jfs_warn("lmLogInit: exit(%d)", rc);
1427*4882a593Smuzhiyun 	return rc;
1428*4882a593Smuzhiyun }
1429*4882a593Smuzhiyun 
1430*4882a593Smuzhiyun 
1431*4882a593Smuzhiyun /*
1432*4882a593Smuzhiyun  * NAME:	lmLogClose()
1433*4882a593Smuzhiyun  *
1434*4882a593Smuzhiyun  * FUNCTION:	remove file system <ipmnt> from active list of log <iplog>
1435*4882a593Smuzhiyun  *		and close it on last close.
1436*4882a593Smuzhiyun  *
1437*4882a593Smuzhiyun  * PARAMETER:	sb	- superblock
1438*4882a593Smuzhiyun  *
1439*4882a593Smuzhiyun  * RETURN:	errors from subroutines
1440*4882a593Smuzhiyun  *
1441*4882a593Smuzhiyun  * serialization:
1442*4882a593Smuzhiyun  */
lmLogClose(struct super_block * sb)1443*4882a593Smuzhiyun int lmLogClose(struct super_block *sb)
1444*4882a593Smuzhiyun {
1445*4882a593Smuzhiyun 	struct jfs_sb_info *sbi = JFS_SBI(sb);
1446*4882a593Smuzhiyun 	struct jfs_log *log = sbi->log;
1447*4882a593Smuzhiyun 	struct block_device *bdev;
1448*4882a593Smuzhiyun 	int rc = 0;
1449*4882a593Smuzhiyun 
1450*4882a593Smuzhiyun 	jfs_info("lmLogClose: log:0x%p", log);
1451*4882a593Smuzhiyun 
1452*4882a593Smuzhiyun 	mutex_lock(&jfs_log_mutex);
1453*4882a593Smuzhiyun 	LOG_LOCK(log);
1454*4882a593Smuzhiyun 	list_del(&sbi->log_list);
1455*4882a593Smuzhiyun 	LOG_UNLOCK(log);
1456*4882a593Smuzhiyun 	sbi->log = NULL;
1457*4882a593Smuzhiyun 
1458*4882a593Smuzhiyun 	/*
1459*4882a593Smuzhiyun 	 * We need to make sure all of the "written" metapages
1460*4882a593Smuzhiyun 	 * actually make it to disk
1461*4882a593Smuzhiyun 	 */
1462*4882a593Smuzhiyun 	sync_blockdev(sb->s_bdev);
1463*4882a593Smuzhiyun 
1464*4882a593Smuzhiyun 	if (test_bit(log_INLINELOG, &log->flag)) {
1465*4882a593Smuzhiyun 		/*
1466*4882a593Smuzhiyun 		 *	in-line log in host file system
1467*4882a593Smuzhiyun 		 */
1468*4882a593Smuzhiyun 		rc = lmLogShutdown(log);
1469*4882a593Smuzhiyun 		kfree(log);
1470*4882a593Smuzhiyun 		goto out;
1471*4882a593Smuzhiyun 	}
1472*4882a593Smuzhiyun 
1473*4882a593Smuzhiyun 	if (!log->no_integrity)
1474*4882a593Smuzhiyun 		lmLogFileSystem(log, sbi, 0);
1475*4882a593Smuzhiyun 
1476*4882a593Smuzhiyun 	if (!list_empty(&log->sb_list))
1477*4882a593Smuzhiyun 		goto out;
1478*4882a593Smuzhiyun 
1479*4882a593Smuzhiyun 	/*
1480*4882a593Smuzhiyun 	 * TODO: ensure that the dummy_log is in a state to allow
1481*4882a593Smuzhiyun 	 * lbmLogShutdown to deallocate all the buffers and call
1482*4882a593Smuzhiyun 	 * kfree against dummy_log.  For now, leave dummy_log & its
1483*4882a593Smuzhiyun 	 * buffers in memory, and resuse if another no-integrity mount
1484*4882a593Smuzhiyun 	 * is requested.
1485*4882a593Smuzhiyun 	 */
1486*4882a593Smuzhiyun 	if (log->no_integrity)
1487*4882a593Smuzhiyun 		goto out;
1488*4882a593Smuzhiyun 
1489*4882a593Smuzhiyun 	/*
1490*4882a593Smuzhiyun 	 *	external log as separate logical volume
1491*4882a593Smuzhiyun 	 */
1492*4882a593Smuzhiyun 	list_del(&log->journal_list);
1493*4882a593Smuzhiyun 	bdev = log->bdev;
1494*4882a593Smuzhiyun 	rc = lmLogShutdown(log);
1495*4882a593Smuzhiyun 
1496*4882a593Smuzhiyun 	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1497*4882a593Smuzhiyun 
1498*4882a593Smuzhiyun 	kfree(log);
1499*4882a593Smuzhiyun 
1500*4882a593Smuzhiyun       out:
1501*4882a593Smuzhiyun 	mutex_unlock(&jfs_log_mutex);
1502*4882a593Smuzhiyun 	jfs_info("lmLogClose: exit(%d)", rc);
1503*4882a593Smuzhiyun 	return rc;
1504*4882a593Smuzhiyun }
1505*4882a593Smuzhiyun 
1506*4882a593Smuzhiyun 
1507*4882a593Smuzhiyun /*
1508*4882a593Smuzhiyun  * NAME:	jfs_flush_journal()
1509*4882a593Smuzhiyun  *
1510*4882a593Smuzhiyun  * FUNCTION:	initiate write of any outstanding transactions to the journal
1511*4882a593Smuzhiyun  *		and optionally wait until they are all written to disk
1512*4882a593Smuzhiyun  *
1513*4882a593Smuzhiyun  *		wait == 0  flush until latest txn is committed, don't wait
1514*4882a593Smuzhiyun  *		wait == 1  flush until latest txn is committed, wait
1515*4882a593Smuzhiyun  *		wait > 1   flush until all txn's are complete, wait
1516*4882a593Smuzhiyun  */
jfs_flush_journal(struct jfs_log * log,int wait)1517*4882a593Smuzhiyun void jfs_flush_journal(struct jfs_log *log, int wait)
1518*4882a593Smuzhiyun {
1519*4882a593Smuzhiyun 	int i;
1520*4882a593Smuzhiyun 	struct tblock *target = NULL;
1521*4882a593Smuzhiyun 
1522*4882a593Smuzhiyun 	/* jfs_write_inode may call us during read-only mount */
1523*4882a593Smuzhiyun 	if (!log)
1524*4882a593Smuzhiyun 		return;
1525*4882a593Smuzhiyun 
1526*4882a593Smuzhiyun 	jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait);
1527*4882a593Smuzhiyun 
1528*4882a593Smuzhiyun 	LOGGC_LOCK(log);
1529*4882a593Smuzhiyun 
1530*4882a593Smuzhiyun 	if (!list_empty(&log->cqueue)) {
1531*4882a593Smuzhiyun 		/*
1532*4882a593Smuzhiyun 		 * This ensures that we will keep writing to the journal as long
1533*4882a593Smuzhiyun 		 * as there are unwritten commit records
1534*4882a593Smuzhiyun 		 */
1535*4882a593Smuzhiyun 		target = list_entry(log->cqueue.prev, struct tblock, cqueue);
1536*4882a593Smuzhiyun 
1537*4882a593Smuzhiyun 		if (test_bit(log_FLUSH, &log->flag)) {
1538*4882a593Smuzhiyun 			/*
1539*4882a593Smuzhiyun 			 * We're already flushing.
1540*4882a593Smuzhiyun 			 * if flush_tblk is NULL, we are flushing everything,
1541*4882a593Smuzhiyun 			 * so leave it that way.  Otherwise, update it to the
1542*4882a593Smuzhiyun 			 * latest transaction
1543*4882a593Smuzhiyun 			 */
1544*4882a593Smuzhiyun 			if (log->flush_tblk)
1545*4882a593Smuzhiyun 				log->flush_tblk = target;
1546*4882a593Smuzhiyun 		} else {
1547*4882a593Smuzhiyun 			/* Only flush until latest transaction is committed */
1548*4882a593Smuzhiyun 			log->flush_tblk = target;
1549*4882a593Smuzhiyun 			set_bit(log_FLUSH, &log->flag);
1550*4882a593Smuzhiyun 
1551*4882a593Smuzhiyun 			/*
1552*4882a593Smuzhiyun 			 * Initiate I/O on outstanding transactions
1553*4882a593Smuzhiyun 			 */
1554*4882a593Smuzhiyun 			if (!(log->cflag & logGC_PAGEOUT)) {
1555*4882a593Smuzhiyun 				log->cflag |= logGC_PAGEOUT;
1556*4882a593Smuzhiyun 				lmGCwrite(log, 0);
1557*4882a593Smuzhiyun 			}
1558*4882a593Smuzhiyun 		}
1559*4882a593Smuzhiyun 	}
1560*4882a593Smuzhiyun 	if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) {
1561*4882a593Smuzhiyun 		/* Flush until all activity complete */
1562*4882a593Smuzhiyun 		set_bit(log_FLUSH, &log->flag);
1563*4882a593Smuzhiyun 		log->flush_tblk = NULL;
1564*4882a593Smuzhiyun 	}
1565*4882a593Smuzhiyun 
1566*4882a593Smuzhiyun 	if (wait && target && !(target->flag & tblkGC_COMMITTED)) {
1567*4882a593Smuzhiyun 		DECLARE_WAITQUEUE(__wait, current);
1568*4882a593Smuzhiyun 
1569*4882a593Smuzhiyun 		add_wait_queue(&target->gcwait, &__wait);
1570*4882a593Smuzhiyun 		set_current_state(TASK_UNINTERRUPTIBLE);
1571*4882a593Smuzhiyun 		LOGGC_UNLOCK(log);
1572*4882a593Smuzhiyun 		schedule();
1573*4882a593Smuzhiyun 		LOGGC_LOCK(log);
1574*4882a593Smuzhiyun 		remove_wait_queue(&target->gcwait, &__wait);
1575*4882a593Smuzhiyun 	}
1576*4882a593Smuzhiyun 	LOGGC_UNLOCK(log);
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun 	if (wait < 2)
1579*4882a593Smuzhiyun 		return;
1580*4882a593Smuzhiyun 
1581*4882a593Smuzhiyun 	write_special_inodes(log, filemap_fdatawrite);
1582*4882a593Smuzhiyun 
1583*4882a593Smuzhiyun 	/*
1584*4882a593Smuzhiyun 	 * If there was recent activity, we may need to wait
1585*4882a593Smuzhiyun 	 * for the lazycommit thread to catch up
1586*4882a593Smuzhiyun 	 */
1587*4882a593Smuzhiyun 	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
1588*4882a593Smuzhiyun 		for (i = 0; i < 200; i++) {	/* Too much? */
1589*4882a593Smuzhiyun 			msleep(250);
1590*4882a593Smuzhiyun 			write_special_inodes(log, filemap_fdatawrite);
1591*4882a593Smuzhiyun 			if (list_empty(&log->cqueue) &&
1592*4882a593Smuzhiyun 			    list_empty(&log->synclist))
1593*4882a593Smuzhiyun 				break;
1594*4882a593Smuzhiyun 		}
1595*4882a593Smuzhiyun 	}
1596*4882a593Smuzhiyun 	assert(list_empty(&log->cqueue));
1597*4882a593Smuzhiyun 
1598*4882a593Smuzhiyun #ifdef CONFIG_JFS_DEBUG
1599*4882a593Smuzhiyun 	if (!list_empty(&log->synclist)) {
1600*4882a593Smuzhiyun 		struct logsyncblk *lp;
1601*4882a593Smuzhiyun 
1602*4882a593Smuzhiyun 		printk(KERN_ERR "jfs_flush_journal: synclist not empty\n");
1603*4882a593Smuzhiyun 		list_for_each_entry(lp, &log->synclist, synclist) {
1604*4882a593Smuzhiyun 			if (lp->xflag & COMMIT_PAGE) {
1605*4882a593Smuzhiyun 				struct metapage *mp = (struct metapage *)lp;
1606*4882a593Smuzhiyun 				print_hex_dump(KERN_ERR, "metapage: ",
1607*4882a593Smuzhiyun 					       DUMP_PREFIX_ADDRESS, 16, 4,
1608*4882a593Smuzhiyun 					       mp, sizeof(struct metapage), 0);
1609*4882a593Smuzhiyun 				print_hex_dump(KERN_ERR, "page: ",
1610*4882a593Smuzhiyun 					       DUMP_PREFIX_ADDRESS, 16,
1611*4882a593Smuzhiyun 					       sizeof(long), mp->page,
1612*4882a593Smuzhiyun 					       sizeof(struct page), 0);
1613*4882a593Smuzhiyun 			} else
1614*4882a593Smuzhiyun 				print_hex_dump(KERN_ERR, "tblock:",
1615*4882a593Smuzhiyun 					       DUMP_PREFIX_ADDRESS, 16, 4,
1616*4882a593Smuzhiyun 					       lp, sizeof(struct tblock), 0);
1617*4882a593Smuzhiyun 		}
1618*4882a593Smuzhiyun 	}
1619*4882a593Smuzhiyun #else
1620*4882a593Smuzhiyun 	WARN_ON(!list_empty(&log->synclist));
1621*4882a593Smuzhiyun #endif
1622*4882a593Smuzhiyun 	clear_bit(log_FLUSH, &log->flag);
1623*4882a593Smuzhiyun }
1624*4882a593Smuzhiyun 
1625*4882a593Smuzhiyun /*
1626*4882a593Smuzhiyun  * NAME:	lmLogShutdown()
1627*4882a593Smuzhiyun  *
1628*4882a593Smuzhiyun  * FUNCTION:	log shutdown at last LogClose().
1629*4882a593Smuzhiyun  *
1630*4882a593Smuzhiyun  *		write log syncpt record.
1631*4882a593Smuzhiyun  *		update super block to set redone flag to 0.
1632*4882a593Smuzhiyun  *
1633*4882a593Smuzhiyun  * PARAMETER:	log	- log inode
1634*4882a593Smuzhiyun  *
1635*4882a593Smuzhiyun  * RETURN:	0	- success
1636*4882a593Smuzhiyun  *
1637*4882a593Smuzhiyun  * serialization: single last close thread
1638*4882a593Smuzhiyun  */
lmLogShutdown(struct jfs_log * log)1639*4882a593Smuzhiyun int lmLogShutdown(struct jfs_log * log)
1640*4882a593Smuzhiyun {
1641*4882a593Smuzhiyun 	int rc;
1642*4882a593Smuzhiyun 	struct lrd lrd;
1643*4882a593Smuzhiyun 	int lsn;
1644*4882a593Smuzhiyun 	struct logsuper *logsuper;
1645*4882a593Smuzhiyun 	struct lbuf *bpsuper;
1646*4882a593Smuzhiyun 	struct lbuf *bp;
1647*4882a593Smuzhiyun 	struct logpage *lp;
1648*4882a593Smuzhiyun 
1649*4882a593Smuzhiyun 	jfs_info("lmLogShutdown: log:0x%p", log);
1650*4882a593Smuzhiyun 
1651*4882a593Smuzhiyun 	jfs_flush_journal(log, 2);
1652*4882a593Smuzhiyun 
1653*4882a593Smuzhiyun 	/*
1654*4882a593Smuzhiyun 	 * write the last SYNCPT record with syncpoint = 0
1655*4882a593Smuzhiyun 	 * (i.e., log redo up to HERE !)
1656*4882a593Smuzhiyun 	 */
1657*4882a593Smuzhiyun 	lrd.logtid = 0;
1658*4882a593Smuzhiyun 	lrd.backchain = 0;
1659*4882a593Smuzhiyun 	lrd.type = cpu_to_le16(LOG_SYNCPT);
1660*4882a593Smuzhiyun 	lrd.length = 0;
1661*4882a593Smuzhiyun 	lrd.log.syncpt.sync = 0;
1662*4882a593Smuzhiyun 
1663*4882a593Smuzhiyun 	lsn = lmWriteRecord(log, NULL, &lrd, NULL);
1664*4882a593Smuzhiyun 	bp = log->bp;
1665*4882a593Smuzhiyun 	lp = (struct logpage *) bp->l_ldata;
1666*4882a593Smuzhiyun 	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
1667*4882a593Smuzhiyun 	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
1668*4882a593Smuzhiyun 	lbmIOWait(log->bp, lbmFREE);
1669*4882a593Smuzhiyun 	log->bp = NULL;
1670*4882a593Smuzhiyun 
1671*4882a593Smuzhiyun 	/*
1672*4882a593Smuzhiyun 	 * synchronous update log superblock
1673*4882a593Smuzhiyun 	 * mark log state as shutdown cleanly
1674*4882a593Smuzhiyun 	 * (i.e., Log does not need to be replayed).
1675*4882a593Smuzhiyun 	 */
1676*4882a593Smuzhiyun 	if ((rc = lbmRead(log, 1, &bpsuper)))
1677*4882a593Smuzhiyun 		goto out;
1678*4882a593Smuzhiyun 
1679*4882a593Smuzhiyun 	logsuper = (struct logsuper *) bpsuper->l_ldata;
1680*4882a593Smuzhiyun 	logsuper->state = cpu_to_le32(LOGREDONE);
1681*4882a593Smuzhiyun 	logsuper->end = cpu_to_le32(lsn);
1682*4882a593Smuzhiyun 	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1683*4882a593Smuzhiyun 	rc = lbmIOWait(bpsuper, lbmFREE);
1684*4882a593Smuzhiyun 
1685*4882a593Smuzhiyun 	jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d",
1686*4882a593Smuzhiyun 		 lsn, log->page, log->eor);
1687*4882a593Smuzhiyun 
1688*4882a593Smuzhiyun       out:
1689*4882a593Smuzhiyun 	/*
1690*4882a593Smuzhiyun 	 * shutdown per log i/o
1691*4882a593Smuzhiyun 	 */
1692*4882a593Smuzhiyun 	lbmLogShutdown(log);
1693*4882a593Smuzhiyun 
1694*4882a593Smuzhiyun 	if (rc) {
1695*4882a593Smuzhiyun 		jfs_warn("lmLogShutdown: exit(%d)", rc);
1696*4882a593Smuzhiyun 	}
1697*4882a593Smuzhiyun 	return rc;
1698*4882a593Smuzhiyun }
1699*4882a593Smuzhiyun 
1700*4882a593Smuzhiyun 
1701*4882a593Smuzhiyun /*
1702*4882a593Smuzhiyun  * NAME:	lmLogFileSystem()
1703*4882a593Smuzhiyun  *
1704*4882a593Smuzhiyun  * FUNCTION:	insert (<activate> = true)/remove (<activate> = false)
1705*4882a593Smuzhiyun  *	file system into/from log active file system list.
1706*4882a593Smuzhiyun  *
1707*4882a593Smuzhiyun  * PARAMETE:	log	- pointer to logs inode.
1708*4882a593Smuzhiyun  *		fsdev	- kdev_t of filesystem.
1709*4882a593Smuzhiyun  *		serial	- pointer to returned log serial number
1710*4882a593Smuzhiyun  *		activate - insert/remove device from active list.
1711*4882a593Smuzhiyun  *
1712*4882a593Smuzhiyun  * RETURN:	0	- success
1713*4882a593Smuzhiyun  *		errors returned by vms_iowait().
1714*4882a593Smuzhiyun  */
lmLogFileSystem(struct jfs_log * log,struct jfs_sb_info * sbi,int activate)1715*4882a593Smuzhiyun static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi,
1716*4882a593Smuzhiyun 			   int activate)
1717*4882a593Smuzhiyun {
1718*4882a593Smuzhiyun 	int rc = 0;
1719*4882a593Smuzhiyun 	int i;
1720*4882a593Smuzhiyun 	struct logsuper *logsuper;
1721*4882a593Smuzhiyun 	struct lbuf *bpsuper;
1722*4882a593Smuzhiyun 	uuid_t *uuid = &sbi->uuid;
1723*4882a593Smuzhiyun 
1724*4882a593Smuzhiyun 	/*
1725*4882a593Smuzhiyun 	 * insert/remove file system device to log active file system list.
1726*4882a593Smuzhiyun 	 */
1727*4882a593Smuzhiyun 	if ((rc = lbmRead(log, 1, &bpsuper)))
1728*4882a593Smuzhiyun 		return rc;
1729*4882a593Smuzhiyun 
1730*4882a593Smuzhiyun 	logsuper = (struct logsuper *) bpsuper->l_ldata;
1731*4882a593Smuzhiyun 	if (activate) {
1732*4882a593Smuzhiyun 		for (i = 0; i < MAX_ACTIVE; i++)
1733*4882a593Smuzhiyun 			if (uuid_is_null(&logsuper->active[i].uuid)) {
1734*4882a593Smuzhiyun 				uuid_copy(&logsuper->active[i].uuid, uuid);
1735*4882a593Smuzhiyun 				sbi->aggregate = i;
1736*4882a593Smuzhiyun 				break;
1737*4882a593Smuzhiyun 			}
1738*4882a593Smuzhiyun 		if (i == MAX_ACTIVE) {
1739*4882a593Smuzhiyun 			jfs_warn("Too many file systems sharing journal!");
1740*4882a593Smuzhiyun 			lbmFree(bpsuper);
1741*4882a593Smuzhiyun 			return -EMFILE;	/* Is there a better rc? */
1742*4882a593Smuzhiyun 		}
1743*4882a593Smuzhiyun 	} else {
1744*4882a593Smuzhiyun 		for (i = 0; i < MAX_ACTIVE; i++)
1745*4882a593Smuzhiyun 			if (uuid_equal(&logsuper->active[i].uuid, uuid)) {
1746*4882a593Smuzhiyun 				uuid_copy(&logsuper->active[i].uuid,
1747*4882a593Smuzhiyun 					  &uuid_null);
1748*4882a593Smuzhiyun 				break;
1749*4882a593Smuzhiyun 			}
1750*4882a593Smuzhiyun 		if (i == MAX_ACTIVE) {
1751*4882a593Smuzhiyun 			jfs_warn("Somebody stomped on the journal!");
1752*4882a593Smuzhiyun 			lbmFree(bpsuper);
1753*4882a593Smuzhiyun 			return -EIO;
1754*4882a593Smuzhiyun 		}
1755*4882a593Smuzhiyun 
1756*4882a593Smuzhiyun 	}
1757*4882a593Smuzhiyun 
1758*4882a593Smuzhiyun 	/*
1759*4882a593Smuzhiyun 	 * synchronous write log superblock:
1760*4882a593Smuzhiyun 	 *
1761*4882a593Smuzhiyun 	 * write sidestream bypassing write queue:
1762*4882a593Smuzhiyun 	 * at file system mount, log super block is updated for
1763*4882a593Smuzhiyun 	 * activation of the file system before any log record
1764*4882a593Smuzhiyun 	 * (MOUNT record) of the file system, and at file system
1765*4882a593Smuzhiyun 	 * unmount, all meta data for the file system has been
1766*4882a593Smuzhiyun 	 * flushed before log super block is updated for deactivation
1767*4882a593Smuzhiyun 	 * of the file system.
1768*4882a593Smuzhiyun 	 */
1769*4882a593Smuzhiyun 	lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC);
1770*4882a593Smuzhiyun 	rc = lbmIOWait(bpsuper, lbmFREE);
1771*4882a593Smuzhiyun 
1772*4882a593Smuzhiyun 	return rc;
1773*4882a593Smuzhiyun }
1774*4882a593Smuzhiyun 
1775*4882a593Smuzhiyun /*
1776*4882a593Smuzhiyun  *		log buffer manager (lbm)
1777*4882a593Smuzhiyun  *		------------------------
1778*4882a593Smuzhiyun  *
1779*4882a593Smuzhiyun  * special purpose buffer manager supporting log i/o requirements.
1780*4882a593Smuzhiyun  *
1781*4882a593Smuzhiyun  * per log write queue:
1782*4882a593Smuzhiyun  * log pageout occurs in serial order by fifo write queue and
1783*4882a593Smuzhiyun  * restricting to a single i/o in pregress at any one time.
1784*4882a593Smuzhiyun  * a circular singly-linked list
1785*4882a593Smuzhiyun  * (log->wrqueue points to the tail, and buffers are linked via
1786*4882a593Smuzhiyun  * bp->wrqueue field), and
1787*4882a593Smuzhiyun  * maintains log page in pageout ot waiting for pageout in serial pageout.
1788*4882a593Smuzhiyun  */
1789*4882a593Smuzhiyun 
1790*4882a593Smuzhiyun /*
1791*4882a593Smuzhiyun  *	lbmLogInit()
1792*4882a593Smuzhiyun  *
1793*4882a593Smuzhiyun  * initialize per log I/O setup at lmLogInit()
1794*4882a593Smuzhiyun  */
lbmLogInit(struct jfs_log * log)1795*4882a593Smuzhiyun static int lbmLogInit(struct jfs_log * log)
1796*4882a593Smuzhiyun {				/* log inode */
1797*4882a593Smuzhiyun 	int i;
1798*4882a593Smuzhiyun 	struct lbuf *lbuf;
1799*4882a593Smuzhiyun 
1800*4882a593Smuzhiyun 	jfs_info("lbmLogInit: log:0x%p", log);
1801*4882a593Smuzhiyun 
1802*4882a593Smuzhiyun 	/* initialize current buffer cursor */
1803*4882a593Smuzhiyun 	log->bp = NULL;
1804*4882a593Smuzhiyun 
1805*4882a593Smuzhiyun 	/* initialize log device write queue */
1806*4882a593Smuzhiyun 	log->wqueue = NULL;
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun 	/*
1809*4882a593Smuzhiyun 	 * Each log has its own buffer pages allocated to it.  These are
1810*4882a593Smuzhiyun 	 * not managed by the page cache.  This ensures that a transaction
1811*4882a593Smuzhiyun 	 * writing to the log does not block trying to allocate a page from
1812*4882a593Smuzhiyun 	 * the page cache (for the log).  This would be bad, since page
1813*4882a593Smuzhiyun 	 * allocation waits on the kswapd thread that may be committing inodes
1814*4882a593Smuzhiyun 	 * which would cause log activity.  Was that clear?  I'm trying to
1815*4882a593Smuzhiyun 	 * avoid deadlock here.
1816*4882a593Smuzhiyun 	 */
1817*4882a593Smuzhiyun 	init_waitqueue_head(&log->free_wait);
1818*4882a593Smuzhiyun 
1819*4882a593Smuzhiyun 	log->lbuf_free = NULL;
1820*4882a593Smuzhiyun 
1821*4882a593Smuzhiyun 	for (i = 0; i < LOGPAGES;) {
1822*4882a593Smuzhiyun 		char *buffer;
1823*4882a593Smuzhiyun 		uint offset;
1824*4882a593Smuzhiyun 		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
1825*4882a593Smuzhiyun 
1826*4882a593Smuzhiyun 		if (!page)
1827*4882a593Smuzhiyun 			goto error;
1828*4882a593Smuzhiyun 		buffer = page_address(page);
1829*4882a593Smuzhiyun 		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
1830*4882a593Smuzhiyun 			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
1831*4882a593Smuzhiyun 			if (lbuf == NULL) {
1832*4882a593Smuzhiyun 				if (offset == 0)
1833*4882a593Smuzhiyun 					__free_page(page);
1834*4882a593Smuzhiyun 				goto error;
1835*4882a593Smuzhiyun 			}
1836*4882a593Smuzhiyun 			if (offset) /* we already have one reference */
1837*4882a593Smuzhiyun 				get_page(page);
1838*4882a593Smuzhiyun 			lbuf->l_offset = offset;
1839*4882a593Smuzhiyun 			lbuf->l_ldata = buffer + offset;
1840*4882a593Smuzhiyun 			lbuf->l_page = page;
1841*4882a593Smuzhiyun 			lbuf->l_log = log;
1842*4882a593Smuzhiyun 			init_waitqueue_head(&lbuf->l_ioevent);
1843*4882a593Smuzhiyun 
1844*4882a593Smuzhiyun 			lbuf->l_freelist = log->lbuf_free;
1845*4882a593Smuzhiyun 			log->lbuf_free = lbuf;
1846*4882a593Smuzhiyun 			i++;
1847*4882a593Smuzhiyun 		}
1848*4882a593Smuzhiyun 	}
1849*4882a593Smuzhiyun 
1850*4882a593Smuzhiyun 	return (0);
1851*4882a593Smuzhiyun 
1852*4882a593Smuzhiyun       error:
1853*4882a593Smuzhiyun 	lbmLogShutdown(log);
1854*4882a593Smuzhiyun 	return -ENOMEM;
1855*4882a593Smuzhiyun }
1856*4882a593Smuzhiyun 
1857*4882a593Smuzhiyun 
1858*4882a593Smuzhiyun /*
1859*4882a593Smuzhiyun  *	lbmLogShutdown()
1860*4882a593Smuzhiyun  *
1861*4882a593Smuzhiyun  * finalize per log I/O setup at lmLogShutdown()
1862*4882a593Smuzhiyun  */
lbmLogShutdown(struct jfs_log * log)1863*4882a593Smuzhiyun static void lbmLogShutdown(struct jfs_log * log)
1864*4882a593Smuzhiyun {
1865*4882a593Smuzhiyun 	struct lbuf *lbuf;
1866*4882a593Smuzhiyun 
1867*4882a593Smuzhiyun 	jfs_info("lbmLogShutdown: log:0x%p", log);
1868*4882a593Smuzhiyun 
1869*4882a593Smuzhiyun 	lbuf = log->lbuf_free;
1870*4882a593Smuzhiyun 	while (lbuf) {
1871*4882a593Smuzhiyun 		struct lbuf *next = lbuf->l_freelist;
1872*4882a593Smuzhiyun 		__free_page(lbuf->l_page);
1873*4882a593Smuzhiyun 		kfree(lbuf);
1874*4882a593Smuzhiyun 		lbuf = next;
1875*4882a593Smuzhiyun 	}
1876*4882a593Smuzhiyun }
1877*4882a593Smuzhiyun 
1878*4882a593Smuzhiyun 
1879*4882a593Smuzhiyun /*
1880*4882a593Smuzhiyun  *	lbmAllocate()
1881*4882a593Smuzhiyun  *
1882*4882a593Smuzhiyun  * allocate an empty log buffer
1883*4882a593Smuzhiyun  */
lbmAllocate(struct jfs_log * log,int pn)1884*4882a593Smuzhiyun static struct lbuf *lbmAllocate(struct jfs_log * log, int pn)
1885*4882a593Smuzhiyun {
1886*4882a593Smuzhiyun 	struct lbuf *bp;
1887*4882a593Smuzhiyun 	unsigned long flags;
1888*4882a593Smuzhiyun 
1889*4882a593Smuzhiyun 	/*
1890*4882a593Smuzhiyun 	 * recycle from log buffer freelist if any
1891*4882a593Smuzhiyun 	 */
1892*4882a593Smuzhiyun 	LCACHE_LOCK(flags);
1893*4882a593Smuzhiyun 	LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags);
1894*4882a593Smuzhiyun 	log->lbuf_free = bp->l_freelist;
1895*4882a593Smuzhiyun 	LCACHE_UNLOCK(flags);
1896*4882a593Smuzhiyun 
1897*4882a593Smuzhiyun 	bp->l_flag = 0;
1898*4882a593Smuzhiyun 
1899*4882a593Smuzhiyun 	bp->l_wqnext = NULL;
1900*4882a593Smuzhiyun 	bp->l_freelist = NULL;
1901*4882a593Smuzhiyun 
1902*4882a593Smuzhiyun 	bp->l_pn = pn;
1903*4882a593Smuzhiyun 	bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize));
1904*4882a593Smuzhiyun 	bp->l_ceor = 0;
1905*4882a593Smuzhiyun 
1906*4882a593Smuzhiyun 	return bp;
1907*4882a593Smuzhiyun }
1908*4882a593Smuzhiyun 
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun /*
1911*4882a593Smuzhiyun  *	lbmFree()
1912*4882a593Smuzhiyun  *
1913*4882a593Smuzhiyun  * release a log buffer to freelist
1914*4882a593Smuzhiyun  */
lbmFree(struct lbuf * bp)1915*4882a593Smuzhiyun static void lbmFree(struct lbuf * bp)
1916*4882a593Smuzhiyun {
1917*4882a593Smuzhiyun 	unsigned long flags;
1918*4882a593Smuzhiyun 
1919*4882a593Smuzhiyun 	LCACHE_LOCK(flags);
1920*4882a593Smuzhiyun 
1921*4882a593Smuzhiyun 	lbmfree(bp);
1922*4882a593Smuzhiyun 
1923*4882a593Smuzhiyun 	LCACHE_UNLOCK(flags);
1924*4882a593Smuzhiyun }
1925*4882a593Smuzhiyun 
lbmfree(struct lbuf * bp)1926*4882a593Smuzhiyun static void lbmfree(struct lbuf * bp)
1927*4882a593Smuzhiyun {
1928*4882a593Smuzhiyun 	struct jfs_log *log = bp->l_log;
1929*4882a593Smuzhiyun 
1930*4882a593Smuzhiyun 	assert(bp->l_wqnext == NULL);
1931*4882a593Smuzhiyun 
1932*4882a593Smuzhiyun 	/*
1933*4882a593Smuzhiyun 	 * return the buffer to head of freelist
1934*4882a593Smuzhiyun 	 */
1935*4882a593Smuzhiyun 	bp->l_freelist = log->lbuf_free;
1936*4882a593Smuzhiyun 	log->lbuf_free = bp;
1937*4882a593Smuzhiyun 
1938*4882a593Smuzhiyun 	wake_up(&log->free_wait);
1939*4882a593Smuzhiyun 	return;
1940*4882a593Smuzhiyun }
1941*4882a593Smuzhiyun 
1942*4882a593Smuzhiyun 
1943*4882a593Smuzhiyun /*
1944*4882a593Smuzhiyun  * NAME:	lbmRedrive
1945*4882a593Smuzhiyun  *
1946*4882a593Smuzhiyun  * FUNCTION:	add a log buffer to the log redrive list
1947*4882a593Smuzhiyun  *
1948*4882a593Smuzhiyun  * PARAMETER:
1949*4882a593Smuzhiyun  *	bp	- log buffer
1950*4882a593Smuzhiyun  *
1951*4882a593Smuzhiyun  * NOTES:
1952*4882a593Smuzhiyun  *	Takes log_redrive_lock.
1953*4882a593Smuzhiyun  */
lbmRedrive(struct lbuf * bp)1954*4882a593Smuzhiyun static inline void lbmRedrive(struct lbuf *bp)
1955*4882a593Smuzhiyun {
1956*4882a593Smuzhiyun 	unsigned long flags;
1957*4882a593Smuzhiyun 
1958*4882a593Smuzhiyun 	spin_lock_irqsave(&log_redrive_lock, flags);
1959*4882a593Smuzhiyun 	bp->l_redrive_next = log_redrive_list;
1960*4882a593Smuzhiyun 	log_redrive_list = bp;
1961*4882a593Smuzhiyun 	spin_unlock_irqrestore(&log_redrive_lock, flags);
1962*4882a593Smuzhiyun 
1963*4882a593Smuzhiyun 	wake_up_process(jfsIOthread);
1964*4882a593Smuzhiyun }
1965*4882a593Smuzhiyun 
1966*4882a593Smuzhiyun 
1967*4882a593Smuzhiyun /*
1968*4882a593Smuzhiyun  *	lbmRead()
1969*4882a593Smuzhiyun  */
lbmRead(struct jfs_log * log,int pn,struct lbuf ** bpp)1970*4882a593Smuzhiyun static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
1971*4882a593Smuzhiyun {
1972*4882a593Smuzhiyun 	struct bio *bio;
1973*4882a593Smuzhiyun 	struct lbuf *bp;
1974*4882a593Smuzhiyun 
1975*4882a593Smuzhiyun 	/*
1976*4882a593Smuzhiyun 	 * allocate a log buffer
1977*4882a593Smuzhiyun 	 */
1978*4882a593Smuzhiyun 	*bpp = bp = lbmAllocate(log, pn);
1979*4882a593Smuzhiyun 	jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn);
1980*4882a593Smuzhiyun 
1981*4882a593Smuzhiyun 	bp->l_flag |= lbmREAD;
1982*4882a593Smuzhiyun 
1983*4882a593Smuzhiyun 	bio = bio_alloc(GFP_NOFS, 1);
1984*4882a593Smuzhiyun 
1985*4882a593Smuzhiyun 	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
1986*4882a593Smuzhiyun 	bio_set_dev(bio, log->bdev);
1987*4882a593Smuzhiyun 
1988*4882a593Smuzhiyun 	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
1989*4882a593Smuzhiyun 	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
1990*4882a593Smuzhiyun 
1991*4882a593Smuzhiyun 	bio->bi_end_io = lbmIODone;
1992*4882a593Smuzhiyun 	bio->bi_private = bp;
1993*4882a593Smuzhiyun 	bio->bi_opf = REQ_OP_READ;
1994*4882a593Smuzhiyun 	/*check if journaling to disk has been disabled*/
1995*4882a593Smuzhiyun 	if (log->no_integrity) {
1996*4882a593Smuzhiyun 		bio->bi_iter.bi_size = 0;
1997*4882a593Smuzhiyun 		lbmIODone(bio);
1998*4882a593Smuzhiyun 	} else {
1999*4882a593Smuzhiyun 		submit_bio(bio);
2000*4882a593Smuzhiyun 	}
2001*4882a593Smuzhiyun 
2002*4882a593Smuzhiyun 	wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD));
2003*4882a593Smuzhiyun 
2004*4882a593Smuzhiyun 	return 0;
2005*4882a593Smuzhiyun }
2006*4882a593Smuzhiyun 
2007*4882a593Smuzhiyun 
2008*4882a593Smuzhiyun /*
2009*4882a593Smuzhiyun  *	lbmWrite()
2010*4882a593Smuzhiyun  *
2011*4882a593Smuzhiyun  * buffer at head of pageout queue stays after completion of
2012*4882a593Smuzhiyun  * partial-page pageout and redriven by explicit initiation of
2013*4882a593Smuzhiyun  * pageout by caller until full-page pageout is completed and
2014*4882a593Smuzhiyun  * released.
2015*4882a593Smuzhiyun  *
2016*4882a593Smuzhiyun  * device driver i/o done redrives pageout of new buffer at
2017*4882a593Smuzhiyun  * head of pageout queue when current buffer at head of pageout
2018*4882a593Smuzhiyun  * queue is released at the completion of its full-page pageout.
2019*4882a593Smuzhiyun  *
2020*4882a593Smuzhiyun  * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit().
2021*4882a593Smuzhiyun  * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone()
2022*4882a593Smuzhiyun  */
lbmWrite(struct jfs_log * log,struct lbuf * bp,int flag,int cant_block)2023*4882a593Smuzhiyun static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag,
2024*4882a593Smuzhiyun 		     int cant_block)
2025*4882a593Smuzhiyun {
2026*4882a593Smuzhiyun 	struct lbuf *tail;
2027*4882a593Smuzhiyun 	unsigned long flags;
2028*4882a593Smuzhiyun 
2029*4882a593Smuzhiyun 	jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn);
2030*4882a593Smuzhiyun 
2031*4882a593Smuzhiyun 	/* map the logical block address to physical block address */
2032*4882a593Smuzhiyun 	bp->l_blkno =
2033*4882a593Smuzhiyun 	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2034*4882a593Smuzhiyun 
2035*4882a593Smuzhiyun 	LCACHE_LOCK(flags);		/* disable+lock */
2036*4882a593Smuzhiyun 
2037*4882a593Smuzhiyun 	/*
2038*4882a593Smuzhiyun 	 * initialize buffer for device driver
2039*4882a593Smuzhiyun 	 */
2040*4882a593Smuzhiyun 	bp->l_flag = flag;
2041*4882a593Smuzhiyun 
2042*4882a593Smuzhiyun 	/*
2043*4882a593Smuzhiyun 	 *	insert bp at tail of write queue associated with log
2044*4882a593Smuzhiyun 	 *
2045*4882a593Smuzhiyun 	 * (request is either for bp already/currently at head of queue
2046*4882a593Smuzhiyun 	 * or new bp to be inserted at tail)
2047*4882a593Smuzhiyun 	 */
2048*4882a593Smuzhiyun 	tail = log->wqueue;
2049*4882a593Smuzhiyun 
2050*4882a593Smuzhiyun 	/* is buffer not already on write queue ? */
2051*4882a593Smuzhiyun 	if (bp->l_wqnext == NULL) {
2052*4882a593Smuzhiyun 		/* insert at tail of wqueue */
2053*4882a593Smuzhiyun 		if (tail == NULL) {
2054*4882a593Smuzhiyun 			log->wqueue = bp;
2055*4882a593Smuzhiyun 			bp->l_wqnext = bp;
2056*4882a593Smuzhiyun 		} else {
2057*4882a593Smuzhiyun 			log->wqueue = bp;
2058*4882a593Smuzhiyun 			bp->l_wqnext = tail->l_wqnext;
2059*4882a593Smuzhiyun 			tail->l_wqnext = bp;
2060*4882a593Smuzhiyun 		}
2061*4882a593Smuzhiyun 
2062*4882a593Smuzhiyun 		tail = bp;
2063*4882a593Smuzhiyun 	}
2064*4882a593Smuzhiyun 
2065*4882a593Smuzhiyun 	/* is buffer at head of wqueue and for write ? */
2066*4882a593Smuzhiyun 	if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) {
2067*4882a593Smuzhiyun 		LCACHE_UNLOCK(flags);	/* unlock+enable */
2068*4882a593Smuzhiyun 		return;
2069*4882a593Smuzhiyun 	}
2070*4882a593Smuzhiyun 
2071*4882a593Smuzhiyun 	LCACHE_UNLOCK(flags);	/* unlock+enable */
2072*4882a593Smuzhiyun 
2073*4882a593Smuzhiyun 	if (cant_block)
2074*4882a593Smuzhiyun 		lbmRedrive(bp);
2075*4882a593Smuzhiyun 	else if (flag & lbmSYNC)
2076*4882a593Smuzhiyun 		lbmStartIO(bp);
2077*4882a593Smuzhiyun 	else {
2078*4882a593Smuzhiyun 		LOGGC_UNLOCK(log);
2079*4882a593Smuzhiyun 		lbmStartIO(bp);
2080*4882a593Smuzhiyun 		LOGGC_LOCK(log);
2081*4882a593Smuzhiyun 	}
2082*4882a593Smuzhiyun }
2083*4882a593Smuzhiyun 
2084*4882a593Smuzhiyun 
2085*4882a593Smuzhiyun /*
2086*4882a593Smuzhiyun  *	lbmDirectWrite()
2087*4882a593Smuzhiyun  *
2088*4882a593Smuzhiyun  * initiate pageout bypassing write queue for sidestream
2089*4882a593Smuzhiyun  * (e.g., log superblock) write;
2090*4882a593Smuzhiyun  */
lbmDirectWrite(struct jfs_log * log,struct lbuf * bp,int flag)2091*4882a593Smuzhiyun static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag)
2092*4882a593Smuzhiyun {
2093*4882a593Smuzhiyun 	jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x",
2094*4882a593Smuzhiyun 		 bp, flag, bp->l_pn);
2095*4882a593Smuzhiyun 
2096*4882a593Smuzhiyun 	/*
2097*4882a593Smuzhiyun 	 * initialize buffer for device driver
2098*4882a593Smuzhiyun 	 */
2099*4882a593Smuzhiyun 	bp->l_flag = flag | lbmDIRECT;
2100*4882a593Smuzhiyun 
2101*4882a593Smuzhiyun 	/* map the logical block address to physical block address */
2102*4882a593Smuzhiyun 	bp->l_blkno =
2103*4882a593Smuzhiyun 	    log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize));
2104*4882a593Smuzhiyun 
2105*4882a593Smuzhiyun 	/*
2106*4882a593Smuzhiyun 	 *	initiate pageout of the page
2107*4882a593Smuzhiyun 	 */
2108*4882a593Smuzhiyun 	lbmStartIO(bp);
2109*4882a593Smuzhiyun }
2110*4882a593Smuzhiyun 
2111*4882a593Smuzhiyun 
2112*4882a593Smuzhiyun /*
2113*4882a593Smuzhiyun  * NAME:	lbmStartIO()
2114*4882a593Smuzhiyun  *
2115*4882a593Smuzhiyun  * FUNCTION:	Interface to DD strategy routine
2116*4882a593Smuzhiyun  *
2117*4882a593Smuzhiyun  * RETURN:	none
2118*4882a593Smuzhiyun  *
2119*4882a593Smuzhiyun  * serialization: LCACHE_LOCK() is NOT held during log i/o;
2120*4882a593Smuzhiyun  */
lbmStartIO(struct lbuf * bp)2121*4882a593Smuzhiyun static void lbmStartIO(struct lbuf * bp)
2122*4882a593Smuzhiyun {
2123*4882a593Smuzhiyun 	struct bio *bio;
2124*4882a593Smuzhiyun 	struct jfs_log *log = bp->l_log;
2125*4882a593Smuzhiyun 
2126*4882a593Smuzhiyun 	jfs_info("lbmStartIO");
2127*4882a593Smuzhiyun 
2128*4882a593Smuzhiyun 	bio = bio_alloc(GFP_NOFS, 1);
2129*4882a593Smuzhiyun 	bio->bi_iter.bi_sector = bp->l_blkno << (log->l2bsize - 9);
2130*4882a593Smuzhiyun 	bio_set_dev(bio, log->bdev);
2131*4882a593Smuzhiyun 
2132*4882a593Smuzhiyun 	bio_add_page(bio, bp->l_page, LOGPSIZE, bp->l_offset);
2133*4882a593Smuzhiyun 	BUG_ON(bio->bi_iter.bi_size != LOGPSIZE);
2134*4882a593Smuzhiyun 
2135*4882a593Smuzhiyun 	bio->bi_end_io = lbmIODone;
2136*4882a593Smuzhiyun 	bio->bi_private = bp;
2137*4882a593Smuzhiyun 	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
2138*4882a593Smuzhiyun 
2139*4882a593Smuzhiyun 	/* check if journaling to disk has been disabled */
2140*4882a593Smuzhiyun 	if (log->no_integrity) {
2141*4882a593Smuzhiyun 		bio->bi_iter.bi_size = 0;
2142*4882a593Smuzhiyun 		lbmIODone(bio);
2143*4882a593Smuzhiyun 	} else {
2144*4882a593Smuzhiyun 		submit_bio(bio);
2145*4882a593Smuzhiyun 		INCREMENT(lmStat.submitted);
2146*4882a593Smuzhiyun 	}
2147*4882a593Smuzhiyun }
2148*4882a593Smuzhiyun 
2149*4882a593Smuzhiyun 
2150*4882a593Smuzhiyun /*
2151*4882a593Smuzhiyun  *	lbmIOWait()
2152*4882a593Smuzhiyun  */
lbmIOWait(struct lbuf * bp,int flag)2153*4882a593Smuzhiyun static int lbmIOWait(struct lbuf * bp, int flag)
2154*4882a593Smuzhiyun {
2155*4882a593Smuzhiyun 	unsigned long flags;
2156*4882a593Smuzhiyun 	int rc = 0;
2157*4882a593Smuzhiyun 
2158*4882a593Smuzhiyun 	jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2159*4882a593Smuzhiyun 
2160*4882a593Smuzhiyun 	LCACHE_LOCK(flags);		/* disable+lock */
2161*4882a593Smuzhiyun 
2162*4882a593Smuzhiyun 	LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags);
2163*4882a593Smuzhiyun 
2164*4882a593Smuzhiyun 	rc = (bp->l_flag & lbmERROR) ? -EIO : 0;
2165*4882a593Smuzhiyun 
2166*4882a593Smuzhiyun 	if (flag & lbmFREE)
2167*4882a593Smuzhiyun 		lbmfree(bp);
2168*4882a593Smuzhiyun 
2169*4882a593Smuzhiyun 	LCACHE_UNLOCK(flags);	/* unlock+enable */
2170*4882a593Smuzhiyun 
2171*4882a593Smuzhiyun 	jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag);
2172*4882a593Smuzhiyun 	return rc;
2173*4882a593Smuzhiyun }
2174*4882a593Smuzhiyun 
2175*4882a593Smuzhiyun /*
2176*4882a593Smuzhiyun  *	lbmIODone()
2177*4882a593Smuzhiyun  *
2178*4882a593Smuzhiyun  * executed at INTIODONE level
2179*4882a593Smuzhiyun  */
lbmIODone(struct bio * bio)2180*4882a593Smuzhiyun static void lbmIODone(struct bio *bio)
2181*4882a593Smuzhiyun {
2182*4882a593Smuzhiyun 	struct lbuf *bp = bio->bi_private;
2183*4882a593Smuzhiyun 	struct lbuf *nextbp, *tail;
2184*4882a593Smuzhiyun 	struct jfs_log *log;
2185*4882a593Smuzhiyun 	unsigned long flags;
2186*4882a593Smuzhiyun 
2187*4882a593Smuzhiyun 	/*
2188*4882a593Smuzhiyun 	 * get back jfs buffer bound to the i/o buffer
2189*4882a593Smuzhiyun 	 */
2190*4882a593Smuzhiyun 	jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag);
2191*4882a593Smuzhiyun 
2192*4882a593Smuzhiyun 	LCACHE_LOCK(flags);		/* disable+lock */
2193*4882a593Smuzhiyun 
2194*4882a593Smuzhiyun 	bp->l_flag |= lbmDONE;
2195*4882a593Smuzhiyun 
2196*4882a593Smuzhiyun 	if (bio->bi_status) {
2197*4882a593Smuzhiyun 		bp->l_flag |= lbmERROR;
2198*4882a593Smuzhiyun 
2199*4882a593Smuzhiyun 		jfs_err("lbmIODone: I/O error in JFS log");
2200*4882a593Smuzhiyun 	}
2201*4882a593Smuzhiyun 
2202*4882a593Smuzhiyun 	bio_put(bio);
2203*4882a593Smuzhiyun 
2204*4882a593Smuzhiyun 	/*
2205*4882a593Smuzhiyun 	 *	pagein completion
2206*4882a593Smuzhiyun 	 */
2207*4882a593Smuzhiyun 	if (bp->l_flag & lbmREAD) {
2208*4882a593Smuzhiyun 		bp->l_flag &= ~lbmREAD;
2209*4882a593Smuzhiyun 
2210*4882a593Smuzhiyun 		LCACHE_UNLOCK(flags);	/* unlock+enable */
2211*4882a593Smuzhiyun 
2212*4882a593Smuzhiyun 		/* wakeup I/O initiator */
2213*4882a593Smuzhiyun 		LCACHE_WAKEUP(&bp->l_ioevent);
2214*4882a593Smuzhiyun 
2215*4882a593Smuzhiyun 		return;
2216*4882a593Smuzhiyun 	}
2217*4882a593Smuzhiyun 
2218*4882a593Smuzhiyun 	/*
2219*4882a593Smuzhiyun 	 *	pageout completion
2220*4882a593Smuzhiyun 	 *
2221*4882a593Smuzhiyun 	 * the bp at the head of write queue has completed pageout.
2222*4882a593Smuzhiyun 	 *
2223*4882a593Smuzhiyun 	 * if single-commit/full-page pageout, remove the current buffer
2224*4882a593Smuzhiyun 	 * from head of pageout queue, and redrive pageout with
2225*4882a593Smuzhiyun 	 * the new buffer at head of pageout queue;
2226*4882a593Smuzhiyun 	 * otherwise, the partial-page pageout buffer stays at
2227*4882a593Smuzhiyun 	 * the head of pageout queue to be redriven for pageout
2228*4882a593Smuzhiyun 	 * by lmGroupCommit() until full-page pageout is completed.
2229*4882a593Smuzhiyun 	 */
2230*4882a593Smuzhiyun 	bp->l_flag &= ~lbmWRITE;
2231*4882a593Smuzhiyun 	INCREMENT(lmStat.pagedone);
2232*4882a593Smuzhiyun 
2233*4882a593Smuzhiyun 	/* update committed lsn */
2234*4882a593Smuzhiyun 	log = bp->l_log;
2235*4882a593Smuzhiyun 	log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor;
2236*4882a593Smuzhiyun 
2237*4882a593Smuzhiyun 	if (bp->l_flag & lbmDIRECT) {
2238*4882a593Smuzhiyun 		LCACHE_WAKEUP(&bp->l_ioevent);
2239*4882a593Smuzhiyun 		LCACHE_UNLOCK(flags);
2240*4882a593Smuzhiyun 		return;
2241*4882a593Smuzhiyun 	}
2242*4882a593Smuzhiyun 
2243*4882a593Smuzhiyun 	tail = log->wqueue;
2244*4882a593Smuzhiyun 
2245*4882a593Smuzhiyun 	/* single element queue */
2246*4882a593Smuzhiyun 	if (bp == tail) {
2247*4882a593Smuzhiyun 		/* remove head buffer of full-page pageout
2248*4882a593Smuzhiyun 		 * from log device write queue
2249*4882a593Smuzhiyun 		 */
2250*4882a593Smuzhiyun 		if (bp->l_flag & lbmRELEASE) {
2251*4882a593Smuzhiyun 			log->wqueue = NULL;
2252*4882a593Smuzhiyun 			bp->l_wqnext = NULL;
2253*4882a593Smuzhiyun 		}
2254*4882a593Smuzhiyun 	}
2255*4882a593Smuzhiyun 	/* multi element queue */
2256*4882a593Smuzhiyun 	else {
2257*4882a593Smuzhiyun 		/* remove head buffer of full-page pageout
2258*4882a593Smuzhiyun 		 * from log device write queue
2259*4882a593Smuzhiyun 		 */
2260*4882a593Smuzhiyun 		if (bp->l_flag & lbmRELEASE) {
2261*4882a593Smuzhiyun 			nextbp = tail->l_wqnext = bp->l_wqnext;
2262*4882a593Smuzhiyun 			bp->l_wqnext = NULL;
2263*4882a593Smuzhiyun 
2264*4882a593Smuzhiyun 			/*
2265*4882a593Smuzhiyun 			 * redrive pageout of next page at head of write queue:
2266*4882a593Smuzhiyun 			 * redrive next page without any bound tblk
2267*4882a593Smuzhiyun 			 * (i.e., page w/o any COMMIT records), or
2268*4882a593Smuzhiyun 			 * first page of new group commit which has been
2269*4882a593Smuzhiyun 			 * queued after current page (subsequent pageout
2270*4882a593Smuzhiyun 			 * is performed synchronously, except page without
2271*4882a593Smuzhiyun 			 * any COMMITs) by lmGroupCommit() as indicated
2272*4882a593Smuzhiyun 			 * by lbmWRITE flag;
2273*4882a593Smuzhiyun 			 */
2274*4882a593Smuzhiyun 			if (nextbp->l_flag & lbmWRITE) {
2275*4882a593Smuzhiyun 				/*
2276*4882a593Smuzhiyun 				 * We can't do the I/O at interrupt time.
2277*4882a593Smuzhiyun 				 * The jfsIO thread can do it
2278*4882a593Smuzhiyun 				 */
2279*4882a593Smuzhiyun 				lbmRedrive(nextbp);
2280*4882a593Smuzhiyun 			}
2281*4882a593Smuzhiyun 		}
2282*4882a593Smuzhiyun 	}
2283*4882a593Smuzhiyun 
2284*4882a593Smuzhiyun 	/*
2285*4882a593Smuzhiyun 	 *	synchronous pageout:
2286*4882a593Smuzhiyun 	 *
2287*4882a593Smuzhiyun 	 * buffer has not necessarily been removed from write queue
2288*4882a593Smuzhiyun 	 * (e.g., synchronous write of partial-page with COMMIT):
2289*4882a593Smuzhiyun 	 * leave buffer for i/o initiator to dispose
2290*4882a593Smuzhiyun 	 */
2291*4882a593Smuzhiyun 	if (bp->l_flag & lbmSYNC) {
2292*4882a593Smuzhiyun 		LCACHE_UNLOCK(flags);	/* unlock+enable */
2293*4882a593Smuzhiyun 
2294*4882a593Smuzhiyun 		/* wakeup I/O initiator */
2295*4882a593Smuzhiyun 		LCACHE_WAKEUP(&bp->l_ioevent);
2296*4882a593Smuzhiyun 	}
2297*4882a593Smuzhiyun 
2298*4882a593Smuzhiyun 	/*
2299*4882a593Smuzhiyun 	 *	Group Commit pageout:
2300*4882a593Smuzhiyun 	 */
2301*4882a593Smuzhiyun 	else if (bp->l_flag & lbmGC) {
2302*4882a593Smuzhiyun 		LCACHE_UNLOCK(flags);
2303*4882a593Smuzhiyun 		lmPostGC(bp);
2304*4882a593Smuzhiyun 	}
2305*4882a593Smuzhiyun 
2306*4882a593Smuzhiyun 	/*
2307*4882a593Smuzhiyun 	 *	asynchronous pageout:
2308*4882a593Smuzhiyun 	 *
2309*4882a593Smuzhiyun 	 * buffer must have been removed from write queue:
2310*4882a593Smuzhiyun 	 * insert buffer at head of freelist where it can be recycled
2311*4882a593Smuzhiyun 	 */
2312*4882a593Smuzhiyun 	else {
2313*4882a593Smuzhiyun 		assert(bp->l_flag & lbmRELEASE);
2314*4882a593Smuzhiyun 		assert(bp->l_flag & lbmFREE);
2315*4882a593Smuzhiyun 		lbmfree(bp);
2316*4882a593Smuzhiyun 
2317*4882a593Smuzhiyun 		LCACHE_UNLOCK(flags);	/* unlock+enable */
2318*4882a593Smuzhiyun 	}
2319*4882a593Smuzhiyun }
2320*4882a593Smuzhiyun 
jfsIOWait(void * arg)2321*4882a593Smuzhiyun int jfsIOWait(void *arg)
2322*4882a593Smuzhiyun {
2323*4882a593Smuzhiyun 	struct lbuf *bp;
2324*4882a593Smuzhiyun 
2325*4882a593Smuzhiyun 	do {
2326*4882a593Smuzhiyun 		spin_lock_irq(&log_redrive_lock);
2327*4882a593Smuzhiyun 		while ((bp = log_redrive_list)) {
2328*4882a593Smuzhiyun 			log_redrive_list = bp->l_redrive_next;
2329*4882a593Smuzhiyun 			bp->l_redrive_next = NULL;
2330*4882a593Smuzhiyun 			spin_unlock_irq(&log_redrive_lock);
2331*4882a593Smuzhiyun 			lbmStartIO(bp);
2332*4882a593Smuzhiyun 			spin_lock_irq(&log_redrive_lock);
2333*4882a593Smuzhiyun 		}
2334*4882a593Smuzhiyun 
2335*4882a593Smuzhiyun 		if (freezing(current)) {
2336*4882a593Smuzhiyun 			spin_unlock_irq(&log_redrive_lock);
2337*4882a593Smuzhiyun 			try_to_freeze();
2338*4882a593Smuzhiyun 		} else {
2339*4882a593Smuzhiyun 			set_current_state(TASK_INTERRUPTIBLE);
2340*4882a593Smuzhiyun 			spin_unlock_irq(&log_redrive_lock);
2341*4882a593Smuzhiyun 			schedule();
2342*4882a593Smuzhiyun 		}
2343*4882a593Smuzhiyun 	} while (!kthread_should_stop());
2344*4882a593Smuzhiyun 
2345*4882a593Smuzhiyun 	jfs_info("jfsIOWait being killed!");
2346*4882a593Smuzhiyun 	return 0;
2347*4882a593Smuzhiyun }
2348*4882a593Smuzhiyun 
2349*4882a593Smuzhiyun /*
2350*4882a593Smuzhiyun  * NAME:	lmLogFormat()/jfs_logform()
2351*4882a593Smuzhiyun  *
2352*4882a593Smuzhiyun  * FUNCTION:	format file system log
2353*4882a593Smuzhiyun  *
2354*4882a593Smuzhiyun  * PARAMETERS:
2355*4882a593Smuzhiyun  *	log	- volume log
2356*4882a593Smuzhiyun  *	logAddress - start address of log space in FS block
2357*4882a593Smuzhiyun  *	logSize	- length of log space in FS block;
2358*4882a593Smuzhiyun  *
2359*4882a593Smuzhiyun  * RETURN:	0	- success
2360*4882a593Smuzhiyun  *		-EIO	- i/o error
2361*4882a593Smuzhiyun  *
2362*4882a593Smuzhiyun  * XXX: We're synchronously writing one page at a time.  This needs to
2363*4882a593Smuzhiyun  *	be improved by writing multiple pages at once.
2364*4882a593Smuzhiyun  */
lmLogFormat(struct jfs_log * log,s64 logAddress,int logSize)2365*4882a593Smuzhiyun int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize)
2366*4882a593Smuzhiyun {
2367*4882a593Smuzhiyun 	int rc = -EIO;
2368*4882a593Smuzhiyun 	struct jfs_sb_info *sbi;
2369*4882a593Smuzhiyun 	struct logsuper *logsuper;
2370*4882a593Smuzhiyun 	struct logpage *lp;
2371*4882a593Smuzhiyun 	int lspn;		/* log sequence page number */
2372*4882a593Smuzhiyun 	struct lrd *lrd_ptr;
2373*4882a593Smuzhiyun 	int npages = 0;
2374*4882a593Smuzhiyun 	struct lbuf *bp;
2375*4882a593Smuzhiyun 
2376*4882a593Smuzhiyun 	jfs_info("lmLogFormat: logAddress:%Ld logSize:%d",
2377*4882a593Smuzhiyun 		 (long long)logAddress, logSize);
2378*4882a593Smuzhiyun 
2379*4882a593Smuzhiyun 	sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list);
2380*4882a593Smuzhiyun 
2381*4882a593Smuzhiyun 	/* allocate a log buffer */
2382*4882a593Smuzhiyun 	bp = lbmAllocate(log, 1);
2383*4882a593Smuzhiyun 
2384*4882a593Smuzhiyun 	npages = logSize >> sbi->l2nbperpage;
2385*4882a593Smuzhiyun 
2386*4882a593Smuzhiyun 	/*
2387*4882a593Smuzhiyun 	 *	log space:
2388*4882a593Smuzhiyun 	 *
2389*4882a593Smuzhiyun 	 * page 0 - reserved;
2390*4882a593Smuzhiyun 	 * page 1 - log superblock;
2391*4882a593Smuzhiyun 	 * page 2 - log data page: A SYNC log record is written
2392*4882a593Smuzhiyun 	 *	    into this page at logform time;
2393*4882a593Smuzhiyun 	 * pages 3-N - log data page: set to empty log data pages;
2394*4882a593Smuzhiyun 	 */
2395*4882a593Smuzhiyun 	/*
2396*4882a593Smuzhiyun 	 *	init log superblock: log page 1
2397*4882a593Smuzhiyun 	 */
2398*4882a593Smuzhiyun 	logsuper = (struct logsuper *) bp->l_ldata;
2399*4882a593Smuzhiyun 
2400*4882a593Smuzhiyun 	logsuper->magic = cpu_to_le32(LOGMAGIC);
2401*4882a593Smuzhiyun 	logsuper->version = cpu_to_le32(LOGVERSION);
2402*4882a593Smuzhiyun 	logsuper->state = cpu_to_le32(LOGREDONE);
2403*4882a593Smuzhiyun 	logsuper->flag = cpu_to_le32(sbi->mntflag);	/* ? */
2404*4882a593Smuzhiyun 	logsuper->size = cpu_to_le32(npages);
2405*4882a593Smuzhiyun 	logsuper->bsize = cpu_to_le32(sbi->bsize);
2406*4882a593Smuzhiyun 	logsuper->l2bsize = cpu_to_le32(sbi->l2bsize);
2407*4882a593Smuzhiyun 	logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE);
2408*4882a593Smuzhiyun 
2409*4882a593Smuzhiyun 	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2410*4882a593Smuzhiyun 	bp->l_blkno = logAddress + sbi->nbperpage;
2411*4882a593Smuzhiyun 	lbmStartIO(bp);
2412*4882a593Smuzhiyun 	if ((rc = lbmIOWait(bp, 0)))
2413*4882a593Smuzhiyun 		goto exit;
2414*4882a593Smuzhiyun 
2415*4882a593Smuzhiyun 	/*
2416*4882a593Smuzhiyun 	 *	init pages 2 to npages-1 as log data pages:
2417*4882a593Smuzhiyun 	 *
2418*4882a593Smuzhiyun 	 * log page sequence number (lpsn) initialization:
2419*4882a593Smuzhiyun 	 *
2420*4882a593Smuzhiyun 	 * pn:   0     1     2     3                 n-1
2421*4882a593Smuzhiyun 	 *       +-----+-----+=====+=====+===.....===+=====+
2422*4882a593Smuzhiyun 	 * lspn:             N-1   0     1           N-2
2423*4882a593Smuzhiyun 	 *                   <--- N page circular file ---->
2424*4882a593Smuzhiyun 	 *
2425*4882a593Smuzhiyun 	 * the N (= npages-2) data pages of the log is maintained as
2426*4882a593Smuzhiyun 	 * a circular file for the log records;
2427*4882a593Smuzhiyun 	 * lpsn grows by 1 monotonically as each log page is written
2428*4882a593Smuzhiyun 	 * to the circular file of the log;
2429*4882a593Smuzhiyun 	 * and setLogpage() will not reset the page number even if
2430*4882a593Smuzhiyun 	 * the eor is equal to LOGPHDRSIZE. In order for binary search
2431*4882a593Smuzhiyun 	 * still work in find log end process, we have to simulate the
2432*4882a593Smuzhiyun 	 * log wrap situation at the log format time.
2433*4882a593Smuzhiyun 	 * The 1st log page written will have the highest lpsn. Then
2434*4882a593Smuzhiyun 	 * the succeeding log pages will have ascending order of
2435*4882a593Smuzhiyun 	 * the lspn starting from 0, ... (N-2)
2436*4882a593Smuzhiyun 	 */
2437*4882a593Smuzhiyun 	lp = (struct logpage *) bp->l_ldata;
2438*4882a593Smuzhiyun 	/*
2439*4882a593Smuzhiyun 	 * initialize 1st log page to be written: lpsn = N - 1,
2440*4882a593Smuzhiyun 	 * write a SYNCPT log record is written to this page
2441*4882a593Smuzhiyun 	 */
2442*4882a593Smuzhiyun 	lp->h.page = lp->t.page = cpu_to_le32(npages - 3);
2443*4882a593Smuzhiyun 	lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE);
2444*4882a593Smuzhiyun 
2445*4882a593Smuzhiyun 	lrd_ptr = (struct lrd *) &lp->data;
2446*4882a593Smuzhiyun 	lrd_ptr->logtid = 0;
2447*4882a593Smuzhiyun 	lrd_ptr->backchain = 0;
2448*4882a593Smuzhiyun 	lrd_ptr->type = cpu_to_le16(LOG_SYNCPT);
2449*4882a593Smuzhiyun 	lrd_ptr->length = 0;
2450*4882a593Smuzhiyun 	lrd_ptr->log.syncpt.sync = 0;
2451*4882a593Smuzhiyun 
2452*4882a593Smuzhiyun 	bp->l_blkno += sbi->nbperpage;
2453*4882a593Smuzhiyun 	bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2454*4882a593Smuzhiyun 	lbmStartIO(bp);
2455*4882a593Smuzhiyun 	if ((rc = lbmIOWait(bp, 0)))
2456*4882a593Smuzhiyun 		goto exit;
2457*4882a593Smuzhiyun 
2458*4882a593Smuzhiyun 	/*
2459*4882a593Smuzhiyun 	 *	initialize succeeding log pages: lpsn = 0, 1, ..., (N-2)
2460*4882a593Smuzhiyun 	 */
2461*4882a593Smuzhiyun 	for (lspn = 0; lspn < npages - 3; lspn++) {
2462*4882a593Smuzhiyun 		lp->h.page = lp->t.page = cpu_to_le32(lspn);
2463*4882a593Smuzhiyun 		lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE);
2464*4882a593Smuzhiyun 
2465*4882a593Smuzhiyun 		bp->l_blkno += sbi->nbperpage;
2466*4882a593Smuzhiyun 		bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT;
2467*4882a593Smuzhiyun 		lbmStartIO(bp);
2468*4882a593Smuzhiyun 		if ((rc = lbmIOWait(bp, 0)))
2469*4882a593Smuzhiyun 			goto exit;
2470*4882a593Smuzhiyun 	}
2471*4882a593Smuzhiyun 
2472*4882a593Smuzhiyun 	rc = 0;
2473*4882a593Smuzhiyun exit:
2474*4882a593Smuzhiyun 	/*
2475*4882a593Smuzhiyun 	 *	finalize log
2476*4882a593Smuzhiyun 	 */
2477*4882a593Smuzhiyun 	/* release the buffer */
2478*4882a593Smuzhiyun 	lbmFree(bp);
2479*4882a593Smuzhiyun 
2480*4882a593Smuzhiyun 	return rc;
2481*4882a593Smuzhiyun }
2482*4882a593Smuzhiyun 
2483*4882a593Smuzhiyun #ifdef CONFIG_JFS_STATISTICS
jfs_lmstats_proc_show(struct seq_file * m,void * v)2484*4882a593Smuzhiyun int jfs_lmstats_proc_show(struct seq_file *m, void *v)
2485*4882a593Smuzhiyun {
2486*4882a593Smuzhiyun 	seq_printf(m,
2487*4882a593Smuzhiyun 		       "JFS Logmgr stats\n"
2488*4882a593Smuzhiyun 		       "================\n"
2489*4882a593Smuzhiyun 		       "commits = %d\n"
2490*4882a593Smuzhiyun 		       "writes submitted = %d\n"
2491*4882a593Smuzhiyun 		       "writes completed = %d\n"
2492*4882a593Smuzhiyun 		       "full pages submitted = %d\n"
2493*4882a593Smuzhiyun 		       "partial pages submitted = %d\n",
2494*4882a593Smuzhiyun 		       lmStat.commit,
2495*4882a593Smuzhiyun 		       lmStat.submitted,
2496*4882a593Smuzhiyun 		       lmStat.pagedone,
2497*4882a593Smuzhiyun 		       lmStat.full_page,
2498*4882a593Smuzhiyun 		       lmStat.partial_page);
2499*4882a593Smuzhiyun 	return 0;
2500*4882a593Smuzhiyun }
2501*4882a593Smuzhiyun #endif /* CONFIG_JFS_STATISTICS */
2502