fs/xfs/xfs_pwork.c

*4882a593Smuzhiyun// SPDX-License-Identifier: GPL-2.0-or-later
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Copyright (C) 2019 Oracle.  All Rights Reserved.
*4882a593Smuzhiyun * Author: Darrick J. Wong <darrick.wong@oracle.com>
*4882a593Smuzhiyun */
*4882a593Smuzhiyun#include "xfs.h"
*4882a593Smuzhiyun#include "xfs_fs.h"
*4882a593Smuzhiyun#include "xfs_shared.h"
*4882a593Smuzhiyun#include "xfs_format.h"
*4882a593Smuzhiyun#include "xfs_log_format.h"
*4882a593Smuzhiyun#include "xfs_trans_resv.h"
*4882a593Smuzhiyun#include "xfs_mount.h"
*4882a593Smuzhiyun#include "xfs_trace.h"
*4882a593Smuzhiyun#include "xfs_sysctl.h"
*4882a593Smuzhiyun#include "xfs_pwork.h"
*4882a593Smuzhiyun#include <linux/nmi.h>
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Parallel Work Queue
*4882a593Smuzhiyun * ===================
*4882a593Smuzhiyun *
*4882a593Smuzhiyun * Abstract away the details of running a large and "obviously" parallelizable
*4882a593Smuzhiyun * task across multiple CPUs.  Callers initialize the pwork control object with
*4882a593Smuzhiyun * a desired level of parallelization and a work function.  Next, they embed
*4882a593Smuzhiyun * struct xfs_pwork in whatever structure they use to pass work context to a
*4882a593Smuzhiyun * worker thread and queue that pwork.  The work function will be passed the
*4882a593Smuzhiyun * pwork item when it is run (from process context) and any returned error will
*4882a593Smuzhiyun * be recorded in xfs_pwork_ctl.error.  Work functions should check for errors
*4882a593Smuzhiyun * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
*4882a593Smuzhiyun * stop workqueue item processing.
*4882a593Smuzhiyun *
*4882a593Smuzhiyun * This is the rough equivalent of the xfsprogs workqueue code, though we can't
*4882a593Smuzhiyun * reuse that name here.
*4882a593Smuzhiyun */
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Invoke our caller's function. */
*4882a593Smuzhiyunstatic void
*4882a593Smuzhiyunxfs_pwork_work(
*4882a593Smuzhiyun	struct work_struct	*work)
*4882a593Smuzhiyun{
*4882a593Smuzhiyun	struct xfs_pwork	*pwork;
*4882a593Smuzhiyun	struct xfs_pwork_ctl	*pctl;
*4882a593Smuzhiyun	int			error;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	pwork = container_of(work, struct xfs_pwork, work);
*4882a593Smuzhiyun	pctl = pwork->pctl;
*4882a593Smuzhiyun	error = pctl->work_fn(pctl->mp, pwork);
*4882a593Smuzhiyun	if (error && !pctl->error)
*4882a593Smuzhiyun		pctl->error = error;
*4882a593Smuzhiyun	if (atomic_dec_and_test(&pctl->nr_work))
*4882a593Smuzhiyun		wake_up(&pctl->poll_wait);
*4882a593Smuzhiyun}
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Set up control data for parallel work.  @work_fn is the function that will
*4882a593Smuzhiyun * be called.  @tag will be written into the kernel threads.  @nr_threads is
*4882a593Smuzhiyun * the level of parallelism desired, or 0 for no limit.
*4882a593Smuzhiyun */
*4882a593Smuzhiyunint
*4882a593Smuzhiyunxfs_pwork_init(
*4882a593Smuzhiyun	struct xfs_mount	*mp,
*4882a593Smuzhiyun	struct xfs_pwork_ctl	*pctl,
*4882a593Smuzhiyun	xfs_pwork_work_fn	work_fn,
*4882a593Smuzhiyun	const char		*tag,
*4882a593Smuzhiyun	unsigned int		nr_threads)
*4882a593Smuzhiyun{
*4882a593Smuzhiyun#ifdef DEBUG
*4882a593Smuzhiyun	if (xfs_globals.pwork_threads >= 0)
*4882a593Smuzhiyun		nr_threads = xfs_globals.pwork_threads;
*4882a593Smuzhiyun#endif
*4882a593Smuzhiyun	trace_xfs_pwork_init(mp, nr_threads, current->pid);
*4882a593Smuzhiyun
*4882a593Smuzhiyun	pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
*4882a593Smuzhiyun			current->pid);
*4882a593Smuzhiyun	if (!pctl->wq)
*4882a593Smuzhiyun		return -ENOMEM;
*4882a593Smuzhiyun	pctl->work_fn = work_fn;
*4882a593Smuzhiyun	pctl->error = 0;
*4882a593Smuzhiyun	pctl->mp = mp;
*4882a593Smuzhiyun	atomic_set(&pctl->nr_work, 0);
*4882a593Smuzhiyun	init_waitqueue_head(&pctl->poll_wait);
*4882a593Smuzhiyun
*4882a593Smuzhiyun	return 0;
*4882a593Smuzhiyun}
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Queue some parallel work. */
*4882a593Smuzhiyunvoid
*4882a593Smuzhiyunxfs_pwork_queue(
*4882a593Smuzhiyun	struct xfs_pwork_ctl	*pctl,
*4882a593Smuzhiyun	struct xfs_pwork	*pwork)
*4882a593Smuzhiyun{
*4882a593Smuzhiyun	INIT_WORK(&pwork->work, xfs_pwork_work);
*4882a593Smuzhiyun	pwork->pctl = pctl;
*4882a593Smuzhiyun	atomic_inc(&pctl->nr_work);
*4882a593Smuzhiyun	queue_work(pctl->wq, &pwork->work);
*4882a593Smuzhiyun}
*4882a593Smuzhiyun
*4882a593Smuzhiyun/* Wait for the work to finish and tear down the control structure. */
*4882a593Smuzhiyunint
*4882a593Smuzhiyunxfs_pwork_destroy(
*4882a593Smuzhiyun	struct xfs_pwork_ctl	*pctl)
*4882a593Smuzhiyun{
*4882a593Smuzhiyun	destroy_workqueue(pctl->wq);
*4882a593Smuzhiyun	pctl->wq = NULL;
*4882a593Smuzhiyun	return pctl->error;
*4882a593Smuzhiyun}
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Wait for the work to finish by polling completion status and touch the soft
*4882a593Smuzhiyun * lockup watchdog.  This is for callers such as mount which hold locks.
*4882a593Smuzhiyun */
*4882a593Smuzhiyunvoid
*4882a593Smuzhiyunxfs_pwork_poll(
*4882a593Smuzhiyun	struct xfs_pwork_ctl	*pctl)
*4882a593Smuzhiyun{
*4882a593Smuzhiyun	while (wait_event_timeout(pctl->poll_wait,
*4882a593Smuzhiyun				atomic_read(&pctl->nr_work) == 0, HZ) == 0)
*4882a593Smuzhiyun		touch_softlockup_watchdog();
*4882a593Smuzhiyun}
*4882a593Smuzhiyun
*4882a593Smuzhiyun/*
*4882a593Smuzhiyun * Return the amount of parallelism that the data device can handle, or 0 for
*4882a593Smuzhiyun * no limit.
*4882a593Smuzhiyun */
*4882a593Smuzhiyununsigned int
*4882a593Smuzhiyunxfs_pwork_guess_datadev_parallelism(
*4882a593Smuzhiyun	struct xfs_mount	*mp)
*4882a593Smuzhiyun{
*4882a593Smuzhiyun	struct xfs_buftarg	*btp = mp->m_ddev_targp;
*4882a593Smuzhiyun
*4882a593Smuzhiyun	/*
*4882a593Smuzhiyun	 * For now we'll go with the most conservative setting possible,
*4882a593Smuzhiyun	 * which is two threads for an SSD and 1 thread everywhere else.
*4882a593Smuzhiyun	 */
*4882a593Smuzhiyun	return blk_queue_nonrot(btp->bt_bdev->bd_disk->queue) ? 2 : 1;
*4882a593Smuzhiyun}