481 lines
12 KiB
C
481 lines
12 KiB
C
/*
|
|
* Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2014
|
|
* The President and Fellows of Harvard College.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* SFS filesystem
|
|
*
|
|
* I/O plumbing.
|
|
*/
|
|
#include <types.h>
|
|
#include <kern/errno.h>
|
|
#include <lib.h>
|
|
#include <uio.h>
|
|
#include <vfs.h>
|
|
#include <device.h>
|
|
#include <sfs.h>
|
|
#include "sfsprivate.h"
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//
|
|
// Basic block-level I/O routines
|
|
|
|
/*
|
|
* Note: sfs_readblock is used to read the superblock
|
|
* early in mount, before sfs is fully (or even mostly)
|
|
* initialized, and so may not use anything from sfs
|
|
* except sfs_device.
|
|
*/
|
|
|
|
/*
|
|
* Read or write a block, retrying I/O errors.
|
|
*/
|
|
static
|
|
int
|
|
sfs_rwblock(struct sfs_fs *sfs, struct uio *uio)
|
|
{
|
|
int result;
|
|
int tries=0;
|
|
|
|
KASSERT(vfs_biglock_do_i_hold());
|
|
|
|
DEBUG(DB_SFS, "sfs: %s %llu\n",
|
|
uio->uio_rw == UIO_READ ? "read" : "write",
|
|
uio->uio_offset / SFS_BLOCKSIZE);
|
|
|
|
retry:
|
|
result = DEVOP_IO(sfs->sfs_device, uio);
|
|
if (result == EINVAL) {
|
|
/*
|
|
* This means the sector we requested was out of range,
|
|
* or the seek address we gave wasn't sector-aligned,
|
|
* or a couple of other things that are our fault.
|
|
*/
|
|
panic("sfs: %s: DEVOP_IO returned EINVAL\n",
|
|
sfs->sfs_sb.sb_volname);
|
|
}
|
|
if (result == EIO) {
|
|
if (tries == 0) {
|
|
tries++;
|
|
kprintf("sfs: %s: block %llu I/O error, retrying\n",
|
|
sfs->sfs_sb.sb_volname,
|
|
uio->uio_offset / SFS_BLOCKSIZE);
|
|
goto retry;
|
|
}
|
|
else if (tries < 10) {
|
|
tries++;
|
|
goto retry;
|
|
}
|
|
else {
|
|
kprintf("sfs: %s: block %llu I/O error, giving up "
|
|
"after %d retries\n",
|
|
sfs->sfs_sb.sb_volname,
|
|
uio->uio_offset / SFS_BLOCKSIZE, tries);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Read a block.
|
|
*/
|
|
int
|
|
sfs_readblock(struct sfs_fs *sfs, daddr_t block, void *data, size_t len)
|
|
{
|
|
struct iovec iov;
|
|
struct uio ku;
|
|
|
|
KASSERT(len == SFS_BLOCKSIZE);
|
|
|
|
SFSUIO(&iov, &ku, data, block, UIO_READ);
|
|
return sfs_rwblock(sfs, &ku);
|
|
}
|
|
|
|
/*
|
|
* Write a block.
|
|
*/
|
|
int
|
|
sfs_writeblock(struct sfs_fs *sfs, daddr_t block, void *data, size_t len)
|
|
{
|
|
struct iovec iov;
|
|
struct uio ku;
|
|
|
|
KASSERT(len == SFS_BLOCKSIZE);
|
|
|
|
SFSUIO(&iov, &ku, data, block, UIO_WRITE);
|
|
return sfs_rwblock(sfs, &ku);
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
//
|
|
// File-level I/O
|
|
|
|
/*
|
|
* Do I/O to a block of a file that doesn't cover the whole block. We
|
|
* need to read in the original block first, even if we're writing, so
|
|
* we don't clobber the portion of the block we're not intending to
|
|
* write over.
|
|
*
|
|
* SKIPSTART is the number of bytes to skip past at the beginning of
|
|
* the sector; LEN is the number of bytes to actually read or write.
|
|
* UIO is the area to do the I/O into.
|
|
*/
|
|
static
|
|
int
|
|
sfs_partialio(struct sfs_vnode *sv, struct uio *uio,
|
|
uint32_t skipstart, uint32_t len)
|
|
{
|
|
/*
|
|
* I/O buffer for handling partial sectors.
|
|
*
|
|
* Note: in real life (and when you've done the fs assignment)
|
|
* you would get space from the disk buffer cache for this,
|
|
* not use a static area.
|
|
*/
|
|
static char iobuf[SFS_BLOCKSIZE];
|
|
|
|
struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data;
|
|
daddr_t diskblock;
|
|
uint32_t fileblock;
|
|
int result;
|
|
|
|
/* Allocate missing blocks if and only if we're writing */
|
|
bool doalloc = (uio->uio_rw==UIO_WRITE);
|
|
|
|
KASSERT(skipstart + len <= SFS_BLOCKSIZE);
|
|
|
|
/* We're using a global static buffer; it had better be locked */
|
|
KASSERT(vfs_biglock_do_i_hold());
|
|
|
|
/* Compute the block offset of this block in the file */
|
|
fileblock = uio->uio_offset / SFS_BLOCKSIZE;
|
|
|
|
/* Get the disk block number */
|
|
result = sfs_bmap(sv, fileblock, doalloc, &diskblock);
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
if (diskblock == 0) {
|
|
/*
|
|
* There was no block mapped at this point in the file.
|
|
* Zero the buffer.
|
|
*/
|
|
KASSERT(uio->uio_rw == UIO_READ);
|
|
bzero(iobuf, sizeof(iobuf));
|
|
}
|
|
else {
|
|
/*
|
|
* Read the block.
|
|
*/
|
|
result = sfs_readblock(sfs, diskblock, iobuf, sizeof(iobuf));
|
|
if (result) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now perform the requested operation into/out of the buffer.
|
|
*/
|
|
result = uiomove(iobuf+skipstart, len, uio);
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* If it was a write, write back the modified block.
|
|
*/
|
|
if (uio->uio_rw == UIO_WRITE) {
|
|
result = sfs_writeblock(sfs, diskblock, iobuf, sizeof(iobuf));
|
|
if (result) {
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Do I/O (either read or write) of a single whole block.
|
|
*/
|
|
static
|
|
int
|
|
sfs_blockio(struct sfs_vnode *sv, struct uio *uio)
|
|
{
|
|
struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data;
|
|
daddr_t diskblock;
|
|
uint32_t fileblock;
|
|
int result;
|
|
bool doalloc = (uio->uio_rw==UIO_WRITE);
|
|
off_t saveoff;
|
|
off_t diskoff;
|
|
off_t saveres;
|
|
off_t diskres;
|
|
|
|
/* Get the block number within the file */
|
|
fileblock = uio->uio_offset / SFS_BLOCKSIZE;
|
|
|
|
/* Look up the disk block number */
|
|
result = sfs_bmap(sv, fileblock, doalloc, &diskblock);
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
if (diskblock == 0) {
|
|
/*
|
|
* No block - fill with zeros.
|
|
*
|
|
* We must be reading, or sfs_bmap would have
|
|
* allocated a block for us.
|
|
*/
|
|
KASSERT(uio->uio_rw == UIO_READ);
|
|
return uiomovezeros(SFS_BLOCKSIZE, uio);
|
|
}
|
|
|
|
/*
|
|
* Do the I/O directly to the uio region. Save the uio_offset,
|
|
* and substitute one that makes sense to the device.
|
|
*/
|
|
saveoff = uio->uio_offset;
|
|
diskoff = diskblock * SFS_BLOCKSIZE;
|
|
uio->uio_offset = diskoff;
|
|
|
|
/*
|
|
* Temporarily set the residue to be one block size.
|
|
*/
|
|
KASSERT(uio->uio_resid >= SFS_BLOCKSIZE);
|
|
saveres = uio->uio_resid;
|
|
diskres = SFS_BLOCKSIZE;
|
|
uio->uio_resid = diskres;
|
|
|
|
result = sfs_rwblock(sfs, uio);
|
|
|
|
/*
|
|
* Now, restore the original uio_offset and uio_resid and update
|
|
* them by the amount of I/O done.
|
|
*/
|
|
uio->uio_offset = (uio->uio_offset - diskoff) + saveoff;
|
|
uio->uio_resid = (uio->uio_resid - diskres) + saveres;
|
|
|
|
return result;
|
|
}
|
|
|
|
/*
|
|
* Do I/O of a whole region of data, whether or not it's block-aligned.
|
|
*/
|
|
int
|
|
sfs_io(struct sfs_vnode *sv, struct uio *uio)
|
|
{
|
|
uint32_t blkoff;
|
|
uint32_t nblocks, i;
|
|
int result = 0;
|
|
uint32_t origresid, extraresid = 0;
|
|
|
|
origresid = uio->uio_resid;
|
|
|
|
/*
|
|
* If reading, check for EOF. If we can read a partial area,
|
|
* remember how much extra there was in EXTRARESID so we can
|
|
* add it back to uio_resid at the end.
|
|
*/
|
|
if (uio->uio_rw == UIO_READ) {
|
|
off_t size = sv->sv_i.sfi_size;
|
|
off_t endpos = uio->uio_offset + uio->uio_resid;
|
|
|
|
if (uio->uio_offset >= size) {
|
|
/* At or past EOF - just return */
|
|
return 0;
|
|
}
|
|
|
|
if (endpos > size) {
|
|
extraresid = endpos - size;
|
|
KASSERT(uio->uio_resid > extraresid);
|
|
uio->uio_resid -= extraresid;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* First, do any leading partial block.
|
|
*/
|
|
blkoff = uio->uio_offset % SFS_BLOCKSIZE;
|
|
if (blkoff != 0) {
|
|
/* Number of bytes at beginning of block to skip */
|
|
uint32_t skip = blkoff;
|
|
|
|
/* Number of bytes to read/write after that point */
|
|
uint32_t len = SFS_BLOCKSIZE - blkoff;
|
|
|
|
/* ...which might be less than the rest of the block */
|
|
if (len > uio->uio_resid) {
|
|
len = uio->uio_resid;
|
|
}
|
|
|
|
/* Call sfs_partialio() to do it. */
|
|
result = sfs_partialio(sv, uio, skip, len);
|
|
if (result) {
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/* If we're done, quit. */
|
|
if (uio->uio_resid==0) {
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Now we should be block-aligned. Do the remaining whole blocks.
|
|
*/
|
|
KASSERT(uio->uio_offset % SFS_BLOCKSIZE == 0);
|
|
nblocks = uio->uio_resid / SFS_BLOCKSIZE;
|
|
for (i=0; i<nblocks; i++) {
|
|
result = sfs_blockio(sv, uio);
|
|
if (result) {
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Now do any remaining partial block at the end.
|
|
*/
|
|
KASSERT(uio->uio_resid < SFS_BLOCKSIZE);
|
|
|
|
if (uio->uio_resid > 0) {
|
|
result = sfs_partialio(sv, uio, 0, uio->uio_resid);
|
|
if (result) {
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
out:
|
|
|
|
/* If writing and we did anything, adjust file length */
|
|
if (uio->uio_resid != origresid &&
|
|
uio->uio_rw == UIO_WRITE &&
|
|
uio->uio_offset > (off_t)sv->sv_i.sfi_size) {
|
|
sv->sv_i.sfi_size = uio->uio_offset;
|
|
sv->sv_dirty = true;
|
|
}
|
|
|
|
/* Add in any extra amount we couldn't read because of EOF */
|
|
uio->uio_resid += extraresid;
|
|
|
|
/* Done */
|
|
return result;
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////
|
|
// Metadata I/O
|
|
|
|
/*
|
|
* This is much the same as sfs_partialio, but intended for use with
|
|
* metadata (e.g. directory entries). It assumes the objects being
|
|
* handled are smaller than whole blocks, do not cross block
|
|
* boundaries, and originate in the kernel.
|
|
*
|
|
* It is separate from sfs_partialio because, although there is no
|
|
* such code in this version of SFS, it is often desirable when doing
|
|
* more advanced things to handle metadata and user data I/O
|
|
* differently.
|
|
*/
|
|
int
|
|
sfs_metaio(struct sfs_vnode *sv, off_t actualpos, void *data, size_t len,
|
|
enum uio_rw rw)
|
|
{
|
|
struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data;
|
|
off_t endpos;
|
|
uint32_t vnblock;
|
|
uint32_t blockoffset;
|
|
daddr_t diskblock;
|
|
bool doalloc;
|
|
int result;
|
|
|
|
/*
|
|
* I/O buffer for metadata ops.
|
|
*
|
|
* Note: in real life (and when you've done the fs assignment) you
|
|
* would get space from the disk buffer cache for this, not use a
|
|
* static area.
|
|
*/
|
|
static char metaiobuf[SFS_BLOCKSIZE];
|
|
|
|
/* We're using a global static buffer; it had better be locked */
|
|
KASSERT(vfs_biglock_do_i_hold());
|
|
|
|
/* Figure out which block of the vnode (directory, whatever) this is */
|
|
vnblock = actualpos / SFS_BLOCKSIZE;
|
|
blockoffset = actualpos % SFS_BLOCKSIZE;
|
|
|
|
/* Get the disk block number */
|
|
doalloc = (rw == UIO_WRITE);
|
|
result = sfs_bmap(sv, vnblock, doalloc, &diskblock);
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
if (diskblock == 0) {
|
|
/* Should only get block 0 back if doalloc is false */
|
|
KASSERT(rw == UIO_READ);
|
|
|
|
/* Sparse file, read as zeros. */
|
|
bzero(data, len);
|
|
return 0;
|
|
}
|
|
|
|
/* Read the block */
|
|
result = sfs_readblock(sfs, diskblock, metaiobuf, sizeof(metaiobuf));
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
if (rw == UIO_READ) {
|
|
/* Copy out the selected region */
|
|
memcpy(data, metaiobuf + blockoffset, len);
|
|
}
|
|
else {
|
|
/* Update the selected region */
|
|
memcpy(metaiobuf + blockoffset, data, len);
|
|
|
|
/* Write the block back */
|
|
result = sfs_writeblock(sfs, diskblock,
|
|
metaiobuf, sizeof(metaiobuf));
|
|
if (result) {
|
|
return result;
|
|
}
|
|
|
|
/* Update the vnode size if needed */
|
|
endpos = actualpos + len;
|
|
if (endpos > (off_t)sv->sv_i.sfi_size) {
|
|
sv->sv_i.sfi_size = endpos;
|
|
sv->sv_dirty = true;
|
|
}
|
|
}
|
|
|
|
/* Done */
|
|
return 0;
|
|
}
|