os161/kern/fs/sfs/sfs_io.c
2015-12-23 00:50:04 +00:00

481 lines
12 KiB
C

/*
* Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009, 2014
* The President and Fellows of Harvard College.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* SFS filesystem
*
* I/O plumbing.
*/
#include <types.h>
#include <kern/errno.h>
#include <lib.h>
#include <uio.h>
#include <vfs.h>
#include <device.h>
#include <sfs.h>
#include "sfsprivate.h"
////////////////////////////////////////////////////////////
//
// Basic block-level I/O routines
/*
* Note: sfs_readblock is used to read the superblock
* early in mount, before sfs is fully (or even mostly)
* initialized, and so may not use anything from sfs
* except sfs_device.
*/
/*
* Read or write a block, retrying I/O errors.
*/
static
int
sfs_rwblock(struct sfs_fs *sfs, struct uio *uio)
{
int result;
int tries=0;
KASSERT(vfs_biglock_do_i_hold());
DEBUG(DB_SFS, "sfs: %s %llu\n",
uio->uio_rw == UIO_READ ? "read" : "write",
uio->uio_offset / SFS_BLOCKSIZE);
retry:
result = DEVOP_IO(sfs->sfs_device, uio);
if (result == EINVAL) {
/*
* This means the sector we requested was out of range,
* or the seek address we gave wasn't sector-aligned,
* or a couple of other things that are our fault.
*/
panic("sfs: %s: DEVOP_IO returned EINVAL\n",
sfs->sfs_sb.sb_volname);
}
if (result == EIO) {
if (tries == 0) {
tries++;
kprintf("sfs: %s: block %llu I/O error, retrying\n",
sfs->sfs_sb.sb_volname,
uio->uio_offset / SFS_BLOCKSIZE);
goto retry;
}
else if (tries < 10) {
tries++;
goto retry;
}
else {
kprintf("sfs: %s: block %llu I/O error, giving up "
"after %d retries\n",
sfs->sfs_sb.sb_volname,
uio->uio_offset / SFS_BLOCKSIZE, tries);
}
}
return result;
}
/*
* Read a block.
*/
int
sfs_readblock(struct sfs_fs *sfs, daddr_t block, void *data, size_t len)
{
struct iovec iov;
struct uio ku;
KASSERT(len == SFS_BLOCKSIZE);
SFSUIO(&iov, &ku, data, block, UIO_READ);
return sfs_rwblock(sfs, &ku);
}
/*
* Write a block.
*/
int
sfs_writeblock(struct sfs_fs *sfs, daddr_t block, void *data, size_t len)
{
struct iovec iov;
struct uio ku;
KASSERT(len == SFS_BLOCKSIZE);
SFSUIO(&iov, &ku, data, block, UIO_WRITE);
return sfs_rwblock(sfs, &ku);
}
////////////////////////////////////////////////////////////
//
// File-level I/O
/*
* Do I/O to a block of a file that doesn't cover the whole block. We
* need to read in the original block first, even if we're writing, so
* we don't clobber the portion of the block we're not intending to
* write over.
*
* SKIPSTART is the number of bytes to skip past at the beginning of
* the sector; LEN is the number of bytes to actually read or write.
* UIO is the area to do the I/O into.
*/
static
int
sfs_partialio(struct sfs_vnode *sv, struct uio *uio,
uint32_t skipstart, uint32_t len)
{
/*
* I/O buffer for handling partial sectors.
*
* Note: in real life (and when you've done the fs assignment)
* you would get space from the disk buffer cache for this,
* not use a static area.
*/
static char iobuf[SFS_BLOCKSIZE];
struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data;
daddr_t diskblock;
uint32_t fileblock;
int result;
/* Allocate missing blocks if and only if we're writing */
bool doalloc = (uio->uio_rw==UIO_WRITE);
KASSERT(skipstart + len <= SFS_BLOCKSIZE);
/* We're using a global static buffer; it had better be locked */
KASSERT(vfs_biglock_do_i_hold());
/* Compute the block offset of this block in the file */
fileblock = uio->uio_offset / SFS_BLOCKSIZE;
/* Get the disk block number */
result = sfs_bmap(sv, fileblock, doalloc, &diskblock);
if (result) {
return result;
}
if (diskblock == 0) {
/*
* There was no block mapped at this point in the file.
* Zero the buffer.
*/
KASSERT(uio->uio_rw == UIO_READ);
bzero(iobuf, sizeof(iobuf));
}
else {
/*
* Read the block.
*/
result = sfs_readblock(sfs, diskblock, iobuf, sizeof(iobuf));
if (result) {
return result;
}
}
/*
* Now perform the requested operation into/out of the buffer.
*/
result = uiomove(iobuf+skipstart, len, uio);
if (result) {
return result;
}
/*
* If it was a write, write back the modified block.
*/
if (uio->uio_rw == UIO_WRITE) {
result = sfs_writeblock(sfs, diskblock, iobuf, sizeof(iobuf));
if (result) {
return result;
}
}
return 0;
}
/*
* Do I/O (either read or write) of a single whole block.
*/
static
int
sfs_blockio(struct sfs_vnode *sv, struct uio *uio)
{
struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data;
daddr_t diskblock;
uint32_t fileblock;
int result;
bool doalloc = (uio->uio_rw==UIO_WRITE);
off_t saveoff;
off_t diskoff;
off_t saveres;
off_t diskres;
/* Get the block number within the file */
fileblock = uio->uio_offset / SFS_BLOCKSIZE;
/* Look up the disk block number */
result = sfs_bmap(sv, fileblock, doalloc, &diskblock);
if (result) {
return result;
}
if (diskblock == 0) {
/*
* No block - fill with zeros.
*
* We must be reading, or sfs_bmap would have
* allocated a block for us.
*/
KASSERT(uio->uio_rw == UIO_READ);
return uiomovezeros(SFS_BLOCKSIZE, uio);
}
/*
* Do the I/O directly to the uio region. Save the uio_offset,
* and substitute one that makes sense to the device.
*/
saveoff = uio->uio_offset;
diskoff = diskblock * SFS_BLOCKSIZE;
uio->uio_offset = diskoff;
/*
* Temporarily set the residue to be one block size.
*/
KASSERT(uio->uio_resid >= SFS_BLOCKSIZE);
saveres = uio->uio_resid;
diskres = SFS_BLOCKSIZE;
uio->uio_resid = diskres;
result = sfs_rwblock(sfs, uio);
/*
* Now, restore the original uio_offset and uio_resid and update
* them by the amount of I/O done.
*/
uio->uio_offset = (uio->uio_offset - diskoff) + saveoff;
uio->uio_resid = (uio->uio_resid - diskres) + saveres;
return result;
}
/*
* Do I/O of a whole region of data, whether or not it's block-aligned.
*/
int
sfs_io(struct sfs_vnode *sv, struct uio *uio)
{
uint32_t blkoff;
uint32_t nblocks, i;
int result = 0;
uint32_t origresid, extraresid = 0;
origresid = uio->uio_resid;
/*
* If reading, check for EOF. If we can read a partial area,
* remember how much extra there was in EXTRARESID so we can
* add it back to uio_resid at the end.
*/
if (uio->uio_rw == UIO_READ) {
off_t size = sv->sv_i.sfi_size;
off_t endpos = uio->uio_offset + uio->uio_resid;
if (uio->uio_offset >= size) {
/* At or past EOF - just return */
return 0;
}
if (endpos > size) {
extraresid = endpos - size;
KASSERT(uio->uio_resid > extraresid);
uio->uio_resid -= extraresid;
}
}
/*
* First, do any leading partial block.
*/
blkoff = uio->uio_offset % SFS_BLOCKSIZE;
if (blkoff != 0) {
/* Number of bytes at beginning of block to skip */
uint32_t skip = blkoff;
/* Number of bytes to read/write after that point */
uint32_t len = SFS_BLOCKSIZE - blkoff;
/* ...which might be less than the rest of the block */
if (len > uio->uio_resid) {
len = uio->uio_resid;
}
/* Call sfs_partialio() to do it. */
result = sfs_partialio(sv, uio, skip, len);
if (result) {
goto out;
}
}
/* If we're done, quit. */
if (uio->uio_resid==0) {
goto out;
}
/*
* Now we should be block-aligned. Do the remaining whole blocks.
*/
KASSERT(uio->uio_offset % SFS_BLOCKSIZE == 0);
nblocks = uio->uio_resid / SFS_BLOCKSIZE;
for (i=0; i<nblocks; i++) {
result = sfs_blockio(sv, uio);
if (result) {
goto out;
}
}
/*
* Now do any remaining partial block at the end.
*/
KASSERT(uio->uio_resid < SFS_BLOCKSIZE);
if (uio->uio_resid > 0) {
result = sfs_partialio(sv, uio, 0, uio->uio_resid);
if (result) {
goto out;
}
}
out:
/* If writing and we did anything, adjust file length */
if (uio->uio_resid != origresid &&
uio->uio_rw == UIO_WRITE &&
uio->uio_offset > (off_t)sv->sv_i.sfi_size) {
sv->sv_i.sfi_size = uio->uio_offset;
sv->sv_dirty = true;
}
/* Add in any extra amount we couldn't read because of EOF */
uio->uio_resid += extraresid;
/* Done */
return result;
}
////////////////////////////////////////////////////////////
// Metadata I/O
/*
* This is much the same as sfs_partialio, but intended for use with
* metadata (e.g. directory entries). It assumes the objects being
* handled are smaller than whole blocks, do not cross block
* boundaries, and originate in the kernel.
*
* It is separate from sfs_partialio because, although there is no
* such code in this version of SFS, it is often desirable when doing
* more advanced things to handle metadata and user data I/O
* differently.
*/
int
sfs_metaio(struct sfs_vnode *sv, off_t actualpos, void *data, size_t len,
enum uio_rw rw)
{
struct sfs_fs *sfs = sv->sv_absvn.vn_fs->fs_data;
off_t endpos;
uint32_t vnblock;
uint32_t blockoffset;
daddr_t diskblock;
bool doalloc;
int result;
/*
* I/O buffer for metadata ops.
*
* Note: in real life (and when you've done the fs assignment) you
* would get space from the disk buffer cache for this, not use a
* static area.
*/
static char metaiobuf[SFS_BLOCKSIZE];
/* We're using a global static buffer; it had better be locked */
KASSERT(vfs_biglock_do_i_hold());
/* Figure out which block of the vnode (directory, whatever) this is */
vnblock = actualpos / SFS_BLOCKSIZE;
blockoffset = actualpos % SFS_BLOCKSIZE;
/* Get the disk block number */
doalloc = (rw == UIO_WRITE);
result = sfs_bmap(sv, vnblock, doalloc, &diskblock);
if (result) {
return result;
}
if (diskblock == 0) {
/* Should only get block 0 back if doalloc is false */
KASSERT(rw == UIO_READ);
/* Sparse file, read as zeros. */
bzero(data, len);
return 0;
}
/* Read the block */
result = sfs_readblock(sfs, diskblock, metaiobuf, sizeof(metaiobuf));
if (result) {
return result;
}
if (rw == UIO_READ) {
/* Copy out the selected region */
memcpy(data, metaiobuf + blockoffset, len);
}
else {
/* Update the selected region */
memcpy(metaiobuf + blockoffset, data, len);
/* Write the block back */
result = sfs_writeblock(sfs, diskblock,
metaiobuf, sizeof(metaiobuf));
if (result) {
return result;
}
/* Update the vnode size if needed */
endpos = actualpos + len;
if (endpos > (off_t)sv->sv_i.sfi_size) {
sv->sv_i.sfi_size = endpos;
sv->sv_dirty = true;
}
}
/* Done */
return 0;
}