| /* AFS volume location management |
| * |
| * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. |
| * Written by David Howells (dhowells@redhat.com) |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/module.h> |
| #include <linux/init.h> |
| #include "internal.h" |
| |
| unsigned afs_vlocation_timeout = 10; /* volume location timeout in seconds */ |
| unsigned afs_vlocation_update_timeout = 10 * 60; |
| |
| static void afs_vlocation_reaper(struct work_struct *); |
| static void afs_vlocation_updater(struct work_struct *); |
| |
| static LIST_HEAD(afs_vlocation_updates); |
| static LIST_HEAD(afs_vlocation_graveyard); |
| static DEFINE_SPINLOCK(afs_vlocation_updates_lock); |
| static DEFINE_SPINLOCK(afs_vlocation_graveyard_lock); |
| static DECLARE_DELAYED_WORK(afs_vlocation_reap, afs_vlocation_reaper); |
| static DECLARE_DELAYED_WORK(afs_vlocation_update, afs_vlocation_updater); |
| static struct workqueue_struct *afs_vlocation_update_worker; |
| |
| /* |
| * iterate through the VL servers in a cell until one of them admits knowing |
| * about the volume in question |
| */ |
| static int afs_vlocation_access_vl_by_name(struct afs_vlocation *vl, |
| struct afs_cache_vlocation *vldb) |
| { |
| struct afs_cell *cell = vl->cell; |
| struct in_addr addr; |
| int count, ret; |
| |
| _enter("%s,%s", cell->name, vl->vldb.name); |
| |
| down_write(&vl->cell->vl_sem); |
| ret = -ENOMEDIUM; |
| for (count = cell->vl_naddrs; count > 0; count--) { |
| addr = cell->vl_addrs[cell->vl_curr_svix]; |
| |
| _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); |
| |
| /* attempt to access the VL server */ |
| ret = afs_vl_get_entry_by_name(&addr, vl->vldb.name, vldb, |
| &afs_sync_call); |
| switch (ret) { |
| case 0: |
| goto out; |
| case -ENOMEM: |
| case -ENONET: |
| case -ENETUNREACH: |
| case -EHOSTUNREACH: |
| case -ECONNREFUSED: |
| if (ret == -ENOMEM || ret == -ENONET) |
| goto out; |
| goto rotate; |
| case -ENOMEDIUM: |
| goto out; |
| default: |
| ret = -EIO; |
| goto rotate; |
| } |
| |
| /* rotate the server records upon lookup failure */ |
| rotate: |
| cell->vl_curr_svix++; |
| cell->vl_curr_svix %= cell->vl_naddrs; |
| } |
| |
| out: |
| up_write(&vl->cell->vl_sem); |
| _leave(" = %d", ret); |
| return ret; |
| } |
| |
| /* |
| * iterate through the VL servers in a cell until one of them admits knowing |
| * about the volume in question |
| */ |
| static int afs_vlocation_access_vl_by_id(struct afs_vlocation *vl, |
| afs_volid_t volid, |
| afs_voltype_t voltype, |
| struct afs_cache_vlocation *vldb) |
| { |
| struct afs_cell *cell = vl->cell; |
| struct in_addr addr; |
| int count, ret; |
| |
| _enter("%s,%x,%d,", cell->name, volid, voltype); |
| |
| down_write(&vl->cell->vl_sem); |
| ret = -ENOMEDIUM; |
| for (count = cell->vl_naddrs; count > 0; count--) { |
| addr = cell->vl_addrs[cell->vl_curr_svix]; |
| |
| _debug("CellServ[%hu]: %08x", cell->vl_curr_svix, addr.s_addr); |
| |
| /* attempt to access the VL server */ |
| ret = afs_vl_get_entry_by_id(&addr, volid, voltype, vldb, |
| &afs_sync_call); |
| switch (ret) { |
| case 0: |
| goto out; |
| case -ENOMEM: |
| case -ENONET: |
| case -ENETUNREACH: |
| case -EHOSTUNREACH: |
| case -ECONNREFUSED: |
| if (ret == -ENOMEM || ret == -ENONET) |
| goto out; |
| goto rotate; |
| case -EBUSY: |
| vl->upd_busy_cnt++; |
| if (vl->upd_busy_cnt <= 3) { |
| if (vl->upd_busy_cnt > 1) { |
| /* second+ BUSY - sleep a little bit */ |
| set_current_state(TASK_UNINTERRUPTIBLE); |
| schedule_timeout(1); |
| __set_current_state(TASK_RUNNING); |
| } |
| continue; |
| } |
| break; |
| case -ENOMEDIUM: |
| vl->upd_rej_cnt++; |
| goto rotate; |
| default: |
| ret = -EIO; |
| goto rotate; |
| } |
| |
| /* rotate the server records upon lookup failure */ |
| rotate: |
| cell->vl_curr_svix++; |
| cell->vl_curr_svix %= cell->vl_naddrs; |
| vl->upd_busy_cnt = 0; |
| } |
| |
| out: |
| if (ret < 0 && vl->upd_rej_cnt > 0) { |
| printk(KERN_NOTICE "kAFS:" |
| " Active volume no longer valid '%s'\n", |
| vl->vldb.name); |
| vl->valid = 0; |
| ret = -ENOMEDIUM; |
| } |
| |
| up_write(&vl->cell->vl_sem); |
| _leave(" = %d", ret); |
| return ret; |
| } |
| |
| /* |
| * allocate a volume location record |
| */ |
| static struct afs_vlocation *afs_vlocation_alloc(struct afs_cell *cell, |
| const char *name, |
| size_t namesz) |
| { |
| struct afs_vlocation *vl; |
| |
| vl = kzalloc(sizeof(struct afs_vlocation), GFP_KERNEL); |
| if (vl) { |
| vl->cell = cell; |
| vl->state = AFS_VL_NEW; |
| atomic_set(&vl->usage, 1); |
| INIT_LIST_HEAD(&vl->link); |
| INIT_LIST_HEAD(&vl->grave); |
| INIT_LIST_HEAD(&vl->update); |
| init_waitqueue_head(&vl->waitq); |
| rwlock_init(&vl->lock); |
| memcpy(vl->vldb.name, name, namesz); |
| } |
| |
| _leave(" = %p", vl); |
| return vl; |
| } |
| |
| /* |
| * update record if we found it in the cache |
| */ |
| static int afs_vlocation_update_record(struct afs_vlocation *vl, |
| struct afs_cache_vlocation *vldb) |
| { |
| afs_voltype_t voltype; |
| afs_volid_t vid; |
| int ret; |
| |
| /* try to look up a cached volume in the cell VL databases by ID */ |
| _debug("Locally Cached: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", |
| vl->vldb.name, |
| vl->vldb.vidmask, |
| ntohl(vl->vldb.servers[0].s_addr), |
| vl->vldb.srvtmask[0], |
| ntohl(vl->vldb.servers[1].s_addr), |
| vl->vldb.srvtmask[1], |
| ntohl(vl->vldb.servers[2].s_addr), |
| vl->vldb.srvtmask[2]); |
| |
| _debug("Vids: %08x %08x %08x", |
| vl->vldb.vid[0], |
| vl->vldb.vid[1], |
| vl->vldb.vid[2]); |
| |
| if (vl->vldb.vidmask & AFS_VOL_VTM_RW) { |
| vid = vl->vldb.vid[0]; |
| voltype = AFSVL_RWVOL; |
| } else if (vl->vldb.vidmask & AFS_VOL_VTM_RO) { |
| vid = vl->vldb.vid[1]; |
| voltype = AFSVL_ROVOL; |
| } else if (vl->vldb.vidmask & AFS_VOL_VTM_BAK) { |
| vid = vl->vldb.vid[2]; |
| voltype = AFSVL_BACKVOL; |
| } else { |
| BUG(); |
| vid = 0; |
| voltype = 0; |
| } |
| |
| /* contact the server to make sure the volume is still available |
| * - TODO: need to handle disconnected operation here |
| */ |
| ret = afs_vlocation_access_vl_by_id(vl, vid, voltype, vldb); |
| switch (ret) { |
| /* net error */ |
| default: |
| printk(KERN_WARNING "kAFS:" |
| " failed to update volume '%s' (%x) up in '%s': %d\n", |
| vl->vldb.name, vid, vl->cell->name, ret); |
| _leave(" = %d", ret); |
| return ret; |
| |
| /* pulled from local cache into memory */ |
| case 0: |
| _leave(" = 0"); |
| return 0; |
| |
| /* uh oh... looks like the volume got deleted */ |
| case -ENOMEDIUM: |
| printk(KERN_ERR "kAFS:" |
| " volume '%s' (%x) does not exist '%s'\n", |
| vl->vldb.name, vid, vl->cell->name); |
| |
| /* TODO: make existing record unavailable */ |
| _leave(" = %d", ret); |
| return ret; |
| } |
| } |
| |
| /* |
| * apply the update to a VL record |
| */ |
| static void afs_vlocation_apply_update(struct afs_vlocation *vl, |
| struct afs_cache_vlocation *vldb) |
| { |
| _debug("Done VL Lookup: %s %02x { %08x(%x) %08x(%x) %08x(%x) }", |
| vldb->name, vldb->vidmask, |
| ntohl(vldb->servers[0].s_addr), vldb->srvtmask[0], |
| ntohl(vldb->servers[1].s_addr), vldb->srvtmask[1], |
| ntohl(vldb->servers[2].s_addr), vldb->srvtmask[2]); |
| |
| _debug("Vids: %08x %08x %08x", |
| vldb->vid[0], vldb->vid[1], vldb->vid[2]); |
| |
| if (strcmp(vldb->name, vl->vldb.name) != 0) |
| printk(KERN_NOTICE "kAFS:" |
| " name of volume '%s' changed to '%s' on server\n", |
| vl->vldb.name, vldb->name); |
| |
| vl->vldb = *vldb; |
| |
| #ifdef AFS_CACHING_SUPPORT |
| /* update volume entry in local cache */ |
| cachefs_update_cookie(vl->cache); |
| #endif |
| } |
| |
| /* |
| * fill in a volume location record, consulting the cache and the VL server |
| * both |
| */ |
| static int afs_vlocation_fill_in_record(struct afs_vlocation *vl) |
| { |
| struct afs_cache_vlocation vldb; |
| int ret; |
| |
| _enter(""); |
| |
| ASSERTCMP(vl->valid, ==, 0); |
| |
| memset(&vldb, 0, sizeof(vldb)); |
| |
| /* see if we have an in-cache copy (will set vl->valid if there is) */ |
| #ifdef AFS_CACHING_SUPPORT |
| cachefs_acquire_cookie(cell->cache, |
| &afs_volume_cache_index_def, |
| vlocation, |
| &vl->cache); |
| #endif |
| |
| if (vl->valid) { |
| /* try to update a known volume in the cell VL databases by |
| * ID as the name may have changed */ |
| _debug("found in cache"); |
| ret = afs_vlocation_update_record(vl, &vldb); |
| } else { |
| /* try to look up an unknown volume in the cell VL databases by |
| * name */ |
| ret = afs_vlocation_access_vl_by_name(vl, &vldb); |
| if (ret < 0) { |
| printk("kAFS: failed to locate '%s' in cell '%s'\n", |
| vl->vldb.name, vl->cell->name); |
| return ret; |
| } |
| } |
| |
| afs_vlocation_apply_update(vl, &vldb); |
| _leave(" = 0"); |
| return 0; |
| } |
| |
| /* |
| * queue a vlocation record for updates |
| */ |
| void afs_vlocation_queue_for_updates(struct afs_vlocation *vl) |
| { |
| struct afs_vlocation *xvl; |
| |
| /* wait at least 10 minutes before updating... */ |
| vl->update_at = get_seconds() + afs_vlocation_update_timeout; |
| |
| spin_lock(&afs_vlocation_updates_lock); |
| |
| if (!list_empty(&afs_vlocation_updates)) { |
| /* ... but wait at least 1 second more than the newest record |
| * already queued so that we don't spam the VL server suddenly |
| * with lots of requests |
| */ |
| xvl = list_entry(afs_vlocation_updates.prev, |
| struct afs_vlocation, update); |
| if (vl->update_at <= xvl->update_at) |
| vl->update_at = xvl->update_at + 1; |
| } else { |
| queue_delayed_work(afs_vlocation_update_worker, |
| &afs_vlocation_update, |
| afs_vlocation_update_timeout * HZ); |
| } |
| |
| list_add_tail(&vl->update, &afs_vlocation_updates); |
| spin_unlock(&afs_vlocation_updates_lock); |
| } |
| |
| /* |
| * lookup volume location |
| * - iterate through the VL servers in a cell until one of them admits knowing |
| * about the volume in question |
| * - lookup in the local cache if not able to find on the VL server |
| * - insert/update in the local cache if did get a VL response |
| */ |
| struct afs_vlocation *afs_vlocation_lookup(struct afs_cell *cell, |
| const char *name, |
| size_t namesz) |
| { |
| struct afs_vlocation *vl; |
| int ret; |
| |
| _enter("{%s},%*.*s,%zu", |
| cell->name, (int) namesz, (int) namesz, name, namesz); |
| |
| if (namesz > sizeof(vl->vldb.name)) { |
| _leave(" = -ENAMETOOLONG"); |
| return ERR_PTR(-ENAMETOOLONG); |
| } |
| |
| /* see if we have an in-memory copy first */ |
| down_write(&cell->vl_sem); |
| spin_lock(&cell->vl_lock); |
| list_for_each_entry(vl, &cell->vl_list, link) { |
| if (vl->vldb.name[namesz] != '\0') |
| continue; |
| if (memcmp(vl->vldb.name, name, namesz) == 0) |
| goto found_in_memory; |
| } |
| spin_unlock(&cell->vl_lock); |
| |
| /* not in the cell's in-memory lists - create a new record */ |
| vl = afs_vlocation_alloc(cell, name, namesz); |
| if (!vl) { |
| up_write(&cell->vl_sem); |
| return ERR_PTR(-ENOMEM); |
| } |
| |
| afs_get_cell(cell); |
| |
| list_add_tail(&vl->link, &cell->vl_list); |
| vl->state = AFS_VL_CREATING; |
| up_write(&cell->vl_sem); |
| |
| fill_in_record: |
| ret = afs_vlocation_fill_in_record(vl); |
| if (ret < 0) |
| goto error_abandon; |
| vl->state = AFS_VL_VALID; |
| wake_up(&vl->waitq); |
| |
| /* schedule for regular updates */ |
| afs_vlocation_queue_for_updates(vl); |
| goto success; |
| |
| found_in_memory: |
| /* found in memory */ |
| _debug("found in memory"); |
| atomic_inc(&vl->usage); |
| spin_unlock(&cell->vl_lock); |
| if (!list_empty(&vl->grave)) { |
| spin_lock(&afs_vlocation_graveyard_lock); |
| list_del_init(&vl->grave); |
| spin_unlock(&afs_vlocation_graveyard_lock); |
| } |
| up_write(&cell->vl_sem); |
| |
| /* see if it was an abandoned record that we might try filling in */ |
| while (vl->state != AFS_VL_VALID) { |
| afs_vlocation_state_t state = vl->state; |
| |
| _debug("invalid [state %d]", state); |
| |
| if ((state == AFS_VL_NEW || state == AFS_VL_NO_VOLUME)) { |
| if (cmpxchg(&vl->state, state, AFS_VL_CREATING) == |
| state) |
| goto fill_in_record; |
| continue; |
| } |
| |
| /* must now wait for creation or update by someone else to |
| * complete */ |
| _debug("wait"); |
| |
| ret = wait_event_interruptible( |
| vl->waitq, |
| vl->state == AFS_VL_NEW || |
| vl->state == AFS_VL_VALID || |
| vl->state == AFS_VL_NO_VOLUME); |
| if (ret < 0) |
| goto error; |
| } |
| |
| success: |
| _leave(" = %p",vl); |
| return vl; |
| |
| error_abandon: |
| vl->state = AFS_VL_NEW; |
| wake_up(&vl->waitq); |
| error: |
| ASSERT(vl != NULL); |
| afs_put_vlocation(vl); |
| _leave(" = %d", ret); |
| return ERR_PTR(ret); |
| } |
| |
| /* |
| * finish using a volume location record |
| */ |
| void afs_put_vlocation(struct afs_vlocation *vl) |
| { |
| if (!vl) |
| return; |
| |
| _enter("%s", vl->vldb.name); |
| |
| ASSERTCMP(atomic_read(&vl->usage), >, 0); |
| |
| if (likely(!atomic_dec_and_test(&vl->usage))) { |
| _leave(""); |
| return; |
| } |
| |
| spin_lock(&afs_vlocation_graveyard_lock); |
| if (atomic_read(&vl->usage) == 0) { |
| _debug("buried"); |
| list_move_tail(&vl->grave, &afs_vlocation_graveyard); |
| vl->time_of_death = get_seconds(); |
| schedule_delayed_work(&afs_vlocation_reap, |
| afs_vlocation_timeout * HZ); |
| |
| /* suspend updates on this record */ |
| if (!list_empty(&vl->update)) { |
| spin_lock(&afs_vlocation_updates_lock); |
| list_del_init(&vl->update); |
| spin_unlock(&afs_vlocation_updates_lock); |
| } |
| } |
| spin_unlock(&afs_vlocation_graveyard_lock); |
| _leave(" [killed?]"); |
| } |
| |
| /* |
| * destroy a dead volume location record |
| */ |
| static void afs_vlocation_destroy(struct afs_vlocation *vl) |
| { |
| _enter("%p", vl); |
| |
| #ifdef AFS_CACHING_SUPPORT |
| cachefs_relinquish_cookie(vl->cache, 0); |
| #endif |
| |
| afs_put_cell(vl->cell); |
| kfree(vl); |
| } |
| |
| /* |
| * reap dead volume location records |
| */ |
| static void afs_vlocation_reaper(struct work_struct *work) |
| { |
| LIST_HEAD(corpses); |
| struct afs_vlocation *vl; |
| unsigned long delay, expiry; |
| time_t now; |
| |
| _enter(""); |
| |
| now = get_seconds(); |
| spin_lock(&afs_vlocation_graveyard_lock); |
| |
| while (!list_empty(&afs_vlocation_graveyard)) { |
| vl = list_entry(afs_vlocation_graveyard.next, |
| struct afs_vlocation, grave); |
| |
| _debug("check %p", vl); |
| |
| /* the queue is ordered most dead first */ |
| expiry = vl->time_of_death + afs_vlocation_timeout; |
| if (expiry > now) { |
| delay = (expiry - now) * HZ; |
| _debug("delay %lu", delay); |
| if (!schedule_delayed_work(&afs_vlocation_reap, |
| delay)) { |
| cancel_delayed_work(&afs_vlocation_reap); |
| schedule_delayed_work(&afs_vlocation_reap, |
| delay); |
| } |
| break; |
| } |
| |
| spin_lock(&vl->cell->vl_lock); |
| if (atomic_read(&vl->usage) > 0) { |
| _debug("no reap"); |
| list_del_init(&vl->grave); |
| } else { |
| _debug("reap"); |
| list_move_tail(&vl->grave, &corpses); |
| list_del_init(&vl->link); |
| } |
| spin_unlock(&vl->cell->vl_lock); |
| } |
| |
| spin_unlock(&afs_vlocation_graveyard_lock); |
| |
| /* now reap the corpses we've extracted */ |
| while (!list_empty(&corpses)) { |
| vl = list_entry(corpses.next, struct afs_vlocation, grave); |
| list_del(&vl->grave); |
| afs_vlocation_destroy(vl); |
| } |
| |
| _leave(""); |
| } |
| |
| /* |
| * initialise the VL update process |
| */ |
| int __init afs_vlocation_update_init(void) |
| { |
| afs_vlocation_update_worker = |
| create_singlethread_workqueue("kafs_vlupdated"); |
| return afs_vlocation_update_worker ? 0 : -ENOMEM; |
| } |
| |
| /* |
| * discard all the volume location records for rmmod |
| */ |
| void __exit afs_vlocation_purge(void) |
| { |
| afs_vlocation_timeout = 0; |
| |
| spin_lock(&afs_vlocation_updates_lock); |
| list_del_init(&afs_vlocation_updates); |
| spin_unlock(&afs_vlocation_updates_lock); |
| cancel_delayed_work(&afs_vlocation_update); |
| queue_delayed_work(afs_vlocation_update_worker, |
| &afs_vlocation_update, 0); |
| destroy_workqueue(afs_vlocation_update_worker); |
| |
| cancel_delayed_work(&afs_vlocation_reap); |
| schedule_delayed_work(&afs_vlocation_reap, 0); |
| } |
| |
| /* |
| * update a volume location |
| */ |
| static void afs_vlocation_updater(struct work_struct *work) |
| { |
| struct afs_cache_vlocation vldb; |
| struct afs_vlocation *vl, *xvl; |
| time_t now; |
| long timeout; |
| int ret; |
| |
| _enter(""); |
| |
| now = get_seconds(); |
| |
| /* find a record to update */ |
| spin_lock(&afs_vlocation_updates_lock); |
| for (;;) { |
| if (list_empty(&afs_vlocation_updates)) { |
| spin_unlock(&afs_vlocation_updates_lock); |
| _leave(" [nothing]"); |
| return; |
| } |
| |
| vl = list_entry(afs_vlocation_updates.next, |
| struct afs_vlocation, update); |
| if (atomic_read(&vl->usage) > 0) |
| break; |
| list_del_init(&vl->update); |
| } |
| |
| timeout = vl->update_at - now; |
| if (timeout > 0) { |
| queue_delayed_work(afs_vlocation_update_worker, |
| &afs_vlocation_update, timeout * HZ); |
| spin_unlock(&afs_vlocation_updates_lock); |
| _leave(" [nothing]"); |
| return; |
| } |
| |
| list_del_init(&vl->update); |
| atomic_inc(&vl->usage); |
| spin_unlock(&afs_vlocation_updates_lock); |
| |
| /* we can now perform the update */ |
| _debug("update %s", vl->vldb.name); |
| vl->state = AFS_VL_UPDATING; |
| vl->upd_rej_cnt = 0; |
| vl->upd_busy_cnt = 0; |
| |
| ret = afs_vlocation_update_record(vl, &vldb); |
| switch (ret) { |
| case 0: |
| afs_vlocation_apply_update(vl, &vldb); |
| vl->state = AFS_VL_VALID; |
| break; |
| case -ENOMEDIUM: |
| vl->state = AFS_VL_VOLUME_DELETED; |
| break; |
| default: |
| vl->state = AFS_VL_UNCERTAIN; |
| break; |
| } |
| |
| /* and then reschedule */ |
| _debug("reschedule"); |
| vl->update_at = get_seconds() + afs_vlocation_update_timeout; |
| |
| spin_lock(&afs_vlocation_updates_lock); |
| |
| if (!list_empty(&afs_vlocation_updates)) { |
| /* next update in 10 minutes, but wait at least 1 second more |
| * than the newest record already queued so that we don't spam |
| * the VL server suddenly with lots of requests |
| */ |
| xvl = list_entry(afs_vlocation_updates.prev, |
| struct afs_vlocation, update); |
| if (vl->update_at <= xvl->update_at) |
| vl->update_at = xvl->update_at + 1; |
| xvl = list_entry(afs_vlocation_updates.next, |
| struct afs_vlocation, update); |
| timeout = xvl->update_at - now; |
| if (timeout < 0) |
| timeout = 0; |
| } else { |
| timeout = afs_vlocation_update_timeout; |
| } |
| |
| ASSERT(list_empty(&vl->update)); |
| |
| list_add_tail(&vl->update, &afs_vlocation_updates); |
| |
| _debug("timeout %ld", timeout); |
| queue_delayed_work(afs_vlocation_update_worker, |
| &afs_vlocation_update, timeout * HZ); |
| spin_unlock(&afs_vlocation_updates_lock); |
| afs_put_vlocation(vl); |
| } |