Carsten Otte wrote:
> From: Christian Borntraeger <cborntra-tA70FqPdS9bQT0dZR+***@public.gmane.org>
>
> This is the host counterpart for the virtual network device driver. This driver
> has an char device node where the hypervisor can attach. It also
> has a kind of dumb switch that passes packets between guests. Last but not least
> it contains a host network interface. Patches for attaching other host network
> devices to the switch via raw sockets, extensions to qeth or netfilter are
>
Any feel for the performance relative to the bridging code? The
bridging code is a pretty big bottle neck in guest=>guest communications
in Xen at least.
> currently tested but not ready yet. We did not use the linux bridging code to
> allow non-root users to create virtual networks between guests.
>
Is that the primary reason? If so, that seems like a rather large
hammer for something that a userspace suid wrapper could have addressed...
Regards,
Anthony Liguori
> Signed-off-by: Christian Borntraeger <cborntra-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> Signed-off-by: Carsten Otte <cotte-tA70FqPdS9bQT0dZR+***@public.gmane.org>
>
> ---
> drivers/s390/guest/Makefile | 3
> drivers/s390/guest/vnet_port_guest.c | 302 ++++++++++++
> drivers/s390/guest/vnet_port_guest.h | 21
> drivers/s390/guest/vnet_port_host.c | 418 +++++++++++++++++
> drivers/s390/guest/vnet_port_host.h | 18
> drivers/s390/guest/vnet_switch.c | 828 +++++++++++++++++++++++++++++++++++
> drivers/s390/guest/vnet_switch.h | 119 +++++
> drivers/s390/net/Kconfig | 12
> 8 files changed, 1721 insertions(+)
>
> Index: linux-2.6.21/drivers/s390/guest/vnet_port_guest.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6.21/drivers/s390/guest/vnet_port_guest.c
> @@ -0,0 +1,302 @@
> +/*
> + * Copyright (C) 2005 IBM Corporation
> + * Authors: Carsten Otte <cotte-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + * Christian Borntraeger <borntrae-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + *
> + */
> +#include <linux/etherdevice.h>
> +#include <linux/fs.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/pagemap.h>
> +#include <linux/poll.h>
> +#include <linux/spinlock.h>
> +
> +#include "vnet.h"
> +#include "vnet_port_guest.h"
> +#include "vnet_switch.h"
> +
> +static void COFIXME_add_irq(struct vnet_guest_port *zgp, int data)
> +{
> + int oldval, newval;
> +
> + do {
> + oldval = atomic_read(&zgp->pending_irqs);
> + newval = oldval | data;
> + } while (atomic_cmpxchg(&zgp->pending_irqs, oldval, newval) != oldval);
> +}
> +
> +static int COFIXME_get_irq(struct vnet_guest_port *zgp)
> +{
> + int oldval;
> +
> + do {
> + oldval = atomic_read(&zgp->pending_irqs);
> + } while (atomic_cmpxchg(&zgp->pending_irqs, oldval, 0) != oldval);
> +
> + return oldval;
> +}
> +
> +static void
> +vnet_guest_interrupt(struct vnet_port *port, int type)
> +{
> + struct vnet_guest_port *priv;
> +
> + priv = port->priv;
> +
> + if (!priv->fasync) {
> + printk (KERN_WARNING "vnet: cannot send interrupt,"
> + "fd not async\n");
> + return;
> + }
> + switch (type) {
> + case VNET_IRQ_START_RX:
> + COFIXME_add_irq(priv, POLLIN);
> + kill_fasync(&priv->fasync, SIGIO, POLL_IN);
> + break;
> + case VNET_IRQ_START_TX:
> + COFIXME_add_irq(priv, POLLOUT);
> + kill_fasync(&priv->fasync, SIGIO, POLL_OUT);
> + break;
> + default:
> + BUG();
> + }
> +}
> +
> +/* release all pinned user pages*/
> +static void
> +vnet_guest_release_pages(struct vnet_port *port)
> +{
> + int i,j;
> +
> + for (i=0; i<VNET_QUEUE_LEN; i++)
> + for (j=0; j<VNET_BUFFER_PAGES; j++) {
> + if (port->s2p_data[i][j]) {
> + page_cache_release(virt_to_page(port->s2p_data[i][j]));
> + port->s2p_data[i][j] = NULL;
> + }
> + if (port->p2s_data[i][j]) {
> + page_cache_release(virt_to_page(port->p2s_data[i][j]));
> + port->p2s_data[i][j] = NULL;
> + }
> + }
> + if (port->control) {
> + page_cache_release(virt_to_page(port->control));
> + port->control = NULL;
> + }
> +}
> +
> +static int
> +vnet_chr_open(struct inode *ino, struct file *filp)
> +{
> + int minor;
> + struct vnet_port *port;
> + char name[BUS_ID_SIZE];
> +
> + minor = iminor(filp->f_dentry->d_inode);
> + snprintf(name, BUS_ID_SIZE, "guest:%d", current->pid);
> + port = vnet_port_get(minor, name);
> + if (!port)
> + return -ENODEV;
> + port->priv = kzalloc(sizeof(struct vnet_guest_port), GFP_KERNEL);
> + if (!port->priv) {
> + vnet_port_put(port);
> + return -ENOMEM;
> + }
> + port->interrupt = vnet_guest_interrupt;
> + filp->private_data = port;
> + return nonseekable_open(ino, filp);
> +}
> +
> +static int
> +vnet_chr_release (struct inode *ino, struct file *filp)
> +{
> + struct vnet_port *port;
> + port = (struct vnet_port *) filp->private_data;
> +
> +//FIXME: what about open close? We unregister non exisiting mac addresses
> +// in vnet_port_detach!
> + vnet_port_detach(port);
> + vnet_guest_release_pages(port);
> + vnet_port_put(port);
> + return 0;
> +}
> +
> +
> +/* helper function which maps a user page into the kernel
> + * the memory must be free with page_cache_release */
> +static void *user_to_kernel(char __user *user)
> +{
> + struct page *temp_page;
> + int rc;
> +
> + BUG_ON(((unsigned long) user) % PAGE_SIZE);
> + rc = fault_in_pages_writeable(user, PAGE_SIZE);
> + if (rc)
> + return NULL;
> + rc = get_user_pages(current, current->mm, (unsigned long) user,
> + 1, 1, 1, &temp_page, NULL);
> + if (rc != 1)
> + return NULL;
> + return page_address(temp_page);
> +}
> +
> +/* this function pins the userspace buffers into memory*/
> +static int
> +vnet_guest_alloc_pages(struct vnet_port *port)
> +{
> + int i,j;
> +
> + down_read(¤t->mm->mmap_sem);
> + for (i=0; i<VNET_QUEUE_LEN; i++)
> + for (j=0; j<VNET_BUFFER_PAGES; j++) {
> + port->s2p_data[i][j] = user_to_kernel(port->control->
> + s2pbufs[i].data + j*PAGE_SIZE);
> + if (!port->s2p_data[i][j])
> + goto cleanup;
> + port->p2s_data[i][j] = user_to_kernel(port->control->
> + p2sbufs[i].data + j*PAGE_SIZE);
> + if (!port->p2s_data[i][j])
> + goto cleanup;
> +
> + }
> + up_read(¤t->mm->mmap_sem);
> + return 0;
> +cleanup:
> + up_read(¤t->mm->mmap_sem);
> + vnet_guest_release_pages(port);
> + return -ENOMEM;
> +}
> +
> +/* userspace control data structure stuff */
> +static int
> +vnet_register_control(struct vnet_port *port, unsigned long user_addr)
> +{
> + u64 uaddr;
> + int rc;
> + struct page *control_page;
> +
> + rc = copy_from_user(&uaddr, (void __user *) user_addr, sizeof(uaddr));
> + if (rc)
> + return -EFAULT;
> + if (uaddr % PAGE_SIZE)
> + return -EFAULT;
> + down_read(¤t->mm->mmap_sem);
> + rc = get_user_pages(current, current->mm, (unsigned long)uaddr,
> + 1, 1, 1, &control_page, NULL);
> + up_read(¤t->mm->mmap_sem);
> + if (rc!=1)
> + return -EFAULT;
> + port->control = (struct vnet_control *) page_address(control_page);
> + rc = vnet_guest_alloc_pages(port);
> + if (rc) {
> + printk("vnet: could not get buffers\n");
> + return rc;
> + }
> + random_ether_addr(port->mac);
> + memcpy(port->control->mac, port->mac,6);
> + vnet_port_attach(port);
> + return 0;
> +}
> +
> +static int
> +vnet_interrupt(struct vnet_port *port, int __user *u_type)
> +{
> + int type, rc;
> +
> + rc = copy_from_user (&type, u_type, sizeof(int));
> + if (rc)
> + return -EFAULT;
> + switch (type) {
> + case VNET_IRQ_START_RX:
> + vnet_port_rx(port);
> + break;
> + case VNET_IRQ_START_TX: /* noop with current drop packet approach*/
> + break;
> + default:
> + printk(KERN_ERR "vnet: Unknown interrupt type %d\n", type);
> + rc = -EINVAL;
> + }
> + return rc;
> +}
> +
> +
> +
> +
> +//this is a HACK. >>COFIXME<<
> +unsigned int
> +vnet_poll(struct file *filp, poll_table * wait)
> +{
> + struct vnet_port *port;
> + struct vnet_guest_port *zgp;
> +
> + port = filp->private_data;
> + zgp = port->priv;
> + return COFIXME_get_irq(zgp);
> +}
> +
> +static int vnet_fill_info(struct vnet_port *zp, void __user *data)
> +{
> + struct vnet_info info;
> +
> + info.linktype = zp->zs->linktype;
> + info.maxmtu=32768; //FIXME
> + return copy_to_user(data, &info, sizeof(info));
> +}
> +long
> +vnet_ioctl(struct file *filp, unsigned int no, unsigned long data)
> +{
> + struct vnet_port *port =
> + (struct vnet_port *) filp->private_data;
> + int rc;
> +
> + switch (no) {
> + case VNET_REGISTER_CTL:
> + rc = vnet_register_control(port, data);
> + break;
> + case VNET_INTERRUPT:
> + rc = vnet_interrupt(port, (int __user *) data);
> + break;
> + case VNET_INFO:
> + rc = vnet_fill_info(port, (void __user *) data);
> + break;
> + default:
> + rc = -ENOTTY;
> + }
> + return rc;
> +}
> +
> +int vnet_fasync(int fd, struct file *filp, int on)
> +{
> + struct vnet_port *port;
> + struct vnet_guest_port *zgp;
> + int rc;
> +
> + port = filp->private_data;
> + zgp = port->priv;
> +
> + if ((rc = fasync_helper(fd, filp, on, &zgp->fasync)) < 0)
> + return rc;
> +
> + if (on)
> + rc = f_setown(filp, current->pid, 0);
> + return rc;
> +}
> +
> +
> +static struct file_operations vnet_char_fops = {
> + .owner = THIS_MODULE,
> + .open = vnet_chr_open,
> + .release = vnet_chr_release,
> + .unlocked_ioctl = vnet_ioctl,
> + .fasync = vnet_fasync,
> + .poll = vnet_poll,
> +};
> +
> +
> +
> +void vnet_cdev_init(struct cdev *cdev)
> +{
> + cdev_init(cdev, &vnet_char_fops);
> +}
> Index: linux-2.6.21/drivers/s390/guest/vnet_port_guest.h
> ===================================================================
> --- /dev/null
> +++ linux-2.6.21/drivers/s390/guest/vnet_port_guest.h
> @@ -0,0 +1,21 @@
> +/*
> + * Copyright (C) 2005 IBM Corporation
> + * Authors: Carsten Otte <cotte-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + * Christian Borntraeger <cborntra-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + *
> + */
> +
> +#ifndef __VNET_PORTS_GUEST_H
> +#define __VNET_PORTS_GUEST_H
> +
> +#include <linux/fs.h>
> +#include <linux/cdev.h>
> +#include <asm/atomic.h>
> +
> +struct vnet_guest_port {
> + struct fasync_struct *fasync;
> + atomic_t pending_irqs;
> +};
> +
> +extern void vnet_cdev_init(struct cdev *cdev);
> +#endif
> Index: linux-2.6.21/drivers/s390/guest/vnet_port_host.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6.21/drivers/s390/guest/vnet_port_host.c
> @@ -0,0 +1,418 @@
> +/*
> + * vnet zlswitch handling
> + *
> + * Copyright (C) 2005 IBM Corporation
> + * Authors: Carsten Otte <cotte-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + * Christian Borntraeger <borntrae-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + *
> + */
> +
> +#include <linux/etherdevice.h>
> +#include <linux/if.h>
> +#include <linux/if_ether.h>
> +#include <linux/if_arp.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +#include <linux/module.h>
> +#include <linux/netdevice.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/pagemap.h>
> +#include <linux/spinlock.h>
> +
> +#include "vnet.h"
> +#include "vnet_switch.h"
> +#include "vnet_port_host.h"
> +
> +static void
> +vnet_host_interrupt(struct vnet_port *zp, int type)
> +{
> + struct vnet_host_port *zhp;
> +
> + zhp = zp->priv;
> +
> + BUG_ON(!zhp->netdev);
> +
> + switch (type) {
> + case VNET_IRQ_START_RX:
> + netif_rx_schedule(zhp->netdev);
> + break;
> + case VNET_IRQ_START_TX:
> + netif_wake_queue(zhp->netdev);
> + break;
> + default:
> + BUG();
> + }
> + /* we are called via system call path. enforce softirq handling */
> + do_softirq();
> +}
> +
> +static void
> +vnet_host_free(struct vnet_port *zp)
> +{
> + int i,j;
> +
> + for (i=0; i<VNET_QUEUE_LEN; i++)
> + for (j=0; j<VNET_BUFFER_PAGES; j++) {
> + if (zp->s2p_data[i][j]) {
> + free_page((unsigned long) zp->s2p_data[i][j]);
> + zp->s2p_data[i][j] = NULL;
> + }
> + if (zp->p2s_data[i][j]) {
> + free_page((unsigned long) zp->p2s_data[i][j]);
> + zp->p2s_data[i][j] = NULL;
> + }
> + }
> + if (zp->control) {
> + kfree(zp->control);
> + zp->control = NULL;
> + }
> +}
> +
> +static int
> +vnet_port_hostsetup(struct vnet_port *zp)
> +{
> + int i,j;
> +
> + zp->control = kzalloc(sizeof(*zp->control), GFP_KERNEL);
> + if (!zp->control)
> + return -ENOMEM;
> + for (i=0; i<VNET_QUEUE_LEN; i++)
> + for (j=0; j<VNET_BUFFER_PAGES; j++) {
> + zp->s2p_data[i][j] = (void *) __get_free_pages(GFP_KERNEL,0);
> + if (!zp->s2p_data[i][j])
> + goto oom;
> + zp->p2s_data[i][j] = (void *) __get_free_pages(GFP_KERNEL,0);
> + if (!zp->p2s_data[i][j]) {
> + free_page((unsigned long) zp->s2p_data[i][j]);
> + goto oom;
> + }
> + }
> + zp->control->buffer_size = VNET_BUFFER_SIZE;
> + return 0;
> +oom:
> + printk(KERN_WARNING "vnet: No memory for buffer space of host device\n");
> + vnet_host_free(zp);
> + return -ENOMEM;
> +}
> +
> +/* host interface specific parts */
> +
> +
> +static int
> +vnet_net_open(struct net_device *dev)
> +{
> + struct vnet_port *port;
> + struct vnet_control *control;
> +
> + port = dev->priv;
> + control = port->control;
> + atomic_set(&control->s2pmit, 0);
> + netif_start_queue(dev);
> + return 0;
> +}
> +
> +static int
> +vnet_net_stop(struct net_device *dev)
> +{
> + netif_stop_queue(dev);
> + return 0;
> +}
> +
> +static void vnet_net_tx_timeout(struct net_device *dev)
> +{
> + struct vnet_port *port = dev->priv;
> + struct vnet_control *control = port->control;
> +
> + printk(KERN_ERR "problems in xmit for device %s\n Resetting...\n",
> + dev->name);
> + atomic_set(&control->p2smit, 0);
> + atomic_set(&control->s2pmit, 0);
> + vnet_port_rx(port);
> + netif_wake_queue(dev);
> +}
> +
> +
> +static int
> +vnet_net_xmit(struct sk_buff *skb, struct net_device *dev)
> +{
> + struct vnet_port *zhost;
> + struct vnet_host_port *zhp;
> + struct vnet_control *control;
> + struct xmit_buffer *buf;
> + int buffer_status;
> + int pkid;
> +
> + zhost = dev->priv;
> + zhp = zhost->priv;
> + control = zhost->control;
> +
> + if (!spin_trylock(&zhost->txlock))
> + return NETDEV_TX_LOCKED;
> + if (vnet_q_full(atomic_read(&control->p2smit))) {
> + netif_stop_queue(dev);
> + goto full;
> + }
> + pkid = __nextx(atomic_read(&control->p2smit));
> + buf = &control->p2sbufs[pkid];
> + buf->len = skb->len;
> + buf->proto = skb->protocol;
> + vnet_copy_buf_to_pages(zhost->p2s_data[pkid], skb->data, skb->len);
> + buffer_status = vnet_tx_packet(&control->p2smit);
> + spin_unlock(&zhost->txlock);
> + zhp->stats.tx_packets++;
> + zhp->stats.tx_bytes += skb->len;
> + dev_kfree_skb(skb);
> + dev->trans_start = jiffies;
> + if (buffer_status & QUEUE_WAS_EMPTY)
> + vnet_port_rx(zhost);
> + if (buffer_status & QUEUE_IS_FULL) {
> + netif_stop_queue(dev);
> + spin_lock(&zhost->txlock);
> + } else
> + return NETDEV_TX_OK;
> +full:
> + /* we might have raced against the wakeup */
> + if (!vnet_q_full(atomic_read(&control->p2smit)))
> + netif_start_queue(dev);
> + spin_unlock(&zhost->txlock);
> + return NETDEV_TX_OK;
> +}
> +
> +static int
> +vnet_l3_poll(struct net_device *dev, int *budget)
> +{
> + struct vnet_port *zp = dev->priv;
> + struct vnet_host_port *zhp = zp->priv;
> + struct vnet_control *control = zp->control;
> + struct xmit_buffer *buf;
> + struct sk_buff *skb;
> + int pkid, count, numpackets = min(64, min(dev->quota, *budget));
> + int buffer_status;
> +
> + if (vnet_q_empty(atomic_read(&control->s2pmit))) {
> + count = 0;
> + goto empty;
> + }
> +loop:
> + count = 0;
> + while(numpackets) {
> + pkid = __nextr(atomic_read(&control->s2pmit));
> + buf = &control->s2pbufs[pkid];
> + skb = dev_alloc_skb(buf->len + 2);
> + if (likely(skb)) {
> + skb_reserve(skb, 2);
> + vnet_copy_pages_to_buf(skb_put(skb, buf->len),
> + zp->s2p_data[pkid], buf->len);
> + skb->dev = dev;
> + skb->protocol = buf->proto;
> +// skb->ip_summed = CHECKSUM_UNNECESSARY;
> + zhp->stats.rx_packets++;
> + zhp->stats.rx_bytes += buf->len;
> + netif_receive_skb(skb);
> + numpackets--;
> + (*budget)--;
> + dev->quota--;
> + count++;
> + } else
> + zhp->stats.rx_dropped++;
> + buffer_status = vnet_rx_packet(&control->s2pmit);
> + if (buffer_status & QUEUE_IS_EMPTY)
> + goto empty;
> + }
> + return 1; //please ask us again
> +empty:
> + netif_rx_complete(dev);
> + /* we might have raced against a wakup*/
> + if (!vnet_q_empty(atomic_read(&control->s2pmit))) {
> + if (netif_rx_reschedule(dev, count))
> + goto loop;
> + }
> + return 0;
> +}
> +
> +
> +static int
> +vnet_l2_poll(struct net_device *dev, int *budget)
> +{
> + struct vnet_port *zp = dev->priv;
> + struct vnet_host_port *zhp = zp->priv;
> + struct vnet_control *control = zp->control;
> + struct xmit_buffer *buf;
> + struct sk_buff *skb;
> + int pkid, count, numpackets = min(64, min(dev->quota, *budget));
> + int buffer_status;
> +
> + if (vnet_q_empty(atomic_read(&control->s2pmit))) {
> + count = 0;
> + goto empty;
> + }
> +loop:
> + count = 0;
> + while(numpackets) {
> + pkid = __nextr(atomic_read(&control->s2pmit));
> + buf = &control->s2pbufs[pkid];
> + skb = dev_alloc_skb(buf->len + 2);
> + if (likely(skb)) {
> + skb_reserve(skb, 2);
> + vnet_copy_pages_to_buf(skb_put(skb, buf->len),
> + zp->s2p_data[pkid], buf->len);
> + skb->dev = dev;
> + skb->protocol = eth_type_trans(skb, dev);
> +// skb->ip_summed = CHECKSUM_UNNECESSARY;
> + zhp->stats.rx_packets++;
> + zhp->stats.rx_bytes += buf->len;
> + netif_receive_skb(skb);
> + numpackets--;
> + (*budget)--;
> + dev->quota--;
> + count++;
> + } else
> + zhp->stats.rx_dropped++;
> + buffer_status = vnet_rx_packet(&control->s2pmit);
> + if (buffer_status & QUEUE_IS_EMPTY)
> + goto empty;
> + }
> + return 1; //please ask us again
> +empty:
> + netif_rx_complete(dev);
> + /* we might have raced against a wakup*/
> + if (!vnet_q_empty(atomic_read(&control->s2pmit))) {
> + if (netif_rx_reschedule(dev, count))
> + goto loop;
> + }
> + return 0;
> +}
> +
> +static struct net_device_stats *
> +vnet_net_stats(struct net_device *dev)
> +{
> + struct vnet_port *zp;
> + struct vnet_host_port *zhp;
> +
> + zp = dev->priv;
> + zhp = zp->priv;
> + return &zhp->stats;
> +}
> +
> +static int
> +vnet_net_change_mtu(struct net_device *dev, int new_mtu)
> +{
> + if (new_mtu <= ETH_ZLEN)
> + return -ERANGE;
> + if (new_mtu > VNET_BUFFER_SIZE-ETH_HLEN)
> + return -ERANGE;
> + dev->mtu = new_mtu;
> + return 0;
> +}
> +
> +static void
> +__vnet_common_init(struct net_device *dev)
> +{
> + dev->open = vnet_net_open;
> + dev->stop = vnet_net_stop;
> + dev->hard_start_xmit = vnet_net_xmit;
> + dev->get_stats = vnet_net_stats;
> + dev->tx_timeout = vnet_net_tx_timeout;
> + dev->watchdog_timeo = VNET_TIMEOUT;
> + dev->change_mtu = vnet_net_change_mtu;
> + dev->weight = 64;
> + //dev->features |= NETIF_F_NO_CSUM | NETIF_F_LLTX;
> + dev->features |= NETIF_F_LLTX;
> +}
> +
> +static void
> +__vnet_layer3_init(struct net_device *dev)
> +{
> + dev->mtu = ETH_DATA_LEN;
> + dev->tx_queue_len = 1000;
> + dev->flags = IFF_BROADCAST|IFF_MULTICAST|IFF_NOARP;
> + dev->type = ARPHRD_PPP;
> + dev->mtu = 1492;
> + dev->poll = vnet_l3_poll;
> + __vnet_common_init(dev);
> +}
> +
> +static void
> +__vnet_layer2_init(struct net_device *dev)
> +{
> + ether_setup(dev);
> + random_ether_addr(dev->dev_addr);
> + dev->mtu = 1492;
> + dev->poll = vnet_l2_poll;
> + __vnet_common_init(dev);
> +}
> +
> +static void
> +vnet_host_destroy(struct vnet_port *zhost)
> +{
> + struct vnet_host_port *zhp;
> + zhp = zhost->priv;
> +
> + vnet_port_detach(zhost);
> + unregister_netdev(zhp->netdev);
> + free_netdev(zhp->netdev);
> + zhp->netdev = NULL;
> + vnet_host_free(zhost);
> + kfree(zhp);
> + vnet_port_put(zhost);
> +}
> +
> +
> +
> +struct vnet_port *
> +vnet_host_create(char *name)
> +{
> + int rc;
> + struct vnet_port *port;
> + struct vnet_host_port *host;
> + char busname[BUS_ID_SIZE];
> + int minor;
> +
> + snprintf(busname, BUS_ID_SIZE, "host:%s", name);
> +
> + minor = vnet_minor_by_name(name);
> + if (minor < 0)
> + return NULL;
> + port = vnet_port_get(minor, busname);
> + if (!port)
> + goto out;
> + host = kzalloc(sizeof(struct vnet_host_port), GFP_KERNEL);
> + if (!host) {
> + kfree(port);
> + port = NULL;
> + goto out;
> + }
> + port->priv = host;
> + rc =vnet_port_hostsetup(port);
> + if (rc)
> + goto out_free_host;
> + rtnl_lock();
> + if (port->zs->linktype == 2)
> + host->netdev = alloc_netdev(0, name, __vnet_layer2_init);
> + else
> + host->netdev = alloc_netdev(0, name, __vnet_layer3_init);
> + if (!host->netdev)
> + goto out_unlock;
> + memcpy(port->mac, host->netdev->dev_addr, ETH_ALEN);
> +
> + host->netdev->priv = port;
> + port->interrupt = vnet_host_interrupt;
> + port->destroy = vnet_host_destroy;
> +
> + if (!register_netdevice(host->netdev)) {
> + /* good case */
> + rtnl_unlock();
> + return port;
> + }
> + host->netdev->priv = NULL;
> + free_netdev(host->netdev);
> + host->netdev = NULL;
> +out_unlock:
> + rtnl_unlock();
> + vnet_host_free(port);
> +out_free_host:
> + vnet_port_put(port);
> + port = NULL;
> +out:
> + return port;
> +}
> Index: linux-2.6.21/drivers/s390/guest/vnet_port_host.h
> ===================================================================
> --- /dev/null
> +++ linux-2.6.21/drivers/s390/guest/vnet_port_host.h
> @@ -0,0 +1,18 @@
> +/*
> + * Copyright (C) 2005 IBM Corporation
> + * Christian Borntraeger <cborntra-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + *
> + */
> +
> +#ifndef __VNET_PORTS_HOST_H
> +#define __VNET_PORTS_HOST_H
> +
> +#include <linux/netdevice.h>
> +#include "vnet_switch.h"
> +
> +struct vnet_host_port {
> + struct net_device_stats stats;
> + struct net_device *netdev;
> +};
> +extern struct vnet_port * vnet_host_create(char *name);
> +#endif
> Index: linux-2.6.21/drivers/s390/guest/vnet_switch.c
> ===================================================================
> --- /dev/null
> +++ linux-2.6.21/drivers/s390/guest/vnet_switch.c
> @@ -0,0 +1,828 @@
> +/*
> + * vnet zlswitch handling
> + *
> + * Copyright (C) 2005 IBM Corporation
> + * Author: Carsten Otte <cotte-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + * Author: Christian Borntraeger <borntrae-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + *
> + */
> +
> +#include <linux/device.h>
> +#include <linux/etherdevice.h>
> +#include <linux/fs.h>
> +#include <linux/if.h>
> +#include <linux/if_ether.h>
> +#include <linux/kernel.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/module.h>
> +#include <linux/netdevice.h>
> +#include <linux/rtnetlink.h>
> +#include <linux/pagemap.h>
> +#include <linux/spinlock.h>
> +
> +#include "vnet.h"
> +#include "vnet_port_guest.h"
> +#include "vnet_port_host.h"
> +#include "vnet_switch.h"
> +
> +#define NUM_MINORS 1024
> +
> +/* devices housekeeping, creation & destruction */
> +static LIST_HEAD(vnet_switches);
> +static rwlock_t vnet_switches_lock = RW_LOCK_UNLOCKED;
> +static struct class *zwitch_class;
> +static int vnet_major;
> +static struct device *root_dev;
> +
> +
> +/* The following functions allow ports of the switch to know about
> + * the MAC addresses of other ports. This is necessary for special
> + * hardware like OSA express which silently drops incoming packets
> + * that not match known MAC addresses and do not support promiscous
> + * mode as well. We have to register all guest MAC addresses at OSA
> + * make packet receive working */
> +
> +/* Announces the own MAC address to all other ports
> + * this function is called if a new port is added */
> +static void vnet_switch_add_mac(struct vnet_port *port)
> +{
> + struct vnet_port *other_port;
> +
> + read_lock(&port->zs->ports_lock);
> + list_for_each_entry(other_port, &port->zs->switch_ports, lh)
> + if ((other_port != port) && (other_port->set_mac))
> + other_port->set_mac(other_port,port->mac, 1);
> + read_unlock(&port->zs->ports_lock);
> +}
> +
> +/* Removes the own MAC address from all other ports
> + * this function is called if a port is detached*/
> +static void vnet_switch_del_mac(struct vnet_port *port)
> +{
> + struct vnet_port *other_port;
> +
> + read_lock(&port->zs->ports_lock);
> + list_for_each_entry(other_port, &port->zs->switch_ports, lh)
> + if (other_port->set_mac)
> + other_port->set_mac(other_port, port->mac, 0);
> + read_unlock(&port->zs->ports_lock);
> +}
> +
> +/* Learn MACs from other ports on the same zwitch and forward
> + * the MAC addresses to the set_mac function of the port.*/
> +static void __vnet_port_learn_macs(struct vnet_port *port)
> +{
> + struct vnet_port *other_port;
> +
> + if (!port->set_mac)
> + return;
> + list_for_each_entry(other_port, &port->zs->switch_ports, lh)
> + if (other_port != port)
> + port->set_mac(port, other_port->mac, 1);
> +}
> +
> +/* Unlearn MACS from other ports on the same zwitch */
> +static void __vnet_port_unlearn_macs(struct vnet_port *port)
> +{
> + struct vnet_port *other_port;
> +
> + if (!port->set_mac)
> + return;
> + list_for_each_entry(other_port, &port->zs->switch_ports, lh)
> + if (other_port != port)
> + port->set_mac(port, other_port->mac, 0);
> +}
> +
> +
> +static struct vnet_switch *__vnet_switch_by_minor(int minor)
> +{
> + struct vnet_switch *zs;
> +
> + list_for_each_entry(zs, &vnet_switches, lh) {
> + if (MINOR(zs->cdev.dev) == minor)
> + return zs;
> + }
> + return NULL;
> +}
> +
> +static struct vnet_switch *__vnet_switch_by_name(char *name)
> +{
> + struct vnet_switch *zs;
> +
> + list_for_each_entry(zs, &vnet_switches, lh)
> + if (strncmp(zs->name, name, ZWITCH_NAME_SIZE) == 0)
> + return zs;
> + return NULL;
> +}
> +
> +/* Returns a switch structure and increases the reference count. If no such
> + * switch exists a new one is created with reference count 1 */
> +static struct vnet_switch *zwitch_get(int minor)
> +{
> + struct vnet_switch *zs;
> +
> + read_lock(&vnet_switches_lock);
> + zs = __vnet_switch_by_minor(minor);
> + if (!zs) {
> + read_unlock(&vnet_switches_lock);
> + return zs;
> + }
> + get_device(&zs->dev);
> + read_unlock(&vnet_switches_lock);
> + return zs;
> +}
> +
> +/* reduces the reference count of the switch. */
> +static void zwitch_put(struct vnet_switch * zs)
> +{
> + put_device(&zs->dev);
> +}
> +
> +/* looks into the packet and searches a matching MAC address
> + * return NULL if unknown or broadcast */
> +static struct vnet_port *__vnet_find_l2(struct vnet_switch *zs, char *data)
> +{
> + //FIXME: make this a hash lookup, more macs per device?
> + struct vnet_port *port;
> +
> + if (is_multicast_ether_addr(data))
> + return NULL;
> + list_for_each_entry(port, &zs->switch_ports, lh) {
> + if (compare_ether_addr(port->mac, data)==0)
> + goto out;
> + }
> + port = NULL;
> + out:
> + return port;
> +}
> +
> +/* searches the destination for IP only interfaces. Normally routing
> + * is the way to go, but guests should see the net transparently without
> + * a hop in between*/
> +static struct vnet_port *__vnet_find_l3(struct vnet_switch *zs, char *data)
> +{
> + return NULL;
> +}
> +
> +static struct vnet_port * __vnet_find_destination(struct vnet_switch *zs,
> + char *data)
> +{
> + switch (zs->linktype) {
> + case 2:
> + return __vnet_find_l2(zs, data);
> + case 3:
> + return __vnet_find_l3(zs, data);
> + default:
> + BUG();
> + }
> +}
> +
> +/* copies len bytes of data from the memory specified by the list of
> + * pointers **from into the memory specified by the list of pointers **to
> + * with each pointer pointing to a page */
> +static void
> +vnet_switch_page_copy(void **to, void **from, int len)
> +{
> + int remaining=len;
> + int pageid = 0;
> + int amount;
> +
> + while(remaining) {
> + amount = min((int)PAGE_SIZE, remaining);
> + memcpy(to[pageid], from[pageid], amount);
> + pageid++;
> + remaining -= amount;
> + }
> +}
> +
> +/* copies to data into a buffer of destination
> + * returns 0 if ok*/
> +static int
> +vnet_unicast(struct vnet_port *destination, void **from_data, int len, int proto)
> +{
> + int pkid;
> + int buffer_status;
> + void **to_data;
> + struct vnet_control *control;
> +
> + control = destination->control;
> + spin_lock_bh(&destination->rxlock);
> + if (vnet_q_full(atomic_read(&control->s2pmit))) {
> + destination->rx_dropped++;
> + spin_unlock_bh(&destination->rxlock);
> + return -ENOBUFS;
> + }
> + pkid = __nextx(atomic_read(&control->s2pmit));
> + to_data = destination->s2p_data[pkid];
> + vnet_switch_page_copy(to_data, from_data, len);
> + control->s2pbufs[pkid].len = len;
> + control->s2pbufs[pkid].proto = proto;
> + buffer_status = vnet_tx_packet(&control->s2pmit);
> + spin_unlock_bh(&destination->rxlock);
> + if (buffer_status & QUEUE_WAS_EMPTY)
> + destination->interrupt(destination, VNET_IRQ_START_RX);
> + destination->rx_bytes += len;
> + destination->rx_packets++;
> + return 0;
> +}
> +
> +/* send packets to all ports and emulate broadcasts via unicasts*/
> +static int vnet_allcast(struct vnet_port *from_port, void **fromdata,
> + int len, int proto)
> +{
> + struct vnet_port *destination;
> + int failure = 0;
> +
> + list_for_each_entry(destination, &from_port->zs->switch_ports, lh)
> + if (destination != from_port)
> + failure |= vnet_unicast(destination, fromdata,
> + len, proto);
> + return failure;
> +}
> +
> +/* takes an incoming packet and forwards it to the right port
> + * if a failure occurs, increase the tx_dropped count of the sender*/
> +static void vnet_switch_packet(struct vnet_port *from_port,
> + void **from_data, int len, int proto)
> +{
> + struct vnet_port *destination;
> + int failure;
> +
> + read_lock(&from_port->zs->ports_lock);
> + destination = __vnet_find_destination(from_port->zs, from_data[0]);
> + /* we dont want to loop. FIXME: document when this can happen*/
> + if (destination == from_port) {
> + read_unlock(&from_port->zs->ports_lock);
> + return;
> + }
> + if (destination)
> + failure = vnet_unicast(destination, from_data, len, proto);
> + else
> + failure = vnet_allcast(from_port, from_data, len, proto);
> + read_unlock(&from_port->zs->ports_lock);
> + if (failure)
> + from_port->tx_dropped++;
> + else {
> + from_port->tx_packets++;
> + from_port->tx_bytes += len;
> + }
> +}
> +
> +static void vnet_port_release(struct device *dev)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + zwitch_put(port->zs);
> + kfree(port);
> +}
> +
> +static ssize_t vnet_port_read_mac(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%02X:%02X:%02X:%02X:%02X:%02X", port->mac[0],
> + port->mac[1], port->mac[2], port->mac[3],
> + port->mac[4], port->mac[5]);
> +}
> +
> +static ssize_t vnet_port_read_tx_bytes(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%lu", port->tx_bytes);
> +}
> +
> +static ssize_t vnet_port_read_rx_bytes(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%lu", port->rx_bytes);
> +}
> +
> +static ssize_t vnet_port_read_tx_packets(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%lu", port->tx_packets);
> +}
> +
> +static ssize_t vnet_port_read_rx_packets(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%lu", port->rx_packets);
> +}
> +
> +static ssize_t vnet_port_read_tx_dropped(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%lu", port->tx_dropped);
> +}
> +
> +static ssize_t vnet_port_read_rx_dropped(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct vnet_port *port;
> +
> + port = container_of(dev, struct vnet_port, dev);
> + return sprintf(buf,"%lu", port->rx_dropped);
> +}
> +
> +static DEVICE_ATTR(mac, S_IRUSR, vnet_port_read_mac, NULL);
> +static DEVICE_ATTR(tx_bytes, S_IRUSR, vnet_port_read_tx_bytes, NULL);
> +static DEVICE_ATTR(rx_bytes, S_IRUSR, vnet_port_read_rx_bytes, NULL);
> +static DEVICE_ATTR(tx_packets, S_IRUSR, vnet_port_read_tx_packets, NULL);
> +static DEVICE_ATTR(rx_packets, S_IRUSR, vnet_port_read_rx_packets, NULL);
> +static DEVICE_ATTR(tx_dropped, S_IRUSR, vnet_port_read_tx_dropped, NULL);
> +static DEVICE_ATTR(rx_dropped, S_IRUSR, vnet_port_read_rx_dropped, NULL);
> +
> +static int vnet_port_attributes(struct device *dev)
> +{
> + int rc;
> + rc = device_create_file(dev, &dev_attr_mac);
> + if (rc)
> + return rc;
> + rc = device_create_file(dev, &dev_attr_tx_dropped);
> + if (rc)
> + return rc;
> + rc = device_create_file(dev, &dev_attr_rx_dropped);
> + if (rc)
> + return rc;
> + rc = device_create_file(dev, &dev_attr_rx_bytes);
> + if (rc)
> + return rc;
> + rc = device_create_file(dev, &dev_attr_tx_bytes);
> + if (rc)
> + return rc;
> + rc = device_create_file(dev, &dev_attr_rx_packets);
> + if (rc)
> + return rc;
> + rc = device_create_file(dev, &dev_attr_tx_packets);
> + return rc;
> +}
> +
> +
> +//FIXME implement this
> +static int vnet_port_exists(struct vnet_switch *zs, char *name)
> +{
> + read_lock(&zs->ports_lock);
> + read_unlock(&zs->ports_lock);
> + return 0;
> +
> +}
> +
> +static struct vnet_port *vnet_port_create(struct vnet_switch *zs,
> + char *name)
> +{
> + struct vnet_port *port;
> +
> + if (vnet_port_exists(zs, name))
> + return NULL;
> +
> + port = kzalloc(sizeof(*port), GFP_KERNEL);
> + if (port) {
> + spin_lock_init(&port->rxlock);
> + spin_lock_init(&port->txlock);
> + INIT_LIST_HEAD(&port->lh);
> + port->zs = zs;
> + } else
> + return NULL;
> + port->dev.parent = &zs->dev;
> + port->dev.release = vnet_port_release;
> + strncpy(port->dev.bus_id, name, BUS_ID_SIZE);
> + if (device_register(&port->dev)) {
> + kfree(port);
> + return NULL;
> + }
> + if (vnet_port_attributes(&port->dev)) {
> + device_unregister(&port->dev);
> + kfree(port);
> + return NULL;
> + }
> + return port;
> +}
> +
> +/*------------------------ switch creation/Destruction/housekeeping---------*/
> +
> +static void zwitch_destroy_ports(struct vnet_switch *zs)
> +{
> + struct vnet_port *port, *tmp;
> +
> + list_for_each_entry_safe(port, tmp, &zs->switch_ports, lh) {
> + if (port->destroy)
> + port->destroy(port);
> + else
> + printk("No destroy function for port\n");
> + }
> +}
> +
> +
> +static void zwitch_destroy(struct vnet_switch *zs)
> +{
> + class_device_destroy(zwitch_class, zs->cdev.dev);
> + cdev_del(&zs->cdev);
> + device_unregister(&zs->dev);
> +}
> +
> +static void zwitch_release(struct device *dev)
> +{
> + struct vnet_switch *zs;
> +
> + zs = container_of(dev, struct vnet_switch, dev);
> + kfree(zs);
> +}
> +
> +static int __zwitch_get_minor(void)
> +{
> + int d, found;
> + struct vnet_switch *zs;
> +
> + for (d=0; d< NUM_MINORS; d++) {
> + found = 0;
> + list_for_each_entry(zs, &vnet_switches, lh)
> + if (MINOR(zs->cdev.dev) == d)
> + found++;
> + if (!found) break;
> + }
> + if (found) return -ENODEV;
> + return d;
> +}
> +
> +/*
> + * checks if this name already exists for a zwitch
> + */
> +static int __zwitch_check_name(char *name)
> +{
> + struct vnet_switch *zs;
> +
> + list_for_each_entry(zs, &vnet_switches, lh)
> + if (!strncmp(name, zs->name, ZWITCH_NAME_SIZE))
> + return -EEXIST;
> + return 0;
> +}
> +
> +static int zwitch_create(char *name, int linktype)
> +{
> + struct vnet_switch *zs;
> + int minor;
> + int ret;
> +
> + if ((linktype < 2) || (linktype > 3))
> + return -EINVAL;
> + zs = kzalloc(sizeof(*zs), GFP_KERNEL);
> + if (!zs) {
> + printk("Creation of %s failed: out of memory\n", name);
> + return -ENOMEM;
> + }
> + zs->linktype = linktype;
> + strncpy(zs->name, name, ZWITCH_NAME_SIZE);
> + rwlock_init(&zs->ports_lock);
> + INIT_LIST_HEAD(&zs->switch_ports);
> +
> + write_lock(&vnet_switches_lock);
> + minor = __zwitch_get_minor();
> + if (minor < 0) {
> + write_unlock(&vnet_switches_lock);
> + printk("Creation of %s failed: No free minor number\n", name);
> + kfree(zs);
> + return minor;
> + }
> + if (__zwitch_check_name(zs->name)) {
> + write_unlock(&vnet_switches_lock);
> + printk("Creation of %s failed: name exists\n", name);
> + kfree(zs);
> + return -EEXIST;
> + }
> + list_add_tail(&zs->lh, &vnet_switches);
> + write_unlock(&vnet_switches_lock);
> + strncpy(zs->dev.bus_id, name, min((int) strlen(name),
> + ZWITCH_NAME_SIZE));
> + zs->dev.parent = root_dev;
> + zs->dev.release = zwitch_release;
> + ret = device_register(&zs->dev);
> + if (ret) {
> + write_lock(&vnet_switches_lock);
> + list_del(&zs->lh);
> + write_unlock(&vnet_switches_lock);
> + printk("Creation of %s failed: no device\n",name);
> + return ret;
> + }
> + vnet_cdev_init(&zs->cdev);
> + cdev_add(&zs->cdev, MKDEV(vnet_major, minor), 1);
> + zs->class_device = class_device_create(zwitch_class, NULL,
> + zs->cdev.dev, &zs->dev, name);
> + if (IS_ERR(zs->class_device)) {
> + cdev_del(&zs->cdev);
> + write_lock(&vnet_switches_lock);
> + list_del(&zs->lh);
> + write_unlock(&vnet_switches_lock);
> + printk("Creation of %s failed: no class_device\n", name);
> + device_unregister(&zs->dev);
> + return PTR_ERR(zs->class_device);
> + }
> + return 0;
> +}
> +
> +
> +static int zwitch_delete(char *name)
> +{
> + struct vnet_switch *zs;
> +
> + write_lock(&vnet_switches_lock);
> + zs = __vnet_switch_by_name(name);
> + if (!zs) {
> + write_unlock(&vnet_switches_lock);
> + return -ENOENT;
> + }
> + list_del(&zs->lh);
> + write_unlock(&vnet_switches_lock);
> + zwitch_destroy_ports(zs);
> + zwitch_destroy(zs);
> + return 0;
> +}
> +
> +/* checks if a switch for the given minor exists
> + * if yes, create an unconnected port on this switch
> + * if no, return NULL */
> +struct vnet_port *vnet_port_get(int minor, char *port_name)
> +{
> + struct vnet_switch *zs;
> + struct vnet_port *port;
> +
> + zs = zwitch_get(minor);
> + if (!zs)
> + return NULL;
> + port = vnet_port_create(zs, port_name);
> + if (!port)
> + zwitch_put(zs);
> + return port;
> +}
> +
> +/* attaches the port to the switch. The port must be
> + * fully initialized, as it may get called immediately afterwards */
> +void vnet_port_attach(struct vnet_port *port)
> +{
> + write_lock_bh(&port->zs->ports_lock);
> + __vnet_port_learn_macs(port);
> + list_add(&port->lh, &port->zs->switch_ports);
> + write_unlock_bh(&port->zs->ports_lock);
> + vnet_switch_add_mac(port);
> + return;
> +}
> +
> +/* detaches the port from the switch. After that,
> + * no calls into the port are made */
> +void vnet_port_detach(struct vnet_port *port)
> +{
> + vnet_switch_del_mac(port);
> + write_lock_bh(&port->zs->ports_lock);
> + if (!list_empty(&port->lh))
> + list_del(&port->lh);
> + __vnet_port_unlearn_macs(port);
> + write_unlock_bh(&port->zs->ports_lock);
> +}
> +
> +/* releases all ressources allocated with vnet_port_get */
> +void vnet_port_put(struct vnet_port *port)
> +{
> + BUG_ON(!list_empty(&port->lh) &&( port->lh.next != LIST_POISON1));
> + device_unregister(&port->dev);
> +}
> +
> +/* tell the switch that new data is available */
> +void vnet_port_rx(struct vnet_port *port)
> +{
> + struct vnet_control *control;
> + int pkid, rc;
> +
> + control = port->control;
> + if (vnet_q_empty(atomic_read(&control->p2smit))) {
> + printk(KERN_WARNING "vnet_switch: Empty buffer"
> + "on interrupt\n");
> + return;
> + }
> + do {
> + pkid = __nextr(atomic_read(&control->p2smit));
> + /* fire and forget. Let the switch care about lost packets*/
> + vnet_switch_packet(port, port->p2s_data[pkid],
> + control->p2sbufs[pkid].len,
> + control->p2sbufs[pkid].proto);
> + rc = vnet_rx_packet(&control->p2smit);
> + if (rc & QUEUE_WAS_FULL) {
> + port->interrupt(port, VNET_IRQ_START_TX);
> + }
> + } while (!(rc & QUEUE_IS_EMPTY));
> + return;
> +}
> +
> +/* checks if the given address is locally attached to the switch*/
> +int vnet_address_is_local(struct vnet_switch *zs, char *address)
> +{
> + struct vnet_port *port;
> +
> + read_lock(&zs->ports_lock);
> + port = __vnet_find_destination(zs, address);
> + read_unlock(&zs->ports_lock);
> + return (port != NULL);
> +}
> +
> +
> +int vnet_minor_by_name(char *name)
> +{
> + struct vnet_switch *zs;
> + int ret;
> +
> + read_lock(&vnet_switches_lock);
> + zs = __vnet_switch_by_name(name);
> + if (zs)
> + ret = MINOR(zs->cdev.dev);
> + else
> + ret = -ENODEV;
> + read_unlock(&vnet_switches_lock);
> + return ret;
> +}
> +
> +static void vnet_root_release(struct device *dev)
> +{
> + kfree(dev);
> +}
> +
> +
> +struct command {
> + char *string1;
> + char *string2;
> +};
> +
> +/*FIXME this is ugly. Dont worry: as soon as we have finalized the interface,
> + this crap is going away. Still, it works.......*/
> +static long vnet_control_ioctl(struct file *f, unsigned int command,
> + unsigned long data)
> +{
> + char string1[BUS_ID_SIZE];
> + char string2[BUS_ID_SIZE];
> + struct command com;
> + struct vnet_port *port;
> +
> + if (!capable(CAP_NET_ADMIN))
> + return -EPERM;
> + if (copy_from_user(&com, (__user struct command*) data, sizeof(struct command)))
> + return -EFAULT;
> + if (copy_from_user(string1, (__user char *) com.string1, ZWITCH_NAME_SIZE))
> + return -EFAULT;
> + if (command >=2)
> + if (copy_from_user(string2, (__user char *) com.string2, ZWITCH_NAME_SIZE))
> + return -EFAULT;
> + if (strnlen(string1, ZWITCH_NAME_SIZE) == ZWITCH_NAME_SIZE)
> + return -EINVAL;
> + switch(command) {
> + case ADD_SWITCH:
> + return zwitch_create(string1,3);
> + case DEL_SWITCH:
> + return zwitch_delete(string1);
> + case ADD_HOST:
> + port = vnet_host_create(string1);
> + if (port) {
> + vnet_port_attach(port);
> + return 0;
> + } else
> + return -ENODEV;
> + default:
> + return -EINVAL;
> + }
> + return 0;
> +}
> +
> +static int vnet_control_open(struct inode *inode, struct file *file)
> +{
> + return 0;
> +}
> +
> +static int vnet_control_release(struct inode *inode, struct file *file)
> +{
> + return 0;
> +}
> +
> +struct file_operations vnet_control_fops = {
> + .open = vnet_control_open,
> + .release = vnet_control_release,
> + .unlocked_ioctl = &vnet_control_ioctl,
> + .compat_ioctl = &vnet_control_ioctl,
> +};
> +
> +struct miscdevice vnet_control_device = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "vnet",
> + .fops = &vnet_control_fops,
> +};
> +
> +int vnet_register_control_device(void)
> +{
> + return misc_register(&vnet_control_device);
> +}
> +
> +int __init vnet_switch_init(void)
> +{
> + int ret;
> + dev_t dev;
> +
> + zwitch_class = class_create(THIS_MODULE, "vnet");
> + if (IS_ERR(zwitch_class)) {
> + printk(KERN_ERR "vnet_switch: class_create failed!\n");
> + ret = PTR_ERR(zwitch_class);
> + goto out;
> + }
> + ret = alloc_chrdev_region(&dev, 0, NUM_MINORS, "vnet");
> + if (ret) {
> + printk(KERN_ERR "vnet_switch: alloc_chrdev_region failed\n");
> + goto out_class;
> + }
> + vnet_major = MAJOR(dev);
> + root_dev = kzalloc(sizeof(*root_dev), GFP_KERNEL);
> + if (!root_dev) {
> + printk(KERN_ERR "vnet_switch:allocation of device failed\n");
> + ret = -ENOMEM;
> + goto out_chrdev;
> + }
> + strncpy(root_dev->bus_id, "vnet", 5);
> + root_dev->release = vnet_root_release;
> + ret =device_register(root_dev);
> + if (ret) {
> + printk(KERN_ERR "vnet_switch: could not register device\n");
> + kfree(root_dev);
> + goto out_chrdev;
> + }
> + ret = vnet_register_control_device();
> + if (ret) {
> + printk("vnet_switch: could not create control device\n");
> + goto out_dev;
> + }
> + printk ("vnet_switch loaded\n");
> +/* FIXME ---------- remove these static defines as soon as everyone has the
> + * user tools */
> + {
> + struct vnet_port *port;
> + zwitch_create("myswitch0",2);
> + zwitch_create("myswitch1",3);
> +
> + port = vnet_host_create("myswitch0");
> + if (port)
> + vnet_port_attach(port);
> + port = vnet_host_create("myswitch1");
> + if (port)
> + vnet_port_attach(port);
> + }
> +/*-----------------------------------------------------------*/
> + return 0;
> +out_dev:
> + device_unregister(root_dev);
> +out_chrdev:
> + unregister_chrdev_region(MKDEV(vnet_major,0), NUM_MINORS);
> +out_class:
> + class_destroy(zwitch_class);
> +out:
> + return ret;
> +}
> +
> +/* remove all existing vnet_zwitches in the system and unregister the
> + * character device from the system */
> +void vnet_switch_exit(void)
> +{
> + struct vnet_switch *zs, *tmp;
> + list_for_each_entry_safe(zs, tmp, &vnet_switches, lh) {
> + zwitch_destroy_ports(zs);
> + zwitch_destroy(zs);
> + }
> + device_unregister(root_dev);
> + misc_deregister(&vnet_control_device);
> + unregister_chrdev_region(MKDEV(vnet_major,0), NUM_MINORS);
> + class_destroy(zwitch_class);
> + printk ("vnet_switch unloaded\n");
> +}
> +
> +module_init(vnet_switch_init);
> +module_exit(vnet_switch_exit);
> +MODULE_DESCRIPTION("VNET: Virtual switch for vnet interfaces");
> +MODULE_AUTHOR("Christian Borntraeger <borntrae-tA70FqPdS9bQT0dZR+***@public.gmane.org>");
> +MODULE_LICENSE("GPL");
> Index: linux-2.6.21/drivers/s390/guest/vnet_switch.h
> ===================================================================
> --- /dev/null
> +++ linux-2.6.21/drivers/s390/guest/vnet_switch.h
> @@ -0,0 +1,119 @@
> +/*
> + * vnet_switch - zlive insular communication knack switch
> + * infrastructure for virtual switching of Linux guests running under Linux
> + *
> + * Copyright (C) 2005 IBM Corporation
> + * Author: Carsten Otte <cotte-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + * Christian Borntraeger <borntrae-tA70FqPdS9bQT0dZR+***@public.gmane.org>
> + *
> + */
> +
> +#ifndef __VNET_SWITCH_H
> +#define __VNET_SWITCH_H
> +
> +#include <linux/cdev.h>
> +#include <linux/device.h>
> +#include <linux/if_ether.h>
> +#include <linux/spinlock.h>
> +
> +#include "vnet.h"
> +
> +/* defines for IOCTLs. interface should be replaced by something better */
> +#define ADD_SWITCH 0
> +#define DEL_SWITCH 1
> +#define ADD_OSA 2
> +#define DEL_OSA 3
> +#define ADD_HOST 4
> +#define DEL_HOST 5
> +
> +/* min(IFNAMSIZ, BUS_ID_SIZE)*/
> +#define ZWITCH_NAME_SIZE 16
> +
> +/* This structure describes a virtual switch for ports to userspace network
> + * interfaces, e.g. in Linux under Linux environments*/
> +struct vnet_switch {
> + struct list_head lh;
> + char name[ZWITCH_NAME_SIZE];
> + struct list_head switch_ports; /* list of ports */
> + rwlock_t ports_lock; /* lock for switch_ports */
> + struct class_device *class_device;
> + struct cdev cdev;
> + struct device dev;
> + struct vnet_port *osa;
> + int linktype; /* 2=ethernet 3=IP */
> +};
> +
> +/* description of a port of the vnet_switch */
> +struct vnet_port {
> + struct list_head lh;
> + struct vnet_switch *zs;
> + struct vnet_control *control;
> + void *s2p_data[VNET_QUEUE_LEN][(VNET_BUFFER_SIZE>>PAGE_SHIFT)];
> + void *p2s_data[VNET_QUEUE_LEN][(VNET_BUFFER_SIZE>>PAGE_SHIFT)];
> + char mac[ETH_ALEN];
> + void *priv;
> + int (*set_mac) (struct vnet_port *port, char mac[ETH_ALEN], int add);
> + void (*interrupt) (struct vnet_port *port, int type);
> + void (*destroy) (struct vnet_port *port);
> + struct device dev;
> + unsigned long rx_packets; /* total packets received */
> + unsigned long tx_packets; /* total packets transmitted */
> + unsigned long rx_bytes; /* total bytes received */
> + unsigned long tx_bytes; /* total bytes transmitted */
> + unsigned long rx_dropped; /* no space in receive buffer */
> + unsigned long tx_dropped; /* no space in destination buffer */
> + spinlock_t rxlock;
> + spinlock_t txlock;
> +};
> +
> +
> +static inline int
> +vnet_copy_buf_to_pages(void **data, char *buf, int len)
> +{
> + int i;
> +
> + if (len == 0)
> + return 0;
> + for (i=0; i <= ((len - 1) >> PAGE_SHIFT); i++ )
> + memcpy(data[i], buf + i*PAGE_SIZE, min(PAGE_SIZE, len - i*PAGE_SIZE));
> + return len;
> +}
> +
> +static inline int
> +vnet_copy_pages_to_buf(char *buf, void **data, int len)
> +{
> + int i;
> +
> + if (len == 0)
> + return 0;
> + for (i=0; i <= ((len -1) >> PAGE_SHIFT); i++ )
> + memcpy(buf + i*PAGE_SIZE, data[i], min(PAGE_SIZE, len - i*PAGE_SIZE));
> + return len;
> +}
> +
> +
> +/* checks if a switch with the given minor exists
> + * if yes, create a named and unconnected port on
> + * this switch with the given name. if no, return NULL */
> +extern struct vnet_port *vnet_port_get(int minor, char *port_name);
> +
> +/* attaches the port to the switch. The port must be
> + * fully initialized, as it may get data immediately afterwards */
> +extern void vnet_port_attach(struct vnet_port *port);
> +
> +/* detaches the port from the switch. After that,
> + * no calls into the port are made */
> +extern void vnet_port_detach(struct vnet_port *port);
> +
> +/* releases all ressources allocated with vnet_port_get */
> +extern void vnet_port_put(struct vnet_port *port);
> +
> +/* tell the switch that new data is available */
> +extern void vnet_port_rx(struct vnet_port *port);
> +
> +/* get the minor for a given name */
> +extern int vnet_minor_by_name(char *name);
> +
> +/* checks if the given address is locally attached to the switch*/
> +extern int vnet_address_is_local(struct vnet_switch *zs, char *address);
> +#endif
> Index: linux-2.6.21/drivers/s390/guest/Makefile
> ===================================================================
> --- linux-2.6.21.orig/drivers/s390/guest/Makefile
> +++ linux-2.6.21/drivers/s390/guest/Makefile
> @@ -6,3 +6,6 @@ obj-$(CONFIG_GUEST_CONSOLE) += guest_con
> obj-$(CONFIG_S390_GUEST) += vdev.o vdev_device.o
> obj-$(CONFIG_VDISK) += vdisk.o vdisk_blk.o
> obj-$(CONFIG_VNET_GUEST) += vnet_guest.o
> +vnet_host-objs := vnet_switch.o vnet_port_guest.o vnet_port_host.o
> +obj-$(CONFIG_VNET_HOST) += vnet_host.o
> +
> Index: linux-2.6.21/drivers/s390/net/Kconfig
> ===================================================================
> --- linux-2.6.21.orig/drivers/s390/net/Kconfig
> +++ linux-2.6.21/drivers/s390/net/Kconfig
> @@ -95,4 +95,16 @@ config VNET_GUEST
> connection.
> If you're not using host/guest support, say N.
>
> +config VNET_HOST
> + tristate "virtual networking support (HOST)"
> + depends on QETH && S390_HOST
> + help
> + This is the host part of the vnet guest network connection.
> + Say Y if you plan to host guests with network
> + connection. The host part consists of a virtual switch
> + a host device as well as a connection to the qeth
> + driver.
> + If you're not using this kernel for hosting guest, say N.
> +
> +
> endmenu
>
>
>
> -------------------------------------------------------------------------
> This SF.net email is sponsored by DB2 Express
> Download DB2 Express C - the FREE version of DB2 express and take
> control of your XML. No limits. Just data. Click to get it now.
> http://sourceforge.net/powerbar/db2/
> _______________________________________________
> kvm-devel mailing list
> kvm-devel-5NWGOfrQmneRv+***@public.gmane.org
> https://lists.sourceforge.net/lists/listinfo/kvm-devel
>
>
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/