blob: 0e48a172b79d88e013b6b886d9102618dc2e573d [file] [log] [blame]
diff -ur ../vger3-011229/linux/net/unix/af_unix.c linux/net/unix/af_unix.c
--- ../vger3-011229/linux/net/unix/af_unix.c Mon Dec 3 20:24:03 2001
+++ linux/net/unix/af_unix.c Sat Jan 5 04:30:19 2002
@@ -112,6 +112,7 @@
#include <asm/checksum.h>
int sysctl_unix_max_dgram_qlen = 10;
+int sysctl_unix_stream_pages = MAX_SKB_FRAGS;
unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
@@ -1123,9 +1124,6 @@
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
scm_destroy(&scm);
sock_wfree(skb);
}
@@ -1140,6 +1138,67 @@
scm->fp = NULL;
}
+int datagram_copy_fromiovec(struct iovec *iov, struct sk_buff *skb, int size)
+{
+ struct sock *sk;
+ struct sk_buff **tail, *skb1;
+ int copy = min_t(int, size, skb_tailroom(skb));
+
+ if (memcpy_fromiovec(skb_put(skb, copy), iov, copy))
+ goto do_fault;
+
+ if ((size -= copy) == 0)
+ return 0;
+
+ sk = skb->sk;
+ skb1 = skb;
+ tail = &skb_shinfo(skb)->frag_list;
+
+ do {
+ struct page *page;
+ int i = skb_shinfo(skb1)->nr_frags;
+
+ if (i == MAX_SKB_FRAGS) {
+ skb1 = alloc_skb(0, sk->allocation);
+ if (skb1 == NULL)
+ goto do_oom;
+ *tail = skb1;
+ tail = &skb1->next;
+ i = 0;
+ skb->truesize += skb1->truesize;
+ atomic_add(skb1->truesize, &sk->wmem_alloc);
+ }
+
+ page = alloc_pages(sk->allocation, 0);
+ if (page == NULL)
+ goto do_oom;
+
+ copy = min_t(int, size, PAGE_SIZE);
+ skb_shinfo(skb1)->nr_frags=i+1;
+ skb_shinfo(skb1)->frags[i].page = page;
+ skb_shinfo(skb1)->frags[i].page_offset = 0;
+ skb_shinfo(skb1)->frags[i].size = copy;
+
+ skb1->len += copy;
+ skb1->data_len += copy;
+ if (skb != skb1) {
+ skb->len += copy;
+ skb->data_len += copy;
+ }
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->wmem_alloc);
+ if (memcpy_fromiovec(page_address(page), iov, copy))
+ goto do_fault;
+ } while ((size -= copy) > 0);
+ return 0;
+
+do_oom:
+ return -ENOMEM;
+
+do_fault:
+ return -EFAULT;
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1155,6 +1214,7 @@
unsigned hash;
struct sk_buff *skb;
long timeo;
+ int alloc;
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
@@ -1178,10 +1238,14 @@
goto out;
err = -EMSGSIZE;
- if ((unsigned)len > sk->sndbuf - 32)
+ if ((unsigned)len > sk->sndbuf)
goto out;
- skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
+ alloc = len;
+ if (alloc > SKB_MAX_HEAD(0))
+ alloc = SKB_MAX_HEAD(0);
+
+ skb = sock_alloc_send_skb(sk, alloc, msg->msg_flags&MSG_DONTWAIT, &err);
if (skb==NULL)
goto out;
@@ -1190,7 +1254,7 @@
unix_attach_fds(scm, skb);
skb->h.raw = skb->data;
- err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+ err = datagram_copy_fromiovec(msg->msg_iov, skb, len);
if (err)
goto out_free;
@@ -1275,74 +1339,57 @@
return err;
}
-
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
struct scm_cookie *scm)
{
struct sock *sk = sock->sk;
unix_socket *other = NULL;
- struct sockaddr_un *sunaddr=msg->msg_name;
- int err,size;
struct sk_buff *skb;
+ int err;
int sent=0;
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out_err;
- if (msg->msg_namelen) {
- err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
+ err = -ENOTCONN;
+ other = unix_peer_get(sk);
+ if (!other)
goto out_err;
- } else {
- sunaddr = NULL;
- err = -ENOTCONN;
- other = unix_peer_get(sk);
- if (!other)
- goto out_err;
- }
if (sk->shutdown&SEND_SHUTDOWN)
goto pipe_err;
- while(sent < len)
- {
- /*
- * Optimisation for the fact that under 0.01% of X messages typically
- * need breaking up.
- */
+ while(sent < len) {
+ int size, alloc;
- size=len-sent;
+ size = len-sent;
/* Keep two messages in the pipe so it schedules better */
- if (size > sk->sndbuf/2 - 64)
- size = sk->sndbuf/2 - 64;
+ if (size > sk->sndbuf/2)
+ size = sk->sndbuf/2;
- if (size > SKB_MAX_ALLOC)
- size = SKB_MAX_ALLOC;
-
/*
* Grab a buffer
*/
-
- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+ alloc = size;
+
+ if (size > SKB_MAX_HEAD(0)) {
+ alloc = SKB_MAX_HEAD(0);
+ if (size > alloc + sysctl_unix_stream_pages*PAGE_SIZE)
+ size = alloc + sysctl_unix_stream_pages*PAGE_SIZE;
+ }
+
+ skb=sock_alloc_send_skb(sk,alloc,msg->msg_flags&MSG_DONTWAIT, &err);
if (skb==NULL)
goto out_err;
- /*
- * If you pass two values to the sock_alloc_send_skb
- * it tries to grab the large buffer with GFP_NOFS
- * (which can fail easily), and if it fails grab the
- * fallback size buffer which is under a page and will
- * succeed. [Alan]
- */
- size = min_t(int, size, skb_tailroom(skb));
-
memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
if (scm->fp)
unix_attach_fds(scm, skb);
- if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
+ if ((err = datagram_copy_fromiovec(msg->msg_iov, skb, size)) != 0) {
kfree_skb(skb);
goto out_err;
}
@@ -1418,13 +1465,10 @@
scm->creds = *UNIXCREDS(skb);
- if (!(flags & MSG_PEEK))
- {
+ if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
unix_detach_fds(scm, skb);
- }
- else
- {
+ } else {
/* It is questionable: on PEEK we could:
- do not return fds - good, but too simple 8)
- return fds, and do not return them on read (old strategy,
@@ -1483,13 +1527,10 @@
return timeo;
}
-
-
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
int flags, struct scm_cookie *scm)
{
struct sock *sk = sock->sk;
- struct sockaddr_un *sunaddr=msg->msg_name;
int copied = 0;
int check_creds = 0;
int target;
@@ -1515,21 +1556,18 @@
down(&sk->protinfo.af_unix.readsem);
- do
- {
+ do {
int chunk;
struct sk_buff *skb;
skb=skb_dequeue(&sk->receive_queue);
- if (skb==NULL)
- {
+ if (skb==NULL) {
if (copied >= target)
break;
/*
* POSIX 1003.1g mandates this order.
*/
-
if ((err = sock_error(sk)) != 0)
break;
if (sk->shutdown & RCV_SHUTDOWN)
@@ -1551,60 +1589,44 @@
if (check_creds) {
/* Never glue messages from different writers */
- if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
- skb_queue_head(&sk->receive_queue, skb);
- break;
- }
+ if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0)
+ goto out_put_back;
} else {
/* Copy credentials */
scm->creds = *UNIXCREDS(skb);
check_creds = 1;
}
- /* Copy address just once */
- if (sunaddr)
- {
- unix_copy_addr(msg, skb->sk);
- sunaddr = NULL;
- }
+ chunk = min_t(int, skb->len - sk->protinfo.af_unix.copied, size);
+ err = skb_copy_datagram_iovec(skb, sk->protinfo.af_unix.copied, msg->msg_iov, chunk);
+ if (err)
+ goto out_put_back;
- chunk = min_t(unsigned int, skb->len, size);
- if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
- skb_queue_head(&sk->receive_queue, skb);
- if (copied == 0)
- copied = -EFAULT;
- break;
- }
copied += chunk;
size -= chunk;
/* Mark read part of skb as used */
- if (!(flags & MSG_PEEK))
- {
- skb_pull(skb, chunk);
-
+ if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
unix_detach_fds(scm, skb);
/* put the skb back if we didn't use it up.. */
- if (skb->len)
- {
- skb_queue_head(&sk->receive_queue, skb);
- break;
- }
+ if ((sk->protinfo.af_unix.copied += chunk) < skb->len)
+ goto out_put_back;
+
+ sk->protinfo.af_unix.copied = 0;
kfree_skb(skb);
if (scm->fp)
break;
- }
- else
- {
+ } else {
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+out_put_back:
/* put message back and return */
skb_queue_head(&sk->receive_queue, skb);
break;
@@ -1676,10 +1698,12 @@
break;
}
+ down(&sk->protinfo.af_unix.readsem);
spin_lock(&sk->receive_queue.lock);
if((skb=skb_peek(&sk->receive_queue))!=NULL)
- amount=skb->len;
+ amount=skb->len - sk->protinfo.af_unix.copied;
spin_unlock(&sk->receive_queue.lock);
+ up(&sk->protinfo.af_unix.readsem);
err = put_user(amount, (int *)arg);
break;
}
@@ -1734,7 +1758,7 @@
int i;
unix_socket *s;
- len+= sprintf(buffer,"Num RefCount Protocol Flags Type St "
+ len+= sprintf(buffer,"Peer RcvQueue WMem Flags Type St "
"Inode Path\n");
read_lock(&unix_table_lock);
@@ -1742,10 +1766,10 @@
{
unix_state_rlock(s);
- len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld",
- s,
- atomic_read(&s->refcnt),
- 0,
+ len+=sprintf(buffer+len,"%08lX: %08X %08X %08X %04X %02X %5ld",
+ unix_peer(s) ? sock_i_ino(unix_peer(s)) : 0,
+ skb_queue_len(&s->receive_queue),
+ atomic_read(&s->wmem_alloc),
s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
s->type,
s->socket ?
diff -ur ../vger3-011229/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c
--- ../vger3-011229/linux/net/unix/sysctl_net_unix.c Tue Jan 30 21:20:16 2001
+++ linux/net/unix/sysctl_net_unix.c Sat Jan 5 04:10:58 2002
@@ -13,10 +13,14 @@
#include <linux/sysctl.h>
extern int sysctl_unix_max_dgram_qlen;
+extern int sysctl_unix_stream_pages;
ctl_table unix_table[] = {
{NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen",
&sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL,
+ &proc_dointvec },
+ {NET_UNIX_STREAM_PAGES, "stream_pages",
+ &sysctl_unix_stream_pages, sizeof(int), 0600, NULL,
&proc_dointvec },
{0}
};