libslack(net) - network module
#define _GNU_SOURCE
#include <slack/std.h>
#include <slack/net.h>
typedef struct sockaddr sockaddr_t;
typedef unsigned short sockport_t;
typedef struct sockopt_t sockopt_t;
typedef union sockaddr_any_t sockaddr_any_t;
typedef struct sockaddr_un sockaddr_un_t;
typedef struct sockaddr_in sockaddr_in_t;
typedef struct sockaddr_in6 sockaddr_in6_t;
typedef struct net_interface_t net_interface_t;
typedef struct rudp_t rudp_t;
struct sockopt_t
{
int level;
int optname;
const void *optval;
int optlen;
};
union sockaddr_any_t
{
sockaddr_t any;
sockaddr_un_t un;
sockaddr_in_t in;
sockaddr_in6_t in6;
};
struct net_interface_t
{
char name[IFNAMSIZ];
unsigned int index;
short flags;
int mtu;
sockaddr_any_t *addr;
sockaddr_any_t *brdaddr;
sockaddr_any_t *dstaddr;
sockaddr_any_t *hwaddr;
};
int net_server(const char *interface, const char *service, sockport_t port, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize);
int net_client(const char *host, const char *service, sockport_t port, long timeout, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize);
int net_udp_server(const char *interface, const char *service, sockport_t port, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize);
int net_udp_client(const char *host, const char *service, sockport_t port, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize);
int net_create_server(const char *interface, const char *service, sockport_t port, int type, int protocol, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize);
int net_create_client(const char *host, const char *service, sockport_t port, sockport_t localport, int type, int protocol, long timeout, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize);
int net_multicast_sender(const char *group, const char *service, sockport_t port, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize, const char *ifname, unsigned int ifindex, int ttl, unsigned int noloopback);
int net_multicast_receiver(const char *group, const char *service, sockport_t port, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize, const char *ifname, unsigned int ifindex);
int net_multicast_join(int sockfd, const sockaddr_t *addr, size_t addrsize, const char *ifname, unsigned int ifindex);
int net_multicast_leave(int sockfd, const sockaddr_t *addr, size_t addrsize, const char *ifname, unsigned int ifindex);
int net_multicast_set_interface(int sockfd, const char *ifname, unsigned int ifindex);
int net_multicast_get_interface(int sockfd);
int net_multicast_set_loopback(int sockfd, unsigned int loopback);
int net_multicast_get_loopback(int sockfd);
int net_multicast_set_ttl(int sockfd, int ttl);
int net_multicast_get_ttl(int sockfd);
int net_tos_lowdelay(int sockfd);
int net_tos_throughput(int sockfd);
int net_tos_reliability(int sockfd);
int net_tos_lowcost(int sockfd);
int net_tos_normal(int sockfd);
struct hostent *net_gethostbyname(const char *name, struct hostent *hostbuf, void **buf, size_t *size, int *herrno);
struct servent *net_getservbyname(const char *name, const char *proto, struct servent *servbuf, void **buf, size_t *size);
int net_options(int sockfd, sockopt_t *sockopts);
List *net_interfaces(void);
List *net_interfaces_with_locker(Locker *locker);
List *net_interfaces_by_family(int family);
List *net_interfaces_by_family_with_locker(int family, Locker *locker);
rudp_t *rudp_create(void);
void rudp_release(rudp_t *rudp);
void *rudp_destroy(rudp_t **rudp);
ssize_t net_rudp_transact(int sockfd, rudp_t *rudp, const void *obuf, size_t osize, void *ibuf, size_t isize);
ssize_t net_rudp_transactwith(int sockfd, rudp_t *rudp, const void *obuf, size_t osize, int oflags, void *ibuf, size_t isize, int iflags, sockaddr_any_t *addr, size_t addrsize);
ssize_t net_pack(int sockfd, long timeout, int flags, const char *format, ...);
ssize_t net_vpack(int sockfd, long timeout, int flags, const char *format, va_list args);
ssize_t net_packto(int sockfd, long timeout, int flags, const sockaddr_t *to, size_t tosize, const char *format, ...);
ssize_t net_vpackto(int sockfd, long timeout, int flags, const sockaddr_t *to, size_t tosize, const char *format, va_list args);
ssize_t net_unpack(int sockfd, long timeout, int flags, const char *format, ...);
ssize_t net_vunpack(int sockfd, long timeout, int flags, const char *format, va_list args);
ssize_t net_unpackfrom(int sockfd, long timeout, int flags, sockaddr_t *from, size_t *fromsize, const char *format, ...);
ssize_t net_vunpackfrom(int sockfd, long timeout, int flags, sockaddr_t *from, size_t *fromsize, const char *format, va_list args);
ssize_t pack(void *buf, size_t size, const char *format, ...);
ssize_t vpack(void *buf, size_t size, const char *format, va_list args);
ssize_t unpack(void *buf, size_t size, const char *format, ...);
ssize_t vunpack(void *buf, size_t size, const char *format, va_list args);
ssize_t net_read(int sockfd, long timeout, char *buf, size_t count);
ssize_t net_write(int sockfd, long timeout, const char *buf, size_t count);
ssize_t net_expect(int sockfd, long timeout, const char *format, ...);
ssize_t net_vexpect(int sockfd, long timeout, const char *format, va_list args);
ssize_t net_send(int sockfd, long timeout, const char *format, ...);
ssize_t net_vsend(int sockfd, long timeout, const char *format, va_list args);
ssize_t sendfd(int sockfd, const void *buf, size_t nbytes, int flags, int fd);
ssize_t recvfd(int sockfd, void *buf, size_t nbytes, int flags, int *fd);
#ifdef SO_PASSCRED
#ifdef SCM_CREDENTIALS
ssize_t recvcred(int sockfd, void *buf, size_t nbytes, int flags, struct ucred *cred);
ssize_t recvfromcred(int sockfd, void *buf, size_t nbytes, int flags, struct sockaddr *src_addr, socklen_t *src_addrlen, struct ucred *cred);
#endif
#endif
int mail(const char *server, const char *sender, const char *recipients, const char *subject, const char *message);
This module provides functions that create client and server sockets (IPv4, IPv6, and UNIX domain sockets, stream or datagram), that expect and send text dialogues/protocols, and that pack and unpack packets according to templates. IPv4 and IPv6 multicasting is supported. Reliability over UDP is provided. There are also a function to send mail, and functions to send and receive open file descriptors via UNIX domain sockets from one process to another, and functions to send and receive user credentials via UNIX domain sockets (if supported by the operating system).
int net_server(const char *interface, const char *service, sockport_t port, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize)
Creates a TCP server socket ready to accept(2) connections on interface
(as determined by gethostbyname(3)).
If interface
is null
, connections will be accepted on all local network interfaces. Otherwise, connections will only be accepted on the specified interface (as determined by gethostbyname(3)).
If service
is non-null
, and is either numeric, or is a service name (as determined by getservbyname(3)), the specified port is used. Otherwise, port
(which must be in host byte order) is used.
If interface
is equal to "/unix"
and service
is an absolute file system path, the server socket created will be a UNIX domain stream socket. Otherwise, a TCP server socket is created. If the RES_OPTIONS
environment variable exists and contains the string "inet6"
, or the /etc/resolv.conf
file contains the inet6
option, the TCP socket will be an IPv6 socket. Otherwise, it will be an IPv4 socket.
If rcvbufsz
is non-zero, the socket's receive buffer size is set to this size. Note that you may not get the size you request. If this is important, use getsockopt(2) to obtain the actual receive buffer size.
If sndbufsz
is non-zero, the socket's send buffer size is set to this size. Note that you may not get the size you ask for. If this is important, use getsockopt(2) to obtain the actual send buffer size.
If addr
and addrsize
are not null
, the address bound to is stored in the buffer pointed to by addr
. *addrsize
specifies the size of the buffer pointed to by addr
. If there is insufficient space, the bound address is not stored in addr
. If addrsize
is not null
, the length of the address is stored there.
On success, returns the new socket descriptor. On error, returns -1
with errno
set appropriately.
int net_client(const char *host, const char *service, sockport_t port, long timeout, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize)
Creates a TCP client socket and connects to the server listening at host
(as determined by gethostbyname(3)) on the port number specified by service
. service
must either be numeric, or a service name as determined by getservbyname(3). Otherwise, the port number to connect to is given by port
(which must be in host byte order). If host
is null
, the client socket connects to the loopback address.
If host
is equal to "/unix"
and service
is an absolute file system path, the client socket created will be a UNIX domain stream socket. Otherwise, a TCP client socket is created. If the RES_OPTIONS
environment variable exists and contains the string "inet6"
, or the /etc/resolv.conf
file contains the inet6
option, the TCP socket will be an IPv6 socket. Otherwise, it will be an IPv4 socket.
If timeout
is non-zero, it specifies the number of seconds after which to timeout the attempt to connect to the specified server. This can be useful if the client may attempt to connect to a service that is blocked by a firewall that drops its packets or if the host that you are connecting to does not protect itself from SYN floods. The native TCP timeouts are very long (usually minutes) when faced with an unresponsive network and you may not want your programs or their users to wait that long.
If rcvbufsz
is non-zero, the socket's receive buffer size is set to this size. Note that you may not get the size you request. If this is important, use getsockopt(2) to obtain the actual receive buffer size.
If sndbufsz
is non-zero, the socket's send buffer size is set to this size. Note that you may not get the size you ask for. If this is important, use getsockopt(2) to obtain the actual send buffer size.
If addr
and addrsize
are not null
, the address of the peer is stored in the buffer pointed to by addr
. *addrsize
specifies the size of the buffer pointed to by addr
. If there is insufficient space, the peer's address is not stored in addr
. If addrsize
is not null
, the size of the peer's address is stored there.
On success, returns the new socket descriptor. On error, returns -1
with errno
set appropriately.
int net_udp_server(const char *interface, const char *service, sockport_t port, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize)
Equivalent to net_server(3) except that a UDP server is socket is created. If interface
is equal to "/unix"
and service
is an absolute file system path, the server socket created will be a UNIX domain datagram socket. On success, returns the new socket's file descriptor. On error, returns -1
with errno
set appropriately.
int net_udp_client(const char *host, const char *service, sockport_t port, int rcvbufsz, int sndbufsz, sockaddr_t *addr, size_t *addrsize)
Equivalent to net_client(3) except that a UDP client socket is created. If interface
is equal to "/unix"
and service
is an absolute file system path, the server socket created will be a UNIX domain datagram socket. On success, returns the new socket's file descriptor. On error, returns -1
with errno
set appropriately.
int net_create_server(const char *interface, const char *service, sockport_t port, int type, int protocol, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize)
Equivalent to net_server(3) and net_udp_server(3) only more general. The type of socket is specified by type
(e.g. SOCK_STREAM
or SOCK_DGRAM
) and protocol
(usually zero). If sockopts
is not null
, the socket options specified are set before calling bind(2). On success, returns the new socket's file descriptor. On error, returns -1
with errno
set appropriately.
int net_create_client(const char *host, const char *service, sockport_t port, sockport_t localport, int type, int protocol, long timeout, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize)
Equivalent to net_client(3) and net_udp_client(3) only more general. The type of socket is specified by type
(e.g. SOCK_STREAM
or SOCK_DGRAM
) and protocol
(usually zero). If localport
is not zero, it is the port (in host byte order) that the local endpoint binds to. If sockopts
is not null
, the socket options specified are set before calling bind(2). On success, returns the new socket's file descriptor. On error, returns -1
with errno
set appropriately.
int net_multicast_sender(const char *group, const char *service, sockport_t port, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize, const char *ifname, unsigned int ifindex, int ttl, unsigned int noloopback)
Creates a UDP multicast sender socket. group
specifies the multicast group that packets will be sent to.
If the RES_OPTIONS
environment variable exists and contains the string "inet6"
or the /etc/resolv.conf
file contains the inet6
option, the multicast sender will be an IPv6 socket. Otherwise, it will be an IPv4 socket.
service
must specify a service name or a numeric port number to use. Otherwise, port
(which must be in host byte order) specifies the port number to use.
sockopts
may contain extra socket options to set.
If addr
and addrsize
are not null
, the multicast group's address is stored in the buffer pointed to by addr
. *addrsize
specifies the size of the buffer pointed to by addr
. If there is insufficient space, the address is not stored in addr
. If addrsize
is not null
, the size of the address is stored there.
If ifname is not null
, it specifies the name of the interface on which to send the multicast packets. Otherwise, if ifindex
is not zero, it specifies the index of the interface on which to send multicast packets. Otherwise, the kernel will choose the interface on which to send multicast packets based on the routing table (which is the default behaviour).
If ttl
is greater than 1
, it specifies the multicast packets' TTL. By default the TTL is 1
. See the Multicast-HOWTO for details on the scoping semantics of the TTL field in multicast packets.
If noloopback
is not zero, multicast loopback is disabled. This would prevent any process on the sending host from receiving the multicast packets sent via this socket. Multicast loopback is enabled by default.
The socket is connected to the specified multicast group address so that send(2) must be used to send packets, rather than sendto(2). This reduces the time spent sending packets by one third because an unconnected UDP socket is temporarily connected to the destination address by the kernel every time sendto(2) is called.
On success, returns the new socket descriptor. On error, returns -1
with errno
set appropriately.
int net_multicast_receiver(const char *group, const char *service, sockport_t port, sockopt_t *sockopts, sockaddr_t *addr, size_t *addrsize, const char *ifname, unsigned int ifindex)
Creates a UDP multicast receiver socket. group
specifies the multicast group that the socket will join.
If the RES_OPTIONS
environment variable exists and contains the string "inet6"
or the /etc/resolv.conf
file contains the inet6
option, the multicast receiver socket will be an IPv6 socket. Otherwise, it will be an IPv4 socket.
service
must specify a service name or a numeric port number to use. Otherwise, port
(which must be in host byte order) specifies the port number to use.
sockopts
may contain extra socket options to set.
If addr
and addrsize
are not null
, the multicast group's address is stored in the buffer pointed to by addr
. *addrsize
specifies the size of the buffer pointed to by addr
. If there is insufficient space, the address is not stored in addr
. If addrsize
is not null
, the size of the address is stored there.
If ifname is not null
, it specifies the name of the interface on which to receive multicast packets. Otherwise, if ifindex
is not zero, it specifies the index of the interface on which to receive multicast packets. Otherwise, the kernel will choose the interface on which to receive multicast packets based on the routing table (which is the default behaviour). The new socket may join the same group on more interfaces by subsequent calls to net_multicast_join(3).
On success, returns the new socket descriptor. On error, returns -1
with errno
set appropriately.
int net_multicast_join(int sockfd, const sockaddr_t *addr, size_t addrsize, const char *ifname, unsigned int ifindex)
Adds sockfd
's membership to the multicast group specified by addr
whose size is addrsize
. If ifname is not null
, it specifies the name of the interface on which to receive multicast packets. Otherwise, if ifindex
is not zero, it specifies the index of the interface on which to receive multicast packets. Otherwise, the kernel will choose the interface on which to receive multicast packets based on the routing table (which is the default behaviour). A multicast socket may join the same group on multiple interfaces by subsequent calls to net_multicast_join(3). Note that there is a system-imposed limit on the number of times a socket may join a multicast group (this limit can be about 20). On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_multicast_leave(int sockfd, const sockaddr_t *addr, size_t addrsize, const char *ifname, unsigned int ifindex)
Drops sockfd
's membership from the multicast group specified by addr
whose size is addrsize
. If ifname is not null
, it specifies the name of the interface on which to drop group membership. Otherwise, if ifindex
is not zero, it specifies the index of the interface on which to drop group membership. Otherwise, the interface that joined most recently will be dropped from the multicast group. On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_multicast_set_interface(int sockfd, const char *ifname, unsigned int ifindex)
Specifies the interface on which sockfd
will send multicast packets. If ifname is not null
, it specifies the name of the interface on which to send the multicast packets. Otherwise, if ifindex
is not zero, it specifies the index of the interface on which to send multicast packets. Otherwise, the kernel will choose the interface on which to send multicast packets based on the routing table (which is the default behaviour). On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_multicast_get_interface(int sockfd)
Returns the index of the interface that sockfd
sends multicast packets on. On error, returns -1
with errno
set appropriately.
int net_multicast_set_loopback(int sockfd, unsigned int loopback)
If loopback
is zero, multicast loopback is disabled for packets sent on sockfd
. This prevents any process on the sending host from receiving the multicast packets sent via this socket. If loopback
is zero, multicast loopback is enabled for packets sent on sockfd
(this is the default behaviour). On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_multicast_get_loopback(int sockfd)
Returns whether or not multicast packets sent on sockfd
can be received by any process on the sending host. A non-zero return value means yes. A zero return value means no. On error, returns -1
with errno
set appropriately.
int net_multicast_set_ttl(int sockfd, int ttl)
Sets the TTL for multicast packets sent on sockfd
to ttl
. The default TTL for multicast packets is 1
. See the Multicast-HOWTO for details on the scoping semantics of the TTL field in multicast packets. On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_multicast_get_ttl(int sockfd)
Returns the TTL of multicast packets sent on sockfd
. On error, returns -1
with errno
set appropriately.
int net_tos_lowdelay(int sockfd)
Sets the TOS bits of packets sent on sockfd
to request minimum delay. This is for interactive applications. This results in many small packets. Use this sparingly. On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_tos_throughput(int sockfd)
Sets the TOS bits of packets sent on sockfd
to request maximum throughput. This is for bulk data transfers. Don't forget to also specify buffer sizes that are large enough to maximise throughput. However, be warned that this might not be wise on asymmetric links, because large buffers can lead to bufferbloat. On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_tos_reliability(int sockfd)
Sets the TOS bits of packets sent on sockfd
to request maximum reliability. This should only be used for datagram-based internet management. On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_tos_lowcost(int sockfd)
Sets the TOS bits of packets sent on sockfd
to request minimum monetary cost. Probably a good default. On success, returns 0
. On error, returns -1
with errno
set appropriately.
int net_tos_normal(int sockfd)
Clears the TOS bits of packets sent on sockfd
(the default). On success, returns 0
. On error, returns -1
with errno
set appropriately.
struct hostent *net_gethostbyname(const char *name, struct hostent *hostbuf, void **buf, size_t *size, int *herrno)
A portable, reentrant gethostbyname(3) that handles its own memory allocation requirements. Looks up name. On success, returns hostbuf
with any extra data in *buf
. *size
is the length of *buf
on entry and is updated to reflect the length on exit if a larger buffer was required to perform the lookup. On error, returns null
with *herrno
set appropriately if there was a lookup failure or with errno
set appropriately if there was a memory allocation failure. It is the caller's responsibility to deallocate *buf
using free(3) when the lookup failed, or when the results of the name lookup are no longer required.
Note: If your system has any version of gethostbyname_r(3), it will be used. Otherwise, gethostbyname(3) will be used. Even this might be threadsafe if your system uses thread specific data to make it so.
struct hostent hostbuf[1], *hostent;
void *buf = NULL;
size_t size = 0;
int herrno;
if ((hostent = net_gethostbyname("hostname", hostbuf, &buf, &size, &herrno)))
{
// use hostent ...
}
free(buf);
struct servent *net_getservbyname(const char *name, const char *proto, struct servent *servbuf, void **buf, size_t *size)
A portable, reentrant getservbyname(3) that handles its own memory allocation requirements. Looks up the service name
and proto
. On success, returns servbuf
with any extra data in *buf
. *size
is the length of *buf
on entry and is updated to reflect the length on exit if a larger buffer was required to perform the lookup. On error, returns null
with errno
set appropriately. It is the caller's responsibility to deallocate *buf
using free(3) when the lookup failed, or when the results of the name lookup are no longer required.
Note: If your system has any version of getservbyname_r(3), it will be used. Otherwise, getservbyname(3) will be used. Even this might be threadsafe if your system uses thread specific data to make it so.
struct servent servbuf[1], *servent;
void *buf = NULL;
size_t size = 0;
if ((servent = net_getservbyname("service", "proto", servbuf, &buf, &size)))
{
// use servent ...
}
free(buf);
int net_options(int sockfd, sockopt_t *sockopts)
Sets an arbitrary number of socket options for the socket sockfd
. The options to set are specified by sockopts
which is an array of sockopt_t
structures. Each sockopt_t structure contains the level
, optname
, optval
and optlen
parameters to be passed to setsockopt(2). The array must end with a structure whose optval
element is null
. On success, returns 0
. On error, returns -1
with errno
set appropriately. If setsockopt(2) returns an error, net_options(3) will continue to set any further options but will ultimately return an error itself.
List *net_interfaces(void)
Returns the list of network interfaces. For each interface, calls ioctl(2) to obtain the interface's flags, hardware address, network address, broadcast address if applicable, destination address if applicable, MTU and index. On success, returns a list of net_interface_t objects. It is the caller's responsibility to deallocate the list with list_release(3). On error, returns null
with errno
set appropriately. Note that on Solaris, neither the hardware address nor the index can be returned. This function guesses the index in this case which seems to work. If the RES_OPTIONS
environment variable contains the string "inet6"
, then only IPv6 interfaces are returned. Otherwise, only IPv4 interfaces are returned.
List *net_interfaces_with_locker(Locker *locker)
Equivalent to net_interfaces(3) except that multiple threads accessing the returned list will be synchronised by locker
.
List *net_interfaces_by_family(int family)
Equivalent to net_interfaces(3) except that family
specifies the required address family.
List *net_interfaces_by_family_with_locker(int family, Locker *locker)
Equivalent to net_interfaces_with_locker(3) except that family
specifies the required address family.
rudp_t *rudp_create(void)
Allocates and initialises a retransmission timeout (RTO) estimator for providing reliability over UDP. It is the caller's responsibility to deallocate the estimator using rudp_release(3) or rudp_destroy(3). It is strongly recommended to use rudp_destroy(3), because it also sets the pointer variable to null
. Note that each retransmission timer may only be used for a single destination address. If a UDP socket communicates with multiple peers, a separate estimator must be used for each peer. On success, returns the RTO estimator. On error, returns null
with errno
set appropriately. See the EXAMPLES section.
void rudp_release(rudp_t *rudp)
Releases (deallocates) the RTO estimator, rudp
. See the EXAMPLES section.
void *rudp_destroy(rudp_t **rudp)
Destroys (deallocates and sets to null
) the RTO estimator, *rudp
. Returns null
.
ssize_t net_rudp_transact(int sockfd, rudp_t *rudp, const void *obuf, size_t osize, void *ibuf, size_t isize)
Provides reliable (not infallible) UDP transactions over sockfd
, a socket created with net_udp_client(3) or net_create_client. Sends osize
bytes, starting at obuf
, to the address to which sockfd
is connected. rudp
is the retransmission timeout estimator as created by rudp_create(3). The message is prepended by an 8
byte header that contains a timestamp and a sequence number. This is required to enable calculation of the RTT. The peer must expect this header and include it verbatim in its response. Note that the same retransmission timeout estimator (rudp
) should be used for all transactions. Waits for a response. If the retransmission timer expires before a response is received, the retransmission timer is updated, and the packet is retransmitted. This continues until either a response is received, or the packet has been retransmitted three times with no response. If there is a response, at most isize bytes are received in ibuf
. On success, returns the number of bytes received. On error, returns -1
with errno
set appropriately.
ssize_t net_rudp_transactwith(int sockfd, rudp_t *rudp, const void *obuf, size_t osize, int oflags, void *ibuf, size_t isize, int iflags, sockaddr_any_t *addr, size_t addrsize)
Equivalent to net_rudp_transact(3) except that sockfd
is a socket created with net_udp_server(3) or net_create_server(3). addr
is the address of the peer. addrsize
is the size of addr
. sendmsg(2) and recvmsg(2) are used instead of using writev(2) and readv(2). oflags
is passed to sendmsg(2) as the flags
argument. iflags
is passed to recvmsg(2) as the flags
argument. Note that each retransmission timer may only be used for a single destination address. If a UDP socket communicates with multiple peers, a separate estimator must be used for each peer. On success, returns the number of bytes received. On error, returns -1
with errno
set appropriately. The EXAMPLES section below contains the code for this function.
ssize_t net_pack(int sockfd, long timeout, int flags, const char *format, ...)
Creates a packet containing data packed by pack(3) as specified by format
, and sends it on the connected socket, sockfd
, with send(2). If timeout
is non-zero, it is the number of seconds to wait for the send buffer to have enough space for the new data before timing out (This only applies to TCP sockets since UDP has no send buffer). flags
is passed to send(2). This is intended for use with UDP. It can work reliably with TCP, but only when the application protocol involves each peer packing and unpacking alternately, each waiting for the other's response before making their next response. On success, returns the number of bytes packed and sent. On error, returns -1
with errno
set appropriately.
Note, the net_pack(3) functions can sometimes be inappropriate as they inherently involve copying existing data into a new buffer before writing it. It is much faster to not copy the data at all. When possible (i.e. when the data is already in network byte order), use writev(2) instead to write multiple non-contiguous buffers in a single system call.
ssize_t net_vpack(int sockfd, long timeout, int flags, const char *format, va_list args)
Equivalent to net_pack(3) with the variable argument list specified directly as for vprintf(3).
ssize_t net_packto(int sockfd, long timeout, int flags, const sockaddr_t *to, size_t tosize, const char *format, ...)
Creates a packet containing data packed by pack(3) as specified by format
, and sends it on the unconnected socket, sockfd
, to the address specified by to
with length tosize
with sendto(2). flags
is passed to sendto(2). If timeout
is non-zero, it is the number of seconds to wait for the send buffer to have enough space for the new data before timing out. This only applies to TCP sockets since UDP has no send buffer. On success, returns the number of bytes packed and sent. On error, returns -1
with errno
set appropriately.
ssize_t net_vpackto(int sockfd, long timeout, int flags, const sockaddr_t *to, size_t tosize, const char *format, va_list args)
Equivalent to net_packto(3) with the variable argument list specified directly as for vprintf(3).
ssize_t net_unpack(int sockfd, long timeout, int flags, const char *format, ...)
Receives a packet of data on the connected socket, sockfd
, with recv(2), and unpacks it with unpack(3) as specified by format
. flags
is passed to recv(2). timeout
is the number of seconds to wait before timing out. On success, returns the number of bytes received and unpacked. On error, returns -1
with errno
set appropriately.
Note, the net_unpack(3) functions can sometimes be inappropriate as they inherently involve reading data into a single buffer and then copying it into multiple target buffers. It is much faster to not copy the data at all. When possible (i.e. when the data is already in network byte order and host byte order are the same), use readv(2) instead to read into multiple non-contiguous buffers in a single system call.
ssize_t net_vunpack(int sockfd, long timeout, int flags, const char *format, va_list args)
Equivalent to net_unpack(3) with the variable argument list specified directly as for vprintf(3).
ssize_t net_unpackfrom(int sockfd, long timeout, int flags, sockaddr_t *from, size_t *fromsize, const char *format, ...)
Receives a packet of data on the unconnected socket, sockfd
, with recvfrom(2), and unpacks it with unpack(3) as specified by format
. If from
is non-null
, the source address of the message is stored there. fromsize
is a value-result parameter, initialized to the size of the from
buffer, and modified on return to indicate the actual size of the address stored there. flags
is passed to recvfrom(2). timeout
is the number of seconds to wait before timing out. On success, returns the number of bytes received and unpacked. On error, returns -1
with errno
set appropriately.
ssize_t net_vunpackfrom(int sockfd, long timeout, int flags, sockaddr_t *from, size_t *fromsize, const char *format, va_list args)
Equivalent to net_unpackfrom(3) with the variable argument list specified directly as for vprintf(3).
ssize_t pack(void *buf, size_t size, const char *format, ...)
Packs data into buf
as described by format
. The arguments after format
contain the data to be packed. size
is the size of buf
. Returns the number of bytes packed on success, or -1 on error with errno
set appropriately.
Note, this is based on the pack(3) function in perl(1) (in fact, the following documentation is from perlfunc(1)) except that the *
count specifier has different semantics, the ?
count specifier is new, there's no non nul
-terminated strings or machine dependent formats or uuencoding or BER integer compression, everything is in network byte order, and floats are represented as strings so pack(3) is suitable for serialising data to be written to disk or sent across a network to other hosts. OK, v
and w
specifically aren't in network order, but sometimes that's needed too.
format
can contain the following type specifiers:
a A string with arbitrary binary data
z A nul-terminated string, will be nul-padded
b A bit-string (rounded out to the nearest byte boundary)
h A hexadecimal string (rounded out to the nearest byte boundary)
c A char (8 bits)
s A short (16 bits)
i An int (32 bits)
l A long (64 bits - only on some systems)
f A single-precision float (length byte + text + nul)
d A double-precision float (length byte + text + nul)
v A short in "VAX" (little-endian) order (16 bits)
w An int in "VAX" (little-endian) order (32 bits)
p A pointer (32 or 64 bits)
x A nul byte
X Back up a byte
@ Null fill to absolute position
The following rules apply:
Each letter may optionally be followed by a number giving a repeat count or length, or by "*"
or "?"
. A "*"
will obtain the repeat count or length from the next argument (like printf(3)). The count argument must appear before the first corresponding data argument. When unpacking "a"
, "z"
, "b"
or "h"
, a "?"
will obtain the repeat count or length from the size_t object pointed to by the next argument, and the size of the target buffer in the argument after that. These two arguments must appear before the first corresponding target buffer argument. This enables unpacking packets that contain length fields without risking target buffer overflow.
With all types except "a"
, "z"
, "b"
and "h"
the pack(3) function will gobble up that many arguments.
The "a"
and "z"
types gobble just one value, but pack it as a string of length count (specified by the corresponding number), truncating or padding with nul
bytes as necessary. It is the caller's responsibility to ensure that the data arguments point to sufficient memory. When unpacking, "z"
strips everything after the first nul
, and "a"
returns data verbatim.
Likewise, the "b"
field packs a string that many bits long.
The "h"
field packs a string that many nybbles long.
The "p"
type packs a pointer. You are responsible for ensuring the memory pointed to is not a temporary value (which can potentially get deallocated before you get around to using the packed result). A null
pointer is unpacked if the corresponding packed value for "p"
is null
. Of course, "p"
is useless if the packed data is to be sent over a network to another process.
The integer formats "c"
, "s"
, "i"
and "l"
are all in network byte order, and so can safely be packed for sending over a network to another process. However, "l"
relies on a non-ISO C 89 language feature (namely, the long long int type which is in ISO C 99), and so should not be used in portable code, even if it is supported on the local system. There is no guarantee that a long long packed on one system will be unpackable on another. At least not until C99 is more widespread. It should be OK now.
Real numbers (floats and doubles) are packed in text format. Due to the multiplicity of floating point formats around, this is done to safely transport real numbers across a network to another process.
It is the caller's responsibility to ensure that there are sufficient arguments provided to satisfy the requirements of format
.
ssize_t vpack(void *buf, size_t size, const char *format, va_list args)
Equivalent to pack(3) with the variable argument list specified directly as for vprintf(3).
ssize_t unpack(void *buf, size_t size, const char *format, ...)
Unpacks the data in buf
which was packed by pack(3). size
is the size of buf
. format
must be equivalent to the format
argument to the call to pack(3) that packed the data. The remaining arguments must be pointers to variables that will hold the unpacked data or null
. If any are null
, the corresponding data will be skipped (i.e. not unpacked). Unpacked "z"
, "b"
and "h"
strings are always nul
-terminated. It is the caller's responsibility to ensure that the pointers into which these strings are unpacked contain enough memory (count + 1 bytes). It is the caller's responsibility to ensure that the non-null
pointers into which "a"
strings are unpacked also contain enough memory (count bytes). It is the caller's responsibility to ensure that there are sufficient arguments supplied to satisfy the requirements of format
, even if they are just null
pointers. Returns the number of bytes unpacked on success, or -1 on error.
ssize_t vunpack(void *buf, size_t size, const char *format, va_list args)
Equivalent to unpack(3) with the variable argument list specified directly as for vprintf(3).
ssize_t net_read(int sockfd, long timeout, char *buf, size_t count)
Repeatedly calls read(2) on the connection-oriented socket, sockfd
, until count
bytes have been read into buf
, or until EOF is encountered, or until it times out (after timeout
seconds). On success, returns the number of bytes read. On error, returns -1
with errno
set appropriately.
ssize_t net_write(int sockfd, long timeout, const char *buf, size_t count)
Repeatedly calls write(2) on the connection-oriented socket, sockfd
, until count
bytes from buf
have been written, or until it times out (after timeout
seconds). On success, returns the number of bytes written. On error, returns -1
.
ssize_t net_expect(int sockfd, long timeout, const char *format, ...)
Expects and confirms a formatted text message from a remote connection on the socket, sockfd
. timeout
is the number of seconds to wait before timing out. If timeout
is 0
, times out immediately. On success, returns the number of conversions performed (see scanf(3)). When the connection closes, returns 0
. On error, returns -1
with errno
set appropriately.
Note: This is generally unreliable. When TCP segments get lost in transit, the re-sent bytes can form part of a larger segment so the "boundaries" that you may expect in your input can fail to appear. This can lead to lost data (read but not expected). This can only really be used safely when the application protocol involves each peer reading and writing alternately, each waiting for the other's response before making their next response. In short, net_expect(3) should only be used in concert with net_send(3).
ssize_t net_vexpect(int sockfd, long timeout, const char *format, va_list args)
Equivalent to net_expect(3) with the variable argument list specified directly as for vprintf(3).
ssize_t net_send(int sockfd, long timeout, const char *format, ...)
Sends a formatted string (see printf(3)) to a remote connection on the socket, sockfd
. timeout
is the number of seconds to wait before timing out. On success, returns the number of bytes written. On error, returns -1
with errno
set appropriately.
ssize_t net_vsend(int sockfd, long timeout, const char *format, va_list args)
Equivalent to net_send(3) with the variable argument list specified directly as for vprintf(3).
ssize_t sendfd(int sockfd, const void *buf, size_t nbytes, int flags, int fd)
Sends the open file descriptor, fd
, to another process (related or unrelated) on the other end of the UNIX domain socket, sockfd
. Equivalent to send(2) in all other respects. UNIX domain sockets can be created using net_client(3) or net_server(3) with a first argument of "/unix"
, or using socketpair(2) or pipe(2) (under SVR4). It is safe to close(2) (and even unlink(2)) the file descriptor after sending it. The kernel won't really close it (or delete it) until the receiving process closes the descriptor. If the sender doesn't close fd
, both processes share the same file table entry in the kernel. This means sharing file position if the descriptor refers to a regular file. If the receiver doesn't receive the file descriptor with recvfd(3) when it is sent, the descriptor will be closed (in the receiving process). A file descriptor must always be passed along with some normal data. Linux doesn't support calling recv(2) with a null
buffer or zero length. On success, returns 0
. On error, returns -1
with errno
set appropriately.
ssize_t recvfd(int sockfd, void *buf, size_t nbytes, int flags, int *fd)
Receives an open file descriptor (which will be stored in *fd
) from another process (related or unrelated) on the other end of the UNIX domain socket, sockfd
. Equivalent to recv(2) in all other respects. UNIX domain sockets can be created using net_client(3) or net_server(3) with a first argument of "/unix"
, or using socketpair(2) or pipe(2) (under SVR4). If the sender doesn't close the file descriptor, both processes share the same file table entry in the kernel. This means sharing file position if the descriptor refers to a regular file. If the sender sends the same file descriptor multiple times, all received file descriptors also share the same file table entry in the kernel. If the receiver doesn't receive the file descriptor with recvfd(3) when it is sent with sendfd(3), the descriptor will be closed (in the receiving process). A file descriptor must always be passed along with some normal data. Linux doesn't support calling recv(2) with a null
buffer or zero length. Don't set MSG_PEEK
in flags
(the results are unpredictable). On success, returns 0
. On error, returns -1
with errno
set appropriately. If the file descriptor was not passed, *fd
is set to -1
.
ssize_t recvcred(int sockfd, void *buf, size_t nbytes, int flags, struct ucred *cred)
Receives the user credentials of the process on the other end of the UNIX domain socket, sockfd
, and stores them in *cred
. Equivalent to recv(2) in all other respects. Requires that the SO_PASSCRED
socket option has been set for sockfd
in advance. On datagram sockets, user credentials accompany every datagram. On stream sockets, user credentials are sent only once, the first time data is sent. On success, returns the number of bytes received. On error, returns -1
with errno
set appropriately. If the user credentials were not provided by the kernel, cred
is filled with zero bytes (so cred[0].pid == 0
).
This function is only available on Linux.
ssize_t recvfromcred(int sockfd, void *buf, size_t nbytes, int flags, struct sockaddr *src_addr, socklen_t *src_addrlen, struct ucred *cred)
Receives the user credentials of the process on the other end of the UNIX domain socket, sockfd
, and stores them in *cred
. Equivalent to recvfrom(2) in all other respects. Requires that the SO_PASSCRED
socket option has been set for sockfd
in advance. On datagram sockets, user credentials accompany every datagram. On stream sockets, user credentials are sent only once, the first time data is sent. On success, returns the number of bytes received. On error, returns -1
with errno
set appropriately. If the user credentials were not provided by the kernel, cred
is filled with zero bytes (so cred[0].pid == 0
).
This function is only available on Linux.
int mail(const char *server, const char *sender, const char *recipients, const char *subject, const char *message)
Sends a mail message consisting of subject
and message
from sender
to the addresses in recipients
. recipients
contains mail addresses separated by sequences of comma and/or space characters. message
must not contain any lines containing only a '.'
character. On success, returns 0
. On error, returns -1
with errno
set appropriately.
Here is some vital information about socket options that never made it into the setsockopt(2) manpage (where it would be most useful). It's from "UNIX Network Programming: Networking APIs: Sockets and XTI (Volume 1)" by W. Richard Stevens.
SO_LINGER
Never set this option. There are two (bad) reasons why people set this option. The first reason is to avoid having to wait until after a socket has left the TIME_WAIT
state before restarting a server that has terminated. The TIME_WAIT
state is your friend. Do not try to avoid it. If you avoid it, you break TCP. If you break TCP, you will be punished. Set SO_REUSEADDR
instead. This is what it's for.
The other reason is to know when the peer has received all sent data. This probably doesn't work the way you want. It can only tell you when the peer TCP has acknowledged the data. It cannot tell you when the peer application has read the data. To do this, use shutdown(2) with a second argument of SHUT_WR
and then call read(2) until it returns 0
. This tells you that the peer application has read all sent data, knows that it has read all sent data (because it received your FIN) and has closed its half of the connection with either close(2) or shutdown(2) with a second argument of SHUT_WR
(because you have received the peer's FIN). Then you can close(2) the socket, safe in the knowledge that no data has been lost.
If you set SO_LINGER
with a zero timeout, the peer will think your application has crashed or aborted the connection (because it receives an RST). The only time to use SO_LINGER
is when this is the behaviour you want.
SO_REUSEADDR
Use this option for every TCP server socket. The net server functions set this option for every TCP server socket. This means that if your server dies, the new process that replaces it will be able to bind to the server's port immediately. This option is also needed when multiple copies of a multicast application need to run on the same host and SO_REUSEPORT
isn't defined. This option must be set before bind(2).
TCP_NODELAY
Avoid setting this option whenever possible (i.e. most of the time). It disables the Nagle algorithm. The Nagle algorithm is your friend. It stops you polluting the network with annoying little packets. If you must set it, please ensure that the traffic is restricted to your own network and leave the Internet alone.
Setting this option is often the wrong solution to a bad network programming practice. If an application protocol involves immediate responses to each message and exceptionally long delays are experienced, it's probably due to the message being sent with multiple small write(2)s (e.g. application header first, then data) instead of a single write(2).
If a message is sent in small write(2)s, the first write(2) will result in a small segment being sent. If the data in that segment does not contain enough information for the peer to respond immediately, the peer TCP will not ACK the segment until the ACK timer expires (50ms - 200ms). This is the delayed ACK algorithm. The sending TCP will not send the second small segment (containing the remainder of the message) until the first small segment has been acknowledged by the peer TCP. This is the Nagle Algorithm.
The solution to this problem is not to disable the Nagle algorithm, but rather to modify the application so that the message is sent in a single call to writev(2). Avoid copying separate buffers into a single buffer and then calling write(2) as it is less efficient.
This option should only be set when the peer application does not respond to each message and there can be no delay in sending the messages (e.g. real time monitoring systems) or when, even though the peer does respond to each message, the application can't hang around waiting for the response to the previous message before sending the next message (e.g. highly interactive applications like The X Window System).
SO_SNDBUF
This option specifies how much unacknowledged data you are willing to have out in the network before you stop sending data and wait for some acknowledgement. For bulk transfers, the send and receive buffer sizes need to be set to the capacity of the pipe (i.e. the bandwidth-delay product) otherwise throughput will be limited by the buffer sizes rather than by the network. The bandwidth-delay product is the bandwidth of the network multiplied by the round trip time. Here are some examples. Note that these values are for raw bandwidth, not data bandwidth. Actual values will be smaller due to packet header overhead.
Network | Bandwidth(bps) | RTT(ms) | Buffer(bytes)
--------------------------+----------------+---------+--------------
Ethernet LAN (10Mb/s) | 10,000,000 | 3 | 3,750
Ethernet LAN (100Mb/s) | 100,000,000 | 3 | 37,500
T1, transcontinental | 1,544,000 | 60 | 11,580
T1, satellite | 1,544,000 | 500 | 96,500
T3, transcontinental | 45,000,000 | 60 | 337,500
Gigabit, transcontinental | 1,000,000,000 | 60 | 7,500,000
Gigabit Satellite Network | 155,520,000 | 500 | 9,720,000
(SONET OC-3) | | |
Gigabit Satellite Network | 622,080,000 | 500 | 38,888,000
(SONET OC-12) | | |
Of course, it's generally impossible to know in advance what the bandwidth or RTT will be, and they can both change during the life of the connection. Ideally, the kernel would automatically adjust buffer sizes as needed, but don't hold your breath. Unless you know exactly what kind of network your application will be running on, it's best to set buffer sizes to values obtained from the user via a configuration file or user interface. Bear in mind that most kernels don't support buffer sizes larger than a few hundred kilobytes anyway.
Also note that TCP over satellite connections can behave very badly. Everything is fine provided that there's no congestion. However, if a single packet is lost, throughput will halve due to congestion avoidance, every segment sent since the lost packet will have to be retransmitted (that's 38MB!) and it takes five minutes to reach maximum throughput again due to the long RTT. Selective ACKs are needed in TCP to fix this. Fortunately, Linux (and probably other) systems support selective ACKs.
This option can also be used to avoid the dreaded interaction between the Nagle Algorithm and Delayed ACK algorithm during bulk data transfer. This interaction cannot occur during bulk transfer if the send buffer size is at least 3 times the Maximum Segment Size (MSS). Having a send buffer this large means that the sender is always capable of sending two full segments. If the receiver's receive buffer size isn't large enough to accept both segments, it will ACK each segment without delay (to indicate that it is running out of buffer space). If the receiver's receive buffer size is large enough to accept both segments, it will ACK every second segment without delay (so as not to disrupt your TCP's RTT calculations). The buffer size should actually be an even multiple of the MSS (i.e. at least four times the MSS). Here are some examples.
Link | MTU(bytes) | MSS(bytes) | 4*MSS(bytes)
---------+------------+------------+--------------
Ethernet | 1,500 | 1,460 | 5,840
ATM | 9,188 | 9,148 | 36,592
HIPPI | 65,535 | 65,495 | 261,980
Some TCP implementations automatically round the send and receive buffer sizes up to an even multiple of the MSS after establishing the connection. So if you set these options, do so before establishing the connection (i.e. before listen(2) or connect(2)). The net server and client functions set these options at the right time if requested.
This option, when set for UDP sockets, limits the maximum datagram size that can be sent.
SO_RCVBUF
Much of what was said about the send buffer size applies to the size of the peer's receive buffer. If your application is willing to accept large amounts of data, it needs to advertise the fact by having a large receive buffer. If the long fat pipe TCP options are required (Window Scale), they must be negotiated during connection setup (in the SYN packets) so this option must be set before listen(2) or connect(2). The net server and client functions set this option at the right time if requested.
This option, when set for UDP sockets, specifies how many received datagrams to queue before discarding datagrams.
SO_KEEPALIVE
This option causes TCP to send a probe after two hours of inactivity to check that the connection is still alive. Many people think that two hours is too long to wait so they implement application level heartbeats instead (e.g. BGP routing daemons send keepalive packets every 30 seconds). Many people think that this functionality belongs in the application anyway. The POSIX.1g standard requires the TCP_KEEPALIVE
option which lets you specify how many seconds to wait before sending the probe but this option isn't widely implemented yet. Until it is, the SO_KEEPALIVE
option is not very useful.
Here are some things to consider when designing packet headers and distributed algorithms gleaned from "Interconnections: Bridges, Routers, Switches and Internetworking Protocols" by Radia Perlman.
Simple protocols are more likely to be successfully implemented and deployed. Various factors complicate a protocol:
Design by committee (multiple ways to do the same thing).
Backwards Compatibility.
Flexibility.
Optimality.
Underspecification (leaving decisions to the implementer).
Exotic features.
Solve at least one actual problem. Do nothing that is of no use.
Calculate the overhead of algorithms and protocols. Does it scale? How far? Does it matter?
If there are assumptions about the size of the problem, either make them impossibly huge, or cope when the limit is exceeded.
Identifiers take two forms: (1) centrally administered numbers (e.g. port numbers) which are short, fixed size, fast and easy to locate, but hard to obtain, and (2) hierarchical identifiers (e.g. MIB names) with decentralised administration. These are large, variable size, slow, and hard to locate (no central authority), but easy to obtain.
If some information in a packet is rarely needed, make it an option. It is better for a few packets to be larger and slower than for all other packets to bear unused overhead.
It is better to overestimate than to underestimate. It makes protocols live longer.
Don't assume addresses are IPv4 addresses.
Spare bits must be transmitted as zero and ignored upon receipt. That way, they can later be used by future versions to encode features that can safely be ignored by earlier versions.
Version numbers can be a simple number, or split into major and minor version components. Minor version increments indicate backwards-compatible changes. Major version increments indicate incompatible changes. If a node receives a packet with a version it doesn't know about, it should drop it or respond with the version it does understand. The other node can switch to the older protocol when it receives this packet. However, nodes should occasionally forget that the other node speaks an older version of the protocol to prevent two nodes from incorrectly thinking that the other can only speak an old version of the protocol.
Avoid having version numbers wrap around, by making it huge, or by incrementing versions very rarely. If the version can wrap, make the highest possible version number indicate that the actual version follows in a larger field.
Another way to provide for future protocol evolutions is to allow options to be appended. Options should be encoded as <type, length, value>, and the length must be interpreted in the same way for all options. This allows unknown options to be skipped. Some options should cause the packet to be dropped. The type field can be used to specify whether the node should skip the option or drop the packet: e.g. skip options with odd numbered types and drop packets when options with even numbered types are encountered.
When migrating from one protocol to another, incompatible protocol, it's easiest to keep them separate (e.g. dual IPv4/IPv6 stacks), because migration can't be done atomically and it can be difficult to translate between two protocols.
Have parameters when there are settings that the user may want to control.
Don't have parameters just because you can't decide on the setting. Who else will?
Choose or calculate parameters when possible to reduce human involvement.
Make it possible to change parameters one at a time throughout a network without things breaking.
Nodes can report their parameters to their neighbours so they adjust their own parameters accordingly or detect misconfiguration.
Have a field that indicates the protocol type. This can allow multiplexing of mini-protocols within the application if the need ever arises.
There are three kinds of robustness. Simple robustness is when a node can cope when other nodes go down. Self stabilising robustness is when, even though a node may not cope with another node malfunctioning, it will return to correct behaviour when the malfunctioning node is fixed. Byzantine robustness is when a node behaves properly even when malicious or malfunctioning nodes are operating. In this day and age, Byzantine robustness is a necessity.
Exercise every single line of code, then torture every single line of code.
Sometimes its better to crash than to malfunction.
Sometimes you can partition a network to contain a problem.
Test connectivity, don't assume it.
Simple checksums can be tricked. Use SHA-2/3 or public key signatures when practical. Use encryption and authentication when possible (e.g. Transport Layer Security/Secure Shell tunnels).
Process packets quickly to avoid denial of service attacks.
Elections can be deterministic (the same node wins every time it is up) or stable (once a node is elected, it stays elected until it goes down). If every node is configured with a priority, and the election winner increases its priority by N after winning an election, then you can achieve deterministic elections by configuring nodes with priorities that differ by more than N, and you can achieve stable elections by configuring nodes with the same priority.
Understand the performance requirements that define a "correct" implementation. For example, processing packets at wire speed is necessary to avoid denials of service.
These are the errors generated by the functions that return -1
on error. Additional errors may be generated and returned from the underlying system calls. See their manual pages.
ENOENT
gethostbyname(3) failed to identify the host
or interface
argument passed to one of the socket functions.
ENOSYS
gethostbyname(3) returned an address from an unsupported address family.
The "l"
format was used with pack(3) or unpack(3) when the system doesn't support it or it wasn't compiled into libslack.
EINVAL
A string argument is null
.
A pack format count is not a positive integer.
An unpack count or limit argument is not a positive integer.
An argument containing "a"
, "z"
, "b"
or "h"
data to be packed is null
.
An argument containing "b"
data to be packed contains characters outside the range [01].
An argument containing "h"
data to be packed contains characters outside the range [0-9a-fA-F].
An "X"
pack instruction is trying to go back past the start of the packet.
The count argument to an "@"
pack instruction refers to a location before that where the instruction was encountered (i.e. it's trying to pack leftwards).
The format
argument to pack(3) or unpack(3) contains an illegal character.
An unpack ?
indirect count argument is null
.
ENOSPC
A message was too large to be sent with net_send(3).
A packet was too small to store all of the data to be packed or unpacked.
An unpack ?
indirect count argument points to a number greater than the subsequent limit argument (not enough space in the target buffer).
ETIMEDOUT
net_expect(3) or net_send(3) timed out.
EPROTO
(or EPROTOTYPE
on Mac OS X)mail(3) encountered an error in the dialogue with the SMTP server. The most likely cause of this is a missing or inadequate domain name for the sender address on systems where sendmail(8) requires a real domain name.
MT-Safe
A TCP server:
#include <slack/std.h>
#include <slack/net.h>
void provide_service(int fd) { write(fd, "ok\n", 3); }
int main()
{
int servfd, clntfd;
if ((servfd = net_server(NULL, "service", 30000, 0, 0, NULL, NULL)) == -1)
return 1;
while ((clntfd = accept(servfd, NULL, NULL)) != -1)
{
pid_t pid;
switch (pid = fork())
{
case -1: return 1;
case 0: provide_service(clntfd); _exit(EXIT_SUCCESS);
default: close(clntfd); break;
}
}
return EXIT_FAILURE; // unreached
}
A TCP client:
#include <slack/std.h>
#include <slack/net.h>
void request_service(int fd) {} // Do something here
void process_response(int fd) {} // Do something here
int main()
{
int sockfd;
if ((sockfd = net_client("localhost", "service", 30000, 5, 0, 0, NULL, NULL)) == -1)
return EXIT_FAILURE;
request_service(sockfd);
process_response(sockfd);
close(sockfd);
return EXIT_SUCCESS;
}
A UDP server:
#include <slack/std.h>
#include <slack/net.h>
void provide_service(char *pkt) {} // Do something here
int main()
{
char pkt[8];
sockaddr_any_t addr;
size_t addrsize;
int servfd;
if ((servfd = net_udp_server(NULL, "service", 30000, 0, 0, NULL, NULL)) == -1)
return EXIT_FAILURE;
for (;;)
{
addrsize = sizeof addr;
if (recvfrom(servfd, pkt, 8, 0, &addr.any, &addrsize) == -1)
return EXIT_FAILURE;
provide_service(pkt);
if (sendto(servfd, pkt, 8, 0, &addr.any, addrsize) == -1)
return EXIT_FAILURE;
}
return EXIT_SUCCESS; // unreached
}
A UDP client:
#include <slack/std.h>
#include <slack/net.h>
void build_request(char *pkt) {} // Do something here
void process_response(char *pkt) {} // Do something here
int main()
{
char pkt[8];
int sockfd = net_udp_client("localhost", "service", 30000, 0, 0, NULL, NULL);
if (sockfd == -1)
return EXIT_FAILURE;
build_request(pkt);
if (send(sockfd, pkt, 8, 0) == -1)
return EXIT_FAILURE;
if (recv(sockfd, pkt, 8, 0) == -1)
return EXIT_FAILURE;
process_response(pkt);
close(sockfd);
return EXIT_SUCCESS;
}
A reliable UDP client:
#include <slack/std.h>
#include <slack/net.h>
void build_request(char *pkt) {} // Do something here
void process_response(char *pkt) {} // Do something here
int main()
{
char opkt[8], ipkt[8];
int sockfd;
rudp_t *rudp;
if ((sockfd = net_udp_client("localhost", "echo", 7, 0, 0, NULL, NULL)) == -1)
return EXIT_FAILURE;
if (!(rudp = rudp_create()))
return EXIT_FAILURE;
build_request(opkt);
if (net_rudp_transact(sockfd, rudp, opkt, 8, ipkt, 8) == -1)
return EXIT_FAILURE;
process_response(ipkt);
rudp_release(rudp);
close(sockfd);
return EXIT_SUCCESS;
}
Expect/Send SMTP protocol:
#include <slack/std.h>
#include <slack/net.h>
int tinymail(char *sender, char *recipient, char *subject, char *message)
{
int smtp = net_client("localhost", "smtp", 25, 5, 0, 0, NULL, NULL);
int code;
int rc =
smtp != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 220 &&
net_send(smtp, 10, "HELO %s\r\n", "localhost") != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 250 &&
net_send(smtp, 10, "MAIL FROM: <%s>\r\n", sender) != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 250 &&
net_send(smtp, 10, "RCPT TO: <%s>\r\n", recipient) != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 250 &&
net_send(smtp, 10, "DATA\n") != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 354 &&
net_send(smtp, 10, "From: %s\r\n", sender) != -1 &&
net_send(smtp, 10, "To: %s\r\n", recipient) != -1 &&
net_send(smtp, 10, "Subject: %s\r\n", subject) != -1 &&
net_send(smtp, 10, "\n%s\r\n.\r\n", message) != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 250 &&
net_send(smtp, 10, "QUIT\r\n") != -1 &&
net_expect(smtp, 10, "%d", &code) == 1 && code == 221;
if (smtp != -1)
close(smtp);
return rc;
}
int main(int ac, char **av)
{
if (tinymail("raf@raf.org", "raf@raf.org", "This is a test", "Are you receiving me?\n") == -1)
return EXIT_FAILURE;
return EXIT_SUCCESS;
}
Unpack the size of a gif image:
unsigned short width, height;
unpack(gif, 10, "z6v2", NULL, &width, &height);
Pack and unpack a packet with a length field:
char pkt[9], data[5] = "4321";
int packed, unpacked;
size_t size;
packed = pack(pkt, sizeof pkt, "ia*", sizeof data, sizeof data, data);
unpacked = unpack(pkt, packed, "ia?", &size, &size, sizeof data, data);
Pack examples from perlfunc(1):
pack(pkt, 4, "cccc", 'A', 'B', 'C', 'D'); // "ABCD"
pack(pkt, 4, "c4", 'A', 'B', 'C', 'D'); // "ABCD"
pack(pkt, 6, "ccxxcc", 'A', 'B', 'C', 'D'); // "AB\0\0CD"
pack(pkt, 4, "s2", 1, 2); // "\0\1\0\2"
pack(pkt, 4, "a4", "abcd", "x", "y", "z"); // "abcd"
pack(pkt, 4, "aaaa", "abcd", "x", "y", "z"); // "axyz"
pack(pkt, 14, "z14", "abcdefg"); // "abcdefg\0\0\0\0\0\0\0"
int binary(const char *binstr)
{
char pkt[4], data[33];
size_t len;
int ret;
len = strlen(binstr);
memset(data, '0', 32 - len);
strlcpy(data + 32 - len, binstr, 33);
pack(pkt, 4, "b32", data);
unpack(pkt, 4, "i", &ret);
return ret;
}
int hexadecimal(const char *hexstr)
{
char pkt[4], data[9];
size_t len;
int ret;
len = strlen(hexstr);
memset(data, '0', 8 - len);
strlcpy(data + 8 - len, hexstr, 9);
pack(pkt, 4, "h8", data);
unpack(pkt, 4, "i", &ret);
return ret;
}
The pack functions assume the following: There are 8 bits in a byte. A char is 1 byte. A short can be stored in 2 bytes. Integers, long integers and pointers can be stored in 4 bytes. Long long integers can be stored in 8 bytes. If these datatypes are larger on your system, only the least significant byte(s) will be packed.
Packing long long integers is not portable (in ISO C 89, anyway).
Every effort has been made to use threadsafe, reentrant host and service name lookups in the net client and server functions. If your system has any version of gethostbyname_r(3) and getservbyname_r(3), they will be used. Some systems (e.g. Digital UNIX, HP-UX, Tru64 UNIX) have a threadsafe version of gethostbyname(3) that uses thread specific data. Unfortunately, there's no way to determine whether or not your system's gethostbyname(3) and getservbyname(3) are threadsafe, so it is possible (though unlikely) that the net client and servers functions are not reentrant on your system. This does not apply to Linux, Solaris, Digital UNIX, HP-UX or Tru64 UNIX (and others, no doubt) since these systems do have threadsafe versions of the host and service name lookup functions.
Note: It's possible that the underlying DNS resolver functions on your system are not threadsafe. Versions of BIND's resolver library prior to BIND 8.2 are not threadsafe. If your system uses such a version, then even gethostbyname_r(3) isn't threadsafe. Fortunately, Solaris doesn't use libresolv by default and Linux uses the BIND 8.2 version of libresolv which has a new threadsafe API and thread specific data for the old API. It is unlikely that any system that provides gethostnyname_r(3) would provide a non-threadsafe implementation.
There is a race condition that can cause a failure when creating a UNIX domain datagram client socket under Solaris and OpenBSD (but not under Linux). The problem is that UNIX domain datagram sockets must be bound to a path using bind(2) otherwise they can't receive any replies from the server (since they have no address to send messages to). Linux lets us bind to ""
which is the AF_LOCAL
equivalent of INADDR_ANY
. This is great. No actual path is created, each client gets its own address and the client doesn't need to unlink the path when it's finished. Unfortunately, systems like Solaris and OpenBSD (and probably many others) don't support this. You have to bind to an actual file system path and bind(2) will create an inode for the socket (which the client must unlink when finished). This means there's a race condition between creating the unique path and creating the inode with bind(2). Fortunately, this isn't a security bug (correct me if I'm wrong) because bind(2) fails if the path already exists. Nor is it a denial of service, since it only affects clients. It's more of a denial of request. Also, the names used are not very predictable. The easy, elegant, portable solution is to never use UNIX domain datagram sockets. Always use UNIX domain stream sockets instead. They don't have this problem. If you must use UNIX domain datagram sockets under Solaris, you have to unlink the socket path when finished.
sockaddr_any_t addr;
size_t addrsize = sizeof addr;
if (getsockname(sockfd, (sockaddr_t *)&addr, &addrsize) != -1)
if (*addr.un.sun_path)
unlink(addr.un.sun_path);
This module provides no support for multiple simultaneous TCP connects in a single thread. Use multiple threads or processes instead.
Solaris (at least 2.6 and 2.7) return -1
as the index for all network interfaces when ioctl(2) is called with a command argument of SIOCGIFINDEX
. net_interfaces(3) guesses the indexes when this happens. It starts at 1 for the first interface, and increments by 1 for each subsequent interface which seems to work.
Because net_interfaces(3) under Solaris 2.6 and 2.7 has to guess the indexes of all interfaces, and because it only returns IPv4 or IPv6 interfaces (but not both), the indexes will probably be wrong on these systems when there is a mix of IPv4 and IPv6 interfaces. Presumably, versions of Solaris that actually support IPv6 will have the ioctl(SIOCGIFINDEX) bug fixed.
Solaris doesn't return hardware addresses when ioctl(2) is called with a command argument of SIOCGIFHWADDR
, so the net_interface_t elements in the list returned by net_interfaces() always have null
hwaddr fields.
Linux 2.2 returns 0.0.0.0
as the address of the outgoing IPv4 multicast interface when getsockopt(2) is called with the IP_MULTICAST_IF
command. This means that net_multicast_get_interface(3) always returns 0
under Linux 2.2. Linux 2.4.9 does not have this bug.
The TOS functions are inherently protocol specific. They only work with IPv4 sockets.
libslack(3), socket(2), bind(2), listen(2), accept(2), connect(2), shutdown(2), select(2), read(2), write(2), readv(2), writev(2), close(2), send(2), sendto(2), recv(2), recvfrom(2), gethostbyname(3), getservbyname(3), perlfunc(1), fdopen(3), scanf(3), printf(3)
20230824 raf <raf@raf.org>