summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorWillem de Bruijn <willemb@google.com>2020-07-10 15:29:02 +0200
committerDavid S. Miller <davem@davemloft.net>2020-07-20 04:20:22 +0200
commiteba75c587e811d3249c8bd50d22bb2266ccd3c0f (patch)
tree28fe5ba40b6a9b252fa8c7e3861922c4aaaad0f3 /net
parentMerge branch 'rework-mvneta-napi_poll-loop-for-XDP-multi-buffers' (diff)
downloadlinux-eba75c587e811d3249c8bd50d22bb2266ccd3c0f.tar.xz
linux-eba75c587e811d3249c8bd50d22bb2266ccd3c0f.zip
icmp: support rfc 4884
Add setsockopt SOL_IP/IP_RECVERR_4884 to return the offset to an extension struct if present. ICMP messages may include an extension structure after the original datagram. RFC 4884 standardized this behavior. It stores the offset in words to the extension header in u8 icmphdr.un.reserved[1]. The field is valid only for ICMP types destination unreachable, time exceeded and parameter problem, if length is at least 128 bytes and entire packet does not exceed 576 bytes. Return the offset to the start of the extension struct when reading an ICMP error from the error queue, if it matches the above constraints. Do not return the raw u8 field. Return the offset from the start of the user buffer, in bytes. The kernel does not return the network and transport headers, so subtract those. Also validate the headers. Return the offset regardless of validation, as an invalid extension must still not be misinterpreted as part of the original datagram. Note that !invalid does not imply valid. If the extension version does not match, no validation can take place, for instance. For backward compatibility, make this optional, set by setsockopt SOL_IP/IP_RECVERR_RFC4884. For API example and feature test, see github.com/wdebruij/kerneltools/blob/master/tests/recv_icmp_v2.c For forward compatibility, reserve only setsockopt value 1, leaving other bits for additional icmp extensions. Changes v1->v2: - convert word offset to byte offset from start of user buffer - return in ee_data as u8 may be insufficient - define extension struct and object header structs - return len only if constraints met - if returning len, also validate Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/icmp.c71
-rw-r--r--net/ipv4/ip_sockglue.c12
2 files changed, 83 insertions, 0 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e30515f89802..793aebf07c2a 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -1116,6 +1116,77 @@ error:
goto drop;
}
+static bool ip_icmp_error_rfc4884_validate(const struct sk_buff *skb, int off)
+{
+ struct icmp_extobj_hdr *objh, _objh;
+ struct icmp_ext_hdr *exth, _exth;
+ u16 olen;
+
+ exth = skb_header_pointer(skb, off, sizeof(_exth), &_exth);
+ if (!exth)
+ return false;
+ if (exth->version != 2)
+ return true;
+
+ if (exth->checksum &&
+ csum_fold(skb_checksum(skb, off, skb->len - off, 0)))
+ return false;
+
+ off += sizeof(_exth);
+ while (off < skb->len) {
+ objh = skb_header_pointer(skb, off, sizeof(_objh), &_objh);
+ if (!objh)
+ return false;
+
+ olen = ntohs(objh->length);
+ if (olen < sizeof(_objh))
+ return false;
+
+ off += olen;
+ if (off > skb->len)
+ return false;
+ }
+
+ return true;
+}
+
+void ip_icmp_error_rfc4884(const struct sk_buff *skb,
+ struct sock_ee_data_rfc4884 *out)
+{
+ int hlen, off;
+
+ switch (icmp_hdr(skb)->type) {
+ case ICMP_DEST_UNREACH:
+ case ICMP_TIME_EXCEEDED:
+ case ICMP_PARAMETERPROB:
+ break;
+ default:
+ return;
+ }
+
+ /* outer headers up to inner iph. skb->data is at inner payload */
+ hlen = -skb_transport_offset(skb) - sizeof(struct icmphdr);
+
+ /* per rfc 791: maximum packet length of 576 bytes */
+ if (hlen + skb->len > 576)
+ return;
+
+ /* per rfc 4884: minimal datagram length of 128 bytes */
+ off = icmp_hdr(skb)->un.reserved[1] * sizeof(u32);
+ if (off < 128)
+ return;
+
+ /* kernel has stripped headers: return payload offset in bytes */
+ off -= hlen;
+ if (off + sizeof(struct icmp_ext_hdr) > skb->len)
+ return;
+
+ out->len = off;
+
+ if (!ip_icmp_error_rfc4884_validate(skb, off))
+ out->flags |= SO_EE_RFC4884_FLAG_INVALID;
+}
+
int icmp_err(struct sk_buff *skb, u32 info)
{
struct iphdr *iph = (struct iphdr *)skb->data;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 86b3b9a7cea3..a5ea02d7a183 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -411,6 +411,9 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
serr->port = port;
if (skb_pull(skb, payload - skb->data)) {
+ if (inet_sk(sk)->recverr_rfc4884)
+ ip_icmp_error_rfc4884(skb, &serr->ee.ee_rfc4884);
+
skb_reset_transport_header(skb);
if (sock_queue_err_skb(sk, skb) == 0)
return;
@@ -904,6 +907,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
case IP_RECVORIGDSTADDR:
case IP_CHECKSUM:
case IP_RECVFRAGSIZE:
+ case IP_RECVERR_RFC4884:
if (optlen >= sizeof(int)) {
if (get_user(val, (int __user *) optval))
return -EFAULT;
@@ -1063,6 +1067,11 @@ static int do_ip_setsockopt(struct sock *sk, int level,
if (!val)
skb_queue_purge(&sk->sk_error_queue);
break;
+ case IP_RECVERR_RFC4884:
+ if (val < 0 || val > 1)
+ goto e_inval;
+ inet->recverr_rfc4884 = !!val;
+ break;
case IP_MULTICAST_TTL:
if (sk->sk_type == SOCK_STREAM)
goto e_inval;
@@ -1611,6 +1620,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_RECVERR:
val = inet->recverr;
break;
+ case IP_RECVERR_RFC4884:
+ val = inet->recverr_rfc4884;
+ break;
case IP_MULTICAST_TTL:
val = inet->mc_ttl;
break;