sctp: identify chunks that need to be fragmented at IP level
[cascardo/linux.git] / net / sctp / output.c
1 /* SCTP kernel implementation
2  * (C) Copyright IBM Corp. 2001, 2004
3  * Copyright (c) 1999-2000 Cisco, Inc.
4  * Copyright (c) 1999-2001 Motorola, Inc.
5  *
6  * This file is part of the SCTP kernel implementation
7  *
8  * These functions handle output processing.
9  *
10  * This SCTP implementation is free software;
11  * you can redistribute it and/or modify it under the terms of
12  * the GNU General Public License as published by
13  * the Free Software Foundation; either version 2, or (at your option)
14  * any later version.
15  *
16  * This SCTP implementation is distributed in the hope that it
17  * will be useful, but WITHOUT ANY WARRANTY; without even the implied
18  *                 ************************
19  * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
20  * See the GNU General Public License for more details.
21  *
22  * You should have received a copy of the GNU General Public License
23  * along with GNU CC; see the file COPYING.  If not, see
24  * <http://www.gnu.org/licenses/>.
25  *
26  * Please send any bug reports or fixes you make to the
27  * email address(es):
28  *    lksctp developers <linux-sctp@vger.kernel.org>
29  *
30  * Written or modified by:
31  *    La Monte H.P. Yarroll <piggy@acm.org>
32  *    Karl Knutson          <karl@athena.chicago.il.us>
33  *    Jon Grimm             <jgrimm@austin.ibm.com>
34  *    Sridhar Samudrala     <sri@us.ibm.com>
35  */
36
37 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
38
39 #include <linux/types.h>
40 #include <linux/kernel.h>
41 #include <linux/wait.h>
42 #include <linux/time.h>
43 #include <linux/ip.h>
44 #include <linux/ipv6.h>
45 #include <linux/init.h>
46 #include <linux/slab.h>
47 #include <net/inet_ecn.h>
48 #include <net/ip.h>
49 #include <net/icmp.h>
50 #include <net/net_namespace.h>
51
52 #include <linux/socket.h> /* for sa_family_t */
53 #include <net/sock.h>
54
55 #include <net/sctp/sctp.h>
56 #include <net/sctp/sm.h>
57 #include <net/sctp/checksum.h>
58
59 /* Forward declarations for private helpers. */
60 static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
61                                               struct sctp_chunk *chunk);
62 static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
63                                            struct sctp_chunk *chunk);
64 static void sctp_packet_append_data(struct sctp_packet *packet,
65                                            struct sctp_chunk *chunk);
66 static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
67                                         struct sctp_chunk *chunk,
68                                         u16 chunk_len);
69
70 static void sctp_packet_reset(struct sctp_packet *packet)
71 {
72         packet->size = packet->overhead;
73         packet->has_cookie_echo = 0;
74         packet->has_sack = 0;
75         packet->has_data = 0;
76         packet->has_auth = 0;
77         packet->ipfragok = 0;
78         packet->auth = NULL;
79 }
80
81 /* Config a packet.
82  * This appears to be a followup set of initializations.
83  */
84 struct sctp_packet *sctp_packet_config(struct sctp_packet *packet,
85                                        __u32 vtag, int ecn_capable)
86 {
87         struct sctp_transport *tp = packet->transport;
88         struct sctp_association *asoc = tp->asoc;
89
90         pr_debug("%s: packet:%p vtag:0x%x\n", __func__, packet, vtag);
91
92         packet->vtag = vtag;
93
94         if (asoc && tp->dst) {
95                 struct sock *sk = asoc->base.sk;
96
97                 rcu_read_lock();
98                 if (__sk_dst_get(sk) != tp->dst) {
99                         dst_hold(tp->dst);
100                         sk_setup_caps(sk, tp->dst);
101                 }
102
103                 if (sk_can_gso(sk)) {
104                         struct net_device *dev = tp->dst->dev;
105
106                         packet->max_size = dev->gso_max_size;
107                 } else {
108                         packet->max_size = asoc->pathmtu;
109                 }
110                 rcu_read_unlock();
111
112         } else {
113                 packet->max_size = tp->pathmtu;
114         }
115
116         if (ecn_capable && sctp_packet_empty(packet)) {
117                 struct sctp_chunk *chunk;
118
119                 /* If there a is a prepend chunk stick it on the list before
120                  * any other chunks get appended.
121                  */
122                 chunk = sctp_get_ecne_prepend(asoc);
123                 if (chunk)
124                         sctp_packet_append_chunk(packet, chunk);
125         }
126
127         return packet;
128 }
129
130 /* Initialize the packet structure. */
131 struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
132                                      struct sctp_transport *transport,
133                                      __u16 sport, __u16 dport)
134 {
135         struct sctp_association *asoc = transport->asoc;
136         size_t overhead;
137
138         pr_debug("%s: packet:%p transport:%p\n", __func__, packet, transport);
139
140         packet->transport = transport;
141         packet->source_port = sport;
142         packet->destination_port = dport;
143         INIT_LIST_HEAD(&packet->chunk_list);
144         if (asoc) {
145                 struct sctp_sock *sp = sctp_sk(asoc->base.sk);
146                 overhead = sp->pf->af->net_header_len;
147         } else {
148                 overhead = sizeof(struct ipv6hdr);
149         }
150         overhead += sizeof(struct sctphdr);
151         packet->overhead = overhead;
152         sctp_packet_reset(packet);
153         packet->vtag = 0;
154
155         return packet;
156 }
157
158 /* Free a packet.  */
159 void sctp_packet_free(struct sctp_packet *packet)
160 {
161         struct sctp_chunk *chunk, *tmp;
162
163         pr_debug("%s: packet:%p\n", __func__, packet);
164
165         list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
166                 list_del_init(&chunk->list);
167                 sctp_chunk_free(chunk);
168         }
169 }
170
171 /* This routine tries to append the chunk to the offered packet. If adding
172  * the chunk causes the packet to exceed the path MTU and COOKIE_ECHO chunk
173  * is not present in the packet, it transmits the input packet.
174  * Data can be bundled with a packet containing a COOKIE_ECHO chunk as long
175  * as it can fit in the packet, but any more data that does not fit in this
176  * packet can be sent only after receiving the COOKIE_ACK.
177  */
178 sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet,
179                                        struct sctp_chunk *chunk,
180                                        int one_packet, gfp_t gfp)
181 {
182         sctp_xmit_t retval;
183         int error = 0;
184
185         pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__,
186                  packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1);
187
188         switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) {
189         case SCTP_XMIT_PMTU_FULL:
190                 if (!packet->has_cookie_echo) {
191                         error = sctp_packet_transmit(packet, gfp);
192                         if (error < 0)
193                                 chunk->skb->sk->sk_err = -error;
194
195                         /* If we have an empty packet, then we can NOT ever
196                          * return PMTU_FULL.
197                          */
198                         if (!one_packet)
199                                 retval = sctp_packet_append_chunk(packet,
200                                                                   chunk);
201                 }
202                 break;
203
204         case SCTP_XMIT_RWND_FULL:
205         case SCTP_XMIT_OK:
206         case SCTP_XMIT_DELAY:
207                 break;
208         }
209
210         return retval;
211 }
212
213 /* Try to bundle an auth chunk into the packet. */
214 static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt,
215                                            struct sctp_chunk *chunk)
216 {
217         struct sctp_association *asoc = pkt->transport->asoc;
218         struct sctp_chunk *auth;
219         sctp_xmit_t retval = SCTP_XMIT_OK;
220
221         /* if we don't have an association, we can't do authentication */
222         if (!asoc)
223                 return retval;
224
225         /* See if this is an auth chunk we are bundling or if
226          * auth is already bundled.
227          */
228         if (chunk->chunk_hdr->type == SCTP_CID_AUTH || pkt->has_auth)
229                 return retval;
230
231         /* if the peer did not request this chunk to be authenticated,
232          * don't do it
233          */
234         if (!chunk->auth)
235                 return retval;
236
237         auth = sctp_make_auth(asoc);
238         if (!auth)
239                 return retval;
240
241         retval = __sctp_packet_append_chunk(pkt, auth);
242
243         if (retval != SCTP_XMIT_OK)
244                 sctp_chunk_free(auth);
245
246         return retval;
247 }
248
249 /* Try to bundle a SACK with the packet. */
250 static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt,
251                                            struct sctp_chunk *chunk)
252 {
253         sctp_xmit_t retval = SCTP_XMIT_OK;
254
255         /* If sending DATA and haven't aleady bundled a SACK, try to
256          * bundle one in to the packet.
257          */
258         if (sctp_chunk_is_data(chunk) && !pkt->has_sack &&
259             !pkt->has_cookie_echo) {
260                 struct sctp_association *asoc;
261                 struct timer_list *timer;
262                 asoc = pkt->transport->asoc;
263                 timer = &asoc->timers[SCTP_EVENT_TIMEOUT_SACK];
264
265                 /* If the SACK timer is running, we have a pending SACK */
266                 if (timer_pending(timer)) {
267                         struct sctp_chunk *sack;
268
269                         if (pkt->transport->sack_generation !=
270                             pkt->transport->asoc->peer.sack_generation)
271                                 return retval;
272
273                         asoc->a_rwnd = asoc->rwnd;
274                         sack = sctp_make_sack(asoc);
275                         if (sack) {
276                                 retval = __sctp_packet_append_chunk(pkt, sack);
277                                 if (retval != SCTP_XMIT_OK) {
278                                         sctp_chunk_free(sack);
279                                         goto out;
280                                 }
281                                 asoc->peer.sack_needed = 0;
282                                 if (del_timer(timer))
283                                         sctp_association_put(asoc);
284                         }
285                 }
286         }
287 out:
288         return retval;
289 }
290
291
292 /* Append a chunk to the offered packet reporting back any inability to do
293  * so.
294  */
295 static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet,
296                                               struct sctp_chunk *chunk)
297 {
298         sctp_xmit_t retval = SCTP_XMIT_OK;
299         __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length));
300
301         /* Check to see if this chunk will fit into the packet */
302         retval = sctp_packet_will_fit(packet, chunk, chunk_len);
303         if (retval != SCTP_XMIT_OK)
304                 goto finish;
305
306         /* We believe that this chunk is OK to add to the packet */
307         switch (chunk->chunk_hdr->type) {
308         case SCTP_CID_DATA:
309                 /* Account for the data being in the packet */
310                 sctp_packet_append_data(packet, chunk);
311                 /* Disallow SACK bundling after DATA. */
312                 packet->has_sack = 1;
313                 /* Disallow AUTH bundling after DATA */
314                 packet->has_auth = 1;
315                 /* Let it be knows that packet has DATA in it */
316                 packet->has_data = 1;
317                 /* timestamp the chunk for rtx purposes */
318                 chunk->sent_at = jiffies;
319                 /* Mainly used for prsctp RTX policy */
320                 chunk->sent_count++;
321                 break;
322         case SCTP_CID_COOKIE_ECHO:
323                 packet->has_cookie_echo = 1;
324                 break;
325
326         case SCTP_CID_SACK:
327                 packet->has_sack = 1;
328                 if (chunk->asoc)
329                         chunk->asoc->stats.osacks++;
330                 break;
331
332         case SCTP_CID_AUTH:
333                 packet->has_auth = 1;
334                 packet->auth = chunk;
335                 break;
336         }
337
338         /* It is OK to send this chunk.  */
339         list_add_tail(&chunk->list, &packet->chunk_list);
340         packet->size += chunk_len;
341         chunk->transport = packet->transport;
342 finish:
343         return retval;
344 }
345
346 /* Append a chunk to the offered packet reporting back any inability to do
347  * so.
348  */
349 sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet,
350                                      struct sctp_chunk *chunk)
351 {
352         sctp_xmit_t retval = SCTP_XMIT_OK;
353
354         pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk);
355
356         /* Data chunks are special.  Before seeing what else we can
357          * bundle into this packet, check to see if we are allowed to
358          * send this DATA.
359          */
360         if (sctp_chunk_is_data(chunk)) {
361                 retval = sctp_packet_can_append_data(packet, chunk);
362                 if (retval != SCTP_XMIT_OK)
363                         goto finish;
364         }
365
366         /* Try to bundle AUTH chunk */
367         retval = sctp_packet_bundle_auth(packet, chunk);
368         if (retval != SCTP_XMIT_OK)
369                 goto finish;
370
371         /* Try to bundle SACK chunk */
372         retval = sctp_packet_bundle_sack(packet, chunk);
373         if (retval != SCTP_XMIT_OK)
374                 goto finish;
375
376         retval = __sctp_packet_append_chunk(packet, chunk);
377
378 finish:
379         return retval;
380 }
381
382 static void sctp_packet_release_owner(struct sk_buff *skb)
383 {
384         sk_free(skb->sk);
385 }
386
387 static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk)
388 {
389         skb_orphan(skb);
390         skb->sk = sk;
391         skb->destructor = sctp_packet_release_owner;
392
393         /*
394          * The data chunks have already been accounted for in sctp_sendmsg(),
395          * therefore only reserve a single byte to keep socket around until
396          * the packet has been transmitted.
397          */
398         atomic_inc(&sk->sk_wmem_alloc);
399 }
400
401 /* All packets are sent to the network through this function from
402  * sctp_outq_tail().
403  *
404  * The return value is a normal kernel error return value.
405  */
406 int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
407 {
408         struct sctp_transport *tp = packet->transport;
409         struct sctp_association *asoc = tp->asoc;
410         struct sctphdr *sh;
411         struct sk_buff *nskb = NULL, *head = NULL;
412         struct sctp_chunk *chunk, *tmp;
413         struct sock *sk;
414         int err = 0;
415         int padding;            /* How much padding do we need?  */
416         int pkt_size;
417         __u8 has_data = 0;
418         int gso = 0;
419         int pktcount = 0;
420         struct dst_entry *dst;
421         unsigned char *auth = NULL;     /* pointer to auth in skb data */
422
423         pr_debug("%s: packet:%p\n", __func__, packet);
424
425         /* Do NOT generate a chunkless packet. */
426         if (list_empty(&packet->chunk_list))
427                 return err;
428
429         /* Set up convenience variables... */
430         chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
431         sk = chunk->skb->sk;
432
433         /* Allocate the head skb, or main one if not in GSO */
434         if (packet->size > tp->pathmtu && !packet->ipfragok) {
435                 if (sk_can_gso(sk)) {
436                         gso = 1;
437                         pkt_size = packet->overhead;
438                 } else {
439                         /* If this happens, we trash this packet and try
440                          * to build a new one, hopefully correct this
441                          * time. Application may notice this error.
442                          */
443                         pr_err_once("Trying to GSO but underlying device doesn't support it.");
444                         goto nomem;
445                 }
446         } else {
447                 pkt_size = packet->size;
448         }
449         head = alloc_skb(pkt_size + MAX_HEADER, gfp);
450         if (!head)
451                 goto nomem;
452         if (gso) {
453                 NAPI_GRO_CB(head)->last = head;
454                 skb_shinfo(head)->gso_type = sk->sk_gso_type;
455         }
456
457         /* Make sure the outbound skb has enough header room reserved. */
458         skb_reserve(head, packet->overhead + MAX_HEADER);
459
460         /* Set the owning socket so that we know where to get the
461          * destination IP address.
462          */
463         sctp_packet_set_owner_w(head, sk);
464
465         if (!sctp_transport_dst_check(tp)) {
466                 sctp_transport_route(tp, NULL, sctp_sk(sk));
467                 if (asoc && (asoc->param_flags & SPP_PMTUD_ENABLE)) {
468                         sctp_assoc_sync_pmtu(sk, asoc);
469                 }
470         }
471         dst = dst_clone(tp->dst);
472         if (!dst)
473                 goto no_route;
474         skb_dst_set(head, dst);
475
476         /* Build the SCTP header.  */
477         sh = (struct sctphdr *)skb_push(head, sizeof(struct sctphdr));
478         skb_reset_transport_header(head);
479         sh->source = htons(packet->source_port);
480         sh->dest   = htons(packet->destination_port);
481
482         /* From 6.8 Adler-32 Checksum Calculation:
483          * After the packet is constructed (containing the SCTP common
484          * header and one or more control or DATA chunks), the
485          * transmitter shall:
486          *
487          * 1) Fill in the proper Verification Tag in the SCTP common
488          *    header and initialize the checksum field to 0's.
489          */
490         sh->vtag     = htonl(packet->vtag);
491         sh->checksum = 0;
492
493         pr_debug("***sctp_transmit_packet***\n");
494
495         do {
496                 /* Set up convenience variables... */
497                 chunk = list_entry(packet->chunk_list.next, struct sctp_chunk, list);
498                 pktcount++;
499
500                 /* Calculate packet size, so it fits in PMTU. Leave
501                  * other chunks for the next packets.
502                  */
503                 if (gso) {
504                         pkt_size = packet->overhead;
505                         list_for_each_entry(chunk, &packet->chunk_list, list) {
506                                 int padded = WORD_ROUND(chunk->skb->len);
507
508                                 if (pkt_size + padded > tp->pathmtu)
509                                         break;
510                                 pkt_size += padded;
511                         }
512
513                         /* Allocate a new skb. */
514                         nskb = alloc_skb(pkt_size + MAX_HEADER, gfp);
515                         if (!nskb)
516                                 goto nomem;
517
518                         /* Make sure the outbound skb has enough header
519                          * room reserved.
520                          */
521                         skb_reserve(nskb, packet->overhead + MAX_HEADER);
522                 } else {
523                         nskb = head;
524                 }
525
526                 /**
527                  * 3.2  Chunk Field Descriptions
528                  *
529                  * The total length of a chunk (including Type, Length and
530                  * Value fields) MUST be a multiple of 4 bytes.  If the length
531                  * of the chunk is not a multiple of 4 bytes, the sender MUST
532                  * pad the chunk with all zero bytes and this padding is not
533                  * included in the chunk length field.  The sender should
534                  * never pad with more than 3 bytes.
535                  *
536                  * [This whole comment explains WORD_ROUND() below.]
537                  */
538
539                 pkt_size -= packet->overhead;
540                 list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
541                         list_del_init(&chunk->list);
542                         if (sctp_chunk_is_data(chunk)) {
543                                 /* 6.3.1 C4) When data is in flight and when allowed
544                                  * by rule C5, a new RTT measurement MUST be made each
545                                  * round trip.  Furthermore, new RTT measurements
546                                  * SHOULD be made no more than once per round-trip
547                                  * for a given destination transport address.
548                                  */
549
550                                 if (!chunk->resent && !tp->rto_pending) {
551                                         chunk->rtt_in_progress = 1;
552                                         tp->rto_pending = 1;
553                                 }
554
555                                 has_data = 1;
556                         }
557
558                         padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len;
559                         if (padding)
560                                 memset(skb_put(chunk->skb, padding), 0, padding);
561
562                         /* if this is the auth chunk that we are adding,
563                          * store pointer where it will be added and put
564                          * the auth into the packet.
565                          */
566                         if (chunk == packet->auth)
567                                 auth = skb_tail_pointer(nskb);
568
569                         memcpy(skb_put(nskb, chunk->skb->len),
570                                chunk->skb->data, chunk->skb->len);
571
572                         pr_debug("*** Chunk:%p[%s] %s 0x%x, length:%d, chunk->skb->len:%d, rtt_in_progress:%d\n",
573                                  chunk,
574                                  sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)),
575                                  chunk->has_tsn ? "TSN" : "No TSN",
576                                  chunk->has_tsn ? ntohl(chunk->subh.data_hdr->tsn) : 0,
577                                  ntohs(chunk->chunk_hdr->length), chunk->skb->len,
578                                  chunk->rtt_in_progress);
579
580                         /* If this is a control chunk, this is our last
581                          * reference. Free data chunks after they've been
582                          * acknowledged or have failed.
583                          * Re-queue auth chunks if needed.
584                          */
585                         pkt_size -= WORD_ROUND(chunk->skb->len);
586
587                         if (!sctp_chunk_is_data(chunk) && chunk != packet->auth)
588                                 sctp_chunk_free(chunk);
589
590                         if (!pkt_size)
591                                 break;
592                 }
593
594                 /* SCTP-AUTH, Section 6.2
595                  *    The sender MUST calculate the MAC as described in RFC2104 [2]
596                  *    using the hash function H as described by the MAC Identifier and
597                  *    the shared association key K based on the endpoint pair shared key
598                  *    described by the shared key identifier.  The 'data' used for the
599                  *    computation of the AUTH-chunk is given by the AUTH chunk with its
600                  *    HMAC field set to zero (as shown in Figure 6) followed by all
601                  *    chunks that are placed after the AUTH chunk in the SCTP packet.
602                  */
603                 if (auth)
604                         sctp_auth_calculate_hmac(asoc, nskb,
605                                                  (struct sctp_auth_chunk *)auth,
606                                                  gfp);
607
608                 if (packet->auth) {
609                         if (!list_empty(&packet->chunk_list)) {
610                                 /* We will generate more packets, so re-queue
611                                  * auth chunk.
612                                  */
613                                 list_add(&packet->auth->list,
614                                          &packet->chunk_list);
615                         } else {
616                                 sctp_chunk_free(packet->auth);
617                                 packet->auth = NULL;
618                         }
619                 }
620
621                 if (!gso)
622                         break;
623
624                 if (skb_gro_receive(&head, nskb))
625                         goto nomem;
626                 nskb = NULL;
627                 if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >=
628                                  sk->sk_gso_max_segs))
629                         goto nomem;
630         } while (!list_empty(&packet->chunk_list));
631
632         /* 2) Calculate the Adler-32 checksum of the whole packet,
633          *    including the SCTP common header and all the
634          *    chunks.
635          *
636          * Note: Adler-32 is no longer applicable, as has been replaced
637          * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
638          *
639          * If it's a GSO packet, it's postponed to sctp_skb_segment.
640          */
641         if (!sctp_checksum_disable || gso) {
642                 if (!gso && (!(dst->dev->features & NETIF_F_SCTP_CRC) ||
643                              dst_xfrm(dst) || packet->ipfragok)) {
644                         sh->checksum = sctp_compute_cksum(head, 0);
645                 } else {
646                         /* no need to seed pseudo checksum for SCTP */
647                         head->ip_summed = CHECKSUM_PARTIAL;
648                         head->csum_start = skb_transport_header(head) - head->head;
649                         head->csum_offset = offsetof(struct sctphdr, checksum);
650                 }
651         }
652
653         /* IP layer ECN support
654          * From RFC 2481
655          *  "The ECN-Capable Transport (ECT) bit would be set by the
656          *   data sender to indicate that the end-points of the
657          *   transport protocol are ECN-capable."
658          *
659          * Now setting the ECT bit all the time, as it should not cause
660          * any problems protocol-wise even if our peer ignores it.
661          *
662          * Note: The works for IPv6 layer checks this bit too later
663          * in transmission.  See IP6_ECN_flow_xmit().
664          */
665         tp->af_specific->ecn_capable(sk);
666
667         /* Set up the IP options.  */
668         /* BUG: not implemented
669          * For v4 this all lives somewhere in sk->sk_opt...
670          */
671
672         /* Dump that on IP!  */
673         if (asoc) {
674                 asoc->stats.opackets += pktcount;
675                 if (asoc->peer.last_sent_to != tp)
676                         /* Considering the multiple CPU scenario, this is a
677                          * "correcter" place for last_sent_to.  --xguo
678                          */
679                         asoc->peer.last_sent_to = tp;
680         }
681
682         if (has_data) {
683                 struct timer_list *timer;
684                 unsigned long timeout;
685
686                 /* Restart the AUTOCLOSE timer when sending data. */
687                 if (sctp_state(asoc, ESTABLISHED) &&
688                     asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE]) {
689                         timer = &asoc->timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
690                         timeout = asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE];
691
692                         if (!mod_timer(timer, jiffies + timeout))
693                                 sctp_association_hold(asoc);
694                 }
695         }
696
697         pr_debug("***sctp_transmit_packet*** skb->len:%d\n", head->len);
698
699         if (gso) {
700                 /* Cleanup our debris for IP stacks */
701                 memset(head->cb, 0, max(sizeof(struct inet_skb_parm),
702                                         sizeof(struct inet6_skb_parm)));
703
704                 skb_shinfo(head)->gso_segs = pktcount;
705                 skb_shinfo(head)->gso_size = GSO_BY_FRAGS;
706
707                 /* We have to refresh this in case we are xmiting to
708                  * more than one transport at a time
709                  */
710                 rcu_read_lock();
711                 if (__sk_dst_get(sk) != tp->dst) {
712                         dst_hold(tp->dst);
713                         sk_setup_caps(sk, tp->dst);
714                 }
715                 rcu_read_unlock();
716         }
717         head->ignore_df = packet->ipfragok;
718         tp->af_specific->sctp_xmit(head, tp);
719
720 out:
721         sctp_packet_reset(packet);
722         return err;
723 no_route:
724         kfree_skb(head);
725         if (nskb != head)
726                 kfree_skb(nskb);
727
728         if (asoc)
729                 IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
730
731         /* FIXME: Returning the 'err' will effect all the associations
732          * associated with a socket, although only one of the paths of the
733          * association is unreachable.
734          * The real failure of a transport or association can be passed on
735          * to the user via notifications. So setting this error may not be
736          * required.
737          */
738          /* err = -EHOSTUNREACH; */
739 err:
740         /* Control chunks are unreliable so just drop them.  DATA chunks
741          * will get resent or dropped later.
742          */
743
744         list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
745                 list_del_init(&chunk->list);
746                 if (!sctp_chunk_is_data(chunk))
747                         sctp_chunk_free(chunk);
748         }
749         goto out;
750 nomem:
751         if (packet->auth && list_empty(&packet->auth->list))
752                 sctp_chunk_free(packet->auth);
753         err = -ENOMEM;
754         goto err;
755 }
756
757 /********************************************************************
758  * 2nd Level Abstractions
759  ********************************************************************/
760
761 /* This private function check to see if a chunk can be added */
762 static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
763                                            struct sctp_chunk *chunk)
764 {
765         size_t datasize, rwnd, inflight, flight_size;
766         struct sctp_transport *transport = packet->transport;
767         struct sctp_association *asoc = transport->asoc;
768         struct sctp_outq *q = &asoc->outqueue;
769
770         /* RFC 2960 6.1  Transmission of DATA Chunks
771          *
772          * A) At any given time, the data sender MUST NOT transmit new data to
773          * any destination transport address if its peer's rwnd indicates
774          * that the peer has no buffer space (i.e. rwnd is 0, see Section
775          * 6.2.1).  However, regardless of the value of rwnd (including if it
776          * is 0), the data sender can always have one DATA chunk in flight to
777          * the receiver if allowed by cwnd (see rule B below).  This rule
778          * allows the sender to probe for a change in rwnd that the sender
779          * missed due to the SACK having been lost in transit from the data
780          * receiver to the data sender.
781          */
782
783         rwnd = asoc->peer.rwnd;
784         inflight = q->outstanding_bytes;
785         flight_size = transport->flight_size;
786
787         datasize = sctp_data_size(chunk);
788
789         if (datasize > rwnd && inflight > 0)
790                 /* We have (at least) one data chunk in flight,
791                  * so we can't fall back to rule 6.1 B).
792                  */
793                 return SCTP_XMIT_RWND_FULL;
794
795         /* RFC 2960 6.1  Transmission of DATA Chunks
796          *
797          * B) At any given time, the sender MUST NOT transmit new data
798          * to a given transport address if it has cwnd or more bytes
799          * of data outstanding to that transport address.
800          */
801         /* RFC 7.2.4 & the Implementers Guide 2.8.
802          *
803          * 3) ...
804          *    When a Fast Retransmit is being performed the sender SHOULD
805          *    ignore the value of cwnd and SHOULD NOT delay retransmission.
806          */
807         if (chunk->fast_retransmit != SCTP_NEED_FRTX &&
808             flight_size >= transport->cwnd)
809                 return SCTP_XMIT_RWND_FULL;
810
811         /* Nagle's algorithm to solve small-packet problem:
812          * Inhibit the sending of new chunks when new outgoing data arrives
813          * if any previously transmitted data on the connection remains
814          * unacknowledged.
815          */
816
817         if (sctp_sk(asoc->base.sk)->nodelay)
818                 /* Nagle disabled */
819                 return SCTP_XMIT_OK;
820
821         if (!sctp_packet_empty(packet))
822                 /* Append to packet */
823                 return SCTP_XMIT_OK;
824
825         if (inflight == 0)
826                 /* Nothing unacked */
827                 return SCTP_XMIT_OK;
828
829         if (!sctp_state(asoc, ESTABLISHED))
830                 return SCTP_XMIT_OK;
831
832         /* Check whether this chunk and all the rest of pending data will fit
833          * or delay in hopes of bundling a full sized packet.
834          */
835         if (chunk->skb->len + q->out_qlen >
836                 transport->pathmtu - packet->overhead - sizeof(sctp_data_chunk_t) - 4)
837                 /* Enough data queued to fill a packet */
838                 return SCTP_XMIT_OK;
839
840         /* Don't delay large message writes that may have been fragmented */
841         if (!chunk->msg->can_delay)
842                 return SCTP_XMIT_OK;
843
844         /* Defer until all data acked or packet full */
845         return SCTP_XMIT_DELAY;
846 }
847
848 /* This private function does management things when adding DATA chunk */
849 static void sctp_packet_append_data(struct sctp_packet *packet,
850                                 struct sctp_chunk *chunk)
851 {
852         struct sctp_transport *transport = packet->transport;
853         size_t datasize = sctp_data_size(chunk);
854         struct sctp_association *asoc = transport->asoc;
855         u32 rwnd = asoc->peer.rwnd;
856
857         /* Keep track of how many bytes are in flight over this transport. */
858         transport->flight_size += datasize;
859
860         /* Keep track of how many bytes are in flight to the receiver. */
861         asoc->outqueue.outstanding_bytes += datasize;
862
863         /* Update our view of the receiver's rwnd. */
864         if (datasize < rwnd)
865                 rwnd -= datasize;
866         else
867                 rwnd = 0;
868
869         asoc->peer.rwnd = rwnd;
870         /* Has been accepted for transmission. */
871         if (!asoc->peer.prsctp_capable)
872                 chunk->msg->can_abandon = 0;
873         sctp_chunk_assign_tsn(chunk);
874         sctp_chunk_assign_ssn(chunk);
875 }
876
877 static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet,
878                                         struct sctp_chunk *chunk,
879                                         u16 chunk_len)
880 {
881         size_t psize, pmtu, maxsize;
882         sctp_xmit_t retval = SCTP_XMIT_OK;
883
884         psize = packet->size;
885         if (packet->transport->asoc)
886                 pmtu = packet->transport->asoc->pathmtu;
887         else
888                 pmtu = packet->transport->pathmtu;
889
890         /* Decide if we need to fragment or resubmit later. */
891         if (psize + chunk_len > pmtu) {
892                 /* It's OK to fragment at IP level if any one of the following
893                  * is true:
894                  *      1. The packet is empty (meaning this chunk is greater
895                  *         the MTU)
896                  *      2. The packet doesn't have any data in it yet and data
897                  *         requires authentication.
898                  */
899                 if (sctp_packet_empty(packet) ||
900                     (!packet->has_data && chunk->auth)) {
901                         /* We no longer do re-fragmentation.
902                          * Just fragment at the IP layer, if we
903                          * actually hit this condition
904                          */
905                         packet->ipfragok = 1;
906                         goto out;
907                 }
908
909                 /* Similarly, if this chunk was built before a PMTU
910                  * reduction, we have to fragment it at IP level now. So
911                  * if the packet already contains something, we need to
912                  * flush.
913                  */
914                 maxsize = pmtu - packet->overhead;
915                 if (packet->auth)
916                         maxsize -= WORD_ROUND(packet->auth->skb->len);
917                 if (chunk_len > maxsize)
918                         retval = SCTP_XMIT_PMTU_FULL;
919
920                 /* It is also okay to fragment if the chunk we are
921                  * adding is a control chunk, but only if current packet
922                  * is not a GSO one otherwise it causes fragmentation of
923                  * a large frame. So in this case we allow the
924                  * fragmentation by forcing it to be in a new packet.
925                  */
926                 if (!sctp_chunk_is_data(chunk) && packet->has_data)
927                         retval = SCTP_XMIT_PMTU_FULL;
928
929                 if (psize + chunk_len > packet->max_size)
930                         /* Hit GSO/PMTU limit, gotta flush */
931                         retval = SCTP_XMIT_PMTU_FULL;
932
933                 if (!packet->transport->burst_limited &&
934                     psize + chunk_len > (packet->transport->cwnd >> 1))
935                         /* Do not allow a single GSO packet to use more
936                          * than half of cwnd.
937                          */
938                         retval = SCTP_XMIT_PMTU_FULL;
939
940                 if (packet->transport->burst_limited &&
941                     psize + chunk_len > (packet->transport->burst_limited >> 1))
942                         /* Do not allow a single GSO packet to use more
943                          * than half of original cwnd.
944                          */
945                         retval = SCTP_XMIT_PMTU_FULL;
946                 /* Otherwise it will fit in the GSO packet */
947         }
948
949 out:
950         return retval;
951 }