summaryrefslogtreecommitdiffstats
path: root/ssl/quic/quic_ackm.c
blob: facd34176a0552a2c15c338435fc52258fec9707 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
/*
 * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
 *
 * Licensed under the Apache License 2.0 (the "License").  You may not use
 * this file except in compliance with the License.  You can obtain a copy
 * in the file LICENSE in the source distribution or at
 * https://www.openssl.org/source/license.html
 */

#include "internal/quic_ackm.h"
#include "internal/uint_set.h"
#include "internal/common.h"
#include <assert.h>

DEFINE_LIST_OF(tx_history, OSSL_ACKM_TX_PKT);

/*
 * TX Packet History
 * *****************
 *
 * The TX Packet History object tracks information about packets which have been
 * sent for which we later expect to receive an ACK. It is essentially a simple
 * database keeping a list of packet information structures in packet number
 * order which can also be looked up directly by packet number.
 *
 * We currently only allow packets to be appended to the list (i.e. the packet
 * numbers of the packets appended to the list must monotonically increase), as
 * we should not currently need more general functionality such as a sorted list
 * insert.
 */
struct tx_pkt_history_st {
    /* A linked list of all our packets. */
    OSSL_LIST(tx_history) packets;

    /*
     * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
     *
     * Invariant: A packet is in this map if and only if it is in the linked
     *            list.
     */
    LHASH_OF(OSSL_ACKM_TX_PKT) *map;

    /*
     * The lowest packet number which may currently be added to the history list
     * (inclusive). We do not allow packet numbers to be added to the history
     * list non-monotonically, so packet numbers must be greater than or equal
     * to this value.
     */
    uint64_t watermark;

    /*
     * Packet number of the highest packet info structure we have yet appended
     * to the list. This is usually one less than watermark, except when we have
     * not added any packet yet.
     */
    uint64_t highest_sent;
};

DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT);

static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT *pkt)
{
    /* Using low bits of the packet number as the hash should be enough */
    return (unsigned long)pkt->pkt_num;
}

static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT *a,
                               const OSSL_ACKM_TX_PKT *b)
{
    if (a->pkt_num < b->pkt_num)
        return -1;
    if (a->pkt_num > b->pkt_num)
        return 1;
    return 0;
}

static int
tx_pkt_history_init(struct tx_pkt_history_st *h)
{
    ossl_list_tx_history_init(&h->packets);
    h->watermark    = 0;
    h->highest_sent = 0;

    h->map = lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash, tx_pkt_info_compare);
    if (h->map == NULL)
        return 0;

    return 1;
}

static void
tx_pkt_history_destroy(struct tx_pkt_history_st *h)
{
    lh_OSSL_ACKM_TX_PKT_free(h->map);
    h->map = NULL;
    ossl_list_tx_history_init(&h->packets);
}

static int
tx_pkt_history_add_actual(struct tx_pkt_history_st *h,
                          OSSL_ACKM_TX_PKT *pkt)
{
    OSSL_ACKM_TX_PKT *existing;

    /*
     * There should not be any existing packet with this number
     * in our mapping.
     */
    existing = lh_OSSL_ACKM_TX_PKT_retrieve(h->map, pkt);
    if (!ossl_assert(existing == NULL))
        return 0;

    /* Should not already be in a list. */
    if (!ossl_assert(ossl_list_tx_history_next(pkt) == NULL
            && ossl_list_tx_history_prev(pkt) == NULL))
        return 0;

    lh_OSSL_ACKM_TX_PKT_insert(h->map, pkt);

    ossl_list_tx_history_insert_tail(&h->packets, pkt);
    return 1;
}

/* Adds a packet information structure to the history list. */
static int
tx_pkt_history_add(struct tx_pkt_history_st *h,
                   OSSL_ACKM_TX_PKT *pkt)
{
    if (!ossl_assert(pkt->pkt_num >= h->watermark))
        return 0;

    if (tx_pkt_history_add_actual(h, pkt) < 1)
        return 0;

    h->watermark    = pkt->pkt_num + 1;
    h->highest_sent = pkt->pkt_num;
    return 1;
}

/* Retrieve a packet information structure by packet number. */
static OSSL_ACKM_TX_PKT *
tx_pkt_history_by_pkt_num(struct tx_pkt_history_st *h, uint64_t pkt_num)
{
    OSSL_ACKM_TX_PKT key;

    key.pkt_num = pkt_num;

    return lh_OSSL_ACKM_TX_PKT_retrieve(h->map, &key);
}

/* Remove a packet information structure from the history log. */
static int
tx_pkt_history_remove(struct tx_pkt_history_st *h, uint64_t pkt_num)
{
    OSSL_ACKM_TX_PKT key, *pkt;
    key.pkt_num = pkt_num;

    pkt = tx_pkt_history_by_pkt_num(h, pkt_num);
    if (pkt == NULL)
        return 0;

    ossl_list_tx_history_remove(&h->packets, pkt);
    lh_OSSL_ACKM_TX_PKT_delete(h->map, &key);
    return 1;
}

/*
 * RX Packet Number Tracking
 * *************************
 *
 * **Background.** The RX side of the ACK manager must track packets we have
 * received for which we have to generate ACK frames. Broadly, this means we
 * store a set of packet numbers which we have received but which we do not know
 * for a fact that the transmitter knows we have received.
 *
 * This must handle various situations:
 *
 *   1. We receive a packet but have not sent an ACK yet, so the transmitter
 *      does not know whether we have received it or not yet.
 *
 *   2. We receive a packet and send an ACK which is lost. We do not
 *      immediately know that the ACK was lost and the transmitter does not know
 *      that we have received the packet.
 *
 *   3. We receive a packet and send an ACK which is received by the
 *      transmitter. The transmitter does not immediately respond with an ACK,
 *      or responds with an ACK which is lost. The transmitter knows that we
 *      have received the packet, but we do not know for sure that it knows,
 *      because the ACK we sent could have been lost.
 *
 *   4. We receive a packet and send an ACK which is received by the
 *      transmitter. The transmitter subsequently sends us an ACK which confirms
 *      its receipt of the ACK we sent, and we successfully receive that ACK, so
 *      we know that the transmitter knows, that we received the original
 *      packet.
 *
 * Only when we reach case (4) are we relieved of any need to track a given
 * packet number we have received, because only in this case do we know for sure
 * that the peer knows we have received the packet. Having reached case (4) we
 * will never again need to generate an ACK containing the PN in question, but
 * until we reach that point, we must keep track of the PN as not having been
 * provably ACKed, as we may have to keep generating ACKs for the given PN not
 * just until the transmitter receives one, but until we know that it has
 * received one. This will be referred to herein as "provably ACKed".
 *
 * **Duplicate handling.** The above discusses the case where we have received a
 * packet with a given PN but are at best unsure whether the sender knows we
 * have received it or not. However, we must also handle the case where we have
 * yet to receive a packet with a given PN in the first place. The reason for
 * this is because of the requirement expressed by RFC 9000 s. 12.3:
 *
 *   "A receiver MUST discard a newly unprotected packet unless it is certain
 *    that it has not processed another packet with the same packet number from
 *    the same packet number space."
 *
 * We must ensure we never process a duplicate PN. As such, each possible PN we
 * can receive must exist in one of the following logical states:
 *
 *   - We have never processed this PN before
 *     (so if we receive such a PN, it can be processed)
 *
 *   - We have processed this PN but it has not yet been provably ACKed
 *     (and should therefore be in any future ACK frame generated;
 *      if we receive such a PN again, it must be ignored)
 *
 *   - We have processed this PN and it has been provably ACKed
 *     (if we receive such a PN again, it must be ignored)
 *
 * However, if we were to track this state for every PN ever used in the history
 * of a connection, the amount of state required would increase unboundedly as
 * the connection goes on (for example, we would have to store a set of every PN
 * ever received.)
 *
 * RFC 9000 s. 12.3 continues:
 *
 *   "Endpoints that track all individual packets for the purposes of detecting
 *    duplicates are at risk of accumulating excessive state. The data required
 *    for detecting duplicates can be limited by maintaining a minimum packet
 *    number below which all packets are immediately dropped."
 *
 * Moreover, RFC 9000 s. 13.2.3 states that:
 *
 *   "A receiver MUST retain an ACK Range unless it can ensure that it will not
 *    subsequently accept packets with numbers in that range. Maintaining a
 *    minimum packet number that increases as ranges are discarded is one way to
 *    achieve this with minimal state."
 *
 * This touches on a subtlety of the original requirement quoted above: the
 * receiver MUST discard a packet unless it is certain that it has not processed
 * another packet with the same PN. However, this does not forbid the receiver
 * from also discarding some PNs even though it has not yet processed them. In
 * other words, implementations must be conservative and err in the direction of
 * assuming a packet is a duplicate, but it is acceptable for this to come at
 * the cost of falsely identifying some packets as duplicates.
 *
 * This allows us to bound the amount of state we must keep, and we adopt the
 * suggested strategy quoted above to do so. We define a watermark PN below
 * which all PNs are in the same state. This watermark is only ever increased.
 * Thus the PNs the state for which needs to be explicitly tracked is limited to
 * only a small number of recent PNs, and all older PNs have an assumed state.
 *
 * Any given PN thus falls into one of the following states:
 *
 *   - (A) The PN is above the watermark but we have not yet received it.
 *
 *         If we receive such a PN, we should process it and record the PN as
 *         received.
 *
 *   - (B) The PN is above the watermark and we have received it.
 *
 *         The PN should be included in any future ACK frame we generate.
 *         If we receive such a PN again, we should ignore it.
 *
 *   - (C) The PN is below the watermark.
 *
 *         We do not know whether a packet with the given PN was received or
 *         not. To be safe, if we receive such a packet, it is not processed.
 *
 * Note that state (C) corresponds to both "we have processed this PN and it has
 * been provably ACKed" logical state and a subset of the PNs in the "we have
 * never processed this PN before" logical state (namely all PNs which were lost
 * and never received, but which are not recent enough to be above the
 * watermark). The reason we can merge these states and avoid tracking states
 * for the PNs in this state is because the provably ACKed and never-received
 * states are functionally identical in terms of how we need to handle them: we
 * don't need to do anything for PNs in either of these states, so we don't have
 * to care about PNs in this state nor do we have to care about distinguishing
 * the two states for a given PN.
 *
 * Note that under this scheme provably ACKed PNs are by definition always below
 * the watermark; therefore, it follows that when a PN becomes provably ACKed,
 * the watermark must be immediately increased to exceed it (otherwise we would
 * keep reporting it in future ACK frames).
 *
 * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
 * to advance the watermark:
 *
 *   "When a packet containing an ACK frame is sent, the Largest Acknowledged
 *    field in that frame can be saved. When a packet containing an ACK frame is
 *    acknowledged, the receiver can stop acknowledging packets less than or
 *    equal to the Largest Acknowledged field in the sent ACK frame."
 *
 * This is where our scheme's false positives arise. When a packet containing an
 * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
 * acked, and the watermark is bumped accordingly. However, the Largest
 * Acknowledged field does not imply that all lower PNs have been received,
 * because there may be gaps expressed in the ranges of PNs expressed by that
 * and previous ACK frames. Thus, some unreceived PNs may be moved below the
 * watermark, and we may subsequently reject those PNs as possibly being
 * duplicates even though we have not actually received those PNs. Since we bump
 * the watermark when a PN becomes provably ACKed, it follows that an unreceived
 * PN falls below the watermark (and thus becomes a false positive for the
 * purposes of duplicate detection) when a higher-numbered PN becomes provably
 * ACKed.
 *
 * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
 * n) will no longer be processed. Although datagrams may be reordered in the
 * network, a PN we receive can only become provably ACKed after our own
 * subsequently generated ACK frame is sent in a future TX packet, and then we
 * receive another RX PN acknowledging that TX packet. This means that a given RX
 * PN can only become provably ACKed at least 1 RTT after it is received; it is
 * unlikely that any reordered datagrams will still be "in the network" (and not
 * lost) by this time. If this does occur for whatever reason and a late PN is
 * received, the packet will be discarded unprocessed and the PN is simply
 * handled as though lost (a "written off" PN).
 *
 * **Data structure.** Our state for the RX handling side of the ACK manager, as
 * discussed above, mainly comprises:
 *
 *   a) a logical set of PNs, and
 *   b) a monotonically increasing PN counter (the watermark).
 *
 * For (a), we define a data structure which stores a logical set of PNs, which
 * we use to keep track of which PNs we have received but which have not yet
 * been provably ACKed, and thus will later need to generate an ACK frame for.
 *
 * The correspondence with the logical states discussed above is as follows. A
 * PN is in state (C) if it is below the watermark; otherwise it is in state (B)
 * if it is in the logical set of PNs, and in state (A) otherwise.
 *
 * Note that PNs are only removed from the PN set (when they become provably
 * ACKed or written off) by virtue of advancement of the watermark. Removing PNs
 * from the PN set any other way would be ambiguous as it would be
 * indistinguishable from a PN we have not yet received and risk us processing a
 * duplicate packet. In other words, for a given PN:
 *
 *   - State (A) can transition to state (B) or (C)
 *   - State (B) can transition to state (C) only
 *   - State (C) is the terminal state
 *
 * We can query the logical set data structure for PNs which have been received
 * but which have not been provably ACKed when we want to generate ACK frames.
 * Since ACK frames can be lost and/or we might not know that the peer has
 * successfully received them, we might generate multiple ACK frames covering a
 * given PN until that PN becomes provably ACKed and we finally remove it from
 * our set (by bumping the watermark) as no longer being our concern.
 *
 * The data structure used is the UINT_SET structure defined in uint_set.h,
 * which is used as a PN set. We use the following operations of the structure:
 *
 *   Insert Range: Used when we receive a new PN.
 *
 *   Remove Range: Used when bumping the watermark.
 *
 *   Query:        Used to determine if a PN is in the set.
 *
 * **Possible duplicates.** A PN is considered a possible duplicate when either:
 *
 *  a) its PN is already in the PN set (i.e. has already been received), or
 *  b) its PN is below the watermark (i.e. was provably ACKed or written off).
 *
 * A packet with a given PN is considered 'processable' when that PN is not
 * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
 *
 * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
 * provably ACKed. This occurs when an ACK frame is received by the TX side of
 * the ACK manager; thus, there is necessary interaction between the TX and RX
 * sides of the ACK manager.
 *
 * This is implemented as follows. When a packet is queued as sent in the TX
 * side of the ACK manager, it may optionally have a Largest Acked value set on
 * it. The user of the ACK manager should do this if the packet being
 * transmitted contains an ACK frame, by setting the field to the Largest Acked
 * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
 * When a TX packet is eventually acknowledged which has this field set, it is
 * used to update the state of the RX side of the ACK manager by bumping the
 * watermark accordingly.
 */
struct rx_pkt_history_st {
    UINT_SET set;

    /*
     * Invariant: PNs below this are not in the set.
     * Invariant: This is monotonic and only ever increases.
     */
    QUIC_PN watermark;
};

static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
                                         QUIC_PN watermark);

static void rx_pkt_history_init(struct rx_pkt_history_st *h)
{
    ossl_uint_set_init(&h->set);
    h->watermark = 0;
}

static void rx_pkt_history_destroy(struct rx_pkt_history_st *h)
{
    ossl_uint_set_destroy(&h->set);
}

/*
 * Limit the number of ACK ranges we store to prevent resource consumption DoS
 * attacks.
 */
#define MAX_RX_ACK_RANGES   32

static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st *h)
{
    QUIC_PN highest = QUIC_PN_INVALID;

    while (ossl_list_uint_set_num(&h->set) > MAX_RX_ACK_RANGES) {
        UINT_RANGE r = ossl_list_uint_set_head(&h->set)->range;

        highest = (highest == QUIC_PN_INVALID)
            ? r.end : ossl_quic_pn_max(highest, r.end);

        ossl_uint_set_remove(&h->set, &r);
    }

    /*
     * Bump watermark to cover all PNs we removed to avoid accidental
     * reprocessing of packets.
     */
    if (highest != QUIC_PN_INVALID)
        rx_pkt_history_bump_watermark(h, highest + 1);
}

static int rx_pkt_history_add_pn(struct rx_pkt_history_st *h,
                                 QUIC_PN pn)
{
    UINT_RANGE r;

    r.start = pn;
    r.end   = pn;

    if (pn < h->watermark)
        return 1; /* consider this a success case */

    if (ossl_uint_set_insert(&h->set, &r) != 1)
        return 0;

    rx_pkt_history_trim_range_count(h);
    return 1;
}

static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st *h,
                                         QUIC_PN watermark)
{
    UINT_RANGE r;

    if (watermark <= h->watermark)
        return 1;

    /* Remove existing PNs below the watermark. */
    r.start = 0;
    r.end   = watermark - 1;
    if (ossl_uint_set_remove(&h->set, &r) != 1)
        return 0;

    h->watermark = watermark;
    return 1;
}

/*
 * ACK Manager Implementation
 * **************************
 * Implementation of the ACK manager proper.
 */

/* Constants used by the ACK manager; see RFC 9002. */
#define K_GRANULARITY           (1 * OSSL_TIME_MS)
#define K_PKT_THRESHOLD         3
#define K_TIME_THRESHOLD_NUM    9
#define K_TIME_THRESHOLD_DEN    8

/* The maximum number of times we allow PTO to be doubled. */
#define MAX_PTO_COUNT          16

/* Default maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */
#define DEFAULT_TX_MAX_ACK_DELAY       ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY)

struct ossl_ackm_st {
    /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */
    struct tx_pkt_history_st tx_history[QUIC_PN_SPACE_NUM];

    /* Our list of received PNs which are not yet provably acked. */
    struct rx_pkt_history_st rx_history[QUIC_PN_SPACE_NUM];

    /* Polymorphic dependencies that we consume. */
    OSSL_TIME             (*now)(void *arg);
    void                   *now_arg;
    OSSL_STATM             *statm;
    const OSSL_CC_METHOD   *cc_method;
    OSSL_CC_DATA           *cc_data;

    /* RFC 9002 variables. */
    uint32_t        pto_count;
    QUIC_PN         largest_acked_pkt[QUIC_PN_SPACE_NUM];
    OSSL_TIME       time_of_last_ack_eliciting_pkt[QUIC_PN_SPACE_NUM];
    OSSL_TIME       loss_time[QUIC_PN_SPACE_NUM];
    OSSL_TIME       loss_detection_deadline;

    /* Lowest PN which is still not known to be ACKed. */
    QUIC_PN         lowest_unacked_pkt[QUIC_PN_SPACE_NUM];

    /* Time at which we got our first RTT sample, or 0. */
    OSSL_TIME       first_rtt_sample;

    /*
     * A packet's num_bytes are added to this if it is inflight,
     * and removed again once ack'd/lost/discarded.
     */
    uint64_t        bytes_in_flight;

    /*
     * A packet's num_bytes are added to this if it is both inflight and
     * ack-eliciting, and removed again once ack'd/lost/discarded.
     */
    uint64_t        ack_eliciting_bytes_in_flight[QUIC_PN_SPACE_NUM];

    /* Count of ECN-CE events. */
    uint64_t        peer_ecnce[QUIC_PN_SPACE_NUM];

    /* Set to 1 when the handshake is confirmed. */
    char            handshake_confirmed;

    /* Set to 1 when the peer has completed address validation. */
    char            peer_completed_addr_validation;

    /* Set to 1 when a PN space has been discarded. */
    char            discarded[QUIC_PN_SPACE_NUM];

    /* Set to 1 when we think an ACK frame should be generated. */
    char            rx_ack_desired[QUIC_PN_SPACE_NUM];

    /* Set to 1 if an ACK frame has ever been generated. */
    char            rx_ack_generated[QUIC_PN_SPACE_NUM];

    /* Probe request counts for reporting to the user. */
    OSSL_ACKM_PROBE_INFO    pending_probe;

    /* Generated ACK frames for each PN space. */
    OSSL_QUIC_FRAME_ACK     ack[QUIC_PN_SPACE_NUM];
    OSSL_QUIC_ACK_RANGE     ack_ranges[QUIC_PN_SPACE_NUM][MAX_RX_ACK_RANGES];

    /* Other RX state. */
    /* Largest PN we have RX'd. */
    QUIC_PN         rx_largest_pn[QUIC_PN_SPACE_NUM];

    /* Time at which the PN in rx_largest_pn was RX'd. */
    OSSL_TIME       rx_largest_time[QUIC_PN_SPACE_NUM];

    /*
     * ECN event counters. Each time we receive a packet with a given ECN label,
     * the corresponding ECN counter here is incremented.
     */
    uint64_t        rx_ect0[QUIC_PN_SPACE_NUM];
    uint64_t        rx_ect1[QUIC_PN_SPACE_NUM];
    uint64_t        rx_ecnce[QUIC_PN_SPACE_NUM];

    /*
     * Number of ACK-eliciting packets since last ACK. We use this to defer
     * emitting ACK frames until a threshold number of ACK-eliciting packets
     * have been received.
     */
    uint32_t        rx_ack_eliciting_pkts_since_last_ack[QUIC_PN_SPACE_NUM];

    /*
     * The ACK frame coalescing deadline at which we should flush any unsent ACK
     * frames.
     */
    OSSL_TIME       rx_ack_flush_deadline[QUIC_PN_SPACE_NUM];

    /*
     * The RX maximum ACK delay (the maximum amount of time our peer might
     * wait to send us an ACK after receiving an ACK-eliciting packet).
     */
    OSSL_TIME       rx_max_ack_delay;

    /*
     * The TX maximum ACK delay (the maximum amount of time we allow ourselves
     * to wait before generating an ACK after receiving an ACK-eliciting
     * packet).
     */
    OSSL_TIME       tx_max_ack_delay;

    /* Callbacks for deadline updates. */
    void (*loss_detection_deadline_cb)(OSSL_TIME deadline, void *arg);
    void *loss_detection_deadline_cb_arg;

    void (*ack_deadline_cb)(OSSL_TIME deadline, int pkt_space, void *arg);
    void *ack_deadline_cb_arg;
};

static ossl_inline uint32_t min_u32(uint32_t x, uint32_t y)
{
    return x < y ? x : y;
}

/*
 * Get TX history for a given packet number space. Must not have been
 * discarded.
 */
static struct tx_pkt_history_st *get_tx_history(OSSL_ACKM *ackm, int pkt_space)
{
    assert(!ackm->discarded[pkt_space]);

    return &ackm->tx_history[pkt_space];
}

/*
 * Get RX history for a given packet number space. Must not have been
 * discarded.
 */
static struct rx_pkt_history_st *get_rx_history(OSSL_ACKM *ackm, int pkt_space)
{
    assert(!ackm->discarded[pkt_space]);

    return &ackm->rx_history[pkt_space];
}

/* Does the newly-acknowledged list contain any ack-eliciting packet? */
static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT *pkt)
{
    for (; pkt != NULL; pkt = pkt->anext)
        if (pkt->is_ack_eliciting)
            return 1;

    return 0;
}

/* Return number of ACK-eliciting bytes in flight across all PN spaces. */
static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM *ackm)
{
    int i;
    uint64_t total = 0;

    for (i = 0; i < QUIC_PN_SPACE_NUM; ++i)
        total += ackm->ack_eliciting_bytes_in_flight[i];

    return total;
}

/* Return 1 if the range contains the given PN. */
static int range_contains(const OSSL_QUIC_ACK_RANGE *range, QUIC_PN pn)
{
    return pn >= range->start && pn <= range->end;
}

/*
 * Given a logical representation of an ACK frame 'ack', create a singly-linked
 * list of the newly ACK'd frames; that is, of frames which are matched by the
 * list of PN ranges contained in the ACK frame. The packet structures in the
 * list returned are removed from the TX history list. Returns a pointer to the
 * list head (or NULL) if empty.
 */
static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM *ackm,
                                                                 const OSSL_QUIC_FRAME_ACK *ack,
                                                                 int pkt_space)
{
    OSSL_ACKM_TX_PKT *acked_pkts = NULL, **fixup = &acked_pkts, *pkt, *pprev;
    struct tx_pkt_history_st *h;
    size_t ridx = 0;

    assert(ack->num_ack_ranges > 0);

    /*
     * Our history list is a list of packets sorted in ascending order
     * by packet number.
     *
     * ack->ack_ranges is a list of packet number ranges in descending order.
     *
     * Walk through our history list from the end in order to efficiently detect
     * membership in the specified ack ranges. As an optimization, we use our
     * hashtable to try and skip to the first matching packet. This may fail if
     * the ACK ranges given include nonexistent packets.
     */
    h = get_tx_history(ackm, pkt_space);

    pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end);
    if (pkt == NULL)
        pkt = ossl_list_tx_history_tail(&h->packets);

    for (; pkt != NULL; pkt = pprev) {
        /*
         * Save prev value as it will be zeroed if we remove the packet from the
         * history list below.
         */
        pprev = ossl_list_tx_history_prev(pkt);

        for (;; ++ridx) {
            if (ridx >= ack->num_ack_ranges) {
                /*
                 * We have exhausted all ranges so stop here, even if there are
                 * more packets to look at.
                 */
                goto stop;
            }

            if (range_contains(&ack->ack_ranges[ridx], pkt->pkt_num)) {
                /* We have matched this range. */
                tx_pkt_history_remove(h, pkt->pkt_num);

                *fixup = pkt;
                fixup = &pkt->anext;
                *fixup = NULL;
                break;
            } else if (pkt->pkt_num > ack->ack_ranges[ridx].end) {
                /*
                 * We have not reached this range yet in our list, so do not
                 * advance ridx.
                 */
                break;
            } else {
                /*
                 * We have moved beyond this range, so advance to the next range
                 * and try matching again.
                 */
                assert(pkt->pkt_num < ack->ack_ranges[ridx].start);
                continue;
            }
        }
    }
stop:

    return acked_pkts;
}

/*
 * Create a singly-linked list of newly detected-lost packets in the given
 * packet number space. Returns the head of the list or NULL if no packets were
 * detected lost. The packets in the list are removed from the TX history list.
 */
static OSSL_ACKM_TX_PKT *ackm_detect_and_remove_lost_pkts(OSSL_ACKM *ackm,
                                                          int pkt_space)
{
    OSSL_ACKM_TX_PKT *lost_pkts = NULL, **fixup = &lost_pkts, *pkt, *pnext;
    OSSL_TIME loss_delay, lost_send_time, now;
    OSSL_RTT_INFO rtt;
    struct tx_pkt_history_st *h;

    assert(ackm->largest_acked_pkt[pkt_space] != QUIC_PN_INVALID);

    ossl_statm_get_rtt_info(ackm->statm, &rtt);

    ackm->loss_time[pkt_space] = ossl_time_zero();

    loss_delay = ossl_time_multiply(ossl_time_max(rtt.latest_rtt,
                                                  rtt.smoothed_rtt),
                                    K_TIME_THRESHOLD_NUM);
    loss_delay = ossl_time_divide(loss_delay, K_TIME_THRESHOLD_DEN);

    /* Minimum time of K_GRANULARITY before packets are deemed lost. */
    loss_delay = ossl_time_max(loss_delay, ossl_ticks2time(K_GRANULARITY));

    /* Packets sent before this time are deemed lost. */
    now = ackm->now(ackm->now_arg);
    lost_send_time = ossl_time_subtract(now, loss_delay);

    h   = get_tx_history(ackm, pkt_space);
    pkt = ossl_list_tx_history_head(&h->packets);

    for (; pkt != NULL; pkt = pnext) {
        assert(pkt_space == pkt->pkt_space);

        /*
         * Save prev value as it will be zeroed if we remove the packet from the
         * history list below.
         */
        pnext = ossl_list_tx_history_next(pkt);

        if (pkt->pkt_num > ackm->largest_acked_pkt[pkt_space])
            continue;

        /*
         * Mark packet as lost, or set time when it should be marked.
         */
        if (ossl_time_compare(pkt->time, lost_send_time) <= 0
                || ackm->largest_acked_pkt[pkt_space]
                >= pkt->pkt_num + K_PKT_THRESHOLD) {
            tx_pkt_history_remove(h, pkt->pkt_num);

            *fixup = pkt;
            fixup = &pkt->lnext;
            *fixup = NULL;
        } else {
            if (ossl_time_is_zero(ackm->loss_time[pkt_space]))
                ackm->loss_time[pkt_space] =
                    ossl_time_add(pkt->time, loss_delay);
            else
                ackm->loss_time[pkt_space] =
                    ossl_time_min(ackm->loss_time[pkt_space],
                                  ossl_time_add(pkt->time, loss_delay));
        }
    }

    return lost_pkts;
}

static OSSL_TIME ackm_get_loss_time_and_space(OSSL_ACKM *ackm, int *pspace)
{
    OSSL_TIME time = ackm->loss_time[QUIC_PN_SPACE_INITIAL];
    int i, space = QUIC_PN_SPACE_INITIAL;

    for (i = space + 1; i < QUIC_PN_SPACE_NUM; ++i)
        if (ossl_time_is_zero(time)
            || ossl_time_compare(ackm->loss_time[i], time) == -1) {
            time    = ackm->loss_time[i];
            space   = i;
        }

    *pspace = space;
    return time;
}

static OSSL_TIME ackm_get_pto_time_and_space(OSSL_ACKM *ackm, int *space)
{
    OSSL_RTT_INFO rtt;
    OSSL_TIME duration;
    OSSL_TIME pto_timeout = ossl_time_infinite(), t;
    int pto_space = QUIC_PN_SPACE_INITIAL, i;

    ossl_statm_get_rtt_info(ackm->statm, &rtt);

    duration
        = ossl_time_add(rtt.smoothed_rtt,
                        ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4),
                                      ossl_ticks2time(K_GRANULARITY)));

    duration
        = ossl_time_multiply(duration,
                             (uint64_t)1 << min_u32(ackm->pto_count,
                                                    MAX_PTO_COUNT));

    /* Anti-deadlock PTO starts from the current time. */
    if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) {
        assert(!ackm->peer_completed_addr_validation);

        *space = ackm->discarded[QUIC_PN_SPACE_INITIAL]
                    ? QUIC_PN_SPACE_HANDSHAKE
                    : QUIC_PN_SPACE_INITIAL;
        return ossl_time_add(ackm->now(ackm->now_arg), duration);
    }

    for (i = QUIC_PN_SPACE_INITIAL; i < QUIC_PN_SPACE_NUM; ++i) {
        if (ackm->ack_eliciting_bytes_in_flight[i] == 0)
            continue;

        if (i == QUIC_PN_SPACE_APP) {
            /* Skip application data until handshake confirmed. */
            if (!ackm->handshake_confirmed)
                break;

            /* Include max_ack_delay and backoff for app data. */
            if (!ossl_time_is_infinite(ackm->rx_max_ack_delay)) {
                uint64_t factor
                    = (uint64_t)1 << min_u32(ackm->pto_count, MAX_PTO_COUNT);

                duration
                    = ossl_time_add(duration,
                                    ossl_time_multiply(ackm->rx_max_ack_delay,
                                                       factor));
            }
        }

        t = ossl_time_add(ackm->time_of_last_ack_eliciting_pkt[i], duration);
        if (ossl_time_compare(t, pto_timeout) < 0) {
            pto_timeout = t;
            pto_space   = i;
        }
    }

    *space = pto_space;
    return pto_timeout;
}

static void ackm_set_loss_detection_timer_actual(OSSL_ACKM *ackm,
                                                 OSSL_TIME deadline)
{
    ackm->loss_detection_deadline = deadline;

    if (ackm->loss_detection_deadline_cb != NULL)
        ackm->loss_detection_deadline_cb(deadline,
                                         ackm->loss_detection_deadline_cb_arg);
}

static int ackm_set_loss_detection_timer(OSSL_ACKM *ackm)
{
    int space;
    OSSL_TIME earliest_loss_time, timeout;

    earliest_loss_time = ackm_get_loss_time_and_space(ackm, &space);
    if (!ossl_time_is_zero(earliest_loss_time)) {
        /* Time threshold loss detection. */
        ackm_set_loss_detection_timer_actual(ackm, earliest_loss_time);
        return 1;
    }

    if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0
            && ackm->peer_completed_addr_validation) {
        /*
         * Nothing to detect lost, so no timer is set. However, the client
         * needs to arm the timer if the server might be blocked by the
         * anti-amplification limit.
         */
        ackm_set_loss_detection_timer_actual(ackm, ossl_time_zero());
        return 1;
    }

    timeout = ackm_get_pto_time_and_space(ackm, &space);
    ackm_set_loss_detection_timer_actual(ackm, timeout);
    return 1;
}

static int ackm_in_persistent_congestion(OSSL_ACKM *ackm,
                                         const OSSL_ACKM_TX_PKT *lpkt)
{
    /* TODO(QUIC): Persistent congestion not currently implemented. */
    return 0;
}

static void ackm_on_pkts_lost(OSSL_ACKM *ackm, int pkt_space,
                              const OSSL_ACKM_TX_PKT *lpkt, int pseudo)
{
    const OSSL_ACKM_TX_PKT *p, *pnext;
    OSSL_RTT_INFO rtt;
    QUIC_PN largest_pn_lost = 0;
    OSSL_CC_LOSS_INFO loss_info = {0};
    uint32_t flags = 0;

    for (p = lpkt; p != NULL; p = pnext) {
        pnext = p->lnext;

        if (p->is_inflight) {
            ackm->bytes_in_flight -= p->num_bytes;
            if (p->is_ack_eliciting)
                ackm->ack_eliciting_bytes_in_flight[p->pkt_space]
                    -= p->num_bytes;

            if (p->pkt_num > largest_pn_lost)
                largest_pn_lost = p->pkt_num;
        }

        if (!pseudo) {
            /*
             * If this is pseudo-loss (e.g. during connection retry) we do not
             * inform the CC as it is not a real loss and not reflective of
             * network conditions.
             */
            loss_info.tx_time = p->time;
            loss_info.tx_size = p->num_bytes;

            ackm->cc_method->on_data_lost(ackm->cc_data, &loss_info);
        }

        p->on_lost(p->cb_arg);
    }

    /*
     * Persistent congestion can only be considered if we have gotten at least
     * one RTT sample.
     */
    ossl_statm_get_rtt_info(ackm->statm, &rtt);
    if (!ossl_time_is_zero(ackm->first_rtt_sample)
        && ackm_in_persistent_congestion(ackm, lpkt))
        flags |= OSSL_CC_LOST_FLAG_PERSISTENT_CONGESTION;

    ackm->cc_method->on_data_lost_finished(ackm->cc_data, flags);
}

static void ackm_on_pkts_acked(OSSL_ACKM *ackm, const OSSL_ACKM_TX_PKT *apkt)
{
    const OSSL_ACKM_TX_PKT *anext;
    QUIC_PN last_pn_acked = 0;
    OSSL_CC_ACK_INFO ainfo = {0};

    for (; apkt != NULL; apkt = anext) {
        if (apkt->is_inflight) {
            ackm->bytes_in_flight -= apkt->num_bytes;
            if (apkt->is_ack_eliciting)
                ackm->ack_eliciting_bytes_in_flight[apkt->pkt_space]
                    -= apkt->num_bytes;

            if (apkt->pkt_num > last_pn_acked)
                last_pn_acked = apkt->pkt_num;

            if (apkt->largest_acked != QUIC_PN_INVALID)
                /*
                 * This can fail, but it is monotonic; worst case we try again
                 * next time.
                 */
                rx_pkt_history_bump_watermark(get_rx_history(ackm,
                                                             apkt->pkt_space),
                                              apkt->largest_acked + 1);
        }

        ainfo.tx_time = apkt->time;
        ainfo.tx_size = apkt->num_bytes;

        anext = apkt->anext;
        apkt->on_acked(apkt->cb_arg); /* may free apkt */

        ackm->cc_method->on_data_acked(ackm->cc_data, &ainfo);
    }
}

OSSL_ACKM *ossl_ackm_new(OSSL_TIME (*now)(void *arg),
                         void *now_arg,
                         OSSL_STATM *statm,
                         const OSSL_CC_METHOD *cc_method,
                         OSSL_CC_DATA *cc_data)
{
    OSSL_ACKM *ackm;
    int i;

    ackm = OPENSSL_zalloc(sizeof(OSSL_ACKM));
    if (ackm == NULL)
        return NULL;

    for (i = 0; i < (int)OSSL_NELEM(ackm->tx_history); ++i) {
        ackm->largest_acked_pkt[i] = QUIC_PN_INVALID;
        ackm->rx_ack_flush_deadline[i] = ossl_time_infinite();
        if (tx_pkt_history_init(&ackm->tx_history[i]) < 1)
            goto err;
    }

    for (i = 0; i < (int)OSSL_NELEM(ackm->rx_history); ++i)
        rx_pkt_history_init(&ackm->rx_history[i]);

    ackm->now       = now;
    ackm->now_arg   = now_arg;
    ackm->statm     = statm;
    ackm->cc_method = cc_method;
    ackm->cc_data   = cc_data;

    ackm->rx_max_ack_delay = ossl_ms2time(QUIC_DEFAULT_MAX_ACK_DELAY);
    ackm->tx_max_ack_delay = DEFAULT_TX_MAX_ACK_DELAY;

    return ackm;

err:
    while (--i >= 0)
        tx_pkt_history_destroy(&ackm->tx_history[i]);

    OPENSSL_free(ackm);
    return NULL;
}

void ossl_ackm_free(OSSL_ACKM *ackm)
{
    size_t i;

    if (ackm == NULL)
        return;

    for (i = 0; i < OSSL_NELEM(ackm->tx_history); ++i)
        if (!ackm->discarded[i]) {
            tx_pkt_history_destroy(&ackm->tx_history[i]);
            rx_pkt_history_destroy(&ackm->rx_history[i]);
        }

    OPENSSL_free(ackm);
}

int ossl_ackm_on_tx_packet(OSSL_ACKM *ackm, OSSL_ACKM_TX_PKT *pkt)
{
    struct tx_pkt_history_st *h = get_tx_history(ackm, pkt->pkt_space);

    /* Time must be set and not move backwards. */
    if (ossl_time_is_zero(pkt->time)
        || ossl_time_compare(ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space],
                             pkt->time) > 0)
        return 0;

    /* Must have non-zero number of bytes. */
    if (pkt->num_bytes == 0)
        return 0;

    /* Does not make any sense for a non-in-flight packet to be ACK-eliciting. */
    if (!pkt->is_inflight && pkt->is_ack_eliciting)
        return 0;

    if (tx_pkt_history_add(h, pkt) == 0)
        return 0;

    if (pkt->is_inflight) {
        if (pkt->is_ack_eliciting) {
            ackm->time_of_last_ack_eliciting_pkt[pkt->pkt_space] = pkt->time;
            ackm->ack_eliciting_bytes_in_flight[pkt->pkt_space]
                += pkt->num_bytes;
        }

        ackm->bytes_in_flight += pkt->num_bytes;
        ackm_set_loss_detection_timer(ackm);

        ackm->cc_method->on_data_sent(ackm->cc_data, pkt->num_bytes);
    }

    return 1;
}

int ossl_ackm_on_rx_datagram(OSSL_ACKM *ackm, size_t num_bytes)
{
    /* No-op on the client. */
    return 1;
}

static void ackm_process_ecn(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack,
                             int pkt_space)
{
    struct tx_pkt_history_st *h;
    OSSL_ACKM_TX_PKT *pkt;
    OSSL_CC_ECN_INFO ecn_info = {0};

    /*
     * If the ECN-CE counter reported by the peer has increased, this could
     * be a new congestion event.
     */
    if (ack->ecnce > ackm->peer_ecnce[pkt_space]) {
        ackm->peer_ecnce[pkt_space] = ack->ecnce;

        h = get_tx_history(ackm, pkt_space);
        pkt = tx_pkt_history_by_pkt_num(h, ack->ack_ranges[0].end);
        if (pkt == NULL)
            return;

        ecn_info.largest_acked_time = pkt->time;
        ackm->cc_method->on_ecn(ackm->cc_data, &ecn_info);
    }
}

int ossl_ackm_on_rx_ack_frame(OSSL_ACKM *ackm, const OSSL_QUIC_FRAME_ACK *ack,
                              int pkt_space, OSSL_TIME rx_time)
{
    OSSL_ACKM_TX_PKT *na_pkts, *lost_pkts;
    int must_set_timer = 0;

    if (ackm->largest_acked_pkt[pkt_space] == QUIC_PN_INVALID)
        ackm->largest_acked_pkt[pkt_space] = ack->ack_ranges[0].end;
    else
        ackm->largest_acked_pkt[pkt_space]
            = ossl_quic_pn_max(ackm->largest_acked_pkt[pkt_space],
                               ack->ack_ranges[0].end);

    /*
     * If we get an ACK in the handshake space, address validation is completed.
     * Make sure we update the timer, even if no packets were ACK'd.
     */
    if (!ackm->peer_completed_addr_validation
            && pkt_space == QUIC_PN_SPACE_HANDSHAKE) {
        ackm->peer_completed_addr_validation = 1;
        must_set_timer = 1;
    }

    /*
     * Find packets that are newly acknowledged and remove them from the list.
     */
    na_pkts = ackm_detect_and_remove_newly_acked_pkts(ackm, ack, pkt_space);
    if (na_pkts == NULL) {
        if (must_set_timer)
            ackm_set_loss_detection_timer(ackm);

        return 1;
    }

    /*
     * Update the RTT if the largest acknowledged is newly acked and at least
     * one ACK-eliciting packet was newly acked.
     *
     * First packet in the list is always the one with the largest PN.
     */
    if (na_pkts->pkt_num == ack->ack_ranges[0].end &&
        ack_includes_ack_eliciting(na_pkts)) {
        OSSL_TIME now = ackm->now(ackm->now_arg), ack_delay;
        if (ossl_time_is_zero(ackm->first_rtt_sample))
            ackm->first_rtt_sample = now;

        /* Enforce maximum ACK delay. */
        ack_delay = ack->delay_time;
        if (ackm->handshake_confirmed)
            ack_delay = ossl_time_min(ack_delay, ackm->rx_max_ack_delay);

        ossl_statm_update_rtt(ackm->statm, ack_delay,
                              ossl_time_subtract(now, na_pkts->time));
    }

    /*
     * Process ECN information if present.
     *
     * We deliberately do most ECN processing in the ACKM rather than the
     * congestion controller to avoid having to give the congestion controller
     * access to ACKM internal state.
     */
    if (ack->ecn_present)
        ackm_process_ecn(ackm, ack, pkt_space);

    /* Handle inferred loss. */
    lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space);
    if (lost_pkts != NULL)
        ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0);

    ackm_on_pkts_acked(ackm, na_pkts);

    /*
     * Reset pto_count unless the client is unsure if the server validated the
     * client's address.
     */
    if (ackm->peer_completed_addr_validation)
        ackm->pto_count = 0;

    ackm_set_loss_detection_timer(ackm);
    return 1;
}

int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM *ackm, int pkt_space)
{
    OSSL_ACKM_TX_PKT *pkt, *pnext;
    uint64_t num_bytes_invalidated = 0;

    if (ackm->discarded[pkt_space])
        return 0;

    if (pkt_space == QUIC_PN_SPACE_HANDSHAKE)
        ackm->peer_completed_addr_validation = 1;

    for (pkt = ossl_list_tx_history_head(&get_tx_history(ackm, pkt_space)->packets);
            pkt != NULL; pkt = pnext) {
        pnext = ossl_list_tx_history_next(pkt);
        if (pkt->is_inflight) {
            ackm->bytes_in_flight -= pkt->num_bytes;
            num_bytes_invalidated += pkt->num_bytes;
        }

        pkt->on_discarded(pkt->cb_arg); /* may free pkt */
    }

    tx_pkt_history_destroy(&ackm->tx_history[pkt_space]);
    rx_pkt_history_destroy(&ackm->rx_history[pkt_space]);

    if (num_bytes_invalidated > 0)
        ackm->cc_method->on_data_invalidated(ackm->cc_data,
                                             num_bytes_invalidated);

    ackm->time_of_last_ack_eliciting_pkt[pkt_space] = ossl_time_zero();
    ackm->loss_time[pkt_space] = ossl_time_zero();
    ackm->pto_count = 0;
    ackm->discarded[pkt_space] = 1;
    ackm->ack_eliciting_bytes_in_flight[pkt_space] = 0;
    ackm_set_loss_detection_timer(ackm);
    return 1;
}

int ossl_ackm_on_handshake_confirmed(OSSL_ACKM *ackm)
{
    ackm->handshake_confirmed               = 1;
    ackm->peer_completed_addr_validation    = 1;
    ackm_set_loss_detection_timer(ackm);
    return 1;
}

static void ackm_queue_probe_anti_deadlock_handshake(OSSL_ACKM *ackm)
{
    ++ackm->pending_probe.anti_deadlock_handshake;
}

static void ackm_queue_probe_anti_deadlock_initial(OSSL_ACKM *ackm)
{
    ++ackm->pending_probe.anti_deadlock_initial;
}

static void ackm_queue_probe(OSSL_ACKM *ackm, int pkt_space)
{
    /*
     * TODO(QUIC): We are allowed to send either one or two probe packets here.
     * Determine a strategy for when we should send two probe packets.
     */
    ++ackm->pending_probe.pto[pkt_space];
}

int ossl_ackm_on_timeout(OSSL_ACKM *ackm)
{
    int pkt_space;
    OSSL_TIME earliest_loss_time;
    OSSL_ACKM_TX_PKT *lost_pkts;

    earliest_loss_time = ackm_get_loss_time_and_space(ackm, &pkt_space);
    if (!ossl_time_is_zero(earliest_loss_time)) {
        /* Time threshold loss detection. */
        lost_pkts = ackm_detect_and_remove_lost_pkts(ackm, pkt_space);
        assert(lost_pkts != NULL);
        ackm_on_pkts_lost(ackm, pkt_space, lost_pkts, /*pseudo=*/0);
        ackm_set_loss_detection_timer(ackm);
        return 1;
    }

    if (ackm_ack_eliciting_bytes_in_flight(ackm) == 0) {
        assert(!ackm->peer_completed_addr_validation);
        /*
         * Client sends an anti-deadlock packet: Initial is padded to earn more
         * anti-amplification credit. A handshake packet proves address
         * ownership.
         */
        if (ackm->discarded[QUIC_PN_SPACE_INITIAL])
            ackm_queue_probe_anti_deadlock_handshake(ackm);
        else
            ackm_queue_probe_anti_deadlock_initial(ackm);
    } else {
        /*
         * PTO. The user of the ACKM should send new data if available, else
         * retransmit old data, or if neither is available, send a single PING
         * frame.
         */
        ackm_get_pto_time_and_space(ackm, &pkt_space);
        ackm_queue_probe(ackm, pkt_space);
    }

    ++ackm->pto_count;
    ackm_set_loss_detection_timer(ackm);
    return 1;
}

OSSL_TIME ossl_ackm_get_loss_detection_deadline(OSSL_ACKM *ackm)
{
    return ackm->loss_detection_deadline;
}

OSSL_ACKM_PROBE_INFO *ossl_ackm_get0_probe_request(OSSL_ACKM *ackm)
{
    return &ackm->pending_probe;
}

int ossl_ackm_get_largest_unacked(OSSL_ACKM *ackm, int pkt_space, QUIC_PN *pn)
{
    struct tx_pkt_history_st *h;
    OSSL_ACKM_TX_PKT *p;

    h = get_tx_history(ackm, pkt_space);
    p = ossl_list_tx_history_tail(&h->packets);
    if (p != NULL) {
        *pn = p->pkt_num;
        return 1;
    }

    return 0;
}

/* Number of ACK-eliciting packets RX'd before we always emit an ACK. */
#define PKTS_BEFORE_ACK     2

/*
 * Return 1 if emission of an ACK frame is currently desired.
 *
 * This occurs when one or more of the following conditions occurs:
 *
 *   - We have flagged that we want to send an ACK frame
 *     (for example, due to the packet threshold count being exceeded), or
 *
 *   - We have exceeded the ACK flush deadline, meaning that
 *     we have received at least one ACK-eliciting packet, but held off on
 *     sending an ACK frame immediately in the hope that more ACK-eliciting
 *     packets might come in, but not enough did and we are now requesting
 *     transmission of an ACK frame anyway.
 *
 */
int ossl_ackm_is_ack_desired(OSSL_ACKM *ackm, int pkt_space)
{
    return ackm->rx_ack_desired[pkt_space]
        || (!ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space])
            && ossl_time_compare(ackm->now(ackm->now_arg),
                                 ackm->rx_ack_flush_deadline[pkt_space]) >= 0);
}

/*
 * Returns 1 if an ACK frame matches a given packet number.
 */
static int ack_contains(const OSSL_QUIC_FRAME_ACK *ack, QUIC_PN pkt_num)
{
    size_t i;

    for (i = 0; i < ack->num_ack_ranges; ++i)
        if (range_contains(&ack->ack_ranges[i], pkt_num))
            return 1;

    return 0;
}

/*
 * Returns 1 iff a PN (which we have just received) was previously reported as
 * implied missing (by us, in an ACK frame we previously generated).
 */
static int ackm_is_missing(OSSL_ACKM *ackm, int pkt_space, QUIC_PN pkt_num)
{
    /*
     * A PN is implied missing if it is not greater than the highest PN in our
     * generated ACK frame, but is not matched by the frame.
     */
    return ackm->ack[pkt_space].num_ack_ranges > 0
        && pkt_num <= ackm->ack[pkt_space].ack_ranges[0].end
        && !ack_contains(&ackm->ack[pkt_space], pkt_num);
}

/*
 * Returns 1 iff our RX of a PN newly establishes the implication of missing
 * packets.
 */
static int ackm_has_newly_missing(OSSL_ACKM *ackm, int pkt_space)
{
    struct rx_pkt_history_st *h;

    h = get_rx_history(ackm, pkt_space);

    if (ossl_list_uint_set_is_empty(&h->set))
        return 0;

    /*
     * The second condition here establishes that the highest PN range in our RX
     * history comprises only a single PN. If there is more than one, then this
     * function will have returned 1 during a previous call to
     * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus
     * we only return 1 when the missing PN condition is newly established.
     *
     * The third condition here establishes that the highest PN range in our RX
     * history is beyond (and does not border) the highest PN we have yet
     * reported in any ACK frame. Thus there is a gap of at least one PN between
     * the PNs we have ACK'd previously and the PN we have just received.
     */
    return ackm->ack[pkt_space].num_ack_ranges > 0
        && ossl_list_uint_set_tail(&h->set)->range.start
           == ossl_list_uint_set_tail(&h->set)->range.end
        && ossl_list_uint_set_tail(&h->set)->range.start
            > ackm->ack[pkt_space].ack_ranges[0].end + 1;
}

static void ackm_set_flush_deadline(OSSL_ACKM *ackm, int pkt_space,
                                    OSSL_TIME deadline)
{
    ackm->rx_ack_flush_deadline[pkt_space] = deadline;

    if (ackm->ack_deadline_cb != NULL)
        ackm->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm, pkt_space),
                              pkt_space, ackm->ack_deadline_cb_arg);
}

/* Explicitly flags that we want to generate an ACK frame. */
static void ackm_queue_ack(OSSL_ACKM *ackm, int pkt_space)
{
    ackm->rx_ack_desired[pkt_space] = 1;

    /* Cancel deadline. */
    ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite());
}

static void ackm_on_rx_ack_eliciting(OSSL_ACKM *ackm,
                                     OSSL_TIME rx_time, int pkt_space,
                                     int was_missing)
{
    OSSL_TIME tx_max_ack_delay;

    if (ackm->rx_ack_desired[pkt_space])
        /* ACK generation already requested so nothing to do. */
        return;

    ++ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space];

    if (!ackm->rx_ack_generated[pkt_space]
            || was_missing
            || ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space]
                >= PKTS_BEFORE_ACK
            || ackm_has_newly_missing(ackm, pkt_space)) {
        /*
         * Either:
         *
         *   - We have never yet generated an ACK frame, meaning that this
         *     is the first ever packet received, which we should always
         *     acknowledge immediately, or
         *
         *   - We previously reported the PN that we have just received as
         *     missing in a previous ACK frame (meaning that we should report
         *     the fact that we now have it to the peer immediately), or
         *
         *   - We have exceeded the ACK-eliciting packet threshold count
         *     for the purposes of ACK coalescing, so request transmission
         *     of an ACK frame, or
         *
         *   - The PN we just received and added to our PN RX history
         *     newly implies one or more missing PNs, in which case we should
         *     inform the peer by sending an ACK frame immediately.
         *
         * We do not test the ACK flush deadline here because it is tested
         * separately in ossl_ackm_is_ack_desired.
         */
        ackm_queue_ack(ackm, pkt_space);
        return;
    }

    /*
     * Not emitting an ACK yet.
     *
     * Update the ACK flush deadline.
     *
     * RFC 9000 s. 13.2.1: "An endpoint MUST acknowledge all ack-eliciting
     * Initial and Handshake packets immediately"; don't delay ACK generation if
     * we are using the Initial or Handshake PN spaces.
     */
    tx_max_ack_delay = ackm->tx_max_ack_delay;
    if (pkt_space == QUIC_PN_SPACE_INITIAL
        || pkt_space == QUIC_PN_SPACE_HANDSHAKE)
        tx_max_ack_delay = ossl_time_zero();

    if (ossl_time_is_infinite(ackm->rx_ack_flush_deadline[pkt_space]))
        ackm_set_flush_deadline(ackm, pkt_space,
                                ossl_time_add(rx_time, tx_max_ack_delay));
    else
        ackm_set_flush_deadline(ackm, pkt_space,
                                ossl_time_min(ackm->rx_ack_flush_deadline[pkt_space],
                                              ossl_time_add(rx_time,
                                                            tx_max_ack_delay)));
}

int ossl_ackm_on_rx_packet(OSSL_ACKM *ackm, const OSSL_ACKM_RX_PKT *pkt)
{
    struct rx_pkt_history_st *h = get_rx_history(ackm, pkt->pkt_space);
    int was_missing;

    if (ossl_ackm_is_rx_pn_processable(ackm, pkt->pkt_num, pkt->pkt_space) != 1)
        /* PN has already been processed or written off, no-op. */
        return 1;

    /*
     * Record the largest PN we have RX'd and the time we received it.
     * We use this to calculate the ACK delay field of ACK frames.
     */
    if (pkt->pkt_num > ackm->rx_largest_pn[pkt->pkt_space]) {
        ackm->rx_largest_pn[pkt->pkt_space]   = pkt->pkt_num;
        ackm->rx_largest_time[pkt->pkt_space] = pkt->time;
    }

    /*
     * If the PN we just received was previously implied missing by virtue of
     * being omitted from a previous ACK frame generated, we skip any packet
     * count thresholds or coalescing delays and emit a new ACK frame
     * immediately.
     */
    was_missing = ackm_is_missing(ackm, pkt->pkt_space, pkt->pkt_num);

    /*
     * Add the packet number to our history list of PNs we have not yet provably
     * acked.
     */
    if (rx_pkt_history_add_pn(h, pkt->pkt_num) != 1)
        return 0;

    /*
     * Receiving this packet may or may not cause us to emit an ACK frame.
     * We may not emit an ACK frame yet if we have not yet received a threshold
     * number of packets.
     */
    if (pkt->is_ack_eliciting)
        ackm_on_rx_ack_eliciting(ackm, pkt->time, pkt->pkt_space, was_missing);

    /* Update the ECN counters according to which ECN signal we got, if any. */
    switch (pkt->ecn) {
    case OSSL_ACKM_ECN_ECT0:
        ++ackm->rx_ect0[pkt->pkt_space];
        break;
    case OSSL_ACKM_ECN_ECT1:
        ++ackm->rx_ect1[pkt->pkt_space];
        break;
    case OSSL_ACKM_ECN_ECNCE:
        ++ackm->rx_ecnce[pkt->pkt_space];
        break;
    default:
        break;
    }

    return 1;
}

static void ackm_fill_rx_ack_ranges(OSSL_ACKM *ackm, int pkt_space,
                                    OSSL_QUIC_FRAME_ACK *ack)
{
    struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space);
    UINT_SET_ITEM *x;
    size_t i = 0;

    /*
     * Copy out ranges from the PN set, starting at the end, until we reach our
     * maximum number of ranges.
     */
    for (x = ossl_list_uint_set_tail(&h->set);
         x != NULL && i < OSSL_NELEM(ackm->ack_ranges);
         x = ossl_list_uint_set_prev(x), ++i) {
        ackm->ack_ranges[pkt_space][i].start = x->range.start;
        ackm->ack_ranges[pkt_space][i].end   = x->range.end;
    }

    ack->ack_ranges     = ackm->ack_ranges[pkt_space];
    ack->num_ack_ranges = i;
}

const OSSL_QUIC_FRAME_ACK *ossl_ackm_get_ack_frame(OSSL_ACKM *ackm,
                                                   int pkt_space)
{
    OSSL_QUIC_FRAME_ACK *ack = &ackm->ack[pkt_space];
    OSSL_TIME now = ackm->now(ackm->now_arg);

    ackm_fill_rx_ack_ranges(ackm, pkt_space, ack);

    if (!ossl_time_is_zero(ackm->rx_largest_time[pkt_space])
            && ossl_time_compare(now, ackm->rx_largest_time[pkt_space]) > 0
            && pkt_space == QUIC_PN_SPACE_APP)
        ack->delay_time =
            ossl_time_subtract(now, ackm->rx_largest_time[pkt_space]);
    else
        ack->delay_time = ossl_time_zero();

    ack->ect0              = ackm->rx_ect0[pkt_space];
    ack->ect1              = ackm->rx_ect1[pkt_space];
    ack->ecnce             = ackm->rx_ecnce[pkt_space];
    ack->ecn_present       = 1;

    ackm->rx_ack_eliciting_pkts_since_last_ack[pkt_space] = 0;

    ackm->rx_ack_generated[pkt_space]       = 1;
    ackm->rx_ack_desired[pkt_space]         = 0;
    ackm_set_flush_deadline(ackm, pkt_space, ossl_time_infinite());
    return ack;
}


OSSL_TIME ossl_ackm_get_ack_deadline(OSSL_ACKM *ackm, int pkt_space)
{
    if (ackm->rx_ack_desired[pkt_space])
        /* Already desired, deadline is now. */
        return ossl_time_zero();

    return ackm->rx_ack_flush_deadline[pkt_space];
}

int ossl_ackm_is_rx_pn_processable(OSSL_ACKM *ackm, QUIC_PN pn, int pkt_space)
{
    struct rx_pkt_history_st *h = get_rx_history(ackm, pkt_space);

    return pn >= h->watermark && ossl_uint_set_query(&h->set, pn) == 0;
}

void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM *ackm,
                                                    void (*fn)(OSSL_TIME deadline,
                                                               void *arg),
                                                    void *arg)
{
    ackm->loss_detection_deadline_cb      = fn;
    ackm->loss_detection_deadline_cb_arg  = arg;
}

void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM *ackm,
                                         void (*fn)(OSSL_TIME deadline,
                                                    int pkt_space,
                                                    void *arg),
                                         void *arg)
{
    ackm->ack_deadline_cb     = fn;
    ackm->ack_deadline_cb_arg = arg;
}

int ossl_ackm_mark_packet_pseudo_lost(OSSL_ACKM *ackm,
                                      int pkt_space, QUIC_PN pn)
{
    struct tx_pkt_history_st *h = get_tx_history(ackm, pkt_space);
    OSSL_ACKM_TX_PKT *pkt;

    pkt = tx_pkt_history_by_pkt_num(h, pn);
    if (pkt == NULL)
        return 0;

    tx_pkt_history_remove(h, pkt->pkt_num);
    pkt->lnext = NULL;
    ackm_on_pkts_lost(ackm, pkt_space, pkt, /*pseudo=*/1);
    return 1;
}

OSSL_TIME ossl_ackm_get_pto_duration(OSSL_ACKM *ackm)
{
    OSSL_TIME duration;
    OSSL_RTT_INFO rtt;

    ossl_statm_get_rtt_info(ackm->statm, &rtt);

    duration = ossl_time_add(rtt.smoothed_rtt,
                             ossl_time_max(ossl_time_multiply(rtt.rtt_variance, 4),
                                           ossl_ticks2time(K_GRANULARITY)));
    if (!ossl_time_is_infinite(ackm->rx_max_ack_delay))
        duration = ossl_time_add(duration, ackm->rx_max_ack_delay);

    return duration;
}

QUIC_PN ossl_ackm_get_largest_acked(OSSL_ACKM *ackm, int pkt_space)
{
    return ackm->largest_acked_pkt[pkt_space];
}

void ossl_ackm_set_rx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME rx_max_ack_delay)
{
    ackm->rx_max_ack_delay = rx_max_ack_delay;
}

void ossl_ackm_set_tx_max_ack_delay(OSSL_ACKM *ackm, OSSL_TIME tx_max_ack_delay)
{
    ackm->tx_max_ack_delay = tx_max_ack_delay;
}