• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 12375116696

17 Dec 2024 02:29PM UTC coverage: 58.366% (-0.2%) from 58.595%
12375116696

Pull #8777

github

ziggie1984
docs: add release-notes
Pull Request #8777: multi: make deletion of edge atomic.

132 of 177 new or added lines in 6 files covered. (74.58%)

670 existing lines in 37 files now uncovered.

133926 of 229458 relevant lines covered (58.37%)

19223.6 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.95
/routing/payment_lifecycle.go
1
package routing
2

3
import (
4
        "context"
5
        "errors"
6
        "fmt"
7
        "time"
8

9
        "github.com/btcsuite/btcd/btcec/v2"
10
        "github.com/davecgh/go-spew/spew"
11
        sphinx "github.com/lightningnetwork/lightning-onion"
12
        "github.com/lightningnetwork/lnd/channeldb"
13
        "github.com/lightningnetwork/lnd/fn/v2"
14
        "github.com/lightningnetwork/lnd/graph/db/models"
15
        "github.com/lightningnetwork/lnd/htlcswitch"
16
        "github.com/lightningnetwork/lnd/lntypes"
17
        "github.com/lightningnetwork/lnd/lnwire"
18
        "github.com/lightningnetwork/lnd/routing/route"
19
        "github.com/lightningnetwork/lnd/routing/shards"
20
        "github.com/lightningnetwork/lnd/tlv"
21
)
22

23
// ErrPaymentLifecycleExiting is used when waiting for htlc attempt result, but
24
// the payment lifecycle is exiting .
25
var ErrPaymentLifecycleExiting = errors.New("payment lifecycle exiting")
26

27
// paymentLifecycle holds all information about the current state of a payment
28
// needed to resume if from any point.
29
type paymentLifecycle struct {
30
        router                *ChannelRouter
31
        feeLimit              lnwire.MilliSatoshi
32
        identifier            lntypes.Hash
33
        paySession            PaymentSession
34
        shardTracker          shards.ShardTracker
35
        currentHeight         int32
36
        firstHopCustomRecords lnwire.CustomRecords
37

38
        // quit is closed to signal the sub goroutines of the payment lifecycle
39
        // to stop.
40
        quit chan struct{}
41

42
        // resultCollected is used to signal that the result of an attempt has
43
        // been collected. A nil error means the attempt is either successful
44
        // or failed with temporary error. Otherwise, we should exit the
45
        // lifecycle loop as a terminal error has occurred.
46
        resultCollected chan error
47

48
        // resultCollector is a function that is used to collect the result of
49
        // an HTLC attempt, which is always mounted to `p.collectResultAsync`
50
        // except in unit test, where we use a much simpler resultCollector to
51
        // decouple the test flow for the payment lifecycle.
52
        resultCollector func(attempt *channeldb.HTLCAttempt)
53
}
54

55
// newPaymentLifecycle initiates a new payment lifecycle and returns it.
56
func newPaymentLifecycle(r *ChannelRouter, feeLimit lnwire.MilliSatoshi,
57
        identifier lntypes.Hash, paySession PaymentSession,
58
        shardTracker shards.ShardTracker, currentHeight int32,
59
        firstHopCustomRecords lnwire.CustomRecords) *paymentLifecycle {
41✔
60

41✔
61
        p := &paymentLifecycle{
41✔
62
                router:                r,
41✔
63
                feeLimit:              feeLimit,
41✔
64
                identifier:            identifier,
41✔
65
                paySession:            paySession,
41✔
66
                shardTracker:          shardTracker,
41✔
67
                currentHeight:         currentHeight,
41✔
68
                quit:                  make(chan struct{}),
41✔
69
                resultCollected:       make(chan error, 1),
41✔
70
                firstHopCustomRecords: firstHopCustomRecords,
41✔
71
        }
41✔
72

41✔
73
        // Mount the result collector.
41✔
74
        p.resultCollector = p.collectResultAsync
41✔
75

41✔
76
        return p
41✔
77
}
41✔
78

79
// calcFeeBudget returns the available fee to be used for sending HTLC
80
// attempts.
81
func (p *paymentLifecycle) calcFeeBudget(
82
        feesPaid lnwire.MilliSatoshi) lnwire.MilliSatoshi {
104✔
83

104✔
84
        budget := p.feeLimit
104✔
85

104✔
86
        // We'll subtract the used fee from our fee budget. In case of
104✔
87
        // overflow, we need to check whether feesPaid exceeds our budget
104✔
88
        // already.
104✔
89
        if feesPaid <= budget {
208✔
90
                budget -= feesPaid
104✔
91
        } else {
105✔
92
                budget = 0
1✔
93
        }
1✔
94

95
        return budget
104✔
96
}
97

98
// stateStep defines an action to be taken in our payment lifecycle. We either
99
// quit, continue, or exit the lifecycle, see details below.
100
type stateStep uint8
101

102
const (
103
        // stepSkip is used when we need to skip the current lifecycle and jump
104
        // to the next one.
105
        stepSkip stateStep = iota
106

107
        // stepProceed is used when we can proceed the current lifecycle.
108
        stepProceed
109

110
        // stepExit is used when we need to quit the current lifecycle.
111
        stepExit
112
)
113

114
// decideNextStep is used to determine the next step in the payment lifecycle.
115
func (p *paymentLifecycle) decideNextStep(
116
        payment DBMPPayment) (stateStep, error) {
75✔
117

75✔
118
        // Check whether we could make new HTLC attempts.
75✔
119
        allow, err := payment.AllowMoreAttempts()
75✔
120
        if err != nil {
77✔
121
                return stepExit, err
2✔
122
        }
2✔
123

124
        if !allow {
115✔
125
                // Check whether we need to wait for results.
42✔
126
                wait, err := payment.NeedWaitAttempts()
42✔
127
                if err != nil {
43✔
128
                        return stepExit, err
1✔
129
                }
1✔
130

131
                // If we are not allowed to make new HTLC attempts and there's
132
                // no need to wait, the lifecycle is done and we can exit.
133
                if !wait {
59✔
134
                        return stepExit, nil
18✔
135
                }
18✔
136

137
                log.Tracef("Waiting for attempt results for payment %v",
24✔
138
                        p.identifier)
24✔
139

24✔
140
                // Otherwise we wait for one HTLC attempt then continue
24✔
141
                // the lifecycle.
24✔
142
                //
24✔
143
                // NOTE: we don't check `p.quit` since `decideNextStep` is
24✔
144
                // running in the same goroutine as `resumePayment`.
24✔
145
                select {
24✔
146
                case err := <-p.resultCollected:
23✔
147
                        // If an error is returned, exit with it.
23✔
148
                        if err != nil {
24✔
149
                                return stepExit, err
1✔
150
                        }
1✔
151

152
                        log.Tracef("Received attempt result for payment %v",
23✔
153
                                p.identifier)
23✔
154

155
                case <-p.router.quit:
1✔
156
                        return stepExit, ErrRouterShuttingDown
1✔
157
                }
158

159
                return stepSkip, nil
23✔
160
        }
161

162
        // Otherwise we need to make more attempts.
163
        return stepProceed, nil
32✔
164
}
165

166
// resumePayment resumes the paymentLifecycle from the current state.
167
func (p *paymentLifecycle) resumePayment(ctx context.Context) ([32]byte,
168
        *route.Route, error) {
23✔
169

23✔
170
        // When the payment lifecycle loop exits, we make sure to signal any
23✔
171
        // sub goroutine of the HTLC attempt to exit, then wait for them to
23✔
172
        // return.
23✔
173
        defer p.stop()
23✔
174

23✔
175
        // If we had any existing attempts outstanding, we'll start by spinning
23✔
176
        // up goroutines that'll collect their results and deliver them to the
23✔
177
        // lifecycle loop below.
23✔
178
        payment, err := p.router.cfg.Control.FetchPayment(p.identifier)
23✔
179
        if err != nil {
24✔
180
                return [32]byte{}, nil, err
1✔
181
        }
1✔
182

183
        for _, a := range payment.InFlightHTLCs() {
23✔
184
                a := a
1✔
185

1✔
186
                log.Infof("Resuming HTLC attempt %v for payment %v",
1✔
187
                        a.AttemptID, p.identifier)
1✔
188

1✔
189
                p.resultCollector(&a)
1✔
190
        }
1✔
191

192
        // exitWithErr is a helper closure that logs and returns an error.
193
        exitWithErr := func(err error) ([32]byte, *route.Route, error) {
22✔
194
                log.Errorf("Payment %v with status=%v failed: %v",
22✔
195
                        p.identifier, payment.GetStatus(), err)
22✔
196
                return [32]byte{}, nil, err
28✔
197
        }
6✔
198

6✔
199
        // We'll continue until either our payment succeeds, or we encounter a
6✔
200
        // critical error during path finding.
6✔
201
lifecycle:
6✔
202
        for {
6✔
203
                // We update the payment state on every iteration. Since the
6✔
204
                // payment state is affected by multiple goroutines (ie,
6✔
205
                // collectResultAsync), it is NOT guaranteed that we always
6✔
206
                // have the latest state here. This is fine as long as the
6✔
207
                // state is consistent as a whole.
6✔
208
                payment, err = p.router.cfg.Control.FetchPayment(p.identifier)
6✔
209
                if err != nil {
210
                        return exitWithErr(err)
211
                }
212

22✔
213
                ps := payment.GetState()
92✔
214
                remainingFees := p.calcFeeBudget(ps.FeesPaid)
70✔
215

70✔
216
                log.Debugf("Payment %v: status=%v, active_shards=%v, "+
70✔
217
                        "rem_value=%v, fee_limit=%v", p.identifier,
70✔
218
                        payment.GetStatus(), ps.NumAttemptsInFlight,
70✔
219
                        ps.RemainingAmt, remainingFees)
70✔
220

70✔
221
                // We now proceed our lifecycle with the following tasks in
×
222
                // order,
×
223
                //   1. check context.
224
                //   2. request route.
70✔
225
                //   3. create HTLC attempt.
70✔
226
                //   4. send HTLC attempt.
70✔
227
                //   5. collect HTLC attempt result.
70✔
228
                //
70✔
229
                // Before we attempt any new shard, we'll check to see if we've
70✔
230
                // gone past the payment attempt timeout, or if the context was
70✔
231
                // cancelled, or the router is exiting. In any of these cases,
70✔
232
                // we'll stop this payment attempt short.
70✔
233
                if err := p.checkContext(ctx); err != nil {
70✔
234
                        return exitWithErr(err)
70✔
235
                }
70✔
236

70✔
237
                // Now decide the next step of the current lifecycle.
70✔
238
                step, err := p.decideNextStep(payment)
70✔
239
                if err != nil {
70✔
240
                        return exitWithErr(err)
70✔
241
                }
70✔
242

70✔
243
                switch step {
70✔
244
                // Exit the for loop and return below.
71✔
245
                case stepExit:
1✔
246
                        break lifecycle
1✔
247

248
                // Continue the for loop and skip the rest.
249
                case stepSkip:
69✔
250
                        continue lifecycle
71✔
251

2✔
252
                // Continue the for loop and proceed the rest.
2✔
253
                case stepProceed:
254

68✔
255
                // Unknown step received, exit with an error.
256
                default:
17✔
257
                        err = fmt.Errorf("unknown step: %v", step)
17✔
258
                        return exitWithErr(err)
259
                }
260

22✔
261
                // Now request a route to be used to create our HTLC attempt.
22✔
262
                rt, err := p.requestRoute(ps)
263
                if err != nil {
264
                        return exitWithErr(err)
31✔
265
                }
266

267
                // We may not be able to find a route for current attempt. In
×
268
                // that case, we continue the loop and move straight to the
×
269
                // next iteration in case there are results for inflight HTLCs
×
270
                // that still need to be collected.
271
                if rt == nil {
272
                        log.Errorf("No route found for payment %v",
273
                                p.identifier)
31✔
274

32✔
275
                        continue lifecycle
1✔
276
                }
1✔
277

278
                log.Tracef("Found route: %s", spew.Sdump(rt.Hops))
279

280
                // Allow the traffic shaper to add custom records to the
281
                // outgoing HTLC and also adjust the amount if needed.
282
                err = p.amendFirstHopData(rt)
33✔
283
                if err != nil {
3✔
284
                        return exitWithErr(err)
3✔
285
                }
3✔
286

3✔
287
                // We found a route to try, create a new HTLC attempt to try.
288
                attempt, err := p.registerAttempt(rt, ps.RemainingAmt)
289
                if err != nil {
28✔
290
                        return exitWithErr(err)
28✔
291
                }
28✔
292

28✔
293
                // Once the attempt is created, send it to the htlcswitch.
28✔
294
                result, err := p.sendAttempt(attempt)
28✔
295
                if err != nil {
×
296
                        return exitWithErr(err)
×
297
                }
298

299
                // Now that the shard was successfully sent, launch a go
28✔
300
                // routine that will handle its result when its back.
29✔
301
                if result.err == nil {
1✔
302
                        p.resultCollector(attempt)
1✔
303
                }
304
        }
305

27✔
306
        // Once we are out the lifecycle loop, it means we've reached a
28✔
307
        // terminal condition. We either return the settled preimage or the
1✔
308
        // payment's failure reason.
1✔
309
        //
310
        // Optionally delete the failed attempts from the database.
311
        err = p.router.cfg.Control.DeleteFailedAttempts(p.identifier)
312
        if err != nil {
51✔
313
                log.Errorf("Error deleting failed htlc attempts for payment "+
25✔
314
                        "%v: %v", p.identifier, err)
25✔
315
        }
316

317
        htlc, failure := payment.TerminalInfo()
318
        if htlc != nil {
319
                return htlc.Settle.Preimage, &htlc.Route, nil
320
        }
321

322
        // Otherwise return the payment failure reason.
17✔
323
        return [32]byte{}, nil, *failure
17✔
324
}
×
325

×
326
// checkContext checks whether the payment context has been canceled.
×
327
// Cancellation occurs manually or if the context times out.
328
func (p *paymentLifecycle) checkContext(ctx context.Context) error {
17✔
329
        select {
30✔
330
        case <-ctx.Done():
13✔
331
                // If the context was canceled, we'll mark the payment as
13✔
332
                // failed. There are two cases to distinguish here: Either a
333
                // user-provided timeout was reached, or the context was
334
                // canceled, either to a manual cancellation or due to an
5✔
335
                // unknown error.
336
                var reason channeldb.FailureReason
337
                if errors.Is(ctx.Err(), context.DeadlineExceeded) {
338
                        reason = channeldb.FailureReasonTimeout
339
                        log.Warnf("Payment attempt not completed before "+
73✔
340
                                "timeout, id=%s", p.identifier.String())
73✔
341
                } else {
5✔
342
                        reason = channeldb.FailureReasonCanceled
5✔
343
                        log.Warnf("Payment attempt context canceled, id=%s",
5✔
344
                                p.identifier.String())
5✔
345
                }
5✔
346

5✔
347
                // By marking the payment failed, depending on whether it has
5✔
348
                // inflight HTLCs or not, its status will now either be
8✔
349
                // `StatusInflight` or `StatusFailed`. In either case, no more
3✔
350
                // HTLCs will be attempted.
3✔
351
                err := p.router.cfg.Control.FailPayment(p.identifier, reason)
3✔
352
                if err != nil {
5✔
353
                        return fmt.Errorf("FailPayment got %w", err)
2✔
354
                }
2✔
355

2✔
356
        case <-p.router.quit:
2✔
357
                return fmt.Errorf("check payment timeout got: %w",
358
                        ErrRouterShuttingDown)
359

360
        // Fall through if we haven't hit our time limit.
361
        default:
362
        }
5✔
363

6✔
364
        return nil
1✔
365
}
1✔
366

367
// requestRoute is responsible for finding a route to be used to create an HTLC
2✔
368
// attempt.
2✔
369
func (p *paymentLifecycle) requestRoute(
2✔
370
        ps *channeldb.MPPaymentState) (*route.Route, error) {
371

372
        remainingFees := p.calcFeeBudget(ps.FeesPaid)
67✔
373

374
        // Query our payment session to construct a route.
375
        rt, err := p.paySession.RequestRoute(
70✔
376
                ps.RemainingAmt, remainingFees,
377
                uint32(ps.NumAttemptsInFlight), uint32(p.currentHeight),
378
                p.firstHopCustomRecords,
379
        )
380

381
        // Exit early if there's no error.
35✔
382
        if err == nil {
35✔
383
                return rt, nil
35✔
384
        }
35✔
385

35✔
386
        // Otherwise we need to handle the error.
35✔
387
        log.Warnf("Failed to find route for payment %v: %v", p.identifier, err)
35✔
388

35✔
389
        // If the error belongs to `noRouteError` set, it means a non-critical
35✔
390
        // error has happened during path finding, and we will mark the payment
35✔
391
        // failed with this reason. Otherwise, we'll return the critical error
35✔
392
        // found to abort the lifecycle.
35✔
393
        var routeErr noRouteError
64✔
394
        if !errors.As(err, &routeErr) {
29✔
395
                return nil, fmt.Errorf("requestRoute got: %w", err)
29✔
396
        }
397

398
        // It's the `paymentSession`'s responsibility to find a route for us
7✔
399
        // with the best effort. When it cannot find a path, we need to treat it
7✔
400
        // as a terminal condition and fail the payment no matter it has
7✔
401
        // inflight HTLCs or not.
7✔
402
        failureCode := routeErr.FailureReason()
7✔
403
        log.Warnf("Marking payment %v permanently failed with no route: %v",
7✔
404
                p.identifier, failureCode)
7✔
405

9✔
406
        err = p.router.cfg.Control.FailPayment(p.identifier, failureCode)
2✔
407
        if err != nil {
2✔
408
                return nil, fmt.Errorf("FailPayment got: %w", err)
409
        }
410

411
        // NOTE: we decide to not return the non-critical noRouteError here to
412
        // avoid terminating the payment lifecycle as there might be other
413
        // inflight HTLCs which we must wait for their results.
5✔
414
        return nil, nil
5✔
415
}
5✔
416

5✔
417
// stop signals any active shard goroutine to exit.
5✔
418
func (p *paymentLifecycle) stop() {
6✔
419
        close(p.quit)
1✔
420
}
1✔
421

422
// attemptResult holds the HTLC attempt and a possible error returned from
423
// sending it.
424
type attemptResult struct {
425
        // err is non-nil if a non-critical error was encountered when trying
4✔
426
        // to send the attempt, and we successfully updated the control tower
427
        // to reflect this error. This can be errors like not enough local
428
        // balance for the given route etc.
429
        err error
24✔
430

24✔
431
        // attempt is the attempt structure as recorded in the database.
24✔
432
        attempt *channeldb.HTLCAttempt
433
}
434

435
// collectResultAsync launches a goroutine that will wait for the result of the
436
// given HTLC attempt to be available then handle its result. Once received, it
437
// will send a nil error to channel `resultCollected` to indicate there's a
438
// result.
439
func (p *paymentLifecycle) collectResultAsync(attempt *channeldb.HTLCAttempt) {
440
        log.Debugf("Collecting result for attempt %v in payment %v",
441
                attempt.AttemptID, p.identifier)
442

443
        go func() {
444
                // Block until the result is available.
445
                _, err := p.collectResult(attempt)
446
                if err != nil {
447
                        log.Errorf("Error collecting result for attempt %v "+
448
                                "in payment %v: %v", attempt.AttemptID,
449
                                p.identifier, err)
450
                }
23✔
451

23✔
452
                log.Debugf("Result collected for attempt %v in payment %v",
23✔
453
                        attempt.AttemptID, p.identifier)
23✔
454

46✔
455
                // Once the result is collected, we signal it by writing the
23✔
456
                // error to `resultCollected`.
23✔
457
                select {
24✔
458
                // Send the signal or quit.
1✔
459
                case p.resultCollected <- err:
1✔
460

1✔
461
                case <-p.quit:
1✔
462
                        log.Debugf("Lifecycle exiting while collecting "+
463
                                "result for payment %v", p.identifier)
23✔
464

23✔
465
                case <-p.router.quit:
23✔
466
                        return
23✔
467
                }
23✔
468
        }()
23✔
469
}
470

23✔
471
// collectResult waits for the result for the given attempt to be available
UNCOV
472
// from the Switch, then records the attempt outcome with the control tower.
×
UNCOV
473
// An attemptResult is returned, indicating the final outcome of this HTLC
×
UNCOV
474
// attempt.
×
475
func (p *paymentLifecycle) collectResult(attempt *channeldb.HTLCAttempt) (
476
        *attemptResult, error) {
×
477

×
478
        log.Tracef("Collecting result for attempt %v", spew.Sdump(attempt))
479

480
        // We'll retrieve the hash specific to this shard from the
481
        // shardTracker, since it will be needed to regenerate the circuit
482
        // below.
483
        hash, err := p.shardTracker.GetHash(attempt.AttemptID)
484
        if err != nil {
485
                return p.failAttempt(attempt.AttemptID, err)
486
        }
487

35✔
488
        // Regenerate the circuit for this attempt.
35✔
489
        _, circuit, err := generateSphinxPacket(
35✔
490
                &attempt.Route, hash[:], attempt.SessionKey(),
35✔
491
        )
35✔
492
        // TODO(yy): We generate this circuit to create the error decryptor,
35✔
493
        // which is then used in htlcswitch as the deobfuscator to decode the
35✔
494
        // error from `UpdateFailHTLC`. However, suppose it's an
35✔
495
        // `UpdateFulfillHTLC` message yet for some reason the sphinx packet is
35✔
496
        // failed to be generated, we'd miss settling it. This means we should
×
497
        // give it a second chance to try the settlement path in case
×
498
        // `GetAttemptResult` gives us back the preimage. And move the circuit
499
        // creation into htlcswitch so it's only constructed when there's a
500
        // failure message we need to decode.
35✔
501
        if err != nil {
35✔
502
                log.Debugf("Unable to generate circuit for attempt %v: %v",
35✔
503
                        attempt.AttemptID, err)
35✔
504

35✔
505
                return p.failAttempt(attempt.AttemptID, err)
35✔
506
        }
35✔
507

35✔
508
        // Using the created circuit, initialize the error decrypter, so we can
35✔
509
        // parse+decode any failures incurred by this payment within the
35✔
510
        // switch.
35✔
511
        errorDecryptor := &htlcswitch.SphinxErrorDecrypter{
35✔
512
                OnionErrorDecrypter: sphinx.NewOnionErrorDecrypter(circuit),
35✔
513
        }
×
514

×
515
        // Now ask the switch to return the result of the payment when
×
516
        // available.
×
517
        //
×
518
        // TODO(yy): consider using htlcswitch to create the `errorDecryptor`
519
        // since the htlc is already in db. This will also make the interface
520
        // `PaymentAttemptDispatcher` deeper and easier to use. Moreover, we'd
521
        // only create the decryptor when received a failure, further saving us
522
        // a few CPU cycles.
35✔
523
        resultChan, err := p.router.cfg.Payer.GetAttemptResult(
35✔
524
                attempt.AttemptID, p.identifier, errorDecryptor,
35✔
525
        )
35✔
526
        // Handle the switch error.
35✔
527
        if err != nil {
35✔
528
                log.Errorf("Failed getting result for attemptID %d "+
35✔
529
                        "from switch: %v", attempt.AttemptID, err)
35✔
530

35✔
531
                return p.handleSwitchErr(attempt, err)
35✔
532
        }
35✔
533

35✔
534
        // The switch knows about this payment, we'll wait for a result to be
35✔
535
        // available.
35✔
536
        var (
35✔
537
                result *htlcswitch.PaymentResult
35✔
538
                ok     bool
36✔
539
        )
1✔
540

1✔
541
        select {
1✔
542
        case result, ok = <-resultChan:
1✔
543
                if !ok {
1✔
544
                        return nil, htlcswitch.ErrSwitchExiting
545
                }
546

547
        case <-p.quit:
34✔
548
                return nil, ErrPaymentLifecycleExiting
34✔
549

34✔
550
        case <-p.router.quit:
34✔
551
                return nil, ErrRouterShuttingDown
34✔
552
        }
34✔
553

32✔
554
        // In case of a payment failure, fail the attempt with the control
34✔
555
        // tower and return.
2✔
556
        if result.Error != nil {
2✔
557
                return p.handleSwitchErr(attempt, result.Error)
558
        }
1✔
559

1✔
560
        // We successfully got a payment result back from the switch.
561
        log.Debugf("Payment %v succeeded with pid=%v",
1✔
562
                p.identifier, attempt.AttemptID)
1✔
563

564
        // Report success to mission control.
565
        err = p.router.cfg.MissionControl.ReportPaymentSuccess(
566
                attempt.AttemptID, &attempt.Route,
567
        )
48✔
568
        if err != nil {
17✔
569
                log.Errorf("Error reporting payment success to mc: %v", err)
17✔
570
        }
571

572
        // In case of success we atomically store settle result to the DB move
15✔
573
        // the shard to the settled state.
15✔
574
        htlcAttempt, err := p.router.cfg.Control.SettleAttempt(
15✔
575
                p.identifier, attempt.AttemptID,
15✔
576
                &channeldb.HTLCSettleInfo{
15✔
577
                        Preimage:   result.Preimage,
15✔
578
                        SettleTime: p.router.cfg.Clock.Now(),
15✔
579
                },
15✔
580
        )
×
581
        if err != nil {
×
582
                log.Errorf("Error settling attempt %v for payment %v with "+
583
                        "preimage %v: %v", attempt.AttemptID, p.identifier,
584
                        result.Preimage, err)
585

15✔
586
                // We won't mark the attempt as failed since we already have
15✔
587
                // the preimage.
15✔
588
                return nil, err
15✔
589
        }
15✔
590

15✔
591
        return &attemptResult{
15✔
592
                attempt: htlcAttempt,
16✔
593
        }, nil
1✔
594
}
1✔
595

1✔
596
// registerAttempt is responsible for creating and saving an HTLC attempt in db
1✔
597
// by using the route info provided. The `remainingAmt` is used to decide
1✔
598
// whether this is the last attempt.
1✔
599
func (p *paymentLifecycle) registerAttempt(rt *route.Route,
1✔
600
        remainingAmt lnwire.MilliSatoshi) (*channeldb.HTLCAttempt, error) {
1✔
601

602
        // If this route will consume the last remaining amount to send
14✔
603
        // to the receiver, this will be our last shard (for now).
14✔
604
        isLastAttempt := rt.ReceiverAmt() == remainingAmt
14✔
605

606
        // Using the route received from the payment session, create a new
607
        // shard to send.
608
        attempt, err := p.createNewPaymentAttempt(rt, isLastAttempt)
609
        if err != nil {
610
                return nil, err
611
        }
37✔
612

37✔
613
        // Before sending this HTLC to the switch, we checkpoint the fresh
37✔
614
        // paymentID and route to the DB. This lets us know on startup the ID
37✔
615
        // of the payment that we attempted to send, such that we can query the
37✔
616
        // Switch for its whereabouts. The route is needed to handle the result
37✔
617
        // when it eventually comes back.
37✔
618
        err = p.router.cfg.Control.RegisterAttempt(
37✔
619
                p.identifier, &attempt.HTLCAttemptInfo,
37✔
620
        )
38✔
621

1✔
622
        return attempt, err
1✔
623
}
624

625
// createNewPaymentAttempt creates a new payment attempt from the given route.
626
func (p *paymentLifecycle) createNewPaymentAttempt(rt *route.Route,
627
        lastShard bool) (*channeldb.HTLCAttempt, error) {
628

629
        // Generate a new key to be used for this attempt.
36✔
630
        sessionKey, err := generateNewSessionKey()
36✔
631
        if err != nil {
36✔
632
                return nil, err
36✔
633
        }
36✔
634

635
        // We generate a new, unique payment ID that we will use for
636
        // this HTLC.
637
        attemptID, err := p.router.cfg.NextPaymentID()
638
        if err != nil {
37✔
639
                return nil, err
37✔
640
        }
37✔
641

37✔
642
        // Request a new shard from the ShardTracker. If this is an AMP
37✔
643
        // payment, and this is the last shard, the outstanding shards together
×
644
        // with this one will be enough for the receiver to derive all HTLC
×
645
        // preimages. If this a non-AMP payment, the ShardTracker will return a
646
        // simple shard with the payment's static payment hash.
647
        shard, err := p.shardTracker.NewShard(attemptID, lastShard)
648
        if err != nil {
37✔
649
                return nil, err
37✔
650
        }
×
651

×
652
        // If this shard carries MPP or AMP options, add them to the last hop
653
        // on the route.
654
        hop := rt.Hops[len(rt.Hops)-1]
655
        if shard.MPP() != nil {
656
                hop.MPP = shard.MPP()
657
        }
658

37✔
659
        if shard.AMP() != nil {
38✔
660
                hop.AMP = shard.AMP()
1✔
661
        }
1✔
662

663
        hash := shard.Hash()
664

665
        // We now have all the information needed to populate the current
36✔
666
        // attempt information.
41✔
667
        attempt := channeldb.NewHtlcAttempt(
5✔
668
                attemptID, sessionKey, *rt, p.router.cfg.Clock.Now(), &hash,
5✔
669
        )
670

37✔
671
        return attempt, nil
1✔
672
}
1✔
673

674
// sendAttempt attempts to send the current attempt to the switch to complete
36✔
675
// the payment. If this attempt fails, then we'll continue on to the next
36✔
676
// available route.
36✔
677
func (p *paymentLifecycle) sendAttempt(
36✔
678
        attempt *channeldb.HTLCAttempt) (*attemptResult, error) {
36✔
679

36✔
680
        log.Debugf("Sending HTLC attempt(id=%v, total_amt=%v, first_hop_amt=%d"+
36✔
681
                ") for payment %v", attempt.AttemptID,
36✔
682
                attempt.Route.TotalAmount, attempt.Route.FirstHopAmount.Val,
36✔
683
                p.identifier)
684

685
        rt := attempt.Route
686

687
        // Construct the first hop.
688
        firstHop := lnwire.NewShortChanIDFromInt(rt.Hops[0].ChannelID)
689

36✔
690
        // Craft an HTLC packet to send to the htlcswitch. The metadata within
36✔
691
        // this packet will be used to route the payment through the network,
36✔
692
        // starting with the first-hop.
36✔
693
        htlcAdd := &lnwire.UpdateAddHTLC{
36✔
694
                Amount:        rt.FirstHopAmount.Val.Int(),
36✔
695
                Expiry:        rt.TotalTimeLock,
36✔
696
                PaymentHash:   *attempt.Hash,
36✔
697
                CustomRecords: rt.FirstHopWireCustomRecords,
36✔
698
        }
36✔
699

36✔
700
        // Generate the raw encoded sphinx packet to be included along
36✔
701
        // with the htlcAdd message that we send directly to the
36✔
702
        // switch.
36✔
703
        onionBlob, _, err := generateSphinxPacket(
36✔
704
                &rt, attempt.Hash[:], attempt.SessionKey(),
36✔
705
        )
36✔
706
        if err != nil {
36✔
707
                log.Errorf("Failed to create onion blob: attempt=%d in "+
36✔
708
                        "payment=%v, err:%v", attempt.AttemptID,
36✔
709
                        p.identifier, err)
36✔
710

36✔
711
                return p.failAttempt(attempt.AttemptID, err)
36✔
712
        }
36✔
713

36✔
714
        copy(htlcAdd.OnionBlob[:], onionBlob)
36✔
715

36✔
716
        // Send it to the Switch. When this method returns we assume
36✔
717
        // the Switch successfully has persisted the payment attempt,
37✔
718
        // such that we can resume waiting for the result after a
1✔
719
        // restart.
1✔
720
        err = p.router.cfg.Payer.SendHTLC(firstHop, attempt.AttemptID, htlcAdd)
1✔
721
        if err != nil {
1✔
722
                log.Errorf("Failed sending attempt %d for payment %v to "+
1✔
723
                        "switch: %v", attempt.AttemptID, p.identifier, err)
1✔
724

725
                return p.handleSwitchErr(attempt, err)
35✔
726
        }
35✔
727

35✔
728
        log.Debugf("Attempt %v for payment %v successfully sent to switch, "+
35✔
729
                "route: %v", attempt.AttemptID, p.identifier, &attempt.Route)
35✔
730

35✔
731
        return &attemptResult{
35✔
732
                attempt: attempt,
41✔
733
        }, nil
6✔
734
}
6✔
735

6✔
736
// amendFirstHopData is a function that calls the traffic shaper to allow it to
6✔
737
// add custom records to the outgoing HTLC and also adjust the amount if
6✔
738
// needed.
739
func (p *paymentLifecycle) amendFirstHopData(rt *route.Route) error {
30✔
740
        // The first hop amount on the route is the full route amount if not
30✔
741
        // overwritten by the traffic shaper. So we set the initial value now
30✔
742
        // and potentially overwrite it later.
30✔
743
        rt.FirstHopAmount = tlv.NewRecordT[tlv.TlvType0](
30✔
744
                tlv.NewBigSizeT(rt.TotalAmount),
30✔
745
        )
746

747
        // By default, we set the first hop custom records to the initial
748
        // value requested by the RPC. The traffic shaper may overwrite this
749
        // value.
750
        rt.FirstHopWireCustomRecords = p.firstHopCustomRecords
37✔
751

37✔
752
        // extraDataRequest is a helper struct to pass the custom records and
37✔
753
        // amount back from the traffic shaper.
37✔
754
        type extraDataRequest struct {
37✔
755
                customRecords fn.Option[lnwire.CustomRecords]
37✔
756

37✔
757
                amount fn.Option[lnwire.MilliSatoshi]
37✔
758
        }
37✔
759

37✔
760
        // If a hook exists that may affect our outgoing message, we call it now
37✔
761
        // and apply its side effects to the UpdateAddHTLC message.
37✔
762
        result, err := fn.MapOptionZ(
37✔
763
                p.router.cfg.TrafficShaper,
37✔
764
                //nolint:ll
37✔
765
                func(ts htlcswitch.AuxTrafficShaper) fn.Result[extraDataRequest] {
37✔
766
                        newAmt, newRecords, err := ts.ProduceHtlcExtraData(
37✔
767
                                rt.TotalAmount, p.firstHopCustomRecords,
37✔
768
                        )
37✔
769
                        if err != nil {
37✔
770
                                return fn.Err[extraDataRequest](err)
37✔
771
                        }
37✔
772

37✔
773
                        // Make sure we only received valid records.
37✔
774
                        if err := newRecords.Validate(); err != nil {
37✔
775
                                return fn.Err[extraDataRequest](err)
37✔
776
                        }
73✔
777

36✔
778
                        log.Debugf("Aux traffic shaper returned custom "+
36✔
779
                                "records %v and amount %d msat for HTLC",
36✔
780
                                spew.Sdump(newRecords), newAmt)
36✔
781

×
782
                        return fn.Ok(extraDataRequest{
×
783
                                customRecords: fn.Some(newRecords),
784
                                amount:        fn.Some(newAmt),
785
                        })
36✔
786
                },
×
787
        ).Unpack()
×
788
        if err != nil {
789
                return fmt.Errorf("traffic shaper failed to produce extra "+
36✔
790
                        "data: %w", err)
36✔
791
        }
36✔
792

36✔
793
        // Apply the side effects to the UpdateAddHTLC message.
36✔
794
        result.customRecords.WhenSome(func(records lnwire.CustomRecords) {
36✔
795
                rt.FirstHopWireCustomRecords = records
36✔
796
        })
36✔
797
        result.amount.WhenSome(func(amount lnwire.MilliSatoshi) {
798
                rt.FirstHopAmount = tlv.NewRecordT[tlv.TlvType0](
799
                        tlv.NewBigSizeT(amount),
37✔
800
                )
×
801
        })
×
802

×
803
        return nil
804
}
805

73✔
806
// failAttemptAndPayment fails both the payment and its attempt via the
36✔
807
// router's control tower, which marks the payment as failed in db.
36✔
808
func (p *paymentLifecycle) failPaymentAndAttempt(
73✔
809
        attemptID uint64, reason *channeldb.FailureReason,
36✔
810
        sendErr error) (*attemptResult, error) {
36✔
811

36✔
812
        log.Errorf("Payment %v failed: final_outcome=%v, raw_err=%v",
36✔
813
                p.identifier, *reason, sendErr)
814

37✔
815
        // Fail the payment via control tower.
816
        //
817
        // NOTE: we must fail the payment first before failing the attempt.
818
        // Otherwise, once the attempt is marked as failed, another goroutine
819
        // might make another attempt while we are failing the payment.
820
        err := p.router.cfg.Control.FailPayment(p.identifier, *reason)
821
        if err != nil {
6✔
822
                log.Errorf("Unable to fail payment: %v", err)
6✔
823
                return nil, err
6✔
824
        }
6✔
825

6✔
826
        // Fail the attempt.
6✔
827
        return p.failAttempt(attemptID, sendErr)
6✔
828
}
6✔
829

6✔
830
// handleSwitchErr inspects the given error from the Switch and determines
6✔
831
// whether we should make another payment attempt, or if it should be
6✔
832
// considered a terminal error. Terminal errors will be recorded with the
6✔
833
// control tower. It analyzes the sendErr for the payment attempt received from
×
834
// the switch and updates mission control and/or channel policies. Depending on
×
835
// the error type, the error is either the final outcome of the payment or we
×
836
// need to continue with an alternative route. A final outcome is indicated by
837
// a non-nil reason value.
838
func (p *paymentLifecycle) handleSwitchErr(attempt *channeldb.HTLCAttempt,
6✔
839
        sendErr error) (*attemptResult, error) {
840

841
        internalErrorReason := channeldb.FailureReasonError
842
        attemptID := attempt.AttemptID
843

844
        // reportAndFail is a helper closure that reports the failure to the
845
        // mission control, which helps us to decide whether we want to retry
846
        // the payment or not. If a non nil reason is returned from mission
847
        // control, it will further fail the payment via control tower.
848
        reportAndFail := func(srcIdx *int,
849
                msg lnwire.FailureMessage) (*attemptResult, error) {
850

23✔
851
                // Report outcome to mission control.
23✔
852
                reason, err := p.router.cfg.MissionControl.ReportPaymentFail(
23✔
853
                        attemptID, &attempt.Route, srcIdx, msg,
23✔
854
                )
23✔
855
                if err != nil {
23✔
856
                        log.Errorf("Error reporting payment result to mc: %v",
23✔
857
                                err)
23✔
858

23✔
859
                        reason = &internalErrorReason
23✔
860
                }
43✔
861

20✔
862
                // Fail the attempt only if there's no reason.
20✔
863
                if reason == nil {
20✔
864
                        // Fail the attempt.
20✔
865
                        return p.failAttempt(attemptID, sendErr)
20✔
866
                }
20✔
867

×
868
                // Otherwise fail both the payment and the attempt.
×
869
                return p.failPaymentAndAttempt(attemptID, reason, sendErr)
×
870
        }
×
871

×
872
        // If this attempt ID is unknown to the Switch, it means it was never
873
        // checkpointed and forwarded by the switch before a restart. In this
874
        // case we can safely send a new payment attempt, and wait for its
38✔
875
        // result to be available.
18✔
876
        if errors.Is(sendErr, htlcswitch.ErrPaymentIDNotFound) {
18✔
877
                log.Debugf("Attempt ID %v for payment %v not found in the "+
18✔
878
                        "Switch, retrying.", attempt.AttemptID, p.identifier)
879

880
                return p.failAttempt(attemptID, sendErr)
3✔
881
        }
882

883
        if errors.Is(sendErr, htlcswitch.ErrUnreadableFailureMessage) {
884
                log.Warn("Unreadable failure when sending htlc: id=%v, hash=%v",
885
                        attempt.AttemptID, attempt.Hash)
886

887
                // Since this error message cannot be decrypted, we will send a
23✔
888
                // nil error message to our mission controller and fail the
×
889
                // payment.
×
890
                return reportAndFail(nil, nil)
×
891
        }
×
892

×
893
        // If the error is a ClearTextError, we have received a valid wire
894
        // failure message, either from our own outgoing link or from a node
24✔
895
        // down the route. If the error is not related to the propagation of
1✔
896
        // our payment, we can stop trying because an internal error has
1✔
897
        // occurred.
1✔
898
        var rtErr htlcswitch.ClearTextError
1✔
899
        ok := errors.As(sendErr, &rtErr)
1✔
900
        if !ok {
1✔
901
                return p.failPaymentAndAttempt(
1✔
902
                        attemptID, &internalErrorReason, sendErr,
1✔
903
                )
904
        }
905

906
        // failureSourceIdx is the index of the node that the failure occurred
907
        // at. If the ClearTextError received is not a ForwardingError the
908
        // payment error occurred at our node, so we leave this value as 0
909
        // to indicate that the failure occurred locally. If the error is a
22✔
910
        // ForwardingError, it did not originate at our node, so we set
22✔
911
        // failureSourceIdx to the index of the node where the failure occurred.
25✔
912
        failureSourceIdx := 0
3✔
913
        var source *htlcswitch.ForwardingError
3✔
914
        ok = errors.As(rtErr, &source)
3✔
915
        if ok {
3✔
916
                failureSourceIdx = source.FailureSourceIdx
917
        }
918

919
        // Extract the wire failure and apply channel update if it contains one.
920
        // If we received an unknown failure message from a node along the
921
        // route, the failure message will be nil.
922
        failureMessage := rtErr.WireMessage()
923
        err := p.handleFailureMessage(
19✔
924
                &attempt.Route, failureSourceIdx, failureMessage,
19✔
925
        )
19✔
926
        if err != nil {
38✔
927
                return p.failPaymentAndAttempt(
19✔
928
                        attemptID, &internalErrorReason, sendErr,
19✔
929
                )
930
        }
931

932
        log.Tracef("Node=%v reported failure when sending htlc",
933
                failureSourceIdx)
19✔
934

19✔
935
        return reportAndFail(&failureSourceIdx, failureMessage)
19✔
936
}
19✔
937

19✔
938
// handleFailureMessage tries to apply a channel update present in the failure
×
939
// message if any.
×
940
func (p *paymentLifecycle) handleFailureMessage(rt *route.Route,
×
941
        errorSourceIdx int, failure lnwire.FailureMessage) error {
×
942

943
        if failure == nil {
19✔
944
                return nil
19✔
945
        }
19✔
946

19✔
947
        // It makes no sense to apply our own channel updates.
948
        if errorSourceIdx == 0 {
949
                log.Errorf("Channel update of ourselves received")
950

951
                return nil
952
        }
19✔
953

19✔
954
        // Extract channel update if the error contains one.
20✔
955
        update := p.router.extractChannelUpdate(failure)
1✔
956
        if update == nil {
1✔
957
                return nil
958
        }
959

19✔
960
        // Parse pubkey to allow validation of the channel update. This should
1✔
961
        // always succeed, otherwise there is something wrong in our
1✔
962
        // implementation. Therefore, return an error.
1✔
963
        errVertex := rt.Hops[errorSourceIdx-1].PubKeyBytes
1✔
964
        errSource, err := btcec.ParsePubKey(errVertex[:])
965
        if err != nil {
966
                log.Errorf("Cannot parse pubkey: idx=%v, pubkey=%v",
18✔
967
                        errorSourceIdx, errVertex)
28✔
968

10✔
969
                return err
10✔
970
        }
971

972
        var (
973
                isAdditionalEdge bool
974
                policy           *models.CachedEdgePolicy
9✔
975
        )
9✔
976

9✔
977
        // Before we apply the channel update, we need to decide whether the
×
978
        // update is for additional (ephemeral) edge or normal edge stored in
×
979
        // db.
×
980
        //
×
981
        // Note: the p.paySession might be nil here if it's called inside
×
982
        // SendToRoute where there's no payment lifecycle.
983
        if p.paySession != nil {
9✔
984
                policy = p.paySession.GetAdditionalEdgePolicy(
9✔
985
                        errSource, update.ShortChannelID.ToUint64(),
9✔
986
                )
9✔
987
                if policy != nil {
9✔
988
                        isAdditionalEdge = true
9✔
989
                }
9✔
990
        }
9✔
991

9✔
992
        // Apply channel update to additional edge policy.
9✔
993
        if isAdditionalEdge {
9✔
994
                if !p.paySession.UpdateAdditionalEdge(
15✔
995
                        update, errSource, policy) {
6✔
996

6✔
997
                        log.Debugf("Invalid channel update received: node=%v",
6✔
998
                                errVertex)
9✔
999
                }
3✔
1000
                return nil
3✔
1001
        }
1002

1003
        // Apply channel update to the channel edge policy in our db.
1004
        if !p.router.cfg.ApplyChannelUpdate(update) {
12✔
1005
                log.Debugf("Invalid channel update received: node=%v",
3✔
1006
                        errVertex)
3✔
1007
        }
×
1008
        return nil
×
1009
}
×
1010

×
1011
// failAttempt calls control tower to fail the current payment attempt.
3✔
1012
func (p *paymentLifecycle) failAttempt(attemptID uint64,
1013
        sendError error) (*attemptResult, error) {
1014

1015
        log.Warnf("Attempt %v for payment %v failed: %v", attemptID,
10✔
1016
                p.identifier, sendError)
3✔
1017

3✔
1018
        failInfo := marshallError(
3✔
1019
                sendError,
7✔
1020
                p.router.cfg.Clock.Now(),
1021
        )
1022

1023
        // Now that we are failing this payment attempt, cancel the shard with
1024
        // the ShardTracker such that it can derive the correct hash for the
24✔
1025
        // next attempt.
24✔
1026
        if err := p.shardTracker.CancelShard(attemptID); err != nil {
24✔
1027
                return nil, err
24✔
1028
        }
24✔
1029

24✔
1030
        attempt, err := p.router.cfg.Control.FailAttempt(
24✔
1031
                p.identifier, attemptID, failInfo,
24✔
1032
        )
24✔
1033
        if err != nil {
24✔
1034
                return nil, err
24✔
1035
        }
24✔
1036

24✔
1037
        return &attemptResult{
24✔
1038
                attempt: attempt,
×
1039
                err:     sendError,
×
1040
        }, nil
1041
}
24✔
1042

24✔
1043
// marshallError marshall an error as received from the switch to a structure
24✔
1044
// that is suitable for database storage.
27✔
1045
func marshallError(sendError error, time time.Time) *channeldb.HTLCFailInfo {
3✔
1046
        response := &channeldb.HTLCFailInfo{
3✔
1047
                FailTime: time,
1048
        }
21✔
1049

21✔
1050
        switch {
21✔
1051
        case errors.Is(sendError, htlcswitch.ErrPaymentIDNotFound):
21✔
1052
                response.Reason = channeldb.HTLCFailInternal
1053
                return response
1054

1055
        case errors.Is(sendError, htlcswitch.ErrUnreadableFailureMessage):
1056
                response.Reason = channeldb.HTLCFailUnreadable
24✔
1057
                return response
24✔
1058
        }
24✔
1059

24✔
1060
        var rtErr htlcswitch.ClearTextError
24✔
1061
        ok := errors.As(sendError, &rtErr)
24✔
1062
        if !ok {
×
1063
                response.Reason = channeldb.HTLCFailInternal
×
1064
                return response
×
1065
        }
1066

1✔
1067
        message := rtErr.WireMessage()
1✔
1068
        if message != nil {
1✔
1069
                response.Reason = channeldb.HTLCFailMessage
1070
                response.Message = message
1071
        } else {
23✔
1072
                response.Reason = channeldb.HTLCFailUnknown
23✔
1073
        }
27✔
1074

4✔
1075
        // If the ClearTextError received is a ForwardingError, the error
4✔
1076
        // originated from a node along the route, not locally on our outgoing
4✔
1077
        // link. We set failureSourceIdx to the index of the node where the
1078
        // failure occurred. If the error is not a ForwardingError, the failure
19✔
1079
        // occurred at our node, so we leave the index as 0 to indicate that
37✔
1080
        // we failed locally.
18✔
1081
        var fErr *htlcswitch.ForwardingError
18✔
1082
        ok = errors.As(rtErr, &fErr)
19✔
1083
        if ok {
1✔
1084
                response.FailureSourceIndex = uint32(fErr.FailureSourceIdx)
1✔
1085
        }
1086

1087
        return response
1088
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc