• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 12012751795

25 Nov 2024 02:40PM UTC coverage: 49.835% (-9.2%) from 59.013%
12012751795

Pull #9303

github

yyforyongyu
lnwallet: add debug logs
Pull Request #9303: htlcswitch+routing: handle nil pointer dereference properly

20 of 23 new or added lines in 4 files covered. (86.96%)

25467 existing lines in 425 files now uncovered.

99835 of 200331 relevant lines covered (49.84%)

2.07 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

77.75
/routing/payment_lifecycle.go
1
package routing
2

3
import (
4
        "context"
5
        "errors"
6
        "fmt"
7
        "time"
8

9
        "github.com/btcsuite/btcd/btcec/v2"
10
        "github.com/davecgh/go-spew/spew"
11
        sphinx "github.com/lightningnetwork/lightning-onion"
12
        "github.com/lightningnetwork/lnd/channeldb"
13
        "github.com/lightningnetwork/lnd/channeldb/models"
14
        "github.com/lightningnetwork/lnd/fn"
15
        "github.com/lightningnetwork/lnd/htlcswitch"
16
        "github.com/lightningnetwork/lnd/lntypes"
17
        "github.com/lightningnetwork/lnd/lnwire"
18
        "github.com/lightningnetwork/lnd/routing/route"
19
        "github.com/lightningnetwork/lnd/routing/shards"
20
        "github.com/lightningnetwork/lnd/tlv"
21
)
22

23
// ErrPaymentLifecycleExiting is used when waiting for htlc attempt result, but
24
// the payment lifecycle is exiting .
25
var ErrPaymentLifecycleExiting = errors.New("payment lifecycle exiting")
26

27
// paymentLifecycle holds all information about the current state of a payment
28
// needed to resume if from any point.
29
type paymentLifecycle struct {
30
        router                *ChannelRouter
31
        feeLimit              lnwire.MilliSatoshi
32
        identifier            lntypes.Hash
33
        paySession            PaymentSession
34
        shardTracker          shards.ShardTracker
35
        currentHeight         int32
36
        firstHopCustomRecords lnwire.CustomRecords
37

38
        // quit is closed to signal the sub goroutines of the payment lifecycle
39
        // to stop.
40
        quit chan struct{}
41

42
        // resultCollected is used to signal that the result of an attempt has
43
        // been collected. A nil error means the attempt is either successful
44
        // or failed with temporary error. Otherwise, we should exit the
45
        // lifecycle loop as a terminal error has occurred.
46
        resultCollected chan error
47

48
        // resultCollector is a function that is used to collect the result of
49
        // an HTLC attempt, which is always mounted to `p.collectResultAsync`
50
        // except in unit test, where we use a much simpler resultCollector to
51
        // decouple the test flow for the payment lifecycle.
52
        resultCollector func(attempt *channeldb.HTLCAttempt)
53
}
54

55
// newPaymentLifecycle initiates a new payment lifecycle and returns it.
56
func newPaymentLifecycle(r *ChannelRouter, feeLimit lnwire.MilliSatoshi,
57
        identifier lntypes.Hash, paySession PaymentSession,
58
        shardTracker shards.ShardTracker, currentHeight int32,
59
        firstHopCustomRecords lnwire.CustomRecords) *paymentLifecycle {
4✔
60

4✔
61
        p := &paymentLifecycle{
4✔
62
                router:                r,
4✔
63
                feeLimit:              feeLimit,
4✔
64
                identifier:            identifier,
4✔
65
                paySession:            paySession,
4✔
66
                shardTracker:          shardTracker,
4✔
67
                currentHeight:         currentHeight,
4✔
68
                quit:                  make(chan struct{}),
4✔
69
                resultCollected:       make(chan error, 1),
4✔
70
                firstHopCustomRecords: firstHopCustomRecords,
4✔
71
        }
4✔
72

4✔
73
        // Mount the result collector.
4✔
74
        p.resultCollector = p.collectResultAsync
4✔
75

4✔
76
        return p
4✔
77
}
4✔
78

79
// calcFeeBudget returns the available fee to be used for sending HTLC
80
// attempts.
81
func (p *paymentLifecycle) calcFeeBudget(
82
        feesPaid lnwire.MilliSatoshi) lnwire.MilliSatoshi {
4✔
83

4✔
84
        budget := p.feeLimit
4✔
85

4✔
86
        // We'll subtract the used fee from our fee budget. In case of
4✔
87
        // overflow, we need to check whether feesPaid exceeds our budget
4✔
88
        // already.
4✔
89
        if feesPaid <= budget {
8✔
90
                budget -= feesPaid
4✔
91
        } else {
8✔
92
                budget = 0
4✔
93
        }
4✔
94

95
        return budget
4✔
96
}
97

98
// stateStep defines an action to be taken in our payment lifecycle. We either
99
// quit, continue, or exit the lifecycle, see details below.
100
type stateStep uint8
101

102
const (
103
        // stepSkip is used when we need to skip the current lifecycle and jump
104
        // to the next one.
105
        stepSkip stateStep = iota
106

107
        // stepProceed is used when we can proceed the current lifecycle.
108
        stepProceed
109

110
        // stepExit is used when we need to quit the current lifecycle.
111
        stepExit
112
)
113

114
// decideNextStep is used to determine the next step in the payment lifecycle.
115
func (p *paymentLifecycle) decideNextStep(
116
        payment DBMPPayment) (stateStep, error) {
4✔
117

4✔
118
        // Check whether we could make new HTLC attempts.
4✔
119
        allow, err := payment.AllowMoreAttempts()
4✔
120
        if err != nil {
4✔
UNCOV
121
                return stepExit, err
×
UNCOV
122
        }
×
123

124
        if !allow {
8✔
125
                // Check whether we need to wait for results.
4✔
126
                wait, err := payment.NeedWaitAttempts()
4✔
127
                if err != nil {
4✔
UNCOV
128
                        return stepExit, err
×
UNCOV
129
                }
×
130

131
                // If we are not allowed to make new HTLC attempts and there's
132
                // no need to wait, the lifecycle is done and we can exit.
133
                if !wait {
8✔
134
                        return stepExit, nil
4✔
135
                }
4✔
136

137
                log.Tracef("Waiting for attempt results for payment %v",
4✔
138
                        p.identifier)
4✔
139

4✔
140
                // Otherwise we wait for one HTLC attempt then continue
4✔
141
                // the lifecycle.
4✔
142
                //
4✔
143
                // NOTE: we don't check `p.quit` since `decideNextStep` is
4✔
144
                // running in the same goroutine as `resumePayment`.
4✔
145
                select {
4✔
146
                case err := <-p.resultCollected:
4✔
147
                        // If an error is returned, exit with it.
4✔
148
                        if err != nil {
8✔
149
                                return stepExit, err
4✔
150
                        }
4✔
151

152
                        log.Tracef("Received attempt result for payment %v",
4✔
153
                                p.identifier)
4✔
154

155
                case <-p.router.quit:
2✔
156
                        return stepExit, ErrRouterShuttingDown
2✔
157
                }
158

159
                return stepSkip, nil
4✔
160
        }
161

162
        // Otherwise we need to make more attempts.
163
        return stepProceed, nil
4✔
164
}
165

166
// resumePayment resumes the paymentLifecycle from the current state.
167
func (p *paymentLifecycle) resumePayment(ctx context.Context) ([32]byte,
168
        *route.Route, error) {
4✔
169

4✔
170
        // When the payment lifecycle loop exits, we make sure to signal any
4✔
171
        // sub goroutine of the HTLC attempt to exit, then wait for them to
4✔
172
        // return.
4✔
173
        defer p.stop()
4✔
174

4✔
175
        // If we had any existing attempts outstanding, we'll start by spinning
4✔
176
        // up goroutines that'll collect their results and deliver them to the
4✔
177
        // lifecycle loop below.
4✔
178
        payment, err := p.router.cfg.Control.FetchPayment(p.identifier)
4✔
179
        if err != nil {
4✔
UNCOV
180
                return [32]byte{}, nil, err
×
UNCOV
181
        }
×
182

183
        // Get the payment state.
184
        ps := payment.GetState()
4✔
185

4✔
186
        for _, a := range payment.InFlightHTLCs() {
8✔
187
                a := a
4✔
188

4✔
189
                log.Infof("Resuming HTLC attempt %v for payment %v",
4✔
190
                        a.AttemptID, p.identifier)
4✔
191

4✔
192
                p.resultCollector(&a)
4✔
193
        }
4✔
194

195
        // exitWithErr is a helper closure that logs and returns an error.
196
        exitWithErr := func(err error) ([32]byte, *route.Route, error) {
8✔
197
                log.Errorf("Payment %v with status=%v failed: %v",
4✔
198
                        p.identifier, ps, err)
4✔
199
                return [32]byte{}, nil, err
4✔
200
        }
4✔
201

202
        // We'll continue until either our payment succeeds, or we encounter a
203
        // critical error during path finding.
204
lifecycle:
4✔
205
        for {
8✔
206
                // We update the payment state on every iteration. Since the
4✔
207
                // payment state is affected by multiple goroutines (ie,
4✔
208
                // collectResultAsync), it is NOT guaranteed that we always
4✔
209
                // have the latest state here. This is fine as long as the
4✔
210
                // state is consistent as a whole.
4✔
211
                payment, err = p.router.cfg.Control.FetchPayment(p.identifier)
4✔
212
                if err != nil {
4✔
213
                        return exitWithErr(err)
×
214
                }
×
215

216
                ps = payment.GetState()
4✔
217
                remainingFees := p.calcFeeBudget(ps.FeesPaid)
4✔
218

4✔
219
                log.Debugf("Payment %v: status=%v, active_shards=%v, "+
4✔
220
                        "rem_value=%v, fee_limit=%v", p.identifier,
4✔
221
                        payment.GetStatus(), ps.NumAttemptsInFlight,
4✔
222
                        ps.RemainingAmt, remainingFees)
4✔
223

4✔
224
                // We now proceed our lifecycle with the following tasks in
4✔
225
                // order,
4✔
226
                //   1. check context.
4✔
227
                //   2. request route.
4✔
228
                //   3. create HTLC attempt.
4✔
229
                //   4. send HTLC attempt.
4✔
230
                //   5. collect HTLC attempt result.
4✔
231
                //
4✔
232
                // Before we attempt any new shard, we'll check to see if we've
4✔
233
                // gone past the payment attempt timeout, or if the context was
4✔
234
                // cancelled, or the router is exiting. In any of these cases,
4✔
235
                // we'll stop this payment attempt short.
4✔
236
                if err := p.checkContext(ctx); err != nil {
4✔
UNCOV
237
                        return exitWithErr(err)
×
UNCOV
238
                }
×
239

240
                // Now decide the next step of the current lifecycle.
241
                step, err := p.decideNextStep(payment)
4✔
242
                if err != nil {
8✔
243
                        return exitWithErr(err)
4✔
244
                }
4✔
245

246
                switch step {
4✔
247
                // Exit the for loop and return below.
248
                case stepExit:
4✔
249
                        break lifecycle
4✔
250

251
                // Continue the for loop and skip the rest.
252
                case stepSkip:
4✔
253
                        continue lifecycle
4✔
254

255
                // Continue the for loop and proceed the rest.
256
                case stepProceed:
4✔
257

258
                // Unknown step received, exit with an error.
259
                default:
×
260
                        err = fmt.Errorf("unknown step: %v", step)
×
261
                        return exitWithErr(err)
×
262
                }
263

264
                // Now request a route to be used to create our HTLC attempt.
265
                rt, err := p.requestRoute(ps)
4✔
266
                if err != nil {
4✔
UNCOV
267
                        return exitWithErr(err)
×
UNCOV
268
                }
×
269

270
                // We may not be able to find a route for current attempt. In
271
                // that case, we continue the loop and move straight to the
272
                // next iteration in case there are results for inflight HTLCs
273
                // that still need to be collected.
274
                if rt == nil {
8✔
275
                        log.Errorf("No route found for payment %v",
4✔
276
                                p.identifier)
4✔
277

4✔
278
                        continue lifecycle
4✔
279
                }
280

281
                log.Tracef("Found route: %s", spew.Sdump(rt.Hops))
4✔
282

4✔
283
                // Allow the traffic shaper to add custom records to the
4✔
284
                // outgoing HTLC and also adjust the amount if needed.
4✔
285
                err = p.amendFirstHopData(rt)
4✔
286
                if err != nil {
4✔
287
                        return exitWithErr(err)
×
288
                }
×
289

290
                // We found a route to try, create a new HTLC attempt to try.
291
                attempt, err := p.registerAttempt(rt, ps.RemainingAmt)
4✔
292
                if err != nil {
4✔
UNCOV
293
                        return exitWithErr(err)
×
UNCOV
294
                }
×
295

296
                // Once the attempt is created, send it to the htlcswitch.
297
                result, err := p.sendAttempt(attempt)
4✔
298
                if err != nil {
4✔
UNCOV
299
                        return exitWithErr(err)
×
UNCOV
300
                }
×
301

302
                // Now that the shard was successfully sent, launch a go
303
                // routine that will handle its result when its back.
304
                if result.err == nil {
8✔
305
                        p.resultCollector(attempt)
4✔
306
                }
4✔
307
        }
308

309
        // Once we are out the lifecycle loop, it means we've reached a
310
        // terminal condition. We either return the settled preimage or the
311
        // payment's failure reason.
312
        //
313
        // Optionally delete the failed attempts from the database.
314
        err = p.router.cfg.Control.DeleteFailedAttempts(p.identifier)
4✔
315
        if err != nil {
4✔
316
                log.Errorf("Error deleting failed htlc attempts for payment "+
×
317
                        "%v: %v", p.identifier, err)
×
318
        }
×
319

320
        htlc, failure := payment.TerminalInfo()
4✔
321
        if htlc != nil {
8✔
322
                return htlc.Settle.Preimage, &htlc.Route, nil
4✔
323
        }
4✔
324

325
        // Otherwise return the payment failure reason.
326
        return [32]byte{}, nil, *failure
4✔
327
}
328

329
// checkContext checks whether the payment context has been canceled.
330
// Cancellation occurs manually or if the context times out.
331
func (p *paymentLifecycle) checkContext(ctx context.Context) error {
4✔
332
        select {
4✔
333
        case <-ctx.Done():
4✔
334
                // If the context was canceled, we'll mark the payment as
4✔
335
                // failed. There are two cases to distinguish here: Either a
4✔
336
                // user-provided timeout was reached, or the context was
4✔
337
                // canceled, either to a manual cancellation or due to an
4✔
338
                // unknown error.
4✔
339
                var reason channeldb.FailureReason
4✔
340
                if errors.Is(ctx.Err(), context.DeadlineExceeded) {
4✔
UNCOV
341
                        reason = channeldb.FailureReasonTimeout
×
UNCOV
342
                        log.Warnf("Payment attempt not completed before "+
×
UNCOV
343
                                "timeout, id=%s", p.identifier.String())
×
344
                } else {
4✔
345
                        reason = channeldb.FailureReasonCanceled
4✔
346
                        log.Warnf("Payment attempt context canceled, id=%s",
4✔
347
                                p.identifier.String())
4✔
348
                }
4✔
349

350
                // By marking the payment failed, depending on whether it has
351
                // inflight HTLCs or not, its status will now either be
352
                // `StatusInflight` or `StatusFailed`. In either case, no more
353
                // HTLCs will be attempted.
354
                err := p.router.cfg.Control.FailPayment(p.identifier, reason)
4✔
355
                if err != nil {
4✔
UNCOV
356
                        return fmt.Errorf("FailPayment got %w", err)
×
UNCOV
357
                }
×
358

UNCOV
359
        case <-p.router.quit:
×
UNCOV
360
                return fmt.Errorf("check payment timeout got: %w",
×
UNCOV
361
                        ErrRouterShuttingDown)
×
362

363
        // Fall through if we haven't hit our time limit.
364
        default:
4✔
365
        }
366

367
        return nil
4✔
368
}
369

370
// requestRoute is responsible for finding a route to be used to create an HTLC
371
// attempt.
372
func (p *paymentLifecycle) requestRoute(
373
        ps *channeldb.MPPaymentState) (*route.Route, error) {
4✔
374

4✔
375
        remainingFees := p.calcFeeBudget(ps.FeesPaid)
4✔
376

4✔
377
        // Query our payment session to construct a route.
4✔
378
        rt, err := p.paySession.RequestRoute(
4✔
379
                ps.RemainingAmt, remainingFees,
4✔
380
                uint32(ps.NumAttemptsInFlight), uint32(p.currentHeight),
4✔
381
                p.firstHopCustomRecords,
4✔
382
        )
4✔
383

4✔
384
        // Exit early if there's no error.
4✔
385
        if err == nil {
8✔
386
                return rt, nil
4✔
387
        }
4✔
388

389
        // Otherwise we need to handle the error.
390
        log.Warnf("Failed to find route for payment %v: %v", p.identifier, err)
4✔
391

4✔
392
        // If the error belongs to `noRouteError` set, it means a non-critical
4✔
393
        // error has happened during path finding, and we will mark the payment
4✔
394
        // failed with this reason. Otherwise, we'll return the critical error
4✔
395
        // found to abort the lifecycle.
4✔
396
        var routeErr noRouteError
4✔
397
        if !errors.As(err, &routeErr) {
4✔
UNCOV
398
                return nil, fmt.Errorf("requestRoute got: %w", err)
×
UNCOV
399
        }
×
400

401
        // It's the `paymentSession`'s responsibility to find a route for us
402
        // with the best effort. When it cannot find a path, we need to treat it
403
        // as a terminal condition and fail the payment no matter it has
404
        // inflight HTLCs or not.
405
        failureCode := routeErr.FailureReason()
4✔
406
        log.Warnf("Marking payment %v permanently failed with no route: %v",
4✔
407
                p.identifier, failureCode)
4✔
408

4✔
409
        err = p.router.cfg.Control.FailPayment(p.identifier, failureCode)
4✔
410
        if err != nil {
4✔
UNCOV
411
                return nil, fmt.Errorf("FailPayment got: %w", err)
×
UNCOV
412
        }
×
413

414
        // NOTE: we decide to not return the non-critical noRouteError here to
415
        // avoid terminating the payment lifecycle as there might be other
416
        // inflight HTLCs which we must wait for their results.
417
        return nil, nil
4✔
418
}
419

420
// stop signals any active shard goroutine to exit.
421
func (p *paymentLifecycle) stop() {
4✔
422
        close(p.quit)
4✔
423
}
4✔
424

425
// attemptResult holds the HTLC attempt and a possible error returned from
426
// sending it.
427
type attemptResult struct {
428
        // err is non-nil if a non-critical error was encountered when trying
429
        // to send the attempt, and we successfully updated the control tower
430
        // to reflect this error. This can be errors like not enough local
431
        // balance for the given route etc.
432
        err error
433

434
        // attempt is the attempt structure as recorded in the database.
435
        attempt *channeldb.HTLCAttempt
436
}
437

438
// collectResultAsync launches a goroutine that will wait for the result of the
439
// given HTLC attempt to be available then handle its result. Once received, it
440
// will send a nil error to channel `resultCollected` to indicate there's a
441
// result.
442
func (p *paymentLifecycle) collectResultAsync(attempt *channeldb.HTLCAttempt) {
4✔
443
        log.Debugf("Collecting result for attempt %v in payment %v",
4✔
444
                attempt.AttemptID, p.identifier)
4✔
445

4✔
446
        go func() {
8✔
447
                // Block until the result is available.
4✔
448
                _, err := p.collectResult(attempt)
4✔
449
                if err != nil {
8✔
450
                        log.Errorf("Error collecting result for attempt %v "+
4✔
451
                                "in payment %v: %v", attempt.AttemptID,
4✔
452
                                p.identifier, err)
4✔
453
                }
4✔
454

455
                log.Debugf("Result collected for attempt %v in payment %v",
4✔
456
                        attempt.AttemptID, p.identifier)
4✔
457

4✔
458
                // Once the result is collected, we signal it by writing the
4✔
459
                // error to `resultCollected`.
4✔
460
                select {
4✔
461
                // Send the signal or quit.
462
                case p.resultCollected <- err:
4✔
463

UNCOV
464
                case <-p.quit:
×
UNCOV
465
                        log.Debugf("Lifecycle exiting while collecting "+
×
UNCOV
466
                                "result for payment %v", p.identifier)
×
467

468
                case <-p.router.quit:
2✔
469
                        return
2✔
470
                }
471
        }()
472
}
473

474
// collectResult waits for the result for the given attempt to be available
475
// from the Switch, then records the attempt outcome with the control tower.
476
// An attemptResult is returned, indicating the final outcome of this HTLC
477
// attempt.
478
func (p *paymentLifecycle) collectResult(attempt *channeldb.HTLCAttempt) (
479
        *attemptResult, error) {
4✔
480

4✔
481
        log.Tracef("Collecting result for attempt %v", spew.Sdump(attempt))
4✔
482

4✔
483
        // We'll retrieve the hash specific to this shard from the
4✔
484
        // shardTracker, since it will be needed to regenerate the circuit
4✔
485
        // below.
4✔
486
        hash, err := p.shardTracker.GetHash(attempt.AttemptID)
4✔
487
        if err != nil {
4✔
488
                return p.failAttempt(attempt.AttemptID, err)
×
489
        }
×
490

491
        // Regenerate the circuit for this attempt.
492
        _, circuit, err := generateSphinxPacket(
4✔
493
                &attempt.Route, hash[:], attempt.SessionKey(),
4✔
494
        )
4✔
495
        // TODO(yy): We generate this circuit to create the error decryptor,
4✔
496
        // which is then used in htlcswitch as the deobfuscator to decode the
4✔
497
        // error from `UpdateFailHTLC`. However, suppose it's an
4✔
498
        // `UpdateFulfillHTLC` message yet for some reason the sphinx packet is
4✔
499
        // failed to be generated, we'd miss settling it. This means we should
4✔
500
        // give it a second chance to try the settlement path in case
4✔
501
        // `GetAttemptResult` gives us back the preimage. And move the circuit
4✔
502
        // creation into htlcswitch so it's only constructed when there's a
4✔
503
        // failure message we need to decode.
4✔
504
        if err != nil {
4✔
505
                log.Debugf("Unable to generate circuit for attempt %v: %v",
×
506
                        attempt.AttemptID, err)
×
507

×
508
                return p.failAttempt(attempt.AttemptID, err)
×
509
        }
×
510

511
        // Using the created circuit, initialize the error decrypter, so we can
512
        // parse+decode any failures incurred by this payment within the
513
        // switch.
514
        errorDecryptor := &htlcswitch.SphinxErrorDecrypter{
4✔
515
                OnionErrorDecrypter: sphinx.NewOnionErrorDecrypter(circuit),
4✔
516
        }
4✔
517

4✔
518
        // Now ask the switch to return the result of the payment when
4✔
519
        // available.
4✔
520
        //
4✔
521
        // TODO(yy): consider using htlcswitch to create the `errorDecryptor`
4✔
522
        // since the htlc is already in db. This will also make the interface
4✔
523
        // `PaymentAttemptDispatcher` deeper and easier to use. Moreover, we'd
4✔
524
        // only create the decryptor when received a failure, further saving us
4✔
525
        // a few CPU cycles.
4✔
526
        resultChan, err := p.router.cfg.Payer.GetAttemptResult(
4✔
527
                attempt.AttemptID, p.identifier, errorDecryptor,
4✔
528
        )
4✔
529
        // Handle the switch error.
4✔
530
        if err != nil {
4✔
UNCOV
531
                log.Errorf("Failed getting result for attemptID %d "+
×
UNCOV
532
                        "from switch: %v", attempt.AttemptID, err)
×
UNCOV
533

×
UNCOV
534
                return p.handleSwitchErr(attempt, err)
×
UNCOV
535
        }
×
536

537
        // The switch knows about this payment, we'll wait for a result to be
538
        // available.
539
        var (
4✔
540
                result *htlcswitch.PaymentResult
4✔
541
                ok     bool
4✔
542
        )
4✔
543

4✔
544
        select {
4✔
545
        case result, ok = <-resultChan:
4✔
546
                if !ok {
8✔
547
                        return nil, htlcswitch.ErrSwitchExiting
4✔
548
                }
4✔
549

UNCOV
550
        case <-p.quit:
×
UNCOV
551
                return nil, ErrPaymentLifecycleExiting
×
552

UNCOV
553
        case <-p.router.quit:
×
UNCOV
554
                return nil, ErrRouterShuttingDown
×
555
        }
556

557
        // In case of a payment failure, fail the attempt with the control
558
        // tower and return.
559
        if result.Error != nil {
8✔
560
                return p.handleSwitchErr(attempt, result.Error)
4✔
561
        }
4✔
562

563
        // We successfully got a payment result back from the switch.
564
        log.Debugf("Payment %v succeeded with pid=%v",
4✔
565
                p.identifier, attempt.AttemptID)
4✔
566

4✔
567
        // Report success to mission control.
4✔
568
        err = p.router.cfg.MissionControl.ReportPaymentSuccess(
4✔
569
                attempt.AttemptID, &attempt.Route,
4✔
570
        )
4✔
571
        if err != nil {
4✔
572
                log.Errorf("Error reporting payment success to mc: %v", err)
×
573
        }
×
574

575
        // In case of success we atomically store settle result to the DB move
576
        // the shard to the settled state.
577
        htlcAttempt, err := p.router.cfg.Control.SettleAttempt(
4✔
578
                p.identifier, attempt.AttemptID,
4✔
579
                &channeldb.HTLCSettleInfo{
4✔
580
                        Preimage:   result.Preimage,
4✔
581
                        SettleTime: p.router.cfg.Clock.Now(),
4✔
582
                },
4✔
583
        )
4✔
584
        if err != nil {
4✔
UNCOV
585
                log.Errorf("Error settling attempt %v for payment %v with "+
×
UNCOV
586
                        "preimage %v: %v", attempt.AttemptID, p.identifier,
×
UNCOV
587
                        result.Preimage, err)
×
UNCOV
588

×
UNCOV
589
                // We won't mark the attempt as failed since we already have
×
UNCOV
590
                // the preimage.
×
UNCOV
591
                return nil, err
×
UNCOV
592
        }
×
593

594
        return &attemptResult{
4✔
595
                attempt: htlcAttempt,
4✔
596
        }, nil
4✔
597
}
598

599
// registerAttempt is responsible for creating and saving an HTLC attempt in db
600
// by using the route info provided. The `remainingAmt` is used to decide
601
// whether this is the last attempt.
602
func (p *paymentLifecycle) registerAttempt(rt *route.Route,
603
        remainingAmt lnwire.MilliSatoshi) (*channeldb.HTLCAttempt, error) {
4✔
604

4✔
605
        // If this route will consume the last remaining amount to send
4✔
606
        // to the receiver, this will be our last shard (for now).
4✔
607
        isLastAttempt := rt.ReceiverAmt() == remainingAmt
4✔
608

4✔
609
        // Using the route received from the payment session, create a new
4✔
610
        // shard to send.
4✔
611
        attempt, err := p.createNewPaymentAttempt(rt, isLastAttempt)
4✔
612
        if err != nil {
4✔
UNCOV
613
                return nil, err
×
UNCOV
614
        }
×
615

616
        // Before sending this HTLC to the switch, we checkpoint the fresh
617
        // paymentID and route to the DB. This lets us know on startup the ID
618
        // of the payment that we attempted to send, such that we can query the
619
        // Switch for its whereabouts. The route is needed to handle the result
620
        // when it eventually comes back.
621
        err = p.router.cfg.Control.RegisterAttempt(
4✔
622
                p.identifier, &attempt.HTLCAttemptInfo,
4✔
623
        )
4✔
624

4✔
625
        return attempt, err
4✔
626
}
627

628
// createNewPaymentAttempt creates a new payment attempt from the given route.
629
func (p *paymentLifecycle) createNewPaymentAttempt(rt *route.Route,
630
        lastShard bool) (*channeldb.HTLCAttempt, error) {
4✔
631

4✔
632
        // Generate a new key to be used for this attempt.
4✔
633
        sessionKey, err := generateNewSessionKey()
4✔
634
        if err != nil {
4✔
635
                return nil, err
×
636
        }
×
637

638
        // We generate a new, unique payment ID that we will use for
639
        // this HTLC.
640
        attemptID, err := p.router.cfg.NextPaymentID()
4✔
641
        if err != nil {
4✔
642
                return nil, err
×
643
        }
×
644

645
        // Request a new shard from the ShardTracker. If this is an AMP
646
        // payment, and this is the last shard, the outstanding shards together
647
        // with this one will be enough for the receiver to derive all HTLC
648
        // preimages. If this a non-AMP payment, the ShardTracker will return a
649
        // simple shard with the payment's static payment hash.
650
        shard, err := p.shardTracker.NewShard(attemptID, lastShard)
4✔
651
        if err != nil {
4✔
UNCOV
652
                return nil, err
×
UNCOV
653
        }
×
654

655
        // If this shard carries MPP or AMP options, add them to the last hop
656
        // on the route.
657
        hop := rt.Hops[len(rt.Hops)-1]
4✔
658
        if shard.MPP() != nil {
8✔
659
                hop.MPP = shard.MPP()
4✔
660
        }
4✔
661

662
        if shard.AMP() != nil {
8✔
663
                hop.AMP = shard.AMP()
4✔
664
        }
4✔
665

666
        hash := shard.Hash()
4✔
667

4✔
668
        // We now have all the information needed to populate the current
4✔
669
        // attempt information.
4✔
670
        attempt := channeldb.NewHtlcAttempt(
4✔
671
                attemptID, sessionKey, *rt, p.router.cfg.Clock.Now(), &hash,
4✔
672
        )
4✔
673

4✔
674
        return attempt, nil
4✔
675
}
676

677
// sendAttempt attempts to send the current attempt to the switch to complete
678
// the payment. If this attempt fails, then we'll continue on to the next
679
// available route.
680
func (p *paymentLifecycle) sendAttempt(
681
        attempt *channeldb.HTLCAttempt) (*attemptResult, error) {
4✔
682

4✔
683
        log.Debugf("Sending HTLC attempt(id=%v, total_amt=%v, first_hop_amt=%d"+
4✔
684
                ") for payment %v", attempt.AttemptID,
4✔
685
                attempt.Route.TotalAmount, attempt.Route.FirstHopAmount.Val,
4✔
686
                p.identifier)
4✔
687

4✔
688
        rt := attempt.Route
4✔
689

4✔
690
        // Construct the first hop.
4✔
691
        firstHop := lnwire.NewShortChanIDFromInt(rt.Hops[0].ChannelID)
4✔
692

4✔
693
        // Craft an HTLC packet to send to the htlcswitch. The metadata within
4✔
694
        // this packet will be used to route the payment through the network,
4✔
695
        // starting with the first-hop.
4✔
696
        htlcAdd := &lnwire.UpdateAddHTLC{
4✔
697
                Amount:        rt.FirstHopAmount.Val.Int(),
4✔
698
                Expiry:        rt.TotalTimeLock,
4✔
699
                PaymentHash:   *attempt.Hash,
4✔
700
                CustomRecords: rt.FirstHopWireCustomRecords,
4✔
701
        }
4✔
702

4✔
703
        // Generate the raw encoded sphinx packet to be included along
4✔
704
        // with the htlcAdd message that we send directly to the
4✔
705
        // switch.
4✔
706
        onionBlob, _, err := generateSphinxPacket(
4✔
707
                &rt, attempt.Hash[:], attempt.SessionKey(),
4✔
708
        )
4✔
709
        if err != nil {
4✔
UNCOV
710
                log.Errorf("Failed to create onion blob: attempt=%d in "+
×
UNCOV
711
                        "payment=%v, err:%v", attempt.AttemptID,
×
UNCOV
712
                        p.identifier, err)
×
UNCOV
713

×
UNCOV
714
                return p.failAttempt(attempt.AttemptID, err)
×
UNCOV
715
        }
×
716

717
        copy(htlcAdd.OnionBlob[:], onionBlob)
4✔
718

4✔
719
        // Send it to the Switch. When this method returns we assume
4✔
720
        // the Switch successfully has persisted the payment attempt,
4✔
721
        // such that we can resume waiting for the result after a
4✔
722
        // restart.
4✔
723
        err = p.router.cfg.Payer.SendHTLC(firstHop, attempt.AttemptID, htlcAdd)
4✔
724
        if err != nil {
8✔
725
                log.Errorf("Failed sending attempt %d for payment %v to "+
4✔
726
                        "switch: %v", attempt.AttemptID, p.identifier, err)
4✔
727

4✔
728
                return p.handleSwitchErr(attempt, err)
4✔
729
        }
4✔
730

731
        log.Debugf("Attempt %v for payment %v successfully sent to switch, "+
4✔
732
                "route: %v", attempt.AttemptID, p.identifier, &attempt.Route)
4✔
733

4✔
734
        return &attemptResult{
4✔
735
                attempt: attempt,
4✔
736
        }, nil
4✔
737
}
738

739
// amendFirstHopData is a function that calls the traffic shaper to allow it to
740
// add custom records to the outgoing HTLC and also adjust the amount if
741
// needed.
742
func (p *paymentLifecycle) amendFirstHopData(rt *route.Route) error {
4✔
743
        // The first hop amount on the route is the full route amount if not
4✔
744
        // overwritten by the traffic shaper. So we set the initial value now
4✔
745
        // and potentially overwrite it later.
4✔
746
        rt.FirstHopAmount = tlv.NewRecordT[tlv.TlvType0](
4✔
747
                tlv.NewBigSizeT(rt.TotalAmount),
4✔
748
        )
4✔
749

4✔
750
        // By default, we set the first hop custom records to the initial
4✔
751
        // value requested by the RPC. The traffic shaper may overwrite this
4✔
752
        // value.
4✔
753
        rt.FirstHopWireCustomRecords = p.firstHopCustomRecords
4✔
754

4✔
755
        // extraDataRequest is a helper struct to pass the custom records and
4✔
756
        // amount back from the traffic shaper.
4✔
757
        type extraDataRequest struct {
4✔
758
                customRecords fn.Option[lnwire.CustomRecords]
4✔
759

4✔
760
                amount fn.Option[lnwire.MilliSatoshi]
4✔
761
        }
4✔
762

4✔
763
        // If a hook exists that may affect our outgoing message, we call it now
4✔
764
        // and apply its side effects to the UpdateAddHTLC message.
4✔
765
        result, err := fn.MapOptionZ(
4✔
766
                p.router.cfg.TrafficShaper,
4✔
767
                func(ts TlvTrafficShaper) fn.Result[extraDataRequest] {
4✔
UNCOV
768
                        newAmt, newRecords, err := ts.ProduceHtlcExtraData(
×
UNCOV
769
                                rt.TotalAmount, p.firstHopCustomRecords,
×
UNCOV
770
                        )
×
UNCOV
771
                        if err != nil {
×
772
                                return fn.Err[extraDataRequest](err)
×
773
                        }
×
774

775
                        // Make sure we only received valid records.
UNCOV
776
                        if err := newRecords.Validate(); err != nil {
×
777
                                return fn.Err[extraDataRequest](err)
×
778
                        }
×
779

UNCOV
780
                        log.Debugf("TLV traffic shaper returned custom "+
×
UNCOV
781
                                "records %v and amount %d msat for HTLC",
×
UNCOV
782
                                spew.Sdump(newRecords), newAmt)
×
UNCOV
783

×
UNCOV
784
                        return fn.Ok(extraDataRequest{
×
UNCOV
785
                                customRecords: fn.Some(newRecords),
×
UNCOV
786
                                amount:        fn.Some(newAmt),
×
UNCOV
787
                        })
×
788
                },
789
        ).Unpack()
790
        if err != nil {
4✔
791
                return fmt.Errorf("traffic shaper failed to produce extra "+
×
792
                        "data: %w", err)
×
793
        }
×
794

795
        // Apply the side effects to the UpdateAddHTLC message.
796
        result.customRecords.WhenSome(func(records lnwire.CustomRecords) {
4✔
UNCOV
797
                rt.FirstHopWireCustomRecords = records
×
UNCOV
798
        })
×
799
        result.amount.WhenSome(func(amount lnwire.MilliSatoshi) {
4✔
UNCOV
800
                rt.FirstHopAmount = tlv.NewRecordT[tlv.TlvType0](
×
UNCOV
801
                        tlv.NewBigSizeT(amount),
×
UNCOV
802
                )
×
UNCOV
803
        })
×
804

805
        return nil
4✔
806
}
807

808
// failAttemptAndPayment fails both the payment and its attempt via the
809
// router's control tower, which marks the payment as failed in db.
810
func (p *paymentLifecycle) failPaymentAndAttempt(
811
        attemptID uint64, reason *channeldb.FailureReason,
812
        sendErr error) (*attemptResult, error) {
4✔
813

4✔
814
        log.Errorf("Payment %v failed: final_outcome=%v, raw_err=%v",
4✔
815
                p.identifier, *reason, sendErr)
4✔
816

4✔
817
        // Fail the payment via control tower.
4✔
818
        //
4✔
819
        // NOTE: we must fail the payment first before failing the attempt.
4✔
820
        // Otherwise, once the attempt is marked as failed, another goroutine
4✔
821
        // might make another attempt while we are failing the payment.
4✔
822
        err := p.router.cfg.Control.FailPayment(p.identifier, *reason)
4✔
823
        if err != nil {
4✔
824
                log.Errorf("Unable to fail payment: %v", err)
×
825
                return nil, err
×
826
        }
×
827

828
        // Fail the attempt.
829
        return p.failAttempt(attemptID, sendErr)
4✔
830
}
831

832
// handleSwitchErr inspects the given error from the Switch and determines
833
// whether we should make another payment attempt, or if it should be
834
// considered a terminal error. Terminal errors will be recorded with the
835
// control tower. It analyzes the sendErr for the payment attempt received from
836
// the switch and updates mission control and/or channel policies. Depending on
837
// the error type, the error is either the final outcome of the payment or we
838
// need to continue with an alternative route. A final outcome is indicated by
839
// a non-nil reason value.
840
func (p *paymentLifecycle) handleSwitchErr(attempt *channeldb.HTLCAttempt,
841
        sendErr error) (*attemptResult, error) {
4✔
842

4✔
843
        internalErrorReason := channeldb.FailureReasonError
4✔
844
        attemptID := attempt.AttemptID
4✔
845

4✔
846
        // reportAndFail is a helper closure that reports the failure to the
4✔
847
        // mission control, which helps us to decide whether we want to retry
4✔
848
        // the payment or not. If a non nil reason is returned from mission
4✔
849
        // control, it will further fail the payment via control tower.
4✔
850
        reportAndFail := func(srcIdx *int,
4✔
851
                msg lnwire.FailureMessage) (*attemptResult, error) {
8✔
852

4✔
853
                // Report outcome to mission control.
4✔
854
                reason, err := p.router.cfg.MissionControl.ReportPaymentFail(
4✔
855
                        attemptID, &attempt.Route, srcIdx, msg,
4✔
856
                )
4✔
857
                if err != nil {
4✔
858
                        log.Errorf("Error reporting payment result to mc: %v",
×
859
                                err)
×
860

×
861
                        reason = &internalErrorReason
×
862
                }
×
863

864
                // Fail the attempt only if there's no reason.
865
                if reason == nil {
8✔
866
                        // Fail the attempt.
4✔
867
                        return p.failAttempt(attemptID, sendErr)
4✔
868
                }
4✔
869

870
                // Otherwise fail both the payment and the attempt.
871
                return p.failPaymentAndAttempt(attemptID, reason, sendErr)
4✔
872
        }
873

874
        // If this attempt ID is unknown to the Switch, it means it was never
875
        // checkpointed and forwarded by the switch before a restart. In this
876
        // case we can safely send a new payment attempt, and wait for its
877
        // result to be available.
878
        if errors.Is(sendErr, htlcswitch.ErrPaymentIDNotFound) {
4✔
879
                log.Debugf("Attempt ID %v for payment %v not found in the "+
×
880
                        "Switch, retrying.", attempt.AttemptID, p.identifier)
×
881

×
882
                return p.failAttempt(attemptID, sendErr)
×
883
        }
×
884

885
        if errors.Is(sendErr, htlcswitch.ErrUnreadableFailureMessage) {
4✔
UNCOV
886
                log.Warn("Unreadable failure when sending htlc: id=%v, hash=%v",
×
UNCOV
887
                        attempt.AttemptID, attempt.Hash)
×
UNCOV
888

×
UNCOV
889
                // Since this error message cannot be decrypted, we will send a
×
UNCOV
890
                // nil error message to our mission controller and fail the
×
UNCOV
891
                // payment.
×
UNCOV
892
                return reportAndFail(nil, nil)
×
UNCOV
893
        }
×
894

895
        // If the error is a ClearTextError, we have received a valid wire
896
        // failure message, either from our own outgoing link or from a node
897
        // down the route. If the error is not related to the propagation of
898
        // our payment, we can stop trying because an internal error has
899
        // occurred.
900
        var rtErr htlcswitch.ClearTextError
4✔
901
        ok := errors.As(sendErr, &rtErr)
4✔
902
        if !ok {
4✔
UNCOV
903
                return p.failPaymentAndAttempt(
×
UNCOV
904
                        attemptID, &internalErrorReason, sendErr,
×
UNCOV
905
                )
×
UNCOV
906
        }
×
907

908
        // failureSourceIdx is the index of the node that the failure occurred
909
        // at. If the ClearTextError received is not a ForwardingError the
910
        // payment error occurred at our node, so we leave this value as 0
911
        // to indicate that the failure occurred locally. If the error is a
912
        // ForwardingError, it did not originate at our node, so we set
913
        // failureSourceIdx to the index of the node where the failure occurred.
914
        failureSourceIdx := 0
4✔
915
        var source *htlcswitch.ForwardingError
4✔
916
        ok = errors.As(rtErr, &source)
4✔
917
        if ok {
8✔
918
                failureSourceIdx = source.FailureSourceIdx
4✔
919
        }
4✔
920

921
        // Extract the wire failure and apply channel update if it contains one.
922
        // If we received an unknown failure message from a node along the
923
        // route, the failure message will be nil.
924
        failureMessage := rtErr.WireMessage()
4✔
925
        err := p.handleFailureMessage(
4✔
926
                &attempt.Route, failureSourceIdx, failureMessage,
4✔
927
        )
4✔
928
        if err != nil {
4✔
929
                return p.failPaymentAndAttempt(
×
930
                        attemptID, &internalErrorReason, sendErr,
×
931
                )
×
932
        }
×
933

934
        log.Tracef("Node=%v reported failure when sending htlc",
4✔
935
                failureSourceIdx)
4✔
936

4✔
937
        return reportAndFail(&failureSourceIdx, failureMessage)
4✔
938
}
939

940
// handleFailureMessage tries to apply a channel update present in the failure
941
// message if any.
942
func (p *paymentLifecycle) handleFailureMessage(rt *route.Route,
943
        errorSourceIdx int, failure lnwire.FailureMessage) error {
4✔
944

4✔
945
        if failure == nil {
4✔
UNCOV
946
                return nil
×
UNCOV
947
        }
×
948

949
        // It makes no sense to apply our own channel updates.
950
        if errorSourceIdx == 0 {
8✔
951
                log.Errorf("Channel update of ourselves received")
4✔
952

4✔
953
                return nil
4✔
954
        }
4✔
955

956
        // Extract channel update if the error contains one.
957
        update := p.router.extractChannelUpdate(failure)
4✔
958
        if update == nil {
8✔
959
                return nil
4✔
960
        }
4✔
961

962
        // Parse pubkey to allow validation of the channel update. This should
963
        // always succeed, otherwise there is something wrong in our
964
        // implementation. Therefore, return an error.
965
        errVertex := rt.Hops[errorSourceIdx-1].PubKeyBytes
4✔
966
        errSource, err := btcec.ParsePubKey(errVertex[:])
4✔
967
        if err != nil {
4✔
968
                log.Errorf("Cannot parse pubkey: idx=%v, pubkey=%v",
×
969
                        errorSourceIdx, errVertex)
×
970

×
971
                return err
×
972
        }
×
973

974
        var (
4✔
975
                isAdditionalEdge bool
4✔
976
                policy           *models.CachedEdgePolicy
4✔
977
        )
4✔
978

4✔
979
        // Before we apply the channel update, we need to decide whether the
4✔
980
        // update is for additional (ephemeral) edge or normal edge stored in
4✔
981
        // db.
4✔
982
        //
4✔
983
        // Note: the p.paySession might be nil here if it's called inside
4✔
984
        // SendToRoute where there's no payment lifecycle.
4✔
985
        if p.paySession != nil {
8✔
986
                policy = p.paySession.GetAdditionalEdgePolicy(
4✔
987
                        errSource, update.ShortChannelID.ToUint64(),
4✔
988
                )
4✔
989
                if policy != nil {
8✔
990
                        isAdditionalEdge = true
4✔
991
                }
4✔
992
        }
993

994
        // Apply channel update to additional edge policy.
995
        if isAdditionalEdge {
8✔
996
                if !p.paySession.UpdateAdditionalEdge(
4✔
997
                        update, errSource, policy) {
4✔
998

×
999
                        log.Debugf("Invalid channel update received: node=%v",
×
1000
                                errVertex)
×
1001
                }
×
1002
                return nil
4✔
1003
        }
1004

1005
        // Apply channel update to the channel edge policy in our db.
1006
        if !p.router.cfg.ApplyChannelUpdate(update) {
8✔
1007
                log.Debugf("Invalid channel update received: node=%v",
4✔
1008
                        errVertex)
4✔
1009
        }
4✔
1010
        return nil
4✔
1011
}
1012

1013
// failAttempt calls control tower to fail the current payment attempt.
1014
func (p *paymentLifecycle) failAttempt(attemptID uint64,
1015
        sendError error) (*attemptResult, error) {
4✔
1016

4✔
1017
        log.Warnf("Attempt %v for payment %v failed: %v", attemptID,
4✔
1018
                p.identifier, sendError)
4✔
1019

4✔
1020
        failInfo := marshallError(
4✔
1021
                sendError,
4✔
1022
                p.router.cfg.Clock.Now(),
4✔
1023
        )
4✔
1024

4✔
1025
        // Now that we are failing this payment attempt, cancel the shard with
4✔
1026
        // the ShardTracker such that it can derive the correct hash for the
4✔
1027
        // next attempt.
4✔
1028
        if err := p.shardTracker.CancelShard(attemptID); err != nil {
4✔
1029
                return nil, err
×
1030
        }
×
1031

1032
        attempt, err := p.router.cfg.Control.FailAttempt(
4✔
1033
                p.identifier, attemptID, failInfo,
4✔
1034
        )
4✔
1035
        if err != nil {
4✔
UNCOV
1036
                return nil, err
×
UNCOV
1037
        }
×
1038

1039
        return &attemptResult{
4✔
1040
                attempt: attempt,
4✔
1041
                err:     sendError,
4✔
1042
        }, nil
4✔
1043
}
1044

1045
// marshallError marshall an error as received from the switch to a structure
1046
// that is suitable for database storage.
1047
func marshallError(sendError error, time time.Time) *channeldb.HTLCFailInfo {
4✔
1048
        response := &channeldb.HTLCFailInfo{
4✔
1049
                FailTime: time,
4✔
1050
        }
4✔
1051

4✔
1052
        switch {
4✔
1053
        case errors.Is(sendError, htlcswitch.ErrPaymentIDNotFound):
×
1054
                response.Reason = channeldb.HTLCFailInternal
×
1055
                return response
×
1056

UNCOV
1057
        case errors.Is(sendError, htlcswitch.ErrUnreadableFailureMessage):
×
UNCOV
1058
                response.Reason = channeldb.HTLCFailUnreadable
×
UNCOV
1059
                return response
×
1060
        }
1061

1062
        var rtErr htlcswitch.ClearTextError
4✔
1063
        ok := errors.As(sendError, &rtErr)
4✔
1064
        if !ok {
4✔
UNCOV
1065
                response.Reason = channeldb.HTLCFailInternal
×
UNCOV
1066
                return response
×
UNCOV
1067
        }
×
1068

1069
        message := rtErr.WireMessage()
4✔
1070
        if message != nil {
8✔
1071
                response.Reason = channeldb.HTLCFailMessage
4✔
1072
                response.Message = message
4✔
1073
        } else {
4✔
UNCOV
1074
                response.Reason = channeldb.HTLCFailUnknown
×
UNCOV
1075
        }
×
1076

1077
        // If the ClearTextError received is a ForwardingError, the error
1078
        // originated from a node along the route, not locally on our outgoing
1079
        // link. We set failureSourceIdx to the index of the node where the
1080
        // failure occurred. If the error is not a ForwardingError, the failure
1081
        // occurred at our node, so we leave the index as 0 to indicate that
1082
        // we failed locally.
1083
        var fErr *htlcswitch.ForwardingError
4✔
1084
        ok = errors.As(rtErr, &fErr)
4✔
1085
        if ok {
8✔
1086
                response.FailureSourceIndex = uint32(fErr.FailureSourceIdx)
4✔
1087
        }
4✔
1088

1089
        return response
4✔
1090
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc