• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 11219354629

07 Oct 2024 03:56PM UTC coverage: 58.585% (-0.2%) from 58.814%
11219354629

Pull #9147

github

ziggie1984
fixup! sqlc: migration up script for payments.
Pull Request #9147: [Part 1|3] Introduce SQL Payment schema into LND

130227 of 222287 relevant lines covered (58.59%)

29106.19 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.28
/routing/missioncontrol.go
1
package routing
2

3
import (
4
        "errors"
5
        "fmt"
6
        "sync"
7
        "time"
8

9
        "github.com/btcsuite/btcd/btcutil"
10
        "github.com/lightningnetwork/lnd/channeldb"
11
        "github.com/lightningnetwork/lnd/fn"
12
        "github.com/lightningnetwork/lnd/kvdb"
13
        "github.com/lightningnetwork/lnd/lnwire"
14
        "github.com/lightningnetwork/lnd/routing/route"
15
)
16

17
const (
18
        // DefaultPenaltyHalfLife is the default half-life duration. The
19
        // half-life duration defines after how much time a penalized node or
20
        // channel is back at 50% probability.
21
        DefaultPenaltyHalfLife = time.Hour
22

23
        // minSecondChanceInterval is the minimum time required between
24
        // second-chance failures.
25
        //
26
        // If nodes return a channel policy related failure, they may get a
27
        // second chance to forward the payment. It could be that the channel
28
        // policy that we are aware of is not up to date. This is especially
29
        // important in case of mobile apps that are mostly offline.
30
        //
31
        // However, we don't want to give nodes the option to endlessly return
32
        // new channel updates so that we are kept busy trying to route through
33
        // that node until the payment loop times out.
34
        //
35
        // Therefore we only grant a second chance to a node if the previous
36
        // second chance is sufficiently long ago. This is what
37
        // minSecondChanceInterval defines. If a second policy failure comes in
38
        // within that interval, we will apply a penalty.
39
        //
40
        // Second chances granted are tracked on the level of node pairs. This
41
        // means that if a node has multiple channels to the same peer, they
42
        // will only get a single second chance to route to that peer again.
43
        // Nodes forward non-strict, so it isn't necessary to apply a less
44
        // restrictive channel level tracking scheme here.
45
        minSecondChanceInterval = time.Minute
46

47
        // DefaultMaxMcHistory is the default maximum history size.
48
        DefaultMaxMcHistory = 1000
49

50
        // DefaultMcFlushInterval is the default interval we use to flush MC state
51
        // to the database.
52
        DefaultMcFlushInterval = time.Second
53

54
        // prevSuccessProbability is the assumed probability for node pairs that
55
        // successfully relayed the previous attempt.
56
        prevSuccessProbability = 0.95
57

58
        // DefaultAprioriWeight is the default a priori weight. See
59
        // MissionControlConfig for further explanation.
60
        DefaultAprioriWeight = 0.5
61

62
        // DefaultMinFailureRelaxInterval is the default minimum time that must
63
        // have passed since the previously recorded failure before the failure
64
        // amount may be raised.
65
        DefaultMinFailureRelaxInterval = time.Minute
66

67
        // DefaultFeeEstimationTimeout is the default value for
68
        // FeeEstimationTimeout. It defines the maximum duration that the
69
        // probing fee estimation is allowed to take.
70
        DefaultFeeEstimationTimeout = time.Minute
71
)
72

73
var (
74
        // ErrInvalidMcHistory is returned if we get a negative mission control
75
        // history count.
76
        ErrInvalidMcHistory = errors.New("mission control history must be " +
77
                ">= 0")
78

79
        // ErrInvalidFailureInterval is returned if we get an invalid failure
80
        // interval.
81
        ErrInvalidFailureInterval = errors.New("failure interval must be >= 0")
82
)
83

84
// NodeResults contains previous results from a node to its peers.
85
type NodeResults map[route.Vertex]TimedPairResult
86

87
// MissionControl contains state which summarizes the past attempts of HTLC
88
// routing by external callers when sending payments throughout the network. It
89
// acts as a shared memory during routing attempts with the goal to optimize the
90
// payment attempt success rate.
91
//
92
// Failed payment attempts are reported to mission control. These reports are
93
// used to track the time of the last node or channel level failure. The time
94
// since the last failure is used to estimate a success probability that is fed
95
// into the path finding process for subsequent payment attempts.
96
type MissionControl struct {
97
        // state is the internal mission control state that is input for
98
        // probability estimation.
99
        state *missionControlState
100

101
        // now is expected to return the current time. It is supplied as an
102
        // external function to enable deterministic unit tests.
103
        now func() time.Time
104

105
        // selfNode is our pubkey.
106
        selfNode route.Vertex
107

108
        store *missionControlStore
109

110
        // estimator is the probability estimator that is used with the payment
111
        // results that mission control collects.
112
        estimator Estimator
113

114
        // onConfigUpdate is a function that is called whenever the
115
        // mission control state is updated.
116
        onConfigUpdate fn.Option[func(cfg *MissionControlConfig)]
117

118
        sync.Mutex
119

120
        // TODO(roasbeef): further counters, if vertex continually unavailable,
121
        // add to another generation
122

123
        // TODO(roasbeef): also add favorable metrics for nodes
124
}
125

126
// MissionControlConfig defines parameters that control mission control
127
// behaviour.
128
type MissionControlConfig struct {
129
        // Estimator gives probability estimates for node pairs.
130
        Estimator Estimator
131

132
        // OnConfigUpdate is function that is called whenever the
133
        // mission control state is updated.
134
        OnConfigUpdate fn.Option[func(cfg *MissionControlConfig)]
135

136
        // MaxMcHistory defines the maximum number of payment results that are
137
        // held on disk.
138
        MaxMcHistory int
139

140
        // McFlushInterval defines the ticker interval when we flush the
141
        // accumulated state to the DB.
142
        McFlushInterval time.Duration
143

144
        // MinFailureRelaxInterval is the minimum time that must have passed
145
        // since the previously recorded failure before the failure amount may
146
        // be raised.
147
        MinFailureRelaxInterval time.Duration
148
}
149

150
func (c *MissionControlConfig) validate() error {
151
        if c.MaxMcHistory < 0 {
152
                return ErrInvalidMcHistory
153
        }
154

155
        if c.MinFailureRelaxInterval < 0 {
36✔
156
                return ErrInvalidFailureInterval
36✔
157
        }
36✔
158

36✔
159
        return nil
36✔
160
}
71✔
161

35✔
162
// String returns a string representation of a mission control config.
35✔
163
func (c *MissionControlConfig) String() string {
164
        return fmt.Sprintf("maximum history: %v, minimum failure relax "+
1✔
165
                "interval: %v", c.MaxMcHistory, c.MinFailureRelaxInterval)
166
}
167

168
// TimedPairResult describes a timestamped pair result.
169
type TimedPairResult struct {
3✔
170
        // FailTime is the time of the last failure.
3✔
171
        FailTime time.Time
3✔
172

3✔
173
        // FailAmt is the amount of the last failure. This amount may be pushed
3✔
174
        // up if a later success is higher than the last failed amount.
8✔
175
        FailAmt lnwire.MilliSatoshi
5✔
176

5✔
177
        // SuccessTime is the time of the last success.
178
        SuccessTime time.Time
3✔
179

180
        // SuccessAmt is the highest amount that successfully forwarded. This
181
        // isn't necessarily the last success amount. The value of this field
182
        // may also be pushed down if a later failure is lower than the highest
183
        // success amount. Because of this, SuccessAmt may not match
184
        // SuccessTime.
185
        SuccessAmt lnwire.MilliSatoshi
186
}
187

188
// MissionControlSnapshot contains a snapshot of the current state of mission
189
// control.
190
type MissionControlSnapshot struct {
191
        // Pairs is a list of channels for which specific information is
192
        // logged.
193
        Pairs []MissionControlPairSnapshot
194
}
195

196
// MissionControlPairSnapshot contains a snapshot of the current node pair
197
// state in mission control.
198
type MissionControlPairSnapshot struct {
199
        // Pair is the node pair of which the state is described.
200
        Pair DirectedNodePair
201

202
        // TimedPairResult contains the data for this pair.
203
        TimedPairResult
204
}
205

33✔
206
// paymentResult is the information that becomes available when a payment
33✔
207
// attempt completes.
×
208
type paymentResult struct {
×
209
        id                 uint64
210
        timeFwd, timeReply time.Time
33✔
211
        route              *mcRoute
×
212
        success            bool
×
213
        failureSourceIdx   *int
214
        failure            lnwire.FailureMessage
33✔
215
}
216

217
// NewMissionControl returns a new instance of missionControl.
218
func NewMissionControl(db kvdb.Backend, self route.Vertex,
2✔
219
        cfg *MissionControlConfig) (*MissionControl, error) {
2✔
220

2✔
221
        log.Debugf("Instantiating mission control with config: %v, %v", cfg,
2✔
222
                cfg.Estimator)
223

224
        if err := cfg.validate(); err != nil {
225
                return nil, err
226
        }
227

228
        store, err := newMissionControlStore(
229
                db, cfg.MaxMcHistory, cfg.McFlushInterval,
230
        )
231
        if err != nil {
232
                return nil, err
233
        }
234

235
        mc := &MissionControl{
236
                state: newMissionControlState(
237
                        cfg.MinFailureRelaxInterval,
238
                ),
239
                now:            time.Now,
240
                selfNode:       self,
241
                store:          store,
242
                estimator:      cfg.Estimator,
243
                onConfigUpdate: cfg.OnConfigUpdate,
244
        }
245

246
        if err := mc.init(); err != nil {
247
                return nil, err
248
        }
249

250
        return mc, nil
251
}
252

253
// RunStoreTicker runs the mission control store's ticker.
254
func (m *MissionControl) RunStoreTicker() {
255
        m.store.run()
256
}
257

258
// StopStoreTicker stops the mission control store's ticker.
259
func (m *MissionControl) StopStoreTicker() {
260
        log.Debug("Stopping mission control store ticker")
261
        defer log.Debug("Mission control store ticker stopped")
262

263
        m.store.stop()
264
}
265

266
// init initializes mission control with historical data.
267
func (m *MissionControl) init() error {
268
        log.Debugf("Mission control state reconstruction started")
269

270
        start := time.Now()
271

272
        results, err := m.store.fetchAll()
273
        if err != nil {
274
                return err
33✔
275
        }
33✔
276

33✔
277
        for _, result := range results {
33✔
278
                m.applyPaymentResult(result)
33✔
279
        }
33✔
280

×
281
        log.Debugf("Mission control state reconstruction finished: "+
×
282
                "n=%v, time=%v", len(results), time.Since(start))
283

33✔
284
        return nil
33✔
285
}
33✔
286

33✔
287
// GetConfig returns the config that mission control is currently configured
33✔
288
// with. All fields are copied by value, so we do not need to worry about
33✔
289
// mutation.
33✔
290
func (m *MissionControl) GetConfig() *MissionControlConfig {
33✔
291
        m.Lock()
33✔
292
        defer m.Unlock()
33✔
293

33✔
294
        return &MissionControlConfig{
33✔
295
                Estimator:               m.estimator,
33✔
296
                MaxMcHistory:            m.store.maxRecords,
×
297
                McFlushInterval:         m.store.flushInterval,
×
298
                MinFailureRelaxInterval: m.state.minFailureRelaxInterval,
299
        }
67✔
300
}
34✔
301

×
302
// SetConfig validates the config provided and updates mission control's config
×
303
// if it is valid.
304
func (m *MissionControl) SetConfig(cfg *MissionControlConfig) error {
305
        if cfg == nil {
33✔
306
                return errors.New("nil mission control config")
307
        }
308

309
        if err := cfg.validate(); err != nil {
310
                return err
311
        }
312

313
        m.Lock()
33✔
314
        defer m.Unlock()
33✔
315

33✔
316
        log.Infof("Active mission control cfg: %v, estimator: %v", cfg,
33✔
317
                cfg.Estimator)
33✔
318

33✔
319
        m.store.maxRecords = cfg.MaxMcHistory
33✔
320
        m.state.minFailureRelaxInterval = cfg.MinFailureRelaxInterval
×
321
        m.estimator = cfg.Estimator
×
322

323
        // Execute the callback function if it is set.
33✔
324
        m.onConfigUpdate.WhenSome(func(f func(cfg *MissionControlConfig)) {
66✔
325
                f(cfg)
33✔
326
        })
33✔
327

×
328
        return nil
×
329
}
×
330

331
// ResetHistory resets the history of MissionControl returning it to a state as
332
// if no payment attempts have been made.
333
func (m *MissionControl) ResetHistory() error {
67✔
334
        m.Lock()
34✔
335
        defer m.Unlock()
34✔
336

67✔
337
        if err := m.store.clear(); err != nil {
33✔
338
                return err
33✔
339
        }
340

1✔
341
        m.state.resetHistory()
1✔
342

1✔
343
        log.Debugf("Mission control history cleared")
344

33✔
345
        return nil
33✔
346
}
×
347

×
348
// GetProbability is expected to return the success probability of a payment
349
// from fromNode along edge.
350
func (m *MissionControl) GetProbability(fromNode, toNode route.Vertex,
34✔
351
        amt lnwire.MilliSatoshi, capacity btcutil.Amount) float64 {
1✔
352

1✔
353
        m.Lock()
×
354
        defer m.Unlock()
×
355

356
        now := m.now()
357
        results, _ := m.state.getLastPairResult(fromNode)
33✔
358

359
        // Use a distinct probability estimation function for local channels.
360
        if fromNode == m.selfNode {
361
                return m.estimator.LocalPairProbability(now, results, toNode)
362
        }
363

364
        return m.estimator.PairProbability(
365
                now, results, toNode, amt, capacity,
35✔
366
        )
35✔
367
}
35✔
368

35✔
369
// GetHistorySnapshot takes a snapshot from the current mission control state
35✔
370
// and actual probability estimates.
×
371
func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
×
372
        m.Lock()
373
        defer m.Unlock()
35✔
374

35✔
375
        log.Debugf("Requesting history snapshot from mission control")
35✔
376

35✔
377
        return m.state.getSnapshot()
35✔
378
}
35✔
379

35✔
380
// ImportHistory imports the set of mission control results provided to our
×
381
// in-memory state. These results are not persisted, so will not survive
×
382
// restarts.
383
func (m *MissionControl) ImportHistory(history *MissionControlSnapshot,
35✔
384
        force bool) error {
35✔
385

35✔
386
        if history == nil {
35✔
387
                return errors.New("cannot import nil history")
35✔
388
        }
35✔
389

35✔
390
        m.Lock()
35✔
391
        defer m.Unlock()
35✔
392

35✔
393
        log.Infof("Importing history snapshot with %v pairs to mission control",
35✔
394
                len(history.Pairs))
35✔
395

35✔
396
        imported := m.state.importSnapshot(history, force)
35✔
397

398
        log.Infof("Imported %v results to mission control", imported)
399

400
        return nil
2✔
401
}
2✔
402

2✔
403
// GetPairHistorySnapshot returns the stored history for a given node pair.
2✔
404
func (m *MissionControl) GetPairHistorySnapshot(
4✔
405
        fromNode, toNode route.Vertex) TimedPairResult {
2✔
406

2✔
407
        m.Lock()
408
        defer m.Unlock()
409

410
        results, ok := m.state.getLastPairResult(fromNode)
2✔
411
        if !ok {
2✔
412
                return TimedPairResult{}
2✔
413
        }
2✔
414

2✔
415
        result, ok := results[toNode]
2✔
416
        if !ok {
2✔
417
                return TimedPairResult{}
4✔
418
        }
2✔
419

2✔
420
        return result
421
}
422

423
// ReportPaymentFail reports a failed payment to mission control as input for
34✔
424
// future probability estimates. The failureSourceIdx argument indicates the
34✔
425
// failure source. If it is nil, the failure source is unknown. This function
34✔
426
// returns a reason if this failure is a final failure. In that case no further
34✔
427
// payment attempts need to be made.
34✔
428
func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
34✔
429
        failureSourceIdx *int, failure lnwire.FailureMessage) (
34✔
430
        *channeldb.FailureReason, error) {
34✔
431

34✔
432
        timestamp := m.now()
34✔
433

×
434
        result := &paymentResult{
×
435
                success:          false,
436
                timeFwd:          timestamp,
40✔
437
                timeReply:        timestamp,
6✔
438
                id:               paymentID,
6✔
439
                failureSourceIdx: failureSourceIdx,
440
                failure:          failure,
34✔
441
                route:            extractMCRoute(rt),
34✔
442
        }
34✔
443

34✔
444
        return m.processPaymentResult(result)
445
}
446

447
// ReportPaymentSuccess reports a successful payment to mission control as input
448
// for future probability estimates.
449
func (m *MissionControl) ReportPaymentSuccess(paymentID uint64,
2✔
450
        rt *route.Route) error {
2✔
451

2✔
452
        timestamp := m.now()
2✔
453

2✔
454
        result := &paymentResult{
2✔
455
                timeFwd:   timestamp,
2✔
456
                timeReply: timestamp,
2✔
457
                id:        paymentID,
2✔
458
                success:   true,
2✔
459
                route:     extractMCRoute(rt),
2✔
460
        }
461

462
        _, err := m.processPaymentResult(result)
463
        return err
2✔
464
}
2✔
465

×
466
// processPaymentResult stores a payment result in the mission control store and
×
467
// updates mission control's in-memory state.
468
func (m *MissionControl) processPaymentResult(result *paymentResult) (
2✔
469
        *channeldb.FailureReason, error) {
×
470

×
471
        // Store complete result in database.
472
        m.store.AddResult(result)
2✔
473

2✔
474
        m.Lock()
2✔
475
        defer m.Unlock()
2✔
476

2✔
477
        // Apply result to update mission control state.
2✔
478
        reason := m.applyPaymentResult(result)
2✔
479

2✔
480
        return reason, nil
2✔
481
}
2✔
482

2✔
483
// applyPaymentResult applies a payment result as input for future probability
4✔
484
// estimates. It returns a bool indicating whether this error is a final error
2✔
485
// and no further payment attempts need to be made.
2✔
486
func (m *MissionControl) applyPaymentResult(
487
        result *paymentResult) *channeldb.FailureReason {
2✔
488

489
        // Interpret result.
490
        i := interpretResult(
491
                result.route, result.success, result.failureSourceIdx,
492
                result.failure,
5✔
493
        )
5✔
494

5✔
495
        if i.policyFailure != nil {
5✔
496
                if m.state.requestSecondChance(
5✔
497
                        result.timeReply,
×
498
                        i.policyFailure.From, i.policyFailure.To,
×
499
                ) {
500
                        return nil
5✔
501
                }
5✔
502
        }
5✔
503

5✔
504
        // If there is a node-level failure, record a failure for every tried
5✔
505
        // connection of that node. A node-level failure can be considered as a
506
        // failure that would have occurred with any of the node's channels.
507
        //
508
        // Ideally we'd also record the failure for the untried connections of
509
        // the node. Unfortunately this would require access to the graph and
510
        // adding this dependency and db calls does not outweigh the benefits.
664✔
511
        //
664✔
512
        // Untried connections will fall back to the node probability. After the
664✔
513
        // call to setAllPairResult below, the node probability will be equal to
664✔
514
        // the probability of the tried channels except that the a priori
664✔
515
        // probability is mixed in too. This effect is controlled by the
664✔
516
        // aprioriWeight parameter. If that parameter isn't set to an extreme
664✔
517
        // and there are a few known connections, there shouldn't be much of a
664✔
518
        // difference. The largest difference occurs when aprioriWeight is 1. In
664✔
519
        // that case, a node-level failure would not be applied to untried
705✔
520
        // channels.
41✔
521
        if i.nodeFailure != nil {
41✔
522
                log.Debugf("Reporting node failure to Mission Control: "+
523
                        "node=%v", *i.nodeFailure)
625✔
524

625✔
525
                m.state.setAllFail(*i.nodeFailure, result.timeReply)
625✔
526
        }
527

528
        for pair, pairResult := range i.pairResults {
529
                pairResult := pairResult
530

1✔
531
                if pairResult.success {
1✔
532
                        log.Debugf("Reporting pair success to Mission "+
1✔
533
                                "Control: pair=%v, amt=%v",
1✔
534
                                pair, pairResult.amt)
1✔
535
                } else {
1✔
536
                        log.Debugf("Reporting pair failure to Mission "+
1✔
537
                                "Control: pair=%v, amt=%v",
1✔
538
                                pair, pairResult.amt)
539
                }
540

541
                m.state.setLastPairResult(
542
                        pair.From, pair.To, result.timeReply, &pairResult, false,
543
                )
2✔
544
        }
2✔
545

2✔
546
        return i.finalFailureReason
×
547
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc