• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

lightningnetwork / lnd / 9915780197

13 Jul 2024 12:30AM UTC coverage: 49.268% (-9.1%) from 58.413%
9915780197

push

github

web-flow
Merge pull request #8653 from ProofOfKeags/fn-prim

DynComms [0/n]: `fn` package additions

92837 of 188433 relevant lines covered (49.27%)

1.55 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.13
/routing/missioncontrol.go
1
package routing
2

3
import (
4
        "errors"
5
        "fmt"
6
        "sync"
7
        "time"
8

9
        "github.com/btcsuite/btcd/btcutil"
10
        "github.com/lightningnetwork/lnd/channeldb"
11
        "github.com/lightningnetwork/lnd/kvdb"
12
        "github.com/lightningnetwork/lnd/lnwire"
13
        "github.com/lightningnetwork/lnd/routing/route"
14
)
15

16
const (
17
        // DefaultPenaltyHalfLife is the default half-life duration. The
18
        // half-life duration defines after how much time a penalized node or
19
        // channel is back at 50% probability.
20
        DefaultPenaltyHalfLife = time.Hour
21

22
        // minSecondChanceInterval is the minimum time required between
23
        // second-chance failures.
24
        //
25
        // If nodes return a channel policy related failure, they may get a
26
        // second chance to forward the payment. It could be that the channel
27
        // policy that we are aware of is not up to date. This is especially
28
        // important in case of mobile apps that are mostly offline.
29
        //
30
        // However, we don't want to give nodes the option to endlessly return
31
        // new channel updates so that we are kept busy trying to route through
32
        // that node until the payment loop times out.
33
        //
34
        // Therefore we only grant a second chance to a node if the previous
35
        // second chance is sufficiently long ago. This is what
36
        // minSecondChanceInterval defines. If a second policy failure comes in
37
        // within that interval, we will apply a penalty.
38
        //
39
        // Second chances granted are tracked on the level of node pairs. This
40
        // means that if a node has multiple channels to the same peer, they
41
        // will only get a single second chance to route to that peer again.
42
        // Nodes forward non-strict, so it isn't necessary to apply a less
43
        // restrictive channel level tracking scheme here.
44
        minSecondChanceInterval = time.Minute
45

46
        // DefaultMaxMcHistory is the default maximum history size.
47
        DefaultMaxMcHistory = 1000
48

49
        // DefaultMcFlushInterval is the default interval we use to flush MC state
50
        // to the database.
51
        DefaultMcFlushInterval = time.Second
52

53
        // prevSuccessProbability is the assumed probability for node pairs that
54
        // successfully relayed the previous attempt.
55
        prevSuccessProbability = 0.95
56

57
        // DefaultAprioriWeight is the default a priori weight. See
58
        // MissionControlConfig for further explanation.
59
        DefaultAprioriWeight = 0.5
60

61
        // DefaultMinFailureRelaxInterval is the default minimum time that must
62
        // have passed since the previously recorded failure before the failure
63
        // amount may be raised.
64
        DefaultMinFailureRelaxInterval = time.Minute
65

66
        // DefaultFeeEstimationTimeout is the default value for
67
        // FeeEstimationTimeout. It defines the maximum duration that the
68
        // probing fee estimation is allowed to take.
69
        DefaultFeeEstimationTimeout = time.Minute
70
)
71

72
var (
73
        // ErrInvalidMcHistory is returned if we get a negative mission control
74
        // history count.
75
        ErrInvalidMcHistory = errors.New("mission control history must be " +
76
                ">= 0")
77

78
        // ErrInvalidFailureInterval is returned if we get an invalid failure
79
        // interval.
80
        ErrInvalidFailureInterval = errors.New("failure interval must be >= 0")
81
)
82

83
// NodeResults contains previous results from a node to its peers.
84
type NodeResults map[route.Vertex]TimedPairResult
85

86
// MissionControl contains state which summarizes the past attempts of HTLC
87
// routing by external callers when sending payments throughout the network. It
88
// acts as a shared memory during routing attempts with the goal to optimize the
89
// payment attempt success rate.
90
//
91
// Failed payment attempts are reported to mission control. These reports are
92
// used to track the time of the last node or channel level failure. The time
93
// since the last failure is used to estimate a success probability that is fed
94
// into the path finding process for subsequent payment attempts.
95
type MissionControl struct {
96
        // state is the internal mission control state that is input for
97
        // probability estimation.
98
        state *missionControlState
99

100
        // now is expected to return the current time. It is supplied as an
101
        // external function to enable deterministic unit tests.
102
        now func() time.Time
103

104
        // selfNode is our pubkey.
105
        selfNode route.Vertex
106

107
        store *missionControlStore
108

109
        // estimator is the probability estimator that is used with the payment
110
        // results that mission control collects.
111
        estimator Estimator
112

113
        sync.Mutex
114

115
        // TODO(roasbeef): further counters, if vertex continually unavailable,
116
        // add to another generation
117

118
        // TODO(roasbeef): also add favorable metrics for nodes
119
}
120

121
// MissionControlConfig defines parameters that control mission control
122
// behaviour.
123
type MissionControlConfig struct {
124
        // Estimator gives probability estimates for node pairs.
125
        Estimator Estimator
126

127
        // MaxMcHistory defines the maximum number of payment results that are
128
        // held on disk.
129
        MaxMcHistory int
130

131
        // McFlushInterval defines the ticker interval when we flush the
132
        // accumulated state to the DB.
133
        McFlushInterval time.Duration
134

135
        // MinFailureRelaxInterval is the minimum time that must have passed
136
        // since the previously recorded failure before the failure amount may
137
        // be raised.
138
        MinFailureRelaxInterval time.Duration
139
}
140

141
func (c *MissionControlConfig) validate() error {
3✔
142
        if c.MaxMcHistory < 0 {
3✔
143
                return ErrInvalidMcHistory
×
144
        }
×
145

146
        if c.MinFailureRelaxInterval < 0 {
3✔
147
                return ErrInvalidFailureInterval
×
148
        }
×
149

150
        return nil
3✔
151
}
152

153
// String returns a string representation of a mission control config.
154
func (c *MissionControlConfig) String() string {
3✔
155
        return fmt.Sprintf("maximum history: %v, minimum failure relax "+
3✔
156
                "interval: %v", c.MaxMcHistory, c.MinFailureRelaxInterval)
3✔
157
}
3✔
158

159
// TimedPairResult describes a timestamped pair result.
160
type TimedPairResult struct {
161
        // FailTime is the time of the last failure.
162
        FailTime time.Time
163

164
        // FailAmt is the amount of the last failure. This amount may be pushed
165
        // up if a later success is higher than the last failed amount.
166
        FailAmt lnwire.MilliSatoshi
167

168
        // SuccessTime is the time of the last success.
169
        SuccessTime time.Time
170

171
        // SuccessAmt is the highest amount that successfully forwarded. This
172
        // isn't necessarily the last success amount. The value of this field
173
        // may also be pushed down if a later failure is lower than the highest
174
        // success amount. Because of this, SuccessAmt may not match
175
        // SuccessTime.
176
        SuccessAmt lnwire.MilliSatoshi
177
}
178

179
// MissionControlSnapshot contains a snapshot of the current state of mission
180
// control.
181
type MissionControlSnapshot struct {
182
        // Pairs is a list of channels for which specific information is
183
        // logged.
184
        Pairs []MissionControlPairSnapshot
185
}
186

187
// MissionControlPairSnapshot contains a snapshot of the current node pair
188
// state in mission control.
189
type MissionControlPairSnapshot struct {
190
        // Pair is the node pair of which the state is described.
191
        Pair DirectedNodePair
192

193
        // TimedPairResult contains the data for this pair.
194
        TimedPairResult
195
}
196

197
// paymentResult is the information that becomes available when a payment
198
// attempt completes.
199
type paymentResult struct {
200
        id                 uint64
201
        timeFwd, timeReply time.Time
202
        route              *route.Route
203
        success            bool
204
        failureSourceIdx   *int
205
        failure            lnwire.FailureMessage
206
}
207

208
// NewMissionControl returns a new instance of missionControl.
209
func NewMissionControl(db kvdb.Backend, self route.Vertex,
210
        cfg *MissionControlConfig) (*MissionControl, error) {
3✔
211

3✔
212
        log.Debugf("Instantiating mission control with config: %v, %v", cfg,
3✔
213
                cfg.Estimator)
3✔
214

3✔
215
        if err := cfg.validate(); err != nil {
3✔
216
                return nil, err
×
217
        }
×
218

219
        store, err := newMissionControlStore(
3✔
220
                db, cfg.MaxMcHistory, cfg.McFlushInterval,
3✔
221
        )
3✔
222
        if err != nil {
3✔
223
                return nil, err
×
224
        }
×
225

226
        mc := &MissionControl{
3✔
227
                state:     newMissionControlState(cfg.MinFailureRelaxInterval),
3✔
228
                now:       time.Now,
3✔
229
                selfNode:  self,
3✔
230
                store:     store,
3✔
231
                estimator: cfg.Estimator,
3✔
232
        }
3✔
233

3✔
234
        if err := mc.init(); err != nil {
3✔
235
                return nil, err
×
236
        }
×
237

238
        return mc, nil
3✔
239
}
240

241
// RunStoreTicker runs the mission control store's ticker.
242
func (m *MissionControl) RunStoreTicker() {
3✔
243
        m.store.run()
3✔
244
}
3✔
245

246
// StopStoreTicker stops the mission control store's ticker.
247
func (m *MissionControl) StopStoreTicker() {
3✔
248
        log.Debug("Stopping mission control store ticker")
3✔
249
        defer log.Debug("Mission control store ticker stopped")
3✔
250

3✔
251
        m.store.stop()
3✔
252
}
3✔
253

254
// init initializes mission control with historical data.
255
func (m *MissionControl) init() error {
3✔
256
        log.Debugf("Mission control state reconstruction started")
3✔
257

3✔
258
        start := time.Now()
3✔
259

3✔
260
        results, err := m.store.fetchAll()
3✔
261
        if err != nil {
3✔
262
                return err
×
263
        }
×
264

265
        for _, result := range results {
6✔
266
                m.applyPaymentResult(result)
3✔
267
        }
3✔
268

269
        log.Debugf("Mission control state reconstruction finished: "+
3✔
270
                "n=%v, time=%v", len(results), time.Since(start))
3✔
271

3✔
272
        return nil
3✔
273
}
274

275
// GetConfig returns the config that mission control is currently configured
276
// with. All fields are copied by value, so we do not need to worry about
277
// mutation.
278
func (m *MissionControl) GetConfig() *MissionControlConfig {
3✔
279
        m.Lock()
3✔
280
        defer m.Unlock()
3✔
281

3✔
282
        return &MissionControlConfig{
3✔
283
                Estimator:               m.estimator,
3✔
284
                MaxMcHistory:            m.store.maxRecords,
3✔
285
                McFlushInterval:         m.store.flushInterval,
3✔
286
                MinFailureRelaxInterval: m.state.minFailureRelaxInterval,
3✔
287
        }
3✔
288
}
3✔
289

290
// SetConfig validates the config provided and updates mission control's config
291
// if it is valid.
292
func (m *MissionControl) SetConfig(cfg *MissionControlConfig) error {
3✔
293
        if cfg == nil {
3✔
294
                return errors.New("nil mission control config")
×
295
        }
×
296

297
        if err := cfg.validate(); err != nil {
3✔
298
                return err
×
299
        }
×
300

301
        m.Lock()
3✔
302
        defer m.Unlock()
3✔
303

3✔
304
        log.Infof("Active mission control cfg: %v, estimator: %v", cfg,
3✔
305
                cfg.Estimator)
3✔
306

3✔
307
        m.store.maxRecords = cfg.MaxMcHistory
3✔
308
        m.state.minFailureRelaxInterval = cfg.MinFailureRelaxInterval
3✔
309
        m.estimator = cfg.Estimator
3✔
310

3✔
311
        return nil
3✔
312
}
313

314
// ResetHistory resets the history of MissionControl returning it to a state as
315
// if no payment attempts have been made.
316
func (m *MissionControl) ResetHistory() error {
3✔
317
        m.Lock()
3✔
318
        defer m.Unlock()
3✔
319

3✔
320
        if err := m.store.clear(); err != nil {
3✔
321
                return err
×
322
        }
×
323

324
        m.state.resetHistory()
3✔
325

3✔
326
        log.Debugf("Mission control history cleared")
3✔
327

3✔
328
        return nil
3✔
329
}
330

331
// GetProbability is expected to return the success probability of a payment
332
// from fromNode along edge.
333
func (m *MissionControl) GetProbability(fromNode, toNode route.Vertex,
334
        amt lnwire.MilliSatoshi, capacity btcutil.Amount) float64 {
3✔
335

3✔
336
        m.Lock()
3✔
337
        defer m.Unlock()
3✔
338

3✔
339
        now := m.now()
3✔
340
        results, _ := m.state.getLastPairResult(fromNode)
3✔
341

3✔
342
        // Use a distinct probability estimation function for local channels.
3✔
343
        if fromNode == m.selfNode {
6✔
344
                return m.estimator.LocalPairProbability(now, results, toNode)
3✔
345
        }
3✔
346

347
        return m.estimator.PairProbability(
3✔
348
                now, results, toNode, amt, capacity,
3✔
349
        )
3✔
350
}
351

352
// GetHistorySnapshot takes a snapshot from the current mission control state
353
// and actual probability estimates.
354
func (m *MissionControl) GetHistorySnapshot() *MissionControlSnapshot {
×
355
        m.Lock()
×
356
        defer m.Unlock()
×
357

×
358
        log.Debugf("Requesting history snapshot from mission control")
×
359

×
360
        return m.state.getSnapshot()
×
361
}
×
362

363
// ImportHistory imports the set of mission control results provided to our
364
// in-memory state. These results are not persisted, so will not survive
365
// restarts.
366
func (m *MissionControl) ImportHistory(history *MissionControlSnapshot,
367
        force bool) error {
3✔
368

3✔
369
        if history == nil {
3✔
370
                return errors.New("cannot import nil history")
×
371
        }
×
372

373
        m.Lock()
3✔
374
        defer m.Unlock()
3✔
375

3✔
376
        log.Infof("Importing history snapshot with %v pairs to mission control",
3✔
377
                len(history.Pairs))
3✔
378

3✔
379
        imported := m.state.importSnapshot(history, force)
3✔
380

3✔
381
        log.Infof("Imported %v results to mission control", imported)
3✔
382

3✔
383
        return nil
3✔
384
}
385

386
// GetPairHistorySnapshot returns the stored history for a given node pair.
387
func (m *MissionControl) GetPairHistorySnapshot(
388
        fromNode, toNode route.Vertex) TimedPairResult {
3✔
389

3✔
390
        m.Lock()
3✔
391
        defer m.Unlock()
3✔
392

3✔
393
        results, ok := m.state.getLastPairResult(fromNode)
3✔
394
        if !ok {
6✔
395
                return TimedPairResult{}
3✔
396
        }
3✔
397

398
        result, ok := results[toNode]
3✔
399
        if !ok {
6✔
400
                return TimedPairResult{}
3✔
401
        }
3✔
402

403
        return result
3✔
404
}
405

406
// ReportPaymentFail reports a failed payment to mission control as input for
407
// future probability estimates. The failureSourceIdx argument indicates the
408
// failure source. If it is nil, the failure source is unknown. This function
409
// returns a reason if this failure is a final failure. In that case no further
410
// payment attempts need to be made.
411
func (m *MissionControl) ReportPaymentFail(paymentID uint64, rt *route.Route,
412
        failureSourceIdx *int, failure lnwire.FailureMessage) (
413
        *channeldb.FailureReason, error) {
3✔
414

3✔
415
        timestamp := m.now()
3✔
416

3✔
417
        result := &paymentResult{
3✔
418
                success:          false,
3✔
419
                timeFwd:          timestamp,
3✔
420
                timeReply:        timestamp,
3✔
421
                id:               paymentID,
3✔
422
                failureSourceIdx: failureSourceIdx,
3✔
423
                failure:          failure,
3✔
424
                route:            rt,
3✔
425
        }
3✔
426

3✔
427
        return m.processPaymentResult(result)
3✔
428
}
3✔
429

430
// ReportPaymentSuccess reports a successful payment to mission control as input
431
// for future probability estimates.
432
func (m *MissionControl) ReportPaymentSuccess(paymentID uint64,
433
        rt *route.Route) error {
3✔
434

3✔
435
        timestamp := m.now()
3✔
436

3✔
437
        result := &paymentResult{
3✔
438
                timeFwd:   timestamp,
3✔
439
                timeReply: timestamp,
3✔
440
                id:        paymentID,
3✔
441
                success:   true,
3✔
442
                route:     rt,
3✔
443
        }
3✔
444

3✔
445
        _, err := m.processPaymentResult(result)
3✔
446
        return err
3✔
447
}
3✔
448

449
// processPaymentResult stores a payment result in the mission control store and
450
// updates mission control's in-memory state.
451
func (m *MissionControl) processPaymentResult(result *paymentResult) (
452
        *channeldb.FailureReason, error) {
3✔
453

3✔
454
        // Store complete result in database.
3✔
455
        m.store.AddResult(result)
3✔
456

3✔
457
        m.Lock()
3✔
458
        defer m.Unlock()
3✔
459

3✔
460
        // Apply result to update mission control state.
3✔
461
        reason := m.applyPaymentResult(result)
3✔
462

3✔
463
        return reason, nil
3✔
464
}
3✔
465

466
// applyPaymentResult applies a payment result as input for future probability
467
// estimates. It returns a bool indicating whether this error is a final error
468
// and no further payment attempts need to be made.
469
func (m *MissionControl) applyPaymentResult(
470
        result *paymentResult) *channeldb.FailureReason {
3✔
471

3✔
472
        // Interpret result.
3✔
473
        i := interpretResult(
3✔
474
                result.route, result.success, result.failureSourceIdx,
3✔
475
                result.failure,
3✔
476
        )
3✔
477

3✔
478
        if i.policyFailure != nil {
6✔
479
                if m.state.requestSecondChance(
3✔
480
                        result.timeReply,
3✔
481
                        i.policyFailure.From, i.policyFailure.To,
3✔
482
                ) {
6✔
483
                        return nil
3✔
484
                }
3✔
485
        }
486

487
        // If there is a node-level failure, record a failure for every tried
488
        // connection of that node. A node-level failure can be considered as a
489
        // failure that would have occurred with any of the node's channels.
490
        //
491
        // Ideally we'd also record the failure for the untried connections of
492
        // the node. Unfortunately this would require access to the graph and
493
        // adding this dependency and db calls does not outweigh the benefits.
494
        //
495
        // Untried connections will fall back to the node probability. After the
496
        // call to setAllPairResult below, the node probability will be equal to
497
        // the probability of the tried channels except that the a priori
498
        // probability is mixed in too. This effect is controlled by the
499
        // aprioriWeight parameter. If that parameter isn't set to an extreme
500
        // and there are a few known connections, there shouldn't be much of a
501
        // difference. The largest difference occurs when aprioriWeight is 1. In
502
        // that case, a node-level failure would not be applied to untried
503
        // channels.
504
        if i.nodeFailure != nil {
3✔
505
                log.Debugf("Reporting node failure to Mission Control: "+
×
506
                        "node=%v", *i.nodeFailure)
×
507

×
508
                m.state.setAllFail(*i.nodeFailure, result.timeReply)
×
509
        }
×
510

511
        for pair, pairResult := range i.pairResults {
6✔
512
                pairResult := pairResult
3✔
513

3✔
514
                if pairResult.success {
6✔
515
                        log.Debugf("Reporting pair success to Mission "+
3✔
516
                                "Control: pair=%v, amt=%v",
3✔
517
                                pair, pairResult.amt)
3✔
518
                } else {
6✔
519
                        log.Debugf("Reporting pair failure to Mission "+
3✔
520
                                "Control: pair=%v, amt=%v",
3✔
521
                                pair, pairResult.amt)
3✔
522
                }
3✔
523

524
                m.state.setLastPairResult(
3✔
525
                        pair.From, pair.To, result.timeReply, &pairResult, false,
3✔
526
                )
3✔
527
        }
528

529
        return i.finalFailureReason
3✔
530
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc